Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
Paddle-Lite
提交
97b54fbe
P
Paddle-Lite
项目概览
PaddlePaddle
/
Paddle-Lite
通知
331
Star
4
Fork
1
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
271
列表
看板
标记
里程碑
合并请求
78
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle-Lite
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
271
Issue
271
列表
看板
标记
里程碑
合并请求
78
合并请求
78
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
97b54fbe
编写于
7月 10, 2020
作者:
Q
Qi Li
提交者:
GitHub
7月 10, 2020
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
[NPU] enhance cache offline model, test=develop (#3805)
* [NPU] enhance cache offline model, test=develop
上级
be7cc8f8
变更
17
隐藏空白更改
内联
并排
Showing
17 changed file
with
793 addition
and
458 deletion
+793
-458
.gitignore
.gitignore
+3
-0
cmake/cross_compiling/android.cmake
cmake/cross_compiling/android.cmake
+5
-1
lite/api/cxx_api_impl.cc
lite/api/cxx_api_impl.cc
+4
-0
lite/backends/npu/device.cc
lite/backends/npu/device.cc
+103
-77
lite/backends/npu/device.h
lite/backends/npu/device.h
+10
-6
lite/core/mir/subgraph/subgraph_detector.cc
lite/core/mir/subgraph/subgraph_detector.cc
+41
-35
lite/kernels/npu/bridges/engine.cc
lite/kernels/npu/bridges/engine.cc
+74
-40
lite/kernels/npu/bridges/engine.h
lite/kernels/npu/bridges/engine.h
+12
-25
lite/kernels/npu/bridges/graph.h
lite/kernels/npu/bridges/graph.h
+1
-1
lite/kernels/npu/bridges/matmul_op.cc
lite/kernels/npu/bridges/matmul_op.cc
+4
-4
lite/kernels/npu/bridges/utility.h
lite/kernels/npu/bridges/utility.h
+16
-15
lite/kernels/npu/subgraph_compute.cc
lite/kernels/npu/subgraph_compute.cc
+297
-211
lite/kernels/npu/subgraph_compute.h
lite/kernels/npu/subgraph_compute.h
+51
-38
lite/utils/env.h
lite/utils/env.h
+2
-0
lite/utils/io.h
lite/utils/io.h
+35
-0
lite/utils/md5.h
lite/utils/md5.h
+104
-0
lite/utils/string.h
lite/utils/string.h
+31
-5
未找到文件。
.gitignore
浏览文件 @
97b54fbe
...
@@ -120,3 +120,6 @@ metal/MobileNetDemo/MobileNetDemo/Resources
...
@@ -120,3 +120,6 @@ metal/MobileNetDemo/MobileNetDemo/Resources
lite/model_parser/flatbuffers/framework_generated.h
lite/model_parser/flatbuffers/framework_generated.h
build*
build*
# hiai libs
ai_ddk_lib*
cmake/cross_compiling/android.cmake
浏览文件 @
97b54fbe
...
@@ -35,7 +35,11 @@ endif()
...
@@ -35,7 +35,11 @@ endif()
if
(
NOT DEFINED ANDROID_API_LEVEL
)
if
(
NOT DEFINED ANDROID_API_LEVEL
)
set
(
ANDROID_API_LEVEL
"23"
)
set
(
ANDROID_API_LEVEL
"23"
)
if
(
ARM_TARGET_ARCH_ABI STREQUAL
"armv7"
)
if
(
ARM_TARGET_ARCH_ABI STREQUAL
"armv7"
)
set
(
ANDROID_API_LEVEL
"22"
)
if
(
LITE_WITH_NPU AND NOT LITE_ON_TINY_PUBLISH
)
set
(
ANDROID_API_LEVEL
"24"
)
# HIAI DDK depends on android-24
else
()
set
(
ANDROID_API_LEVEL
"22"
)
endif
()
endif
()
endif
()
endif
()
endif
()
...
...
lite/api/cxx_api_impl.cc
浏览文件 @
97b54fbe
...
@@ -73,6 +73,10 @@ void CxxPaddleApiImpl::Init(const lite_api::CxxConfig &config) {
...
@@ -73,6 +73,10 @@ void CxxPaddleApiImpl::Init(const lite_api::CxxConfig &config) {
}
}
mode_
=
config
.
power_mode
();
mode_
=
config
.
power_mode
();
threads_
=
config
.
threads
();
threads_
=
config
.
threads
();
#ifdef LITE_WITH_NPU
Context
<
TargetType
::
kNPU
>::
SetSubgraphModelCacheDir
(
config
.
subgraph_model_cache_dir
());
#endif
#if (defined LITE_WITH_X86) && (defined PADDLE_WITH_MKLML) && \
#if (defined LITE_WITH_X86) && (defined PADDLE_WITH_MKLML) && \
!(defined LITE_ON_MODEL_OPTIMIZE_TOOL)
!(defined LITE_ON_MODEL_OPTIMIZE_TOOL)
int
num_threads
=
config
.
x86_math_library_num_threads
();
int
num_threads
=
config
.
x86_math_library_num_threads
();
...
...
lite/backends/npu/device.cc
浏览文件 @
97b54fbe
...
@@ -20,96 +20,122 @@ namespace paddle {
...
@@ -20,96 +20,122 @@ namespace paddle {
namespace
lite
{
namespace
lite
{
namespace
npu
{
namespace
npu
{
bool
WriteToOMFile
(
const
domi
::
ModelBufferData
&
om_model_buff
,
std
::
shared_ptr
<
hiai
::
AiModelMngerClient
>
Device
::
Load
(
std
::
string
om_file_path
)
{
const
std
::
string
&
model_name
,
FILE
*
fp
;
std
::
vector
<
char
>*
model_buffer
,
fp
=
fopen
(
om_file_path
.
c_str
(),
"wb"
);
bool
*
model_comp
)
{
CHECK
(
fp
!=
nullptr
)
<<
om_file_path
<<
" open failed!"
;
uint32_t
write_size
=
(
uint32_t
)
fwrite
(
om_model_buff
.
data
,
1
,
om_model_buff
.
length
,
fp
);
CHECK_EQ
(
write_size
,
om_model_buff
.
length
)
<<
"write om file failed !"
;
fclose
(
fp
);
return
true
;
}
bool
ReadFromOMFile
(
domi
::
ModelBufferData
*
om_model_buff
,
std
::
string
om_file_path
)
{
FILE
*
fp
;
fp
=
fopen
(
om_file_path
.
c_str
(),
"rb"
);
CHECK
(
fp
!=
nullptr
)
<<
om_file_path
<<
" open failed!"
;
fseek
(
fp
,
0
,
SEEK_END
);
uint32_t
model_length
=
(
uint32_t
)
ftell
(
fp
);
fseek
(
fp
,
0
,
SEEK_SET
);
om_model_buff
->
data
=
malloc
(
model_length
);
om_model_buff
->
length
=
model_length
;
uint32_t
read_size
=
(
uint32_t
)
fread
(
om_model_buff
->
data
,
1
,
model_length
,
fp
);
CHECK_EQ
(
read_size
,
model_length
)
<<
"read om file failed !"
;
fclose
(
fp
);
return
true
;
}
std
::
shared_ptr
<
hiai
::
AiModelMngerClient
>
Device
::
Build
(
const
std
::
string
model_name
,
// NOLINT
std
::
vector
<
ge
::
Operator
>&
input_nodes
,
// NOLINT
std
::
vector
<
ge
::
Operator
>&
output_nodes
,
// NOLINT
const
std
::
string
model_cache_full_dir
=
""
// NOLINT
)
{
VLOG
(
3
)
<<
"[NPU] Build model"
;
// Build the HiAI IR graph to the HiAI om model
ge
::
Graph
ir_graph
(
"graph"
);
ir_graph
.
SetInputs
(
input_nodes
).
SetOutputs
(
output_nodes
);
ge
::
Model
om_model
(
"model"
,
"model"
);
om_model
.
SetGraph
(
ir_graph
);
domi
::
HiaiIrBuild
ir_build
;
domi
::
ModelBufferData
om_model_buf
;
if
(
!
model_cache_full_dir
.
empty
()
&&
IsFileExists
(
model_cache_full_dir
))
{
VLOG
(
3
)
<<
"Will read om model from "
<<
model_cache_full_dir
;
ReadFromOMFile
(
&
om_model_buf
,
model_cache_full_dir
);
}
else
{
if
(
!
ir_build
.
CreateModelBuff
(
om_model
,
om_model_buf
))
{
LOG
(
WARNING
)
<<
"[NPU] CreateModelBuff failed!"
;
return
nullptr
;
}
if
(
!
ir_build
.
BuildIRModel
(
om_model
,
om_model_buf
))
{
LOG
(
WARNING
)
<<
"[NPU] BuildIRModel failed!"
;
ir_build
.
ReleaseModelBuff
(
om_model_buf
);
return
nullptr
;
}
if
(
!
model_cache_full_dir
.
empty
())
{
VLOG
(
3
)
<<
"Will write om model to "
<<
model_cache_full_dir
;
WriteToOMFile
(
om_model_buf
,
model_cache_full_dir
);
}
}
// Create a HiAI model manager client to load the HiAI om model
// Create a HiAI model manager client to load the HiAI om model
std
::
shared_ptr
<
hiai
::
AiModelMngerClient
>
model_client
(
auto
model_client
=
std
::
make_shared
<
hiai
::
AiModelMngerClient
>
();
new
hiai
::
AiModelMngerClient
());
if
(
model_client
->
Init
(
nullptr
)
!=
hiai
::
AI_SUCCESS
)
{
if
(
model_client
->
Init
(
nullptr
)
!=
hiai
::
AI_SUCCESS
)
{
LOG
(
WARNING
)
<<
"[NPU] AiModelMngerClient init failed)!"
;
LOG
(
WARNING
)
<<
"[NPU] Init hiai model client failed!"
;
ir_build
.
ReleaseModelBuff
(
om_model_buf
);
return
nullptr
;
return
nullptr
;
}
}
// Check HiAI DDK version
const
char
*
ddk_version
=
model_client
->
GetVersion
();
if
(
ddk_version
)
{
LOG
(
INFO
)
<<
"[NPU] HiAI DDK version: "
<<
ddk_version
;
}
else
{
LOG
(
WARNING
)
<<
"[NPU] Unable to get HiAI DDK version!"
;
}
// Check model compatibility
auto
model_desc
=
std
::
make_shared
<
hiai
::
AiModelDescription
>
(
auto
model_desc
=
std
::
make_shared
<
hiai
::
AiModelDescription
>
(
model_name
,
freq_level
(),
framework_type
(),
model_type
(),
device_type
());
model_name
,
freq_level
(),
framework_type
(),
model_type
(),
device_type
());
model_desc
->
SetModelBuffer
(
om_model_buf
.
data
,
om_model_buf
.
length
);
model_desc
->
SetModelBuffer
(
std
::
vector
<
std
::
shared_ptr
<
hiai
::
AiModelDescription
>>
model_descs
;
reinterpret_cast
<
const
void
*>
(
model_buffer
->
data
()),
model_descs
.
push_back
(
model_desc
);
model_buffer
->
size
());
if
(
!*
model_comp
&&
model_client
->
CheckModelCompatibility
(
*
model_desc
,
*
model_comp
)
!=
hiai
::
AI_SUCCESS
)
{
*
model_comp
=
false
;
VLOG
(
3
)
<<
"[NPU] model is NOT compatiblitiable, setting model_comp to "
<<
*
model_comp
;
}
else
{
*
model_comp
=
true
;
VLOG
(
3
)
<<
"[NPU] model is compatiblitiable, setting model_comp to "
<<
*
model_comp
;
}
// Rebuild and write the data of the compatible model to the model buffer
if
(
!*
model_comp
)
{
std
::
shared_ptr
<
hiai
::
AiModelBuilder
>
model_builder
=
std
::
make_shared
<
hiai
::
AiModelBuilder
>
(
model_client
);
hiai
::
MemBuffer
*
org_model_buffer
=
model_builder
->
InputMemBufferCreate
(
reinterpret_cast
<
void
*>
(
model_buffer
->
data
()),
model_buffer
->
size
());
if
(
org_model_buffer
)
{
std
::
vector
<
hiai
::
MemBuffer
*>
org_model_buffers
;
org_model_buffers
.
push_back
(
org_model_buffer
);
hiai
::
MemBuffer
*
new_model_buffer
=
model_builder
->
OutputMemBufferCreate
(
framework_type
(),
org_model_buffers
);
// VLOG(3) << "[NPU] new model buffer memeory size is " <<
// new_model_buffer->GetMemBufferSize();
if
(
new_model_buffer
)
{
uint32_t
new_model_size
=
0
;
if
(
model_builder
->
BuildModel
(
org_model_buffers
,
new_model_buffer
,
new_model_size
)
==
hiai
::
AI_SUCCESS
)
{
// need to change to new_model_size as GetMemBufferSize is not
// correct.
model_buffer
->
resize
(
new_model_size
);
memcpy
(
reinterpret_cast
<
void
*>
(
model_buffer
->
data
()),
new_model_buffer
->
GetMemBufferData
(),
new_model_size
);
// Reset the model buffer
model_desc
->
SetModelBuffer
(
reinterpret_cast
<
const
void
*>
(
model_buffer
->
data
()),
model_buffer
->
size
());
VLOG
(
3
)
<<
"[NPU] Rebuild the compatible model done."
;
}
else
{
LOG
(
WARNING
)
<<
"[NPU] Rebuild the compatible model failed!"
;
}
model_builder
->
MemBufferDestroy
(
new_model_buffer
);
}
else
{
LOG
(
WARNING
)
<<
"[NPU] OutputMemBufferCreate failed!"
;
}
model_builder
->
MemBufferDestroy
(
org_model_buffer
);
}
else
{
LOG
(
WARNING
)
<<
"[NPU] InputMemBufferCreate failed!"
;
}
}
// Load the compatible model
std
::
vector
<
std
::
shared_ptr
<
hiai
::
AiModelDescription
>>
model_descs
{
model_desc
};
if
(
model_client
->
Load
(
model_descs
)
!=
hiai
::
AI_SUCCESS
)
{
if
(
model_client
->
Load
(
model_descs
)
!=
hiai
::
AI_SUCCESS
)
{
LOG
(
WARNING
)
<<
"[NPU] AiModelMngerClient load model failed!"
;
LOG
(
WARNING
)
<<
"[NPU] AiModelMngerClient load model failed!"
;
ir_build
.
ReleaseModelBuff
(
om_model_buf
);
return
nullptr
;
return
nullptr
;
}
}
ir_build
.
ReleaseModelBuff
(
om_model_buf
);
VLOG
(
3
)
<<
"[NPU] Load model done."
;
VLOG
(
3
)
<<
"[NPU] Build done"
;
return
model_client
;
return
model_client
;
}
}
bool
Device
::
Build
(
std
::
vector
<
ge
::
Operator
>&
input_nodes
,
// NOLINT
std
::
vector
<
ge
::
Operator
>&
output_nodes
,
// NOLINT
std
::
vector
<
char
>*
model_buffer
)
{
// Convert the HiAI IR graph to the HiAI om model
ge
::
Graph
ir_graph
(
"graph"
);
ir_graph
.
SetInputs
(
input_nodes
).
SetOutputs
(
output_nodes
);
ge
::
Model
om_model
(
"model"
,
"model"
);
om_model
.
SetGraph
(
ir_graph
);
// Build the HiAI om model, serialize and output it to the om buffer
domi
::
HiaiIrBuild
ir_build
;
domi
::
ModelBufferData
om_buffer
;
if
(
!
ir_build
.
CreateModelBuff
(
om_model
,
om_buffer
))
{
LOG
(
WARNING
)
<<
"[NPU] CreateModelBuff failed!"
;
return
false
;
}
if
(
!
ir_build
.
BuildIRModel
(
om_model
,
om_buffer
))
{
LOG
(
WARNING
)
<<
"[NPU] BuildIRModel failed!"
;
ir_build
.
ReleaseModelBuff
(
om_buffer
);
return
false
;
}
model_buffer
->
resize
(
om_buffer
.
length
);
memcpy
(
reinterpret_cast
<
void
*>
(
model_buffer
->
data
()),
reinterpret_cast
<
void
*>
(
om_buffer
.
data
),
om_buffer
.
length
);
ir_build
.
ReleaseModelBuff
(
om_buffer
);
VLOG
(
3
)
<<
"[NPU] Build model done."
;
return
true
;
}
}
// namespace npu
}
// namespace npu
}
// namespace lite
}
// namespace lite
}
// namespace paddle
}
// namespace paddle
lite/backends/npu/device.h
浏览文件 @
97b54fbe
...
@@ -38,14 +38,18 @@ class Device {
...
@@ -38,14 +38,18 @@ class Device {
int
model_type
()
{
return
model_type_
;
}
int
model_type
()
{
return
model_type_
;
}
int
device_type
()
{
return
device_type_
;
}
int
device_type
()
{
return
device_type_
;
}
// Load the HiAI om model from buffer, rebuild the model if it's incompatible
// with the current device, then create a HiAI model manager client(from HiAI
// Server) to run inference
std
::
shared_ptr
<
hiai
::
AiModelMngerClient
>
Load
(
const
std
::
string
&
model_name
,
std
::
vector
<
char
>*
model_buffer
,
bool
*
model_comp
);
// Build the HiAI IR graph to om model, return HiAI model manager client to
// Build the HiAI IR graph to om model, return HiAI model manager client to
// load om model and run inference.
// load om model and run inference.
std
::
shared_ptr
<
hiai
::
AiModelMngerClient
>
Build
(
bool
Build
(
std
::
vector
<
ge
::
Operator
>&
input_nodes
,
// NOLINT
const
std
::
string
model_name
,
// NOLINT
std
::
vector
<
ge
::
Operator
>&
output_nodes
,
// NOLINT
std
::
vector
<
ge
::
Operator
>&
input_nodes
,
// NOLINT
std
::
vector
<
char
>*
model_buffer
);
std
::
vector
<
ge
::
Operator
>&
output_nodes
,
// NOLINT
const
std
::
string
model_cache_name
// NOLINT
);
// NOLINT
private:
private:
int
freq_level_
{
3
};
int
freq_level_
{
3
};
...
...
lite/core/mir/subgraph/subgraph_detector.cc
浏览文件 @
97b54fbe
...
@@ -425,42 +425,51 @@ void SubgraphFuser::InsertNewNode(SSAGraph *graph,
...
@@ -425,42 +425,51 @@ void SubgraphFuser::InsertNewNode(SSAGraph *graph,
subgraph_op_desc
.
SetAttr
<
int32_t
>
(
"sub_block"
,
sub_block_idx
);
subgraph_op_desc
.
SetAttr
<
int32_t
>
(
"sub_block"
,
sub_block_idx
);
// Extract input and output nodes from the target subgraph
// Extract input and output nodes from the target subgraph
std
::
set
<
Node
*>
i
nput
_var_nodes
;
std
::
set
<
Node
*>
i
data
_var_nodes
;
std
::
set
<
Node
*>
weight_var_nodes
;
std
::
set
<
Node
*>
weight_var_nodes
;
std
::
set
<
Node
*>
o
utput
_var_nodes
;
std
::
set
<
Node
*>
o
data
_var_nodes
;
std
::
set
<
Node
*>
local_var_nodes
;
std
::
set
<
Node
*>
local_var_nodes
;
std
::
set
<
Node
*>
unused_var_nodes
;
std
::
set
<
Node
*>
unused_var_nodes
;
ExtractInputsOutputs
(
subgraph_nodes
,
ExtractInputsOutputs
(
subgraph_nodes
,
&
i
nput
_var_nodes
,
&
i
data
_var_nodes
,
&
weight_var_nodes
,
&
weight_var_nodes
,
&
o
utput
_var_nodes
,
&
o
data
_var_nodes
,
&
local_var_nodes
,
&
local_var_nodes
,
&
unused_var_nodes
);
&
unused_var_nodes
);
// A simplified model without the original weight/local/unused nodes on the
// subgraph ops will be saved only if 'SUBGRAPH_DISABLE_ONLINE_MODE' is set to
// true and Predictor->Run(...), Predictor->Save(...) is called.
std
::
set
<
Node
*>
input_var_nodes
(
idata_var_nodes
.
begin
(),
idata_var_nodes
.
end
());
std
::
set
<
Node
*>
output_var_nodes
(
odata_var_nodes
.
begin
(),
odata_var_nodes
.
end
());
if
(
!
GetBoolFromEnv
(
SUBGRAPH_DISABLE_ONLINE_MODE
))
{
input_var_nodes
.
insert
(
weight_var_nodes
.
begin
(),
weight_var_nodes
.
end
());
output_var_nodes
.
insert
(
local_var_nodes
.
begin
(),
local_var_nodes
.
end
());
output_var_nodes
.
insert
(
unused_var_nodes
.
begin
(),
unused_var_nodes
.
end
());
}
// Set input and output name mapping which stores the real inputs and
// Set input and output name mapping which stores the real inputs and
// outputs
// outputs
std
::
vector
<
std
::
string
>
i
nput
_var_names
;
std
::
vector
<
std
::
string
>
i
data
_var_names
;
std
::
vector
<
std
::
string
>
o
utput
_var_names
;
std
::
vector
<
std
::
string
>
o
data
_var_names
;
for
(
auto
&
var_node
:
i
nput
_var_nodes
)
{
for
(
auto
&
var_node
:
i
data
_var_nodes
)
{
i
nput
_var_names
.
push_back
(
var_node
->
AsArg
().
name
);
i
data
_var_names
.
push_back
(
var_node
->
AsArg
().
name
);
}
}
for
(
auto
&
var_node
:
o
utput
_var_nodes
)
{
for
(
auto
&
var_node
:
o
data
_var_nodes
)
{
o
utput
_var_names
.
push_back
(
var_node
->
AsArg
().
name
);
o
data
_var_names
.
push_back
(
var_node
->
AsArg
().
name
);
}
}
subgraph_op_desc
.
SetAttr
<
std
::
vector
<
std
::
string
>>
(
"input_data_names"
,
subgraph_op_desc
.
SetAttr
<
std
::
vector
<
std
::
string
>>
(
"input_data_names"
,
i
nput
_var_names
);
i
data
_var_names
);
subgraph_op_desc
.
SetAttr
<
std
::
vector
<
std
::
string
>>
(
"output_data_names"
,
subgraph_op_desc
.
SetAttr
<
std
::
vector
<
std
::
string
>>
(
"output_data_names"
,
output_var_names
);
odata_var_names
);
// Set all of the inputs and outputs to the target subgraph op
// Set all of the inputs and outputs to the target subgraph op
// To prevent vars are removed in RuntimeProgram::UpdateVarsOfProgram()
// To prevent vars are removed in RuntimeProgram::UpdateVarsOfProgram()
for
(
auto
&
var_node
:
weight_var_nodes
)
{
std
::
vector
<
std
::
string
>
input_var_names
;
std
::
vector
<
std
::
string
>
output_var_names
;
for
(
auto
&
var_node
:
input_var_nodes
)
{
input_var_names
.
push_back
(
var_node
->
AsArg
().
name
);
input_var_names
.
push_back
(
var_node
->
AsArg
().
name
);
}
}
for
(
auto
&
var_node
:
local_var_nodes
)
{
for
(
auto
&
var_node
:
output_var_nodes
)
{
output_var_names
.
push_back
(
var_node
->
AsArg
().
name
);
}
for
(
auto
&
var_node
:
unused_var_nodes
)
{
output_var_names
.
push_back
(
var_node
->
AsArg
().
name
);
output_var_names
.
push_back
(
var_node
->
AsArg
().
name
);
}
}
subgraph_op_desc
.
SetInput
(
"Inputs"
,
input_var_names
);
subgraph_op_desc
.
SetInput
(
"Inputs"
,
input_var_names
);
...
@@ -500,26 +509,13 @@ void SubgraphFuser::InsertNewNode(SSAGraph *graph,
...
@@ -500,26 +509,13 @@ void SubgraphFuser::InsertNewNode(SSAGraph *graph,
for
(
auto
&
var_node
:
input_var_nodes
)
{
for
(
auto
&
var_node
:
input_var_nodes
)
{
IR_NODE_LINK_TO
(
var_node
,
subgraph_op_node
);
IR_NODE_LINK_TO
(
var_node
,
subgraph_op_node
);
}
}
for
(
auto
&
var_node
:
weight_var_nodes
)
{
IR_NODE_LINK_TO
(
var_node
,
subgraph_op_node
);
}
for
(
auto
&
var_node
:
output_var_nodes
)
{
for
(
auto
&
var_node
:
output_var_nodes
)
{
IR_OP_VAR_LINK
(
subgraph_op_node
,
var_node
);
IR_OP_VAR_LINK
(
subgraph_op_node
,
var_node
);
}
}
for
(
auto
&
var_node
:
local_var_nodes
)
{
IR_OP_VAR_LINK
(
subgraph_op_node
,
var_node
);
}
for
(
auto
&
var_node
:
unused_var_nodes
)
{
IR_OP_VAR_LINK
(
subgraph_op_node
,
var_node
);
}
// Remove subgraph nodes and unused var nodes
// Remove subgraph nodes and unused var nodes
auto
nodes2rm
=
GetNodes2RM
(
subgraph_nodes
,
auto
nodes2rm
=
{
input_var_nodes
,
GetNodes2RM
(
subgraph_nodes
,
{
input_var_nodes
,
output_var_nodes
});
weight_var_nodes
,
output_var_nodes
,
local_var_nodes
,
unused_var_nodes
});
GraphSafeRemoveNodes
(
graph
,
nodes2rm
);
GraphSafeRemoveNodes
(
graph
,
nodes2rm
);
}
}
...
@@ -594,7 +590,17 @@ std::set<const Node *> GetNodes2RM(
...
@@ -594,7 +590,17 @@ std::set<const Node *> GetNodes2RM(
std
::
set
<
const
Node
*>
nodes2rm
(
op_nodes
.
begin
(),
op_nodes
.
end
());
std
::
set
<
const
Node
*>
nodes2rm
(
op_nodes
.
begin
(),
op_nodes
.
end
());
for
(
auto
&
op_node
:
op_nodes
)
{
for
(
auto
&
op_node
:
op_nodes
)
{
for
(
auto
&
var_node
:
op_node
->
inlinks
)
{
for
(
auto
&
var_node
:
op_node
->
inlinks
)
{
if
(
!
nodes2rm
.
count
(
var_node
))
{
bool
skip
=
false
;
// skip the var node which is used by any other ops that doesn't belong to
// the subgraph ops.
for
(
auto
&
out_op_node
:
var_node
->
outlinks
)
{
if
(
std
::
find
(
op_nodes
.
begin
(),
op_nodes
.
end
(),
out_op_node
)
!=
op_nodes
.
end
())
{
skip
=
true
;
break
;
}
}
if
(
!
skip
&&
!
nodes2rm
.
count
(
var_node
))
{
nodes2rm
.
insert
(
var_node
);
nodes2rm
.
insert
(
var_node
);
}
}
}
}
...
...
lite/kernels/npu/bridges/engine.cc
浏览文件 @
97b54fbe
...
@@ -15,6 +15,7 @@
...
@@ -15,6 +15,7 @@
#include "lite/kernels/npu/bridges/engine.h"
#include "lite/kernels/npu/bridges/engine.h"
#include <sys/time.h>
#include <sys/time.h>
#include <time.h>
#include <time.h>
#include <algorithm>
#include <utility>
#include <utility>
#include "lite/kernels/npu/bridges/registry.h"
#include "lite/kernels/npu/bridges/registry.h"
...
@@ -22,11 +23,50 @@ namespace paddle {
...
@@ -22,11 +23,50 @@ namespace paddle {
namespace
lite
{
namespace
lite
{
namespace
subgraph
{
namespace
subgraph
{
int
Engine
::
BuildDeviceProgram
()
{
return
FAILED
;
}
Engine
::
Engine
(
KernelContext
*
ctx
,
int
block_idx
,
cpp
::
BlockDesc
*
block_desc
,
const
std
::
vector
<
std
::
string
>
&
input_names
,
const
std
::
vector
<
std
::
string
>
&
output_names
,
lite
::
Scope
*
scope
)
:
ctx_
(
ctx
),
block_idx_
(
block_idx
),
block_desc_
(
block_desc
),
scope_
(
scope
)
{
input_names_
=
input_names
;
output_names_
=
output_names
;
// Sort the name of input and output tensors, it's convenient for us to get
// the info of input and output tensors in the same order from the device
// program, because the result of subgraph division may be different but right
// at each call of the subgraph pass.
std
::
stable_sort
(
input_names_
.
begin
(),
input_names_
.
end
());
std
::
stable_sort
(
output_names_
.
begin
(),
output_names_
.
end
());
}
int
Engine
::
LaunchDeviceProgram
()
{
return
0
;
}
bool
Engine
::
Run
()
{
if
(
is_first_epoch_
)
{
PrepareWorkspaceForDeviceProgram
();
is_first_epoch_
=
false
;
}
if
(
InputShapeChanged
())
{
BuildDeviceProgram
();
}
return
LaunchDeviceProgram
();
}
int
Engine
::
BuildOriginProgram
()
{
bool
Engine
::
PrepareWorkspaceForOriginProgram
()
{
origin_idims_
.
resize
(
input_names_
.
size
());
origin_itensors_
.
resize
(
input_names_
.
size
());
for
(
int
i
=
0
;
i
<
input_names_
.
size
();
i
++
)
{
origin_itensors_
[
i
]
=
scope_
->
FindMutableTensor
(
input_names_
[
i
]);
CHECK
(
origin_itensors_
[
i
]);
}
origin_otensors_
.
resize
(
output_names_
.
size
());
for
(
int
i
=
0
;
i
<
output_names_
.
size
();
i
++
)
{
origin_otensors_
[
i
]
=
scope_
->
FindMutableTensor
(
output_names_
[
i
]);
CHECK
(
origin_otensors_
[
i
]);
}
return
true
;
}
bool
Engine
::
BuildOriginProgram
()
{
// TODO(hong19860320) The block_desc need to be divided into subgraphs during
// TODO(hong19860320) The block_desc need to be divided into subgraphs during
// the exection time. But only see them as a subgraph now.
// the exection time. But only see them as a subgraph now.
origin_program_
.
clear
();
origin_program_
.
clear
();
...
@@ -34,11 +74,14 @@ int Engine::BuildOriginProgram() {
...
@@ -34,11 +74,14 @@ int Engine::BuildOriginProgram() {
auto
op_desc
=
block_desc_
->
GetOp
<
cpp
::
OpDesc
>
(
op_idx
);
auto
op_desc
=
block_desc_
->
GetOp
<
cpp
::
OpDesc
>
(
op_idx
);
CHECK
(
op_desc
);
CHECK
(
op_desc
);
std
::
string
op_type
=
op_desc
->
Type
();
std
::
string
op_type
=
op_desc
->
Type
();
// Create op and pick up the best kernel
auto
op
=
LiteOpRegistry
::
Global
().
Create
(
op_desc
->
Type
());
auto
op
=
LiteOpRegistry
::
Global
().
Create
(
op_desc
->
Type
());
CHECK
(
op
)
<<
"no Op found for "
<<
op_type
;
op
->
Attach
(
*
op_desc
,
scope_
);
op
->
Attach
(
*
op_desc
,
scope_
);
std
::
unique_ptr
<
KernelBase
>
picked_kernel
;
std
::
unique_ptr
<
KernelBase
>
picked_kernel
;
if
(
op_desc
->
HasAttr
(
kKernelTypeAttr
))
{
if
(
op_desc
->
HasAttr
(
kKernelTypeAttr
))
{
// Create op and pick up kernel according to the kKernelTypeAttr attribute
// Create op and pick up the best kernel according to the
// kKernelTypeAttr attribute
auto
kernel_type
=
op_desc
->
GetAttr
<
std
::
string
>
(
kKernelTypeAttr
);
auto
kernel_type
=
op_desc
->
GetAttr
<
std
::
string
>
(
kKernelTypeAttr
);
std
::
string
alias
;
std
::
string
alias
;
Place
place
;
Place
place
;
...
@@ -48,12 +91,14 @@ int Engine::BuildOriginProgram() {
...
@@ -48,12 +91,14 @@ int Engine::BuildOriginProgram() {
auto
kernels
=
op
->
CreateKernels
({
place
});
auto
kernels
=
op
->
CreateKernels
({
place
});
CHECK_GT
(
kernels
.
size
(),
0u
)
<<
"No kernels found for "
<<
op_type
;
CHECK_GT
(
kernels
.
size
(),
0u
)
<<
"No kernels found for "
<<
op_type
;
auto
it
=
std
::
find_if
(
auto
it
=
std
::
find_if
(
kernels
.
begin
(),
kernels
.
end
(),
[
&
](
std
::
unique_ptr
<
KernelBase
>
&
it
)
{
kernels
.
begin
(),
kernels
.
end
(),
[
&
](
std
::
unique_ptr
<
KernelBase
>
&
it
)
{
return
it
->
alias
()
==
alias
;
return
it
->
alias
()
==
alias
;
});
});
CHECK
(
it
!=
kernels
.
end
());
CHECK
(
it
!=
kernels
.
end
());
picked_kernel
=
std
::
move
(
*
it
);
picked_kernel
=
std
::
move
(
*
it
);
}
else
{
}
else
{
// TODO(hong19860320) add kernel picking according to the type of input
// and output tensors
VLOG
(
3
)
<<
"The attr '"
<<
kKernelTypeAttr
VLOG
(
3
)
<<
"The attr '"
<<
kKernelTypeAttr
<<
"' not found, pick the first kernel for "
<<
op_type
;
<<
"' not found, pick the first kernel for "
<<
op_type
;
std
::
vector
<
std
::
unique_ptr
<
KernelBase
>>
kernels
;
std
::
vector
<
std
::
unique_ptr
<
KernelBase
>>
kernels
;
...
@@ -74,52 +119,41 @@ int Engine::BuildOriginProgram() {
...
@@ -74,52 +119,41 @@ int Engine::BuildOriginProgram() {
}
}
origin_program_
.
emplace_back
(
std
::
move
(
op
),
std
::
move
(
picked_kernel
));
origin_program_
.
emplace_back
(
std
::
move
(
op
),
std
::
move
(
picked_kernel
));
}
}
return
0
;
CHECK
(
!
origin_program_
.
empty
())
<<
"no instructions"
;
return
true
;
}
}
int
Engine
::
LaunchOriginProgram
()
{
bool
Engine
::
LaunchOriginProgram
()
{
for
(
auto
&
inst
:
origin_program_
)
{
if
(
origin_program_
.
empty
())
{
auto
op_type
=
inst
.
op
()
->
op_info
()
->
Type
();
BuildOriginProgram
();
if
(
op_type
==
"feed"
||
op_type
==
"fetch"
)
continue
;
}
inst
.
Run
();
if
(
!
origin_program_
.
empty
())
{
for
(
auto
&
inst
:
origin_program_
)
{
auto
op_type
=
inst
.
op
()
->
op_info
()
->
Type
();
if
(
op_type
==
"feed"
||
op_type
==
"fetch"
)
continue
;
inst
.
Run
();
}
return
true
;
}
}
return
0
;
return
false
;
}
}
int
Engine
::
Build
()
{
bool
Engine
::
PrepareWorkspaceForDeviceProgram
()
{
// In order to attach all of the ops of the block desc, we need to build the
return
PrepareWorkspaceForOriginProgram
();
// original program firstly.
BuildOriginProgram
();
// Run InferShape() of all of ops, and convert Paddle ops to NPU/XPU IR graph
build_device_program_status_
=
BuildDeviceProgram
();
return
build_device_program_status_
;
}
}
void
Engine
::
InitDeviceTensor
()
{
return
;
}
bool
Engine
::
BuildDeviceProgram
()
{
return
BuildOriginProgram
();
}
bool
Engine
::
LaunchDeviceProgram
()
{
return
LaunchOriginProgram
();
}
bool
Engine
::
InputShapeChanged
()
{
bool
Engine
::
InputShapeChanged
()
{
bool
changed
=
false
;
for
(
size_t
i
=
0
;
i
<
origin_itensors_
.
size
();
i
++
)
{
for
(
size_t
i
=
0
;
i
<
origin_itensors_
.
size
();
i
++
)
{
if
(
origin_itensors_
[
i
]
->
dims
()
!=
origin_idims_
[
i
])
{
auto
origin_idim
=
origin_itensors_
[
i
]
->
dims
().
Vectorize
();
return
true
;
changed
|=
origin_idim
!=
origin_idims_
[
i
];
}
origin_idims_
[
i
]
=
origin_idim
;
}
return
false
;
}
int
Engine
::
Launch
()
{
// Rebuild device program when the shapes of input tensors have been changed.
if
(
CHECK_SUCCESS
(
build_device_program_status_
)
&&
CHECK_REBUILD_WHEN_SHAPE_CHANGED
(
build_device_program_status_
)
&&
InputShapeChanged
())
{
Build
();
InitDeviceTensor
();
}
if
(
CHECK_FAILED
(
build_device_program_status_
))
{
LaunchOriginProgram
();
}
else
{
LaunchDeviceProgram
();
}
}
return
0
;
return
changed
;
}
}
}
// namespace subgraph
}
// namespace subgraph
...
...
lite/kernels/npu/bridges/engine.h
浏览文件 @
97b54fbe
...
@@ -33,49 +33,36 @@ class Engine {
...
@@ -33,49 +33,36 @@ class Engine {
cpp
::
BlockDesc
*
block_desc
,
cpp
::
BlockDesc
*
block_desc
,
const
std
::
vector
<
std
::
string
>
&
input_names
,
const
std
::
vector
<
std
::
string
>
&
input_names
,
const
std
::
vector
<
std
::
string
>
&
output_names
,
const
std
::
vector
<
std
::
string
>
&
output_names
,
lite
::
Scope
*
scope
,
lite
::
Scope
*
scope
);
std
::
string
model_cache_dir
=
""
)
:
ctx_
(
ctx
),
block_idx_
(
block_idx
),
block_desc_
(
block_desc
),
input_names_
(
input_names
),
output_names_
(
output_names
),
scope_
(
scope
),
model_cache_dir_
(
model_cache_dir
)
{}
virtual
~
Engine
()
=
default
;
virtual
~
Engine
()
=
default
;
virtual
int
Build
();
virtual
bool
Run
();
virtual
int
Launch
();
private:
private:
Engine
(
const
Engine
&
)
=
delete
;
Engine
(
const
Engine
&
)
=
delete
;
protected:
protected:
virtual
int
BuildDeviceProgram
();
virtual
bool
PrepareWorkspaceForOriginProgram
();
virtual
int
LaunchDeviceProgram
();
virtual
bool
BuildOriginProgram
();
virtual
bool
LaunchOriginProgram
();
virtual
int
BuildOriginProgram
();
virtual
bool
PrepareWorkspaceForDeviceProgram
();
virtual
int
LaunchOriginProgram
();
virtual
bool
BuildDeviceProgram
();
virtual
bool
LaunchDeviceProgram
();
virtual
void
InitDeviceTensor
();
virtual
bool
InputShapeChanged
();
virtual
bool
InputShapeChanged
();
KernelContext
*
ctx_
{
nullptr
};
KernelContext
*
ctx_
{
nullptr
};
int
block_idx_
;
int
block_idx_
{
-
1
}
;
cpp
::
BlockDesc
*
block_desc_
;
cpp
::
BlockDesc
*
block_desc_
{
nullptr
}
;
std
::
vector
<
std
::
string
>
input_names_
;
std
::
vector
<
std
::
string
>
input_names_
;
std
::
vector
<
std
::
string
>
output_names_
;
std
::
vector
<
std
::
string
>
output_names_
;
Scope
*
scope_
{
nullptr
};
Scope
*
scope_
{
nullptr
};
// SUCCESS: device program build successed. FAILED: device program build
bool
is_first_epoch_
{
true
};
// failed. REBUILD_WHEN_SHAPE_CHANGED: device program build successed but need
std
::
vector
<
std
::
vector
<
int64_t
>>
origin_idims_
;
// to rebuild when input shape changed.
int
build_device_program_status_
{
0
};
std
::
vector
<
DDim
>
origin_idims_
;
std
::
vector
<
DDim
>
origin_odims_
;
std
::
vector
<
Tensor
*>
origin_itensors_
;
std
::
vector
<
Tensor
*>
origin_itensors_
;
std
::
vector
<
Tensor
*>
origin_otensors_
;
std
::
vector
<
Tensor
*>
origin_otensors_
;
std
::
vector
<
Instruction
>
origin_program_
;
std
::
vector
<
Instruction
>
origin_program_
;
std
::
string
model_cache_dir_
{
""
};
};
};
}
// namespace subgraph
}
// namespace subgraph
...
...
lite/kernels/npu/bridges/graph.h
浏览文件 @
97b54fbe
...
@@ -19,7 +19,7 @@
...
@@ -19,7 +19,7 @@
#include <string>
#include <string>
#include <utility>
#include <utility>
#include <vector>
#include <vector>
#include "graph/
op
/all_ops.h"
#include "graph/
compatible
/all_ops.h"
#include "lite/core/op_lite.h"
#include "lite/core/op_lite.h"
#include "lite/core/tensor.h"
#include "lite/core/tensor.h"
...
...
lite/kernels/npu/bridges/matmul_op.cc
浏览文件 @
97b54fbe
...
@@ -94,10 +94,10 @@ int MatMulConverter(void* ctx, OpLite* op, KernelBase* kernel) {
...
@@ -94,10 +94,10 @@ int MatMulConverter(void* ctx, OpLite* op, KernelBase* kernel) {
}
else
{
}
else
{
matmul_node
=
graph
->
Add
<
ge
::
op
::
BatchMatMul
>
(
out_name
);
matmul_node
=
graph
->
Add
<
ge
::
op
::
BatchMatMul
>
(
out_name
);
auto
matmul_op
=
matmul_node
->
data
<
ge
::
op
::
BatchMatMul
>
();
auto
matmul_op
=
matmul_node
->
data
<
ge
::
op
::
BatchMatMul
>
();
matmul_op
->
set_input_x
(
*
x_node
->
data
());
matmul_op
->
set_input_x
1
(
*
x_node
->
data
());
matmul_op
->
set_input_
y
(
*
y_node
->
data
());
matmul_op
->
set_input_
x2
(
*
y_node
->
data
());
matmul_op
->
set_attr_adj_x
(
transpose_x
);
matmul_op
->
set_attr_adj_x
1
(
transpose_x
);
matmul_op
->
set_attr_adj_
y
(
transpose_y
);
matmul_op
->
set_attr_adj_
x2
(
transpose_y
);
}
}
if
(
fabs
(
alpha
-
1.
f
)
>
1e-6
f
)
{
if
(
fabs
(
alpha
-
1.
f
)
>
1e-6
f
)
{
...
...
lite/kernels/npu/bridges/utility.h
浏览文件 @
97b54fbe
...
@@ -20,11 +20,11 @@
...
@@ -20,11 +20,11 @@
#include <string>
#include <string>
#include <vector>
#include <vector>
#include "graph/buffer.h"
#include "graph/buffer.h"
#include "graph/compatible/operator_reg.h"
#include "graph/graph.h"
#include "graph/graph.h"
#include "graph/model.h"
#include "graph/model.h"
#include "graph/op/all_ops.h"
#include "graph/op/all_ops.h"
#include "graph/operator.h"
#include "graph/operator.h"
#include "graph/operator_reg.h"
#include "lite/core/op_lite.h"
#include "lite/core/op_lite.h"
#include "lite/utils/macros.h"
#include "lite/utils/macros.h"
...
@@ -97,25 +97,26 @@ REG_OP(Pad)
...
@@ -97,25 +97,26 @@ REG_OP(Pad)
/*
/*
* Multiplies slices of two tensors in batches.
* Multiplies slices of two tensors in batches.
* <Input>
* <Input>
*
x
: The input tensor
*
x1
: The input tensor
*
y
: The input tensor
*
x2
: The input tensor
* <Output>
* <Output>
*
z
: The output tensor
*
y
: The output tensor
* <Attr>
* <Attr>
* adj_x : adj_x is true, the input tensor x is transposed, otherwise
* adj_x1 : adj_x1 is true, the input tensor x1 is transposed,
* it will not be transposed. Default is false (The current version only
* otherwise it will not be transposed.
* supports false).
* Default is false (The current version only supports false).
* adj_y : adj_y is true, the input tensor y is transposed, otherwise
* adj_x2 : adj_x2 is true, the input tensor x2 is transposed,
* it will not be transposed. Default is false.
* otherwise it will not be transposed.
* Default is false.
* <Added in HiAI version>
* <Added in HiAI version>
*
100.320.010.010
* 100.320.010.010
*/
*/
REG_OP
(
BatchMatMul
)
REG_OP
(
BatchMatMul
)
.
INPUT
(
x
,
TensorType
({
DT_FLOAT
}))
.
INPUT
(
x
1
,
TensorType
({
DT_FLOAT
}))
.
INPUT
(
y
,
TensorType
({
DT_FLOAT
}))
.
INPUT
(
x2
,
TensorType
({
DT_FLOAT
}))
.
OUTPUT
(
z
,
TensorType
({
DT_FLOAT
}))
.
OUTPUT
(
y
,
TensorType
({
DT_FLOAT
}))
.
ATTR
(
adj_x
,
AttrValue
::
BOOL
{
false
})
.
ATTR
(
adj_x
1
,
AttrValue
::
BOOL
{
false
})
.
ATTR
(
adj_
y
,
AttrValue
::
BOOL
{
false
})
.
ATTR
(
adj_
x2
,
AttrValue
::
BOOL
{
false
})
.
OP_END
()
.
OP_END
()
}
// namespace ge
}
// namespace ge
...
...
lite/kernels/npu/subgraph_compute.cc
浏览文件 @
97b54fbe
...
@@ -16,6 +16,7 @@
...
@@ -16,6 +16,7 @@
#include <sys/time.h>
#include <sys/time.h>
#include <time.h>
#include <time.h>
#include <algorithm>
#include <algorithm>
#include <functional>
#include <utility>
#include <utility>
#include "hiai_ir_build.h" // NOLINT
#include "hiai_ir_build.h" // NOLINT
#include "lite/backends/npu/device.h"
#include "lite/backends/npu/device.h"
...
@@ -24,205 +25,275 @@
...
@@ -24,205 +25,275 @@
#include "lite/kernels/npu/bridges/paddle_use_bridges.h"
#include "lite/kernels/npu/bridges/paddle_use_bridges.h"
#include "lite/kernels/npu/bridges/utility.h"
#include "lite/kernels/npu/bridges/utility.h"
#include "lite/utils/io.h"
#include "lite/utils/io.h"
#include "lite/utils/md5.h"
namespace
paddle
{
namespace
paddle
{
namespace
lite
{
namespace
lite
{
namespace
kernels
{
namespace
kernels
{
namespace
npu
{
namespace
npu
{
std
::
string
SubgraphEngine
::
GenerateModelCacheName
()
const
{
// Generate the model name by using md5 hashes based on:
auto
inames
=
device_inames_
;
// 1. the sorted variable input names
auto
onames
=
device_onames_
;
// 2. the shapes of the origin input tensors
std
::
stable_sort
(
inames
.
begin
(),
inames
.
end
());
// 3. the sorted variable output names
std
::
string
DeviceProgram
::
GenerateModelName
(
std
::
string
model_cache_name
=
"subgraph_"
+
std
::
to_string
(
block_idx_
);
const
std
::
vector
<
std
::
string
>&
input_names
,
for
(
auto
iname
:
inames
)
{
const
std
::
vector
<
std
::
string
>&
output_names
,
model_cache_name
+=
"_"
;
const
std
::
vector
<
std
::
vector
<
int64_t
>>&
origin_idims
)
{
auto
itensor
=
scope_
->
FindTensor
(
iname
);
std
::
ostringstream
os
;
int
tmp
=
0
;
CHECK_EQ
(
input_names
.
size
(),
origin_idims
.
size
());
for
(
auto
i
:
itensor
->
dims
().
Vectorize
())
{
for
(
int
i
=
0
;
i
<
input_names
.
size
();
i
++
)
{
tmp
+=
i
*
i
;
os
<<
input_names
[
i
];
for
(
auto
dim
:
origin_idims
[
i
])
{
os
<<
dim
;
}
}
model_cache_name
+=
std
::
to_string
(
tmp
%
1999
);
}
}
model_cache_name
+=
"_.om"
;
for
(
auto
output_name
:
output_names
)
{
os
<<
output_name
;
}
return
MD5
(
os
.
str
());
}
return
model_cache_name
;
// Deserialize the generated model, the precisions and dimensions of the origin
// output tensors of the subgraph op into files
bool
DeviceProgram
::
LoadFromCacheFile
(
const
std
::
vector
<
std
::
string
>&
input_names
,
const
std
::
vector
<
std
::
string
>&
output_names
,
const
std
::
vector
<
std
::
vector
<
int64_t
>>&
origin_idims
,
const
std
::
string
&
model_cache_dir
)
{
// Generate the model name if not initialized
if
(
model_name_
.
empty
())
{
model_name_
=
GenerateModelName
(
input_names
,
output_names
,
origin_idims
);
}
// Load from the cached model file, return a HiAI model manager client for
// inference
auto
model_path
=
model_cache_dir
+
"/"
+
model_name_
+
".om"
;
VLOG
(
3
)
<<
"[NPU] Load model from "
<<
model_path
;
std
::
vector
<
char
>
model_buffer
;
if
(
!
ReadFile
(
model_path
,
&
model_buffer
))
{
LOG
(
WARNING
)
<<
"[NPU] read from "
<<
model_path
<<
" failed!"
;
return
false
;
}
bool
model_comp
=
false
;
model_client_
=
lite
::
npu
::
Device
::
Global
().
Load
(
model_name_
,
&
model_buffer
,
&
model_comp
);
if
(
!
model_client_
)
{
LOG
(
WARNING
)
<<
"[NPU] Load model failed!"
;
return
false
;
}
// Rewrite with the compatible model data if the cached
// model file is incompatible with the current device
if
(
!
model_comp
)
{
VLOG
(
3
)
<<
"[NPU] Export the compatible model to "
<<
model_path
;
if
(
!
WriteFile
(
model_path
,
model_buffer
))
{
LOG
(
WARNING
)
<<
"[NPU] Open "
<<
model_path
<<
" for writting failed!"
;
}
}
// Deserialize the precisions and shapes of the origin output tensors from the
// cached configuration file
auto
config_path
=
model_cache_dir
+
"/"
+
model_name_
+
".cfg"
;
VLOG
(
3
)
<<
"[NPU] Load configuration from "
<<
config_path
;
std
::
vector
<
char
>
config_buffer
;
if
(
!
ReadFile
(
config_path
,
&
config_buffer
))
{
LOG
(
WARNING
)
<<
"[NPU] read from "
<<
config_path
<<
" failed!"
;
return
false
;
}
std
::
string
config_str
(
config_buffer
.
begin
(),
config_buffer
.
end
());
// Parse the precision and shapes of the output tensors
auto
output_options
=
Split
<
std
::
string
>
(
config_str
,
";"
);
CHECK_EQ
(
output_options
.
size
(),
output_names
.
size
());
origin_otypes_
.
resize
(
output_names
.
size
());
origin_odims_
.
resize
(
output_names
.
size
());
for
(
int
i
=
0
;
i
<
output_names
.
size
();
i
++
)
{
auto
items
=
Split
<
std
::
string
>
(
output_options
[
i
],
":"
);
CHECK_EQ
(
items
.
size
(),
2
);
// precision and shapes
origin_otypes_
[
i
]
=
static_cast
<
PrecisionType
>
(
std
::
stoi
(
items
[
0
]));
origin_odims_
[
i
]
=
Split
<
int64_t
>
(
items
[
1
],
","
);
}
return
true
;
}
}
int
SubgraphEngine
::
BuildDeviceProgram
()
{
bool
DeviceProgram
::
BuildGraphAndCacheToFile
(
const
std
::
vector
<
Instruction
>&
origin_program
,
const
std
::
vector
<
std
::
string
>&
input_names
,
const
std
::
vector
<
std
::
string
>&
output_names
,
const
std
::
vector
<
std
::
vector
<
int64_t
>>&
origin_idims
,
const
std
::
vector
<
Tensor
*>&
origin_otensors
,
const
std
::
string
&
model_cache_dir
)
{
// Generate the model name if not initialized
if
(
model_name_
.
empty
())
{
model_name_
=
GenerateModelName
(
input_names
,
output_names
,
origin_idims
);
}
// Convert all of ops and their input vars and weights to HiAI IR nodes,
// then added them into the HiAI IR graph
int
status
=
0
;
int
status
=
0
;
// Convert all of ops and their input vars and weights and added into the NPU
CHECK
(
!
origin_program
.
empty
())
<<
"no instructions"
;
// HiAI IR graph
subgraph
::
npu
::
Graph
graph
;
subgraph
::
npu
::
Graph
graph
;
const
auto
&
bridges
=
subgraph
::
Registry
::
Instance
();
const
auto
&
bridges
=
subgraph
::
Registry
::
Instance
();
for
(
auto
&
inst
:
origin_program
_
)
{
for
(
auto
&
inst
:
origin_program
)
{
auto
op
=
const_cast
<
OpLite
*>
(
inst
.
op
());
auto
op
=
const_cast
<
OpLite
*>
(
inst
.
op
());
CHECK
(
op
);
CHECK
(
op
);
op
->
CheckShape
();
op
->
CheckShape
();
op
->
InferShape
();
op
->
InferShape
();
std
::
string
op_type
=
op
->
op_info
()
->
Type
();
std
::
string
op_type
=
op
->
op_info
()
->
Type
();
if
(
!
bridges
.
Exists
(
op_type
,
TARGET
(
kNPU
)))
{
if
(
!
bridges
.
Exists
(
op_type
,
TARGET
(
kNPU
)))
{
return
subgraph
::
FAILED
;
return
false
;
}
}
auto
kernel
=
inst
.
kernel
();
auto
kernel
=
inst
.
kernel
();
status
|=
bridges
.
Select
(
op_type
,
TARGET
(
kNPU
))(
status
|=
bridges
.
Select
(
op_type
,
TARGET
(
kNPU
))(
reinterpret_cast
<
void
*>
(
&
graph
),
op
,
const_cast
<
KernelBase
*>
(
kernel
));
reinterpret_cast
<
void
*>
(
&
graph
),
op
,
const_cast
<
KernelBase
*>
(
kernel
));
if
(
subgraph
::
CHECK_FAILED
(
status
))
{
if
(
subgraph
::
CHECK_FAILED
(
status
))
{
return
subgraph
::
FAILED
;
return
false
;
}
}
}
}
// Collect the valid input and output nodes in the HiAI IR graph and update
// Collect the input and output nodes of the HiAI IR graph
// the input and output names
device_inames_
.
clear
();
device_onames_
.
clear
();
std
::
vector
<
ge
::
Operator
>
device_inodes
;
std
::
vector
<
ge
::
Operator
>
device_inodes
;
for
(
size_t
i
=
0
;
i
<
input_names
.
size
();
i
++
)
{
CHECK
(
graph
.
Has
(
input_names
[
i
])
&&
graph
.
Get
(
input_names
[
i
])
->
is_data
());
device_inodes
.
push_back
(
*
graph
.
Get
(
input_names
[
i
])
->
data
());
}
std
::
vector
<
ge
::
Operator
>
device_onodes
;
std
::
vector
<
ge
::
Operator
>
device_onodes
;
for
(
auto
&
input_name
:
input_names_
)
{
for
(
size_t
i
=
0
;
i
<
output_names
.
size
();
i
++
)
{
if
(
graph
.
Has
(
input_name
))
{
CHECK
(
graph
.
Has
(
output_names
[
i
]));
if
(
graph
.
Get
(
input_name
)
->
is_data
())
{
device_onodes
.
push_back
(
*
graph
.
Get
(
output_names
[
i
])
->
data
());
device_inodes
.
push_back
(
*
graph
.
Get
(
input_name
)
->
data
());
device_inames_
.
push_back
(
input_name
);
}
else
{
LOG
(
WARNING
)
<<
"[NPU] Input node "
<<
input_name
<<
" is ignored because it is not a data node."
;
}
}
else
{
LOG
(
WARNING
)
<<
"[NPU] Input node "
<<
input_name
<<
" is ignored because it does not exist."
;
}
}
}
for
(
auto
&
output_name
:
output_names_
)
{
// Build the HiAI IR graph to the HiAI om model
if
(
graph
.
Has
(
output_name
))
{
std
::
vector
<
char
>
model_buffer
;
device_onodes
.
push_back
(
*
graph
.
Get
(
output_name
)
->
data
());
if
(
!
lite
::
npu
::
Device
::
Global
().
Build
(
device_onames_
.
push_back
(
output_name
);
device_inodes
,
device_onodes
,
&
model_buffer
))
{
}
else
{
LOG
(
WARNING
)
<<
"[NPU] Build model failed!"
;
LOG
(
WARNING
)
<<
"[NPU] Output node "
<<
output_name
return
false
;
<<
" is ignored because it does not exist."
;
}
}
}
CHECK
(
!
device_inames_
.
empty
())
// Load the HiAI om model and create a HiAI model manager client(from HiAI
<<
"[NPU] No input nodes found for building NPU model"
;
// Service) to run inference.
CHECK
(
!
device_onames_
.
empty
())
bool
model_comp
=
true
;
<<
"[NPU] No output nodes found for building NPU model"
;
model_client_
=
lite
::
npu
::
Device
::
Global
().
Load
(
model_name_
,
&
model_buffer
,
&
model_comp
);
// Build the HiAI IR graph to HiAI om model as the device program
if
(
!
model_client_
)
{
if
(
device_program_map_
.
count
(
inputs_shape_
)
>
0
)
{
LOG
(
WARNING
)
<<
"[NPU] Load model failed!"
;
return
status
;
return
false
;
}
}
std
::
string
model_cache_full_dir
=
// Update the precison and dimensions of the origin output tensors
model_cache_dir_
.
empty
()
?
""
:
model_cache_dir_
+
"/"
+
CHECK_EQ
(
origin_otensors
.
size
(),
output_names
.
size
());
GenerateModelCacheName
();
origin_otypes_
.
resize
(
output_names
.
size
());
auto
device_client
=
lite
::
npu
::
Device
::
Global
().
Build
(
origin_odims_
.
resize
(
output_names
.
size
());
model_name_
,
device_inodes
,
device_onodes
,
model_cache_full_dir
);
for
(
size_t
i
=
0
;
i
<
output_names
.
size
();
i
++
)
{
if
(
device_client
==
nullptr
)
{
origin_otypes_
[
i
]
=
graph
.
Get
(
output_names
[
i
])
->
precision
();
LOG
(
WARNING
)
<<
"[NPU] Build model failed!"
;
origin_odims_
[
i
]
=
origin_otensors
[
i
]
->
dims
().
Vectorize
();
return
subgraph
::
FAILED
;
}
}
auto
device_program
=
std
::
make_shared
<
device_program_t
>
(
device_client
);
if
(
!
model_cache_dir
.
empty
())
{
if
(
!
inputs_shape_
.
empty
())
{
// Save the generated model to file, used for the model caching or the
device_program_map_
[
inputs_shape_
]
=
device_program
;
// offline model generation
auto
model_path
=
model_cache_dir
+
"/"
+
model_name_
+
".om"
;
VLOG
(
3
)
<<
"[NPU] Save model to "
<<
model_path
;
if
(
!
WriteFile
(
model_path
,
model_buffer
))
{
LOG
(
WARNING
)
<<
"[NPU] Open "
<<
model_path
<<
" for writting failed!"
;
}
// Serialize the precisions and shapes of the origin output tensors into the
// configuration file
std
::
ostringstream
os
;
for
(
int
i
=
0
;
i
<
output_names
.
size
();
i
++
)
{
os
<<
static_cast
<
int32_t
>
(
origin_otypes_
[
i
])
<<
":"
;
for
(
auto
dim
:
origin_odims_
[
i
])
{
os
<<
dim
<<
","
;
}
os
<<
";"
;
}
auto
str
=
os
.
str
();
std
::
vector
<
char
>
config_buffer
(
str
.
begin
(),
str
.
end
());
auto
config_path
=
model_cache_dir
+
"/"
+
model_name_
+
".cfg"
;
VLOG
(
3
)
<<
"[NPU] Save configuration to "
<<
config_path
;
if
(
!
WriteFile
(
config_path
,
config_buffer
))
{
LOG
(
WARNING
)
<<
"[NPU] Open "
<<
config_path
<<
" for writting failed!"
;
}
}
}
return
true
;
}
// Query and check the dimensions of valid input and output tensors
bool
DeviceProgram
::
ShareBufferWithOriginTensors
(
std
::
vector
<
hiai
::
TensorDimension
>
device_idims
,
device_odims
;
const
std
::
vector
<
std
::
string
>&
input_names
,
if
(
device_program
->
client
->
GetModelIOTensorDim
(
const
std
::
vector
<
std
::
string
>&
output_names
,
model_name_
,
device_idims
,
device_odims
)
!=
hiai
::
AI_SUCCESS
)
{
std
::
vector
<
Tensor
*>*
origin_itensors
,
LOG
(
WARNING
)
std
::
vector
<
Tensor
*>*
origin_otensors
,
<<
"[NPU] Get the dimensions of input and output tensors failed!"
;
std
::
vector
<
std
::
shared_ptr
<
hiai
::
AiTensor
>>*
device_itensors
,
return
subgraph
::
FAILED
;
std
::
vector
<
std
::
shared_ptr
<
hiai
::
AiTensor
>>*
device_otensors
)
{
CHECK
(
!
model_name_
.
empty
()
&&
model_client_
);
// Query the dimensions of the device input and output tensors if not
// initialized
if
(
device_idims_
.
empty
()
||
device_odims_
.
empty
())
{
if
(
model_client_
->
GetModelIOTensorDim
(
model_name_
,
device_idims_
,
device_odims_
)
!=
hiai
::
AI_SUCCESS
)
{
LOG
(
WARNING
)
<<
"[NPU] Get the dimensions of input and output tensors failed!"
;
return
false
;
}
}
}
device_program
->
device_idims
=
device_idims
;
// Check the dimensions of the device tensors and the origin tensors
device_program
->
device_odims
=
device_odims
;
CHECK_EQ
(
device_itensors
->
size
(),
input_names
.
size
());
CHECK_EQ
(
device_otensors
->
size
(),
output_names
.
size
());
CHECK_EQ
(
origin_otypes_
.
size
(),
output_names
.
size
());
CHECK_EQ
(
origin_odims_
.
size
(),
output_names
.
size
());
CHECK_EQ
(
device_idims_
.
size
(),
input_names
.
size
());
CHECK_EQ
(
device_odims_
.
size
(),
output_names
.
size
());
for
(
int
i
=
0
;
i
<
input_names
.
size
();
i
++
)
{
VLOG
(
3
)
<<
"[NPU] Inputs["
<<
i
<<
"] name: "
<<
input_names
[
i
]
<<
" origin dims:"
<<
(
*
origin_itensors
)[
i
]
->
dims
().
repr
()
<<
" device dims: {"
<<
device_idims_
[
i
].
GetNumber
()
<<
","
<<
device_idims_
[
i
].
GetChannel
()
<<
","
<<
device_idims_
[
i
].
GetHeight
()
<<
","
<<
device_idims_
[
i
].
GetWidth
()
<<
"}"
;
CHECK_EQ
((
*
origin_itensors
)[
i
]
->
dims
().
production
(),
device_idims_
[
i
].
GetNumber
()
*
device_idims_
[
i
].
GetChannel
()
*
device_idims_
[
i
].
GetHeight
()
*
device_idims_
[
i
].
GetWidth
());
VLOG
(
3
)
<<
"[NPU] Init the input tensors for the device program and share "
"their buffers with the origin input tensors"
;
// reinit device tensor will free shared buffer, so copy data to a tmp
// tensor
Tensor
tmp
;
tmp
.
CopyDataFrom
(
*
(
*
origin_itensors
)[
i
]);
(
*
device_itensors
)[
i
]
->
Init
(
&
(
device_idims_
[
i
]));
CHECK_EQ
(
device_idims
.
size
(),
device_inames_
.
size
());
std
::
memcpy
(
CHECK_EQ
(
device_odims
.
size
(),
device_onames_
.
size
());
(
*
device_itensors
)[
i
]
->
GetBuffer
(),
tmp
.
raw_data
(),
tmp
.
memory_size
());
origin_idims_
.
resize
(
device_inames_
.
size
());
origin_itensors_
.
resize
(
device_inames_
.
size
());
device_itensors_
.
resize
(
device_inames_
.
size
());
origin_odims_
.
resize
(
device_onames_
.
size
());
origin_otensors_
.
resize
(
device_onames_
.
size
());
device_otensors_
.
resize
(
device_onames_
.
size
());
for
(
int
i
=
0
;
i
<
device_inames_
.
size
();
i
++
)
{
// Share data buf between device_itensor and origin_itensor
auto
node
=
graph
.
Get
(
device_inames_
[
i
]);
std
::
shared_ptr
<
Buffer
>
buffer
=
auto
precision
=
node
->
precision
();
std
::
make_shared
<
Buffer
>
((
*
device_itensors
)[
i
]
->
GetBuffer
(),
auto
layout
=
node
->
layout
();
lite_api
::
TargetType
::
kHost
,
origin_itensors_
[
i
]
=
scope_
->
FindMutableTensor
(
device_inames_
[
i
]);
(
*
device_itensors
)[
i
]
->
GetSize
());
CHECK
(
origin_itensors_
[
i
]);
(
*
origin_itensors
)[
i
]
->
ResetBuffer
(
buffer
,
origin_idims_
[
i
]
=
origin_itensors_
[
i
]
->
dims
();
(
*
device_itensors
)[
i
]
->
GetSize
());
VLOG
(
3
)
<<
"[NPU] Inputs["
<<
i
<<
"] name: "
<<
device_inames_
[
i
]
<<
" precision: "
<<
PrecisionToStr
(
precision
)
<<
" layout: "
<<
DataLayoutToStr
(
layout
)
<<
" dims: {"
<<
device_idims
[
i
].
GetNumber
()
<<
","
<<
device_idims
[
i
].
GetChannel
()
<<
","
<<
device_idims
[
i
].
GetHeight
()
<<
","
<<
device_idims
[
i
].
GetWidth
()
<<
"}"
;
// Prepare the device input tensors
CHECK_EQ
(
origin_idims_
[
i
].
production
(),
device_idims
[
i
].
GetNumber
()
*
device_idims
[
i
].
GetChannel
()
*
device_idims
[
i
].
GetHeight
()
*
device_idims
[
i
].
GetWidth
());
device_itensors_
[
i
].
reset
(
new
hiai
::
AiTensor
);
device_itensors_
[
i
]
->
Init
(
&
(
device_idims
[
i
]));
}
}
device_program
->
origin_idims
=
origin_idims_
;
for
(
int
i
=
0
;
i
<
output_names
.
size
();
i
++
)
{
(
*
origin_otensors
)[
i
]
->
set_precision
(
origin_otypes_
[
i
]);
for
(
int
i
=
0
;
i
<
device_onames_
.
size
();
i
++
)
{
(
*
origin_otensors
)[
i
]
->
Resize
(
origin_odims_
[
i
]);
auto
node
=
graph
.
Get
(
device_onames_
[
i
]);
VLOG
(
3
)
<<
"[NPU] Outputs["
<<
i
<<
"] name: "
<<
output_names
[
i
]
auto
precision
=
node
->
precision
();
<<
" origin dims:"
<<
(
*
origin_otensors
)[
i
]
->
dims
().
repr
()
auto
layout
=
node
->
layout
();
<<
" device dims: {"
<<
device_odims_
[
i
].
GetNumber
()
<<
","
origin_otensors_
[
i
]
=
scope_
->
FindMutableTensor
(
device_onames_
[
i
]);
<<
device_odims_
[
i
].
GetChannel
()
<<
","
CHECK
(
origin_otensors_
[
i
]);
<<
device_odims_
[
i
].
GetHeight
()
<<
","
origin_odims_
[
i
]
=
origin_otensors_
[
i
]
->
dims
();
<<
device_odims_
[
i
].
GetWidth
()
<<
"}"
;
VLOG
(
3
)
<<
"[NPU] Outputs["
<<
i
<<
"] name: "
<<
device_onames_
[
i
]
CHECK_EQ
((
*
origin_otensors
)[
i
]
->
dims
().
production
(),
<<
" precision: "
<<
PrecisionToStr
(
precision
)
device_odims_
[
i
].
GetNumber
()
*
device_odims_
[
i
].
GetChannel
()
*
<<
" layout: "
<<
DataLayoutToStr
(
layout
)
<<
" dims: {"
device_odims_
[
i
].
GetHeight
()
*
device_odims_
[
i
].
GetWidth
());
<<
device_odims
[
i
].
GetNumber
()
<<
","
(
*
device_otensors
)[
i
]
->
Init
(
&
(
device_odims_
[
i
]));
<<
device_odims
[
i
].
GetChannel
()
<<
","
VLOG
(
3
)
<<
"[NPU] Init the output tensors for the device program and share "
<<
device_odims
[
i
].
GetHeight
()
<<
","
<<
device_odims
[
i
].
GetWidth
()
"their buffers with the origin output tensors"
;
<<
"}"
;
// Share data buf between device_itensor and origin_itensor
// Prepare the device output tensors
std
::
shared_ptr
<
Buffer
>
buffer
=
switch
(
precision
)
{
std
::
make_shared
<
Buffer
>
((
*
device_otensors
)[
i
]
->
GetBuffer
(),
case
PRECISION
(
kFloat
):
lite_api
::
TargetType
::
kHost
,
origin_otensors_
[
i
]
->
mutable_data
<
float
>
();
(
*
device_otensors
)[
i
]
->
GetSize
());
break
;
(
*
origin_otensors
)[
i
]
->
ResetBuffer
(
buffer
,
case
PRECISION
(
kBool
):
(
*
device_otensors
)[
i
]
->
GetSize
());
origin_otensors_
[
i
]
->
mutable_data
<
bool
>
();
break
;
case
PRECISION
(
kInt8
):
origin_otensors_
[
i
]
->
mutable_data
<
int8_t
>
();
break
;
case
PRECISION
(
kInt16
):
origin_otensors_
[
i
]
->
mutable_data
<
int16_t
>
();
break
;
case
PRECISION
(
kInt32
):
origin_otensors_
[
i
]
->
mutable_data
<
int32_t
>
();
break
;
case
PRECISION
(
kInt64
):
origin_otensors_
[
i
]
->
mutable_data
<
int64_t
>
();
break
;
default:
LOG
(
FATAL
)
<<
"[NPU] "
<<
device_onames_
[
i
]
<<
" can't mutable data with precision type "
<<
PrecisionToStr
(
precision
);
break
;
}
device_program
->
origin_odims
=
origin_odims_
;
CHECK_EQ
(
origin_odims_
[
i
].
production
(),
device_odims
[
i
].
GetNumber
()
*
device_odims
[
i
].
GetChannel
()
*
device_odims
[
i
].
GetHeight
()
*
device_odims
[
i
].
GetWidth
());
device_otensors_
[
i
].
reset
(
new
hiai
::
AiTensor
);
device_otensors_
[
i
]
->
Init
(
&
(
device_odims
[
i
]));
}
}
return
status
;
return
true
;
}
}
int
SubgraphEngine
::
LaunchDeviceProgram
()
{
bool
DeviceProgram
::
ZeroCopyRun
(
// Copy the data of origin input tensors to the buffer of input HiAI tensors
std
::
vector
<
std
::
shared_ptr
<
hiai
::
AiTensor
>>*
device_itensors
,
// init device_itensors_, device_otensors_, origin_otensors_
std
::
vector
<
std
::
shared_ptr
<
hiai
::
AiTensor
>>*
device_otensors
)
{
auto
device_program
=
device_program_map_
[
inputs_shape_
];
CHECK
(
!
model_name_
.
empty
()
&&
model_client_
);
// Run the HiAI model by name
// Run the HiAI model by name
std
::
string
key
=
"model_name"
;
// Note: key seems must be model_name
std
::
string
key
=
"model_name"
;
// Note: key seems must be model_name
hiai
::
AiContext
model_context
;
hiai
::
AiContext
model_context
;
...
@@ -234,70 +305,87 @@ int SubgraphEngine::LaunchDeviceProgram() {
...
@@ -234,70 +305,87 @@ int SubgraphEngine::LaunchDeviceProgram() {
};
};
int
istamp
;
int
istamp
;
auto
start_time
=
GetCurrentUS
();
auto
start_time
=
GetCurrentUS
();
CHECK_EQ
(
device_program
->
client
->
Process
(
CHECK_EQ
(
model_client_
->
Process
(
model_context
,
device_itensors_
,
device_otensors_
,
1000
,
istamp
),
model_context
,
*
device_itensors
,
*
device_otensors
,
1000
,
istamp
),
hiai
::
AI_SUCCESS
);
hiai
::
AI_SUCCESS
);
VLOG
(
3
)
<<
"[NPU] Process cost "
<<
GetCurrentUS
()
-
start_time
<<
" us"
;
VLOG
(
3
)
<<
"[NPU] Process cost "
<<
GetCurrentUS
()
-
start_time
<<
" us"
;
return
true
;
return
0
;
}
}
int
SubgraphEngine
::
Build
()
{
bool
SubgraphEngine
::
PrepareWorkspaceForDeviceProgram
()
{
if
(
device_program_map_
.
count
(
inputs_shape_
)
>
0
)
{
// Obtain the origin input tensors, and create the origin output
return
subgraph
::
SUCCESS
;
// tensors(Don't try to access them before launch the device program or the
// origin program)
PrepareWorkspaceForOriginProgram
();
// Create the device input and output tensors, but don't initialize them
// with the dimensions
device_itensors_
.
resize
(
input_names_
.
size
());
for
(
int
i
=
0
;
i
<
input_names_
.
size
();
i
++
)
{
device_itensors_
[
i
].
reset
(
new
hiai
::
AiTensor
);
CHECK
(
device_itensors_
[
i
]);
}
device_otensors_
.
resize
(
output_names_
.
size
());
for
(
int
i
=
0
;
i
<
output_names_
.
size
();
i
++
)
{
device_otensors_
[
i
].
reset
(
new
hiai
::
AiTensor
);
CHECK
(
device_otensors_
[
i
]);
}
}
// In order to attach all of the ops of the block desc, we need to build the
return
true
;
// original program firstly.
BuildOriginProgram
();
// Run InferShape() of all of ops, and convert Paddle ops to NPU/XPU IR graph
build_device_program_status_
=
BuildDeviceProgram
();
return
build_device_program_status_
;
}
}
void
SubgraphEngine
::
InitDeviceTensor
()
{
bool
SubgraphEngine
::
BuildDeviceProgram
()
{
auto
device_program
=
device_program_map_
[
inputs_shape_
];
// Check if the cache device program exists
for
(
size_t
i
=
0
;
i
<
device_itensors_
.
size
();
i
++
)
{
if
(
!
device_programs_
.
count
(
origin_idims_
))
{
if
(
device_itensors_
[
i
]
->
GetBuffer
()
!=
origin_itensors_
[
i
]
->
raw_data
())
{
auto
device_program
=
std
::
make_shared
<
DeviceProgram
>
();
VLOG
(
3
)
<<
"init device_itensors and share input tensor buf between "
// Obtain the model cache dir from the NPU Context of the subgraph op
"device and host"
;
auto
model_cache_dir
=
ctx_
->
As
<
NPUContext
>
().
SubgraphModelCacheDir
();
device_itensors_
[
i
]
->
Init
(
&
(
device_program
->
device_idims
[
i
]));
VLOG
(
3
)
<<
"[NPU] Getting subgraph model_cache_dir is: "
<<
model_cache_dir
;
std
::
memcpy
(
device_itensors_
[
i
]
->
GetBuffer
(),
// Check and load if the cached model and configuration file exists
origin_itensors_
[
i
]
->
raw_data
(),
if
(
model_cache_dir
.
empty
()
||
origin_itensors_
[
i
]
->
memory_size
());
!
device_program
->
LoadFromCacheFile
(
// share data buf between device_itensor and origin_itensor
input_names_
,
output_names_
,
origin_idims_
,
model_cache_dir
))
{
std
::
shared_ptr
<
Buffer
>
buffer
=
// Build the model online, including converting the paddle ops to the HiAI
std
::
make_shared
<
Buffer
>
(
device_itensors_
[
i
]
->
GetBuffer
(),
// IR nodes, building the HiAI IR graph to the om model, then load it as a
lite_api
::
TargetType
::
kHost
,
// new HiAI model manager client for inference.
device_itensors_
[
i
]
->
GetSize
());
if
(
origin_program_
.
empty
())
{
origin_itensors_
[
i
]
->
ResetBuffer
(
buffer
,
device_itensors_
[
i
]
->
GetSize
());
BuildOriginProgram
();
}
CHECK
(
!
origin_program_
.
empty
())
<<
"no instructions"
;
if
(
!
device_program
->
BuildGraphAndCacheToFile
(
origin_program_
,
input_names_
,
output_names_
,
origin_idims_
,
origin_otensors_
,
model_cache_dir
))
{
return
false
;
}
}
}
}
if
(
device_program
->
model_client_
==
nullptr
)
{
for
(
size_t
i
=
0
;
i
<
device_otensors_
.
size
();
i
++
)
{
return
false
;
if
(
device_otensors_
[
i
]
->
GetBuffer
()
!=
origin_otensors_
[
i
]
->
raw_data
())
{
VLOG
(
3
)
<<
"init device_otensors and share output tensor buf between "
"device and host"
;
device_otensors_
[
i
]
->
Init
(
&
(
device_program
->
device_odims
[
i
]));
// share data buf between device_itensor and origin_itensor
origin_otensors_
[
i
]
->
Resize
(
device_program
->
origin_odims
[
i
]);
std
::
shared_ptr
<
Buffer
>
buffer
=
std
::
make_shared
<
Buffer
>
(
device_otensors_
[
i
]
->
GetBuffer
(),
lite_api
::
TargetType
::
kHost
,
device_otensors_
[
i
]
->
GetSize
());
origin_otensors_
[
i
]
->
ResetBuffer
(
buffer
,
device_otensors_
[
i
]
->
GetSize
());
}
}
device_programs_
[
origin_idims_
]
=
device_program
;
}
}
auto
device_program
=
device_programs_
[
origin_idims_
];
CHECK
(
device_program
&&
device_program
->
model_client_
);
return
device_program
->
ShareBufferWithOriginTensors
(
input_names_
,
output_names_
,
&
origin_itensors_
,
&
origin_otensors_
,
&
device_itensors_
,
&
device_otensors_
);
}
}
bool
SubgraphEngine
::
InputShapeChanged
()
{
bool
SubgraphEngine
::
LaunchDeviceProgram
()
{
std
::
vector
<
std
::
vector
<
int64_t
>>
new_shape
;
// Roll back to launch the origin program if the device program can't be
for
(
auto
origin_itensor
:
origin_itensors_
)
{
// found or the model client isn't initialized.
new_shape
.
push_back
(
origin_itensor
->
dims
().
Vectorize
());
if
(
device_programs_
.
count
(
origin_idims_
)
==
0
||
device_programs_
[
origin_idims_
]
->
model_client_
==
nullptr
)
{
return
LaunchOriginProgram
();
}
}
if
(
inputs_shape_
==
new_shape
)
{
auto
device_program
=
device_programs_
[
origin_idims_
];
return
false
;
if
(
!
device_program
->
model_client_
)
{
return
LaunchOriginProgram
();
}
}
inputs_shape_
=
new_shape
;
return
device_program
->
ZeroCopyRun
(
&
device_itensors_
,
&
device_otensors_
);
return
true
;
}
}
void
SubgraphCompute
::
PrepareForRun
()
{
void
SubgraphCompute
::
PrepareForRun
()
{
...
@@ -307,15 +395,13 @@ void SubgraphCompute::PrepareForRun() {
...
@@ -307,15 +395,13 @@ void SubgraphCompute::PrepareForRun() {
param
.
sub_block_desc
,
param
.
sub_block_desc
,
param
.
input_data_names
,
param
.
input_data_names
,
param
.
output_data_names
,
param
.
output_data_names
,
param
.
scope
,
param
.
scope
));
NPUContext
::
SubgraphModelCacheDir
()));
CHECK
(
engine_
);
CHECK
(
engine_
);
engine_
->
Build
();
}
}
void
SubgraphCompute
::
Run
()
{
void
SubgraphCompute
::
Run
()
{
CHECK
(
engine_
);
CHECK
(
engine_
);
engine_
->
Launch
();
engine_
->
Run
();
}
}
}
// namespace npu
}
// namespace npu
...
...
lite/kernels/npu/subgraph_compute.h
浏览文件 @
97b54fbe
...
@@ -28,52 +28,65 @@ namespace lite {
...
@@ -28,52 +28,65 @@ namespace lite {
namespace
kernels
{
namespace
kernels
{
namespace
npu
{
namespace
npu
{
class
SubgraphEngine
:
public
subgraph
::
Engine
{
class
DeviceProgram
{
public:
public:
SubgraphEngine
(
KernelContext
*
ctx
,
DeviceProgram
()
{}
int
block_idx
,
~
DeviceProgram
()
{}
cpp
::
BlockDesc
*
block_desc
,
std
::
string
GenerateModelName
(
const
std
::
vector
<
std
::
string
>
&
input_names
,
const
std
::
vector
<
std
::
string
>&
input_names
,
const
std
::
vector
<
std
::
string
>
&
output_names
,
const
std
::
vector
<
std
::
string
>&
output_names
,
Scope
*
scope
,
const
std
::
vector
<
std
::
vector
<
int64_t
>>&
origin_idims
);
std
::
string
model_cache_dir
=
""
)
bool
LoadFromCacheFile
(
const
std
::
vector
<
std
::
string
>&
input_names
,
:
subgraph
::
Engine
(
ctx
,
const
std
::
vector
<
std
::
string
>&
output_names
,
block_idx
,
const
std
::
vector
<
std
::
vector
<
int64_t
>>&
origin_idims
,
block_desc
,
const
std
::
string
&
model_cache_dir
);
input_names
,
bool
BuildGraphAndCacheToFile
(
output_names
,
const
std
::
vector
<
Instruction
>&
origin_program
,
scope
,
const
std
::
vector
<
std
::
string
>&
input_names
,
model_cache_dir
)
{}
const
std
::
vector
<
std
::
string
>&
output_names
,
const
std
::
vector
<
std
::
vector
<
int64_t
>>&
origin_idims
,
const
std
::
vector
<
Tensor
*>&
origin_otensors
,
const
std
::
string
&
model_cache_dir
);
bool
ShareBufferWithOriginTensors
(
const
std
::
vector
<
std
::
string
>&
input_names
,
const
std
::
vector
<
std
::
string
>&
output_names
,
std
::
vector
<
Tensor
*>*
origin_itensors
,
std
::
vector
<
Tensor
*>*
origin_otensors
,
std
::
vector
<
std
::
shared_ptr
<
hiai
::
AiTensor
>>*
device_itensors
,
std
::
vector
<
std
::
shared_ptr
<
hiai
::
AiTensor
>>*
device_otensors
);
bool
ZeroCopyRun
(
std
::
vector
<
std
::
shared_ptr
<
hiai
::
AiTensor
>>*
device_itensors
,
std
::
vector
<
std
::
shared_ptr
<
hiai
::
AiTensor
>>*
device_otensors
);
struct
device_program_t
{
public:
explicit
device_program_t
(
std
::
shared_ptr
<
hiai
::
AiModelMngerClient
>
_client
)
std
::
string
model_name_
{
""
};
:
client
(
_client
)
{}
std
::
shared_ptr
<
hiai
::
AiModelMngerClient
>
model_client_
{
nullptr
};
std
::
shared_ptr
<
hiai
::
AiModelMngerClient
>
client
{
nullptr
};
std
::
vector
<
std
::
vector
<
int64_t
>>
origin_odims_
;
std
::
vector
<
DDim
>
origin_idims
{};
std
::
vector
<
PrecisionType
>
origin_otypes_
;
std
::
vector
<
DDim
>
origin_odims
{};
std
::
vector
<
hiai
::
TensorDimension
>
device_idims_
{};
std
::
vector
<
hiai
::
TensorDimension
>
device_idims
{};
std
::
vector
<
hiai
::
TensorDimension
>
device_odims_
{};
std
::
vector
<
hiai
::
TensorDimension
>
device_odims
{};
};
};
int
Build
()
override
;
class
SubgraphEngine
:
public
subgraph
::
Engine
{
public:
SubgraphEngine
(
KernelContext
*
ctx
,
int
block_idx
,
cpp
::
BlockDesc
*
block_desc
,
const
std
::
vector
<
std
::
string
>&
input_names
,
const
std
::
vector
<
std
::
string
>&
output_names
,
Scope
*
scope
)
:
subgraph
::
Engine
(
ctx
,
block_idx
,
block_desc
,
input_names
,
output_names
,
scope
)
{}
protected:
protected:
int
BuildDeviceProgram
()
override
;
bool
PrepareWorkspaceForDeviceProgram
()
override
;
int
LaunchDeviceProgram
()
override
;
bool
BuildDeviceProgram
()
override
;
bool
LaunchDeviceProgram
()
override
;
void
InitDeviceTensor
()
override
;
bool
InputShapeChanged
()
override
;
std
::
string
GenerateModelCacheName
()
const
;
std
::
string
model_name_
{
"model.om"
};
std
::
vector
<
std
::
vector
<
int64_t
>>
inputs_shape_
{};
std
::
map
<
std
::
vector
<
std
::
vector
<
int64_t
>>
,
std
::
shared_ptr
<
device_program_t
>>
device_program_map_
{};
std
::
vector
<
std
::
string
>
device_inames_
{};
std
::
vector
<
std
::
string
>
device_onames_
{};
std
::
vector
<
std
::
shared_ptr
<
hiai
::
AiTensor
>>
device_itensors_
{};
std
::
vector
<
std
::
shared_ptr
<
hiai
::
AiTensor
>>
device_itensors_
{};
std
::
vector
<
std
::
shared_ptr
<
hiai
::
AiTensor
>>
device_otensors_
{};
std
::
vector
<
std
::
shared_ptr
<
hiai
::
AiTensor
>>
device_otensors_
{};
std
::
map
<
std
::
vector
<
std
::
vector
<
int64_t
>>
,
std
::
shared_ptr
<
DeviceProgram
>>
device_programs_
;
};
};
class
SubgraphCompute
:
public
KernelLite
<
TARGET
(
kNPU
),
PRECISION
(
kAny
)
>
{
class
SubgraphCompute
:
public
KernelLite
<
TARGET
(
kNPU
),
PRECISION
(
kAny
)
>
{
...
...
lite/utils/env.h
浏览文件 @
97b54fbe
...
@@ -22,6 +22,8 @@
...
@@ -22,6 +22,8 @@
#define SUBGRAPH_CUSTOM_PARTITION_CONFIG_FILE \
#define SUBGRAPH_CUSTOM_PARTITION_CONFIG_FILE \
"SUBGRAPH_CUSTOM_PARTITION_CONFIG_FILE"
"SUBGRAPH_CUSTOM_PARTITION_CONFIG_FILE"
#define SUBGRAPH_DISABLE_ONLINE_MODE "SUBGRAPH_DISABLE_ONLINE_MODE"
namespace
paddle
{
namespace
paddle
{
namespace
lite
{
namespace
lite
{
...
...
lite/utils/io.h
浏览文件 @
97b54fbe
...
@@ -120,5 +120,40 @@ static std::vector<std::string> ListDir(const std::string& path,
...
@@ -120,5 +120,40 @@ static std::vector<std::string> ListDir(const std::string& path,
return
paths
;
return
paths
;
}
}
static
bool
ReadFile
(
const
std
::
string
&
filename
,
std
::
vector
<
char
>*
contents
)
{
FILE
*
fp
=
fopen
(
filename
.
c_str
(),
"rb"
);
if
(
!
fp
)
return
false
;
fseek
(
fp
,
0
,
SEEK_END
);
size_t
size
=
ftell
(
fp
);
fseek
(
fp
,
0
,
SEEK_SET
);
contents
->
clear
();
contents
->
resize
(
size
);
size_t
offset
=
0
;
char
*
ptr
=
reinterpret_cast
<
char
*>
(
&
(
contents
->
at
(
0
)));
while
(
offset
<
size
)
{
size_t
already_read
=
fread
(
ptr
,
1
,
size
-
offset
,
fp
);
offset
+=
already_read
;
ptr
+=
already_read
;
}
fclose
(
fp
);
return
true
;
}
static
bool
WriteFile
(
const
std
::
string
&
filename
,
const
std
::
vector
<
char
>&
contents
)
{
FILE
*
fp
=
fopen
(
filename
.
c_str
(),
"wb"
);
if
(
!
fp
)
return
false
;
size_t
size
=
contents
.
size
();
size_t
offset
=
0
;
const
char
*
ptr
=
reinterpret_cast
<
const
char
*>
(
&
(
contents
.
at
(
0
)));
while
(
offset
<
size
)
{
size_t
already_written
=
fwrite
(
ptr
,
1
,
size
-
offset
,
fp
);
offset
+=
already_written
;
ptr
+=
already_written
;
}
fclose
(
fp
);
return
true
;
}
}
// namespace lite
}
// namespace lite
}
// namespace paddle
}
// namespace paddle
lite/utils/md5.h
0 → 100644
浏览文件 @
97b54fbe
// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
#pragma once
#include <string>
namespace
paddle
{
namespace
lite
{
std
::
string
MD5
(
std
::
string
message
)
{
const
uint32_t
shiftAmounts
[]
=
{
7
,
12
,
17
,
22
,
7
,
12
,
17
,
22
,
7
,
12
,
17
,
22
,
7
,
12
,
17
,
22
,
5
,
9
,
14
,
20
,
5
,
9
,
14
,
20
,
5
,
9
,
14
,
20
,
5
,
9
,
14
,
20
,
4
,
11
,
16
,
23
,
4
,
11
,
16
,
23
,
4
,
11
,
16
,
23
,
4
,
11
,
16
,
23
,
6
,
10
,
15
,
21
,
6
,
10
,
15
,
21
,
6
,
10
,
15
,
21
,
6
,
10
,
15
,
21
};
const
uint32_t
partsOfSines
[]
=
{
0xd76aa478
,
0xe8c7b756
,
0x242070db
,
0xc1bdceee
,
0xf57c0faf
,
0x4787c62a
,
0xa8304613
,
0xfd469501
,
0x698098d8
,
0x8b44f7af
,
0xffff5bb1
,
0x895cd7be
,
0x6b901122
,
0xfd987193
,
0xa679438e
,
0x49b40821
,
0xf61e2562
,
0xc040b340
,
0x265e5a51
,
0xe9b6c7aa
,
0xd62f105d
,
0x02441453
,
0xd8a1e681
,
0xe7d3fbc8
,
0x21e1cde6
,
0xc33707d6
,
0xf4d50d87
,
0x455a14ed
,
0xa9e3e905
,
0xfcefa3f8
,
0x676f02d9
,
0x8d2a4c8a
,
0xfffa3942
,
0x8771f681
,
0x6d9d6122
,
0xfde5380c
,
0xa4beea44
,
0x4bdecfa9
,
0xf6bb4b60
,
0xbebfbc70
,
0x289b7ec6
,
0xeaa127fa
,
0xd4ef3085
,
0x04881d05
,
0xd9d4d039
,
0xe6db99e5
,
0x1fa27cf8
,
0xc4ac5665
,
0xf4292244
,
0x432aff97
,
0xab9423a7
,
0xfc93a039
,
0x655b59c3
,
0x8f0ccc92
,
0xffeff47d
,
0x85845dd1
,
0x6fa87e4f
,
0xfe2ce6e0
,
0xa3014314
,
0x4e0811a1
,
0xf7537e82
,
0xbd3af235
,
0x2ad7d2bb
,
0xeb86d391
};
uint32_t
state
[
4
];
state
[
0
]
=
0x67452301
;
state
[
1
]
=
0xefcdab89
;
state
[
2
]
=
0x98badcfe
;
state
[
3
]
=
0x10325476
;
// Pad with zeros
int
size
=
((((
message
.
length
()
+
8
)
/
64
)
+
1
)
*
64
)
-
8
;
uint8_t
*
buf
=
reinterpret_cast
<
uint8_t
*>
(
calloc
(
size
+
64
,
1
));
memcpy
(
buf
,
message
.
c_str
(),
message
.
length
());
buf
[
message
.
length
()]
=
128
;
uint32_t
bits
=
8
*
message
.
length
();
memcpy
(
buf
+
size
,
&
bits
,
4
);
// Process at each 512-bit(64 bytes) chunk
#define LEFTROTATE(x, c) (((x) << (c)) | ((x) >> (32 - (c))))
for
(
int
offset
=
0
;
offset
<
size
;
offset
+=
64
)
{
uint32_t
A
=
state
[
0
];
uint32_t
B
=
state
[
1
];
uint32_t
C
=
state
[
2
];
uint32_t
D
=
state
[
3
];
uint32_t
*
W
=
reinterpret_cast
<
uint32_t
*>
(
buf
+
offset
);
for
(
uint32_t
i
=
0
;
i
<
64
;
i
++
)
{
uint32_t
F
,
g
;
if
(
i
<
16
)
{
F
=
(
B
&
C
)
|
((
~
B
)
&
D
);
g
=
i
;
}
else
if
(
i
<
32
)
{
F
=
(
D
&
B
)
|
((
~
D
)
&
C
);
g
=
(
5
*
i
+
1
)
%
16
;
}
else
if
(
i
<
48
)
{
F
=
B
^
C
^
D
;
g
=
(
3
*
i
+
5
)
%
16
;
}
else
{
F
=
C
^
(
B
|
(
~
D
));
g
=
(
7
*
i
)
%
16
;
}
uint32_t
T
=
D
;
D
=
C
;
C
=
B
;
B
=
B
+
LEFTROTATE
((
A
+
F
+
partsOfSines
[
i
]
+
W
[
g
]),
shiftAmounts
[
i
]);
A
=
T
;
}
state
[
0
]
+=
A
;
state
[
1
]
+=
B
;
state
[
2
]
+=
C
;
state
[
3
]
+=
D
;
}
#undef LEFTROTATE
free
(
buf
);
// Convert digest to string
std
::
string
res
;
res
.
reserve
(
16
<<
1
);
const
uint8_t
*
digest
=
reinterpret_cast
<
uint8_t
*>
(
state
);
char
hex
[
3
];
for
(
size_t
i
=
0
;
i
<
16
;
i
++
)
{
snprintf
(
hex
,
sizeof
(
hex
),
"%02x"
,
digest
[
i
]);
res
.
append
(
hex
);
}
return
res
;
}
}
// namespace lite
}
// namespace paddle
lite/utils/string.h
浏览文件 @
97b54fbe
...
@@ -67,6 +67,31 @@ static std::string to_string(int index) {
...
@@ -67,6 +67,31 @@ static std::string to_string(int index) {
return
std
::
string
(
buffer
);
return
std
::
string
(
buffer
);
}
}
template
<
typename
T
=
std
::
string
>
static
T
parse_string
(
const
std
::
string
&
v
)
{
return
v
;
}
template
<
>
int32_t
parse_string
<
int32_t
>
(
const
std
::
string
&
v
)
{
return
std
::
stoi
(
v
);
}
template
<
>
int64_t
parse_string
<
int64_t
>
(
const
std
::
string
&
v
)
{
return
std
::
stoll
(
v
);
}
template
<
>
float
parse_string
<
float
>
(
const
std
::
string
&
v
)
{
return
std
::
stof
(
v
);
}
template
<
>
double
parse_string
<
double
>
(
const
std
::
string
&
v
)
{
return
std
::
stod
(
v
);
}
template
<
typename
T
>
template
<
typename
T
>
std
::
string
Join
(
const
std
::
vector
<
T
>&
vec
,
const
std
::
string
&
delim
)
{
std
::
string
Join
(
const
std
::
vector
<
T
>&
vec
,
const
std
::
string
&
delim
)
{
if
(
vec
.
empty
())
return
""
;
if
(
vec
.
empty
())
return
""
;
...
@@ -91,19 +116,20 @@ static std::string Repr(const std::vector<std::string>& v) {
...
@@ -91,19 +116,20 @@ static std::string Repr(const std::vector<std::string>& v) {
return
"{"
+
Join
(
tmp
,
","
)
+
"}"
;
return
"{"
+
Join
(
tmp
,
","
)
+
"}"
;
}
}
static
std
::
vector
<
std
::
string
>
Split
(
const
std
::
string
&
original
,
template
<
class
T
=
std
::
string
>
const
std
::
string
&
separator
)
{
static
std
::
vector
<
T
>
Split
(
const
std
::
string
&
original
,
std
::
vector
<
std
::
string
>
results
;
const
std
::
string
&
separator
)
{
std
::
vector
<
T
>
results
;
std
::
string
::
size_type
pos1
,
pos2
;
std
::
string
::
size_type
pos1
,
pos2
;
pos2
=
original
.
find
(
separator
);
pos2
=
original
.
find
(
separator
);
pos1
=
0
;
pos1
=
0
;
while
(
std
::
string
::
npos
!=
pos2
)
{
while
(
std
::
string
::
npos
!=
pos2
)
{
results
.
push_back
(
original
.
substr
(
pos1
,
pos2
-
pos1
));
results
.
push_back
(
parse_string
<
T
>
(
original
.
substr
(
pos1
,
pos2
-
pos1
)
));
pos1
=
pos2
+
separator
.
size
();
pos1
=
pos2
+
separator
.
size
();
pos2
=
original
.
find
(
separator
,
pos1
);
pos2
=
original
.
find
(
separator
,
pos1
);
}
}
if
(
pos1
!=
original
.
length
())
{
if
(
pos1
!=
original
.
length
())
{
results
.
push_back
(
original
.
substr
(
pos1
));
results
.
push_back
(
parse_string
<
T
>
(
original
.
substr
(
pos1
)
));
}
}
return
results
;
return
results
;
}
}
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录