Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
Paddle-Lite
提交
aa05c93e
P
Paddle-Lite
项目概览
PaddlePaddle
/
Paddle-Lite
通知
331
Star
4
Fork
1
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
271
列表
看板
标记
里程碑
合并请求
78
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle-Lite
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
271
Issue
271
列表
看板
标记
里程碑
合并请求
78
合并请求
78
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
aa05c93e
编写于
5月 13, 2020
作者:
Z
zhupengyang
提交者:
GitHub
5月 13, 2020
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
[NPU] save subgraph model cache (#3589)
上级
950b7382
变更
10
显示空白变更内容
内联
并排
Showing
10 changed file
with
145 addition
and
25 deletion
+145
-25
lite/api/light_api_impl.cc
lite/api/light_api_impl.cc
+5
-0
lite/api/paddle_api.h
lite/api/paddle_api.h
+12
-3
lite/backends/npu/device.cc
lite/backends/npu/device.cc
+56
-11
lite/backends/npu/device.h
lite/backends/npu/device.h
+5
-4
lite/core/context.cc
lite/core/context.cc
+4
-0
lite/core/context.h
lite/core/context.h
+10
-0
lite/core/mir/subgraph/subgraph_pass_test.cc
lite/core/mir/subgraph/subgraph_pass_test.cc
+1
-0
lite/kernels/npu/bridges/engine.h
lite/kernels/npu/bridges/engine.h
+5
-2
lite/kernels/npu/subgraph_compute.cc
lite/kernels/npu/subgraph_compute.cc
+36
-2
lite/kernels/npu/subgraph_compute.h
lite/kernels/npu/subgraph_compute.h
+11
-3
未找到文件。
lite/api/light_api_impl.cc
浏览文件 @
aa05c93e
...
@@ -36,6 +36,11 @@ void LightPredictorImpl::Init(const lite_api::MobileConfig& config) {
...
@@ -36,6 +36,11 @@ void LightPredictorImpl::Init(const lite_api::MobileConfig& config) {
}
}
mode_
=
config
.
power_mode
();
mode_
=
config
.
power_mode
();
threads_
=
config
.
threads
();
threads_
=
config
.
threads
();
#ifdef LITE_WITH_NPU
Context
<
TargetType
::
kNPU
>::
SetSubgraphModelCacheDir
(
config
.
subgraph_model_cache_dir
());
#endif
}
}
std
::
unique_ptr
<
lite_api
::
Tensor
>
LightPredictorImpl
::
GetInput
(
int
i
)
{
std
::
unique_ptr
<
lite_api
::
Tensor
>
LightPredictorImpl
::
GetInput
(
int
i
)
{
...
...
lite/api/paddle_api.h
浏览文件 @
aa05c93e
...
@@ -118,18 +118,27 @@ class LITE_API ConfigBase {
...
@@ -118,18 +118,27 @@ class LITE_API ConfigBase {
std
::
string
model_dir_
;
std
::
string
model_dir_
;
int
threads_
{
1
};
int
threads_
{
1
};
PowerMode
mode_
{
LITE_POWER_NO_BIND
};
PowerMode
mode_
{
LITE_POWER_NO_BIND
};
// to save subgraph model for npu/xpu/...
std
::
string
subgraph_model_cache_dir_
{
""
};
public:
public:
explicit
ConfigBase
(
PowerMode
mode
=
LITE_POWER_NO_BIND
,
int
threads
=
1
);
explicit
ConfigBase
(
PowerMode
mode
=
LITE_POWER_NO_BIND
,
int
threads
=
1
);
// set Model_dir
// set Model_dir
void
set_model_dir
(
const
std
::
string
&
x
)
{
model_dir_
=
x
;
}
void
set_model_dir
(
const
std
::
string
&
x
)
{
model_dir_
=
x
;
}
const
std
::
string
&
model_dir
()
const
{
return
model_dir_
;
}
const
std
::
string
&
model_dir
()
const
{
return
model_dir_
;
}
// set Power_mode
void
set_power_mode
(
PowerMode
mode
);
PowerMode
power_mode
()
const
{
return
mode_
;
}
// set Thread
// set Thread
void
set_threads
(
int
threads
);
void
set_threads
(
int
threads
);
int
threads
()
const
{
return
threads_
;
}
int
threads
()
const
{
return
threads_
;
}
// set Power_mode
void
set_power_mode
(
PowerMode
mode
);
PowerMode
power_mode
()
const
{
return
mode_
;
}
// set subgraph_model_dir
void
set_subgraph_model_cache_dir
(
std
::
string
subgraph_model_cache_dir
)
{
subgraph_model_cache_dir_
=
subgraph_model_cache_dir
;
}
const
std
::
string
&
subgraph_model_cache_dir
()
const
{
return
subgraph_model_cache_dir_
;
}
};
};
/// CxxConfig is the config for the Full feature predictor.
/// CxxConfig is the config for the Full feature predictor.
...
...
lite/backends/npu/device.cc
浏览文件 @
aa05c93e
...
@@ -14,15 +14,50 @@
...
@@ -14,15 +14,50 @@
#include "lite/backends/npu/device.h"
#include "lite/backends/npu/device.h"
#include "lite/utils/cp_logging.h"
#include "lite/utils/cp_logging.h"
#include "lite/utils/io.h"
namespace
paddle
{
namespace
paddle
{
namespace
lite
{
namespace
lite
{
namespace
npu
{
namespace
npu
{
bool
WriteToOMFile
(
const
domi
::
ModelBufferData
&
om_model_buff
,
std
::
string
om_file_path
)
{
FILE
*
fp
;
fp
=
fopen
(
om_file_path
.
c_str
(),
"wb"
);
CHECK
(
fp
!=
nullptr
)
<<
om_file_path
<<
" open failed!"
;
uint32_t
write_size
=
(
uint32_t
)
fwrite
(
om_model_buff
.
data
,
1
,
om_model_buff
.
length
,
fp
);
CHECK_EQ
(
write_size
,
om_model_buff
.
length
)
<<
"write om file failed !"
;
fclose
(
fp
);
return
true
;
}
bool
ReadFromOMFile
(
domi
::
ModelBufferData
*
om_model_buff
,
std
::
string
om_file_path
)
{
FILE
*
fp
;
fp
=
fopen
(
om_file_path
.
c_str
(),
"rb"
);
CHECK
(
fp
!=
nullptr
)
<<
om_file_path
<<
" open failed!"
;
fseek
(
fp
,
0
,
SEEK_END
);
uint32_t
model_length
=
(
uint32_t
)
ftell
(
fp
);
fseek
(
fp
,
0
,
SEEK_SET
);
om_model_buff
->
data
=
malloc
(
model_length
);
om_model_buff
->
length
=
model_length
;
uint32_t
read_size
=
(
uint32_t
)
fread
(
om_model_buff
->
data
,
1
,
model_length
,
fp
);
CHECK_EQ
(
read_size
,
model_length
)
<<
"read om file failed !"
;
fclose
(
fp
);
return
true
;
}
std
::
shared_ptr
<
hiai
::
AiModelMngerClient
>
Device
::
Build
(
std
::
shared_ptr
<
hiai
::
AiModelMngerClient
>
Device
::
Build
(
const
std
::
string
model_name
,
// NOLINT
const
std
::
string
model_name
,
// NOLINT
std
::
vector
<
ge
::
Operator
>&
input_nodes
,
// NOLINT
std
::
vector
<
ge
::
Operator
>&
input_nodes
,
// NOLINT
std
::
vector
<
ge
::
Operator
>&
output_nodes
// NOLINT
std
::
vector
<
ge
::
Operator
>&
output_nodes
,
// NOLINT
const
std
::
string
model_cache_full_dir
=
""
// NOLINT
)
{
)
{
VLOG
(
3
)
<<
"[NPU] Build model"
;
VLOG
(
3
)
<<
"[NPU] Build model"
;
// Build the HiAI IR graph to the HiAI om model
// Build the HiAI IR graph to the HiAI om model
...
@@ -32,6 +67,11 @@ std::shared_ptr<hiai::AiModelMngerClient> Device::Build(
...
@@ -32,6 +67,11 @@ std::shared_ptr<hiai::AiModelMngerClient> Device::Build(
om_model
.
SetGraph
(
ir_graph
);
om_model
.
SetGraph
(
ir_graph
);
domi
::
HiaiIrBuild
ir_build
;
domi
::
HiaiIrBuild
ir_build
;
domi
::
ModelBufferData
om_model_buf
;
domi
::
ModelBufferData
om_model_buf
;
if
(
!
model_cache_full_dir
.
empty
()
&&
IsFileExists
(
model_cache_full_dir
))
{
VLOG
(
3
)
<<
"Will read om model from "
<<
model_cache_full_dir
;
ReadFromOMFile
(
&
om_model_buf
,
model_cache_full_dir
);
}
else
{
if
(
!
ir_build
.
CreateModelBuff
(
om_model
,
om_model_buf
))
{
if
(
!
ir_build
.
CreateModelBuff
(
om_model
,
om_model_buf
))
{
LOG
(
WARNING
)
<<
"[NPU] CreateModelBuff failed!"
;
LOG
(
WARNING
)
<<
"[NPU] CreateModelBuff failed!"
;
return
nullptr
;
return
nullptr
;
...
@@ -41,6 +81,11 @@ std::shared_ptr<hiai::AiModelMngerClient> Device::Build(
...
@@ -41,6 +81,11 @@ std::shared_ptr<hiai::AiModelMngerClient> Device::Build(
ir_build
.
ReleaseModelBuff
(
om_model_buf
);
ir_build
.
ReleaseModelBuff
(
om_model_buf
);
return
nullptr
;
return
nullptr
;
}
}
if
(
!
model_cache_full_dir
.
empty
())
{
VLOG
(
3
)
<<
"Will write om model to "
<<
model_cache_full_dir
;
WriteToOMFile
(
om_model_buf
,
model_cache_full_dir
);
}
}
// Create a HiAI model manager client to load the HiAI om model
// Create a HiAI model manager client to load the HiAI om model
std
::
shared_ptr
<
hiai
::
AiModelMngerClient
>
model_client
(
std
::
shared_ptr
<
hiai
::
AiModelMngerClient
>
model_client
(
...
...
lite/backends/npu/device.h
浏览文件 @
aa05c93e
...
@@ -43,7 +43,8 @@ class Device {
...
@@ -43,7 +43,8 @@ class Device {
std
::
shared_ptr
<
hiai
::
AiModelMngerClient
>
Build
(
std
::
shared_ptr
<
hiai
::
AiModelMngerClient
>
Build
(
const
std
::
string
model_name
,
// NOLINT
const
std
::
string
model_name
,
// NOLINT
std
::
vector
<
ge
::
Operator
>&
input_nodes
,
// NOLINT
std
::
vector
<
ge
::
Operator
>&
input_nodes
,
// NOLINT
std
::
vector
<
ge
::
Operator
>&
output_nodes
// NOLINT
std
::
vector
<
ge
::
Operator
>&
output_nodes
,
// NOLINT
const
std
::
string
model_cache_name
// NOLINT
);
// NOLINT
);
// NOLINT
private:
private:
...
...
lite/core/context.cc
浏览文件 @
aa05c93e
...
@@ -17,6 +17,10 @@
...
@@ -17,6 +17,10 @@
namespace
paddle
{
namespace
paddle
{
namespace
lite
{
namespace
lite
{
#ifdef LITE_WITH_NPU
std
::
string
Context
<
TargetType
::
kNPU
>::
subgraph_model_cache_dir_
{
""
};
// NOLINT
#endif
#ifdef LITE_WITH_XPU
#ifdef LITE_WITH_XPU
std
::
string
Context
<
TargetType
::
kXPU
>::
_multi_encoder_precision
;
// NOLINT
std
::
string
Context
<
TargetType
::
kXPU
>::
_multi_encoder_precision
;
// NOLINT
thread_local
xdnn
::
Context
*
Context
<
TargetType
::
kXPU
>::
_tls_raw_ctx
{
nullptr
};
thread_local
xdnn
::
Context
*
Context
<
TargetType
::
kXPU
>::
_tls_raw_ctx
{
nullptr
};
...
...
lite/core/context.h
浏览文件 @
aa05c93e
...
@@ -85,6 +85,16 @@ class Context<TargetType::kNPU> {
...
@@ -85,6 +85,16 @@ class Context<TargetType::kNPU> {
NPUContext
&
operator
=
(
const
NPUContext
&
ctx
)
{}
NPUContext
&
operator
=
(
const
NPUContext
&
ctx
)
{}
std
::
string
name
()
const
{
return
"NPUContext"
;
}
std
::
string
name
()
const
{
return
"NPUContext"
;
}
static
void
SetSubgraphModelCacheDir
(
std
::
string
subgraph_model_cache_dir
)
{
subgraph_model_cache_dir_
=
subgraph_model_cache_dir
;
}
static
std
::
string
SubgraphModelCacheDir
()
{
return
subgraph_model_cache_dir_
;
}
private:
static
std
::
string
subgraph_model_cache_dir_
;
};
};
#endif
#endif
...
...
lite/core/mir/subgraph/subgraph_pass_test.cc
浏览文件 @
aa05c93e
...
@@ -132,6 +132,7 @@ std::shared_ptr<lite_api::PaddlePredictor> TestModel(
...
@@ -132,6 +132,7 @@ std::shared_ptr<lite_api::PaddlePredictor> TestModel(
mobile_config
.
set_model_from_file
(
optimized_model_dir
+
".nb"
);
mobile_config
.
set_model_from_file
(
optimized_model_dir
+
".nb"
);
mobile_config
.
set_power_mode
(
lite_api
::
PowerMode
::
LITE_POWER_HIGH
);
mobile_config
.
set_power_mode
(
lite_api
::
PowerMode
::
LITE_POWER_HIGH
);
mobile_config
.
set_threads
(
1
);
mobile_config
.
set_threads
(
1
);
// mobile_config.set_subgraph_model_cache_dir("/data/local/tmp");
predictor
=
lite_api
::
CreatePaddlePredictor
(
mobile_config
);
predictor
=
lite_api
::
CreatePaddlePredictor
(
mobile_config
);
FillInputTensors
(
predictor
,
input_tensor_shape
,
input_tensor_type
,
1
);
FillInputTensors
(
predictor
,
input_tensor_shape
,
input_tensor_type
,
1
);
// Run optimized model
// Run optimized model
...
...
lite/kernels/npu/bridges/engine.h
浏览文件 @
aa05c93e
...
@@ -33,13 +33,15 @@ class Engine {
...
@@ -33,13 +33,15 @@ class Engine {
cpp
::
BlockDesc
*
block_desc
,
cpp
::
BlockDesc
*
block_desc
,
const
std
::
vector
<
std
::
string
>
&
input_names
,
const
std
::
vector
<
std
::
string
>
&
input_names
,
const
std
::
vector
<
std
::
string
>
&
output_names
,
const
std
::
vector
<
std
::
string
>
&
output_names
,
lite
::
Scope
*
scope
)
lite
::
Scope
*
scope
,
std
::
string
model_cache_dir
=
""
)
:
ctx_
(
ctx
),
:
ctx_
(
ctx
),
block_idx_
(
block_idx
),
block_idx_
(
block_idx
),
block_desc_
(
block_desc
),
block_desc_
(
block_desc
),
input_names_
(
input_names
),
input_names_
(
input_names
),
output_names_
(
output_names
),
output_names_
(
output_names
),
scope_
(
scope
)
{}
scope_
(
scope
),
model_cache_dir_
(
model_cache_dir
)
{}
virtual
~
Engine
()
=
default
;
virtual
~
Engine
()
=
default
;
virtual
int
Build
();
virtual
int
Build
();
...
@@ -73,6 +75,7 @@ class Engine {
...
@@ -73,6 +75,7 @@ class Engine {
std
::
vector
<
Tensor
*>
origin_itensors_
;
std
::
vector
<
Tensor
*>
origin_itensors_
;
std
::
vector
<
Tensor
*>
origin_otensors_
;
std
::
vector
<
Tensor
*>
origin_otensors_
;
std
::
vector
<
Instruction
>
origin_program_
;
std
::
vector
<
Instruction
>
origin_program_
;
std
::
string
model_cache_dir_
{
""
};
};
};
}
// namespace subgraph
}
// namespace subgraph
...
...
lite/kernels/npu/subgraph_compute.cc
浏览文件 @
aa05c93e
...
@@ -15,6 +15,7 @@
...
@@ -15,6 +15,7 @@
#include "lite/kernels/npu/subgraph_compute.h"
#include "lite/kernels/npu/subgraph_compute.h"
#include <sys/time.h>
#include <sys/time.h>
#include <time.h>
#include <time.h>
#include <algorithm>
#include <utility>
#include <utility>
#include "hiai_ir_build.h" // NOLINT
#include "hiai_ir_build.h" // NOLINT
#include "lite/backends/npu/device.h"
#include "lite/backends/npu/device.h"
...
@@ -22,12 +23,41 @@
...
@@ -22,12 +23,41 @@
#include "lite/kernels/npu/bridges/graph.h"
#include "lite/kernels/npu/bridges/graph.h"
#include "lite/kernels/npu/bridges/paddle_use_bridges.h"
#include "lite/kernels/npu/bridges/paddle_use_bridges.h"
#include "lite/kernels/npu/bridges/utility.h"
#include "lite/kernels/npu/bridges/utility.h"
#include "lite/utils/io.h"
namespace
paddle
{
namespace
paddle
{
namespace
lite
{
namespace
lite
{
namespace
kernels
{
namespace
kernels
{
namespace
npu
{
namespace
npu
{
std
::
string
SubgraphEngine
::
GenerateModelCacheName
()
const
{
auto
inames
=
device_inames_
;
auto
onames
=
device_onames_
;
std
::
sort
(
inames
.
begin
(),
inames
.
end
());
std
::
sort
(
onames
.
begin
(),
onames
.
end
());
std
::
string
model_cache_name
=
""
;
for
(
auto
iname
:
inames
)
{
auto
itensor
=
scope_
->
FindTensor
(
iname
);
std
::
replace
(
iname
.
begin
(),
iname
.
end
(),
'/'
,
'_'
);
model_cache_name
+=
"_"
+
iname
;
for
(
auto
i
:
itensor
->
dims
().
Vectorize
())
{
model_cache_name
+=
"_"
+
std
::
to_string
(
i
);
}
}
for
(
auto
oname
:
onames
)
{
auto
otensor
=
scope_
->
FindTensor
(
oname
);
std
::
replace
(
oname
.
begin
(),
oname
.
end
(),
'/'
,
'_'
);
model_cache_name
+=
"_"
+
oname
;
for
(
auto
i
:
otensor
->
dims
().
Vectorize
())
{
model_cache_name
+=
"_"
+
std
::
to_string
(
i
);
}
}
model_cache_name
+=
"_.om"
;
return
model_cache_name
;
}
int
SubgraphEngine
::
BuildDeviceProgram
()
{
int
SubgraphEngine
::
BuildDeviceProgram
()
{
int
status
=
0
;
int
status
=
0
;
// Convert all of ops and their input vars and weights and added into the NPU
// Convert all of ops and their input vars and weights and added into the NPU
...
@@ -88,8 +118,11 @@ int SubgraphEngine::BuildDeviceProgram() {
...
@@ -88,8 +118,11 @@ int SubgraphEngine::BuildDeviceProgram() {
if
(
device_program_map_
.
count
(
inputs_shape_
)
>
0
)
{
if
(
device_program_map_
.
count
(
inputs_shape_
)
>
0
)
{
return
status
;
return
status
;
}
}
std
::
string
model_cache_full_dir
=
model_cache_dir_
.
empty
()
?
""
:
model_cache_dir_
+
"/"
+
GenerateModelCacheName
();
auto
device_client
=
lite
::
npu
::
Device
::
Global
().
Build
(
auto
device_client
=
lite
::
npu
::
Device
::
Global
().
Build
(
model_name_
,
device_inodes
,
device_onodes
);
model_name_
,
device_inodes
,
device_onodes
,
model_cache_full_dir
);
if
(
device_client
==
nullptr
)
{
if
(
device_client
==
nullptr
)
{
LOG
(
WARNING
)
<<
"[NPU] Build model failed!"
;
LOG
(
WARNING
)
<<
"[NPU] Build model failed!"
;
return
subgraph
::
FAILED
;
return
subgraph
::
FAILED
;
...
@@ -280,7 +313,8 @@ void SubgraphCompute::PrepareForRun() {
...
@@ -280,7 +313,8 @@ void SubgraphCompute::PrepareForRun() {
param
.
sub_block_desc
,
param
.
sub_block_desc
,
param
.
input_data_names
,
param
.
input_data_names
,
param
.
output_data_names
,
param
.
output_data_names
,
param
.
scope
));
param
.
scope
,
NPUContext
::
SubgraphModelCacheDir
()));
CHECK
(
engine_
);
CHECK
(
engine_
);
engine_
->
Build
();
engine_
->
Build
();
}
}
...
...
lite/kernels/npu/subgraph_compute.h
浏览文件 @
aa05c93e
...
@@ -35,9 +35,15 @@ class SubgraphEngine : public subgraph::Engine {
...
@@ -35,9 +35,15 @@ class SubgraphEngine : public subgraph::Engine {
cpp
::
BlockDesc
*
block_desc
,
cpp
::
BlockDesc
*
block_desc
,
const
std
::
vector
<
std
::
string
>
&
input_names
,
const
std
::
vector
<
std
::
string
>
&
input_names
,
const
std
::
vector
<
std
::
string
>
&
output_names
,
const
std
::
vector
<
std
::
string
>
&
output_names
,
Scope
*
scope
)
Scope
*
scope
,
:
subgraph
::
Engine
(
std
::
string
model_cache_dir
=
""
)
ctx
,
block_idx
,
block_desc
,
input_names
,
output_names
,
scope
)
{}
:
subgraph
::
Engine
(
ctx
,
block_idx
,
block_desc
,
input_names
,
output_names
,
scope
,
model_cache_dir
)
{}
struct
device_program_t
{
struct
device_program_t
{
explicit
device_program_t
(
std
::
shared_ptr
<
hiai
::
AiModelMngerClient
>
_client
)
explicit
device_program_t
(
std
::
shared_ptr
<
hiai
::
AiModelMngerClient
>
_client
)
...
@@ -58,6 +64,8 @@ class SubgraphEngine : public subgraph::Engine {
...
@@ -58,6 +64,8 @@ class SubgraphEngine : public subgraph::Engine {
void
InitDeviceTensor
()
override
;
void
InitDeviceTensor
()
override
;
bool
InputShapeChanged
()
override
;
bool
InputShapeChanged
()
override
;
std
::
string
GenerateModelCacheName
()
const
;
std
::
string
model_name_
{
"model.om"
};
std
::
string
model_name_
{
"model.om"
};
std
::
vector
<
std
::
vector
<
int64_t
>>
inputs_shape_
{};
std
::
vector
<
std
::
vector
<
int64_t
>>
inputs_shape_
{};
std
::
map
<
std
::
vector
<
std
::
vector
<
int64_t
>>
,
std
::
shared_ptr
<
device_program_t
>>
std
::
map
<
std
::
vector
<
std
::
vector
<
int64_t
>>
,
std
::
shared_ptr
<
device_program_t
>>
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录