Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
机器未来
Paddle
提交
2aaa417e
P
Paddle
项目概览
机器未来
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
2aaa417e
编写于
9月 22, 2021
作者:
W
Wilber
提交者:
GitHub
9月 22, 2021
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
[cherry-pick] [Inference] Support NNAdapter and ascend310 (#35882)
上级
c0535200
变更
15
隐藏空白更改
内联
并排
Showing
15 changed file
with
310 addition
and
9 deletion
+310
-9
cmake/external/lite.cmake
cmake/external/lite.cmake
+31
-0
paddle/fluid/inference/analysis/argument.h
paddle/fluid/inference/analysis/argument.h
+16
-0
paddle/fluid/inference/analysis/ir_pass_manager.cc
paddle/fluid/inference/analysis/ir_pass_manager.cc
+21
-0
paddle/fluid/inference/analysis/ir_passes/lite_subgraph_pass.cc
.../fluid/inference/analysis/ir_passes/lite_subgraph_pass.cc
+29
-0
paddle/fluid/inference/api/analysis_config.cc
paddle/fluid/inference/api/analysis_config.cc
+57
-1
paddle/fluid/inference/api/analysis_predictor.cc
paddle/fluid/inference/api/analysis_predictor.cc
+36
-0
paddle/fluid/inference/api/analysis_predictor_tester.cc
paddle/fluid/inference/api/analysis_predictor_tester.cc
+20
-0
paddle/fluid/inference/api/paddle_analysis_config.h
paddle/fluid/inference/api/paddle_analysis_config.h
+33
-0
paddle/fluid/inference/lite/CMakeLists.txt
paddle/fluid/inference/lite/CMakeLists.txt
+3
-3
paddle/fluid/inference/lite/engine.cc
paddle/fluid/inference/lite/engine.cc
+19
-0
paddle/fluid/inference/lite/engine.h
paddle/fluid/inference/lite/engine.h
+9
-0
paddle/fluid/operators/lite/lite_engine_op_test.cc
paddle/fluid/operators/lite/lite_engine_op_test.cc
+6
-2
paddle/fluid/pybind/inference_api.cc
paddle/fluid/pybind/inference_api.cc
+22
-1
paddle/scripts/paddle_build.sh
paddle/scripts/paddle_build.sh
+2
-2
python/setup.py.in
python/setup.py.in
+6
-0
未找到文件。
cmake/external/lite.cmake
浏览文件 @
2aaa417e
...
...
@@ -35,6 +35,14 @@ if (LITE_WITH_XPU)
ENDIF
()
endif
()
if
(
LITE_WITH_NNADAPTER
)
add_definitions
(
-DLITE_SUBGRAPH_WITH_NNADAPTER
)
if
(
NNADAPTER_WITH_HUAWEI_ASCEND_NPU
)
add_definitions
(
-DLITE_SUBGRAPH_WITH_NPU
)
set
(
NPU_SDK_ROOT
"/usr/local/Ascend/ascend-toolkit/latest"
CACHE STRING
"default NPU SDK ROOT"
)
endif
()
endif
()
if
(
NOT LITE_SOURCE_DIR OR NOT LITE_BINARY_DIR
)
include
(
ExternalProject
)
set
(
LITE_PROJECT extern_lite
)
...
...
@@ -67,6 +75,9 @@ if (NOT LITE_SOURCE_DIR OR NOT LITE_BINARY_DIR)
-DLITE_WITH_XPU=
${
LITE_WITH_XPU
}
-DXPU_SDK_URL=
${
XPU_BASE_URL
}
-DXPU_SDK_ENV=
${
XPU_SDK_ENV
}
-DLITE_WITH_NNADAPTER=
${
LITE_WITH_NNADAPTER
}
-DNNADAPTER_WITH_HUAWEI_ASCEND_NPU=
${
NNADAPTER_WITH_HUAWEI_ASCEND_NPU
}
-DNNADAPTER_HUAWEI_ASCEND_NPU_SDK_ROOT=
${
NPU_SDK_ROOT
}
-DLITE_WITH_CODE_META_INFO=OFF
-DLITE_WITH_ARM=ON
)
ExternalProject_Add
(
...
...
@@ -110,6 +121,9 @@ if (NOT LITE_SOURCE_DIR OR NOT LITE_BINARY_DIR)
-DLITE_WITH_XPU=
${
LITE_WITH_XPU
}
-DXPU_SDK_URL=
${
XPU_BASE_URL
}
-DXPU_SDK_ENV=
${
XPU_SDK_ENV
}
-DLITE_WITH_NNADAPTER=
${
LITE_WITH_NNADAPTER
}
-DNNADAPTER_WITH_HUAWEI_ASCEND_NPU=
${
NNADAPTER_WITH_HUAWEI_ASCEND_NPU
}
-DNNADAPTER_HUAWEI_ASCEND_NPU_SDK_ROOT=
${
NPU_SDK_ROOT
}
-DLITE_WITH_CODE_META_INFO=OFF
-DLITE_WITH_ARM=OFF
)
...
...
@@ -120,6 +134,7 @@ if (NOT LITE_SOURCE_DIR OR NOT LITE_BINARY_DIR)
GIT_TAG
${
LITE_GIT_TAG
}
PREFIX
${
LITE_SOURCES_DIR
}
UPDATE_COMMAND
""
PATCH_COMMAND sed -i
"s?NNadapter_bridges_path = os.path.abspath('..')+
\"
\/lite\/kernels\/nnadapter\/bridges\/paddle_use_bridges.h
\"
?NNadapter_bridges_path = os.path.abspath(\'..\')+
\"
\/extern_lite\/lite\/kernels\/nnadapter\/bridges\/paddle_use_bridges.h
\"
?"
${
LITE_SOURCES_DIR
}
/src/extern_lite//lite/tools/cmake_tools/record_supported_kernel_op.py && sed -i
"/general::ssa::ConvertToSSA(cpp_prog)$<SEMICOLON>/d"
${
LITE_SOURCES_DIR
}
/src/extern_lite/lite/model_parser/model_parser.cc
BUILD_COMMAND
${
LITE_BUILD_COMMAND
}
INSTALL_COMMAND
""
CMAKE_ARGS -DCMAKE_CXX_COMPILER=
${
CMAKE_CXX_COMPILER
}
...
...
@@ -146,6 +161,11 @@ endif()
if
(
WITH_ARM
)
if
(
LITE_WITH_XPU
)
set
(
LITE_OUTPUT_BIN_DIR inference_lite_lib.armlinux.armv8.xpu
)
elseif
(
LITE_WITH_NNADAPTER
)
message
(
"Enable LITE_WITH_NNADAPTER"
)
if
(
NNADAPTER_WITH_HUAWEI_ASCEND_NPU
)
set
(
LITE_OUTPUT_BIN_DIR inference_lite_lib.armlinux.armv8.nnadapter
)
endif
()
else
()
set
(
LITE_OUTPUT_BIN_DIR inference_lite_lib.armlinux.armv8
)
endif
()
...
...
@@ -174,5 +194,16 @@ endfunction()
external_lite_libs
(
lite_full_static
${
LITE_BINARY_DIR
}
/
${
LITE_OUTPUT_BIN_DIR
}
/cxx/lib/libpaddle_full_api_shared.so
)
set
(
LITE_SHARED_LIB
${
LITE_BINARY_DIR
}
/
${
LITE_OUTPUT_BIN_DIR
}
/cxx/lib/libpaddle_full_api_shared.so
)
if
(
LITE_WITH_NNADAPTER
)
set
(
LITE_NNADAPTER_LIB
${
LITE_BINARY_DIR
}
/
${
LITE_OUTPUT_BIN_DIR
}
/cxx/lib/libnnadapter.so
)
if
(
NNADAPTER_WITH_HUAWEI_ASCEND_NPU
)
external_lite_libs
(
lite_nnadapter
${
LITE_BINARY_DIR
}
/
${
LITE_OUTPUT_BIN_DIR
}
/cxx/lib/libnnadapter.so
${
LITE_BINARY_DIR
}
/
${
LITE_OUTPUT_BIN_DIR
}
/cxx/lib/libnnadapter_driver_huawei_ascend_npu.so
)
set
(
LITE_DEPS lite_full_static lite_nnadapter
)
set
(
LITE_NNADAPTER_NPU_LIB
${
LITE_BINARY_DIR
}
/
${
LITE_OUTPUT_BIN_DIR
}
/cxx/lib/libnnadapter_driver_huawei_ascend_npu.so
)
endif
()
else
()
set
(
LITE_DEPS lite_full_static
)
endif
()
add_definitions
(
-DPADDLE_WITH_LITE
)
add_definitions
(
-DLITE_WITH_LOG
)
paddle/fluid/inference/analysis/argument.h
浏览文件 @
2aaa417e
...
...
@@ -239,6 +239,22 @@ struct Argument {
DECL_ARGUMENT_FIELD
(
xpu_precision
,
XpuPrecision
,
std
::
string
);
DECL_ARGUMENT_FIELD
(
xpu_adaptive_seqlen
,
XpuAdaptiveSeqlen
,
bool
);
DECL_ARGUMENT_FIELD
(
use_nnadapter
,
UseNNAdapter
,
bool
);
DECL_ARGUMENT_FIELD
(
nnadapter_model_cache_dir
,
NNAdapterModelCacheDir
,
std
::
string
);
DECL_ARGUMENT_FIELD
(
nnadapter_device_names
,
NNAdapterDeviceNames
,
std
::
vector
<
std
::
string
>
);
DECL_ARGUMENT_FIELD
(
nnadapter_context_properties
,
NNAdapterContextProperties
,
std
::
string
);
DECL_ARGUMENT_FIELD
(
nnadapter_subgraph_partition_config_buffer
,
NNAdapterSubgraphPartitionConfigBuffer
,
std
::
string
);
DECL_ARGUMENT_FIELD
(
nnadapter_subgraph_partition_config_path
,
NNAdapterSubgraphPartitionConfigPath
,
std
::
string
);
DECL_ARGUMENT_FIELD
(
nnadapter_model_cache_token
,
NNAdapterModelCacheToken
,
std
::
vector
<
std
::
string
>
);
DECL_ARGUMENT_FIELD
(
nnadapter_model_cache_buffer
,
NNAdapterModelCacheBuffer
,
std
::
vector
<
std
::
vector
<
char
>>
);
// Memory optimized related.
DECL_ARGUMENT_FIELD
(
enable_memory_optim
,
EnableMemoryOptim
,
bool
);
...
...
paddle/fluid/inference/analysis/ir_pass_manager.cc
浏览文件 @
2aaa417e
...
...
@@ -202,6 +202,27 @@ void IRPassManager::CreatePasses(Argument *argument,
new
std
::
string
(
argument
->
xpu_autotune_file
()));
pass
->
Set
(
"precision"
,
new
std
::
string
(
argument
->
xpu_precision
()));
pass
->
Set
(
"adaptive_seqlen"
,
new
bool
(
argument
->
xpu_adaptive_seqlen
()));
// NNAdapter Related
pass
->
Set
(
"use_nnadapter"
,
new
bool
(
argument
->
use_nnadapter
()));
pass
->
Set
(
"nnadapter_model_cache_dir"
,
new
std
::
string
(
argument
->
nnadapter_model_cache_dir
()));
pass
->
Set
(
"nnadapter_device_names"
,
new
std
::
vector
<
std
::
string
>
(
argument
->
nnadapter_device_names
()));
pass
->
Set
(
"nnadapter_context_properties"
,
new
std
::
string
(
argument
->
nnadapter_context_properties
()));
pass
->
Set
(
"nnadapter_subgraph_partition_config_buffer"
,
new
std
::
string
(
argument
->
nnadapter_subgraph_partition_config_buffer
()));
pass
->
Set
(
"nnadapter_subgraph_partition_config_path"
,
new
std
::
string
(
argument
->
nnadapter_subgraph_partition_config_path
()));
pass
->
Set
(
"nnadapter_model_cache_buffer"
,
new
std
::
vector
<
std
::
vector
<
char
>>
(
argument
->
nnadapter_model_cache_buffer
()));
pass
->
Set
(
"nnadapter_model_cache_token"
,
new
std
::
vector
<
std
::
string
>
(
argument
->
nnadapter_model_cache_token
()));
}
disable_logs_
=
argument
->
disable_logs
();
if
(
pass_name
==
"fc_fuse_pass"
)
{
...
...
paddle/fluid/inference/analysis/ir_passes/lite_subgraph_pass.cc
浏览文件 @
2aaa417e
...
...
@@ -250,12 +250,30 @@ void LiteSubgraphPass::SetUpEngine(
std
::
string
autotune_file
=
Get
<
std
::
string
>
(
"autotune_file"
);
std
::
string
precision
=
Get
<
std
::
string
>
(
"precision"
);
bool
adaptive_seqlen
=
Get
<
bool
>
(
"adaptive_seqlen"
);
// NNAdapter Related
bool
use_nnadapter
=
Get
<
bool
>
(
"use_nnadapter"
);
std
::
string
nnadapter_model_cache_dir
=
Get
<
std
::
string
>
(
"nnadapter_model_cache_dir"
);
auto
nnadapter_device_names
=
Get
<
std
::
vector
<
std
::
string
>>
(
"nnadapter_device_names"
);
std
::
string
nnadapter_context_properties
=
Get
<
std
::
string
>
(
"nnadapter_context_properties"
);
std
::
string
nnadapter_subgraph_partition_config_buffer
=
Get
<
std
::
string
>
(
"nnadapter_subgraph_partition_config_buffer"
);
std
::
string
nnadapter_subgraph_partition_config_path
=
Get
<
std
::
string
>
(
"nnadapter_subgraph_partition_config_path"
);
auto
nnadapter_model_cache_buffer
=
Get
<
std
::
vector
<
std
::
vector
<
char
>>>
(
"nnadapter_model_cache_buffer"
);
auto
nnadapter_model_cache_token
=
Get
<
std
::
vector
<
std
::
string
>>
(
"nnadapter_model_cache_token"
);
lite_api
::
TargetType
target_type
;
if
(
use_gpu
)
{
target_type
=
TARGET
(
kCUDA
);
}
else
if
(
use_xpu
)
{
target_type
=
TARGET
(
kXPU
);
}
else
if
(
use_nnadapter
)
{
target_type
=
TARGET
(
kNNAdapter
);
}
else
{
#ifdef PADDLE_WITH_ARM
target_type
=
TARGET
(
kARM
);
...
...
@@ -292,6 +310,17 @@ void LiteSubgraphPass::SetUpEngine(
config
.
autotune_file
=
autotune_file
;
config
.
precision
=
precision
;
config
.
adaptive_seqlen
=
adaptive_seqlen
;
// NNAdapter Related
config
.
nnadapter_model_cache_dir
=
nnadapter_model_cache_dir
;
config
.
nnadapter_device_names
=
nnadapter_device_names
;
config
.
nnadapter_context_properties
=
nnadapter_context_properties
;
config
.
nnadapter_subgraph_partition_config_buffer
=
nnadapter_subgraph_partition_config_buffer
;
config
.
nnadapter_subgraph_partition_config_path
=
nnadapter_subgraph_partition_config_path
;
config
.
nnadapter_model_cache_buffer
=
nnadapter_model_cache_buffer
;
config
.
nnadapter_model_cache_token
=
nnadapter_model_cache_token
;
if
(
dump_model
)
{
lite
::
StrToBinaryFile
(
"./model.bin"
,
config
.
model
);
lite
::
StrToBinaryFile
(
"./param.bin"
,
config
.
param
);
...
...
paddle/fluid/inference/api/analysis_config.cc
浏览文件 @
2aaa417e
...
...
@@ -207,6 +207,7 @@ AnalysisConfig::AnalysisConfig(const AnalysisConfig &other) {
// NPU related.
CP_MEMBER
(
use_npu_
);
CP_MEMBER
(
npu_device_id_
);
CP_MEMBER
(
nnadapter_config_
);
// profile related.
CP_MEMBER
(
with_profile_
);
...
...
@@ -554,7 +555,7 @@ void AnalysisConfig::Update() {
}
if
(
use_npu_
)
{
#if
def PADDLE_WITH_ASCEND_CL
#if
defined(PADDLE_WITH_ASCEND_CL) || defined(LITE_SUBGRAPH_WITH_NPU)
PADDLE_ENFORCE_EQ
(
use_gpu_
,
false
,
platform
::
errors
::
Unavailable
(
"Currently, NPU and GPU cannot be enabled in the "
...
...
@@ -833,6 +834,61 @@ std::string AnalysisConfig::Summary() {
return
os
.
PrintTable
();
}
LiteNNAdapterConfig
&
LiteNNAdapterConfig
::
SetDeviceNames
(
const
std
::
vector
<
std
::
string
>
&
names
)
{
nnadapter_device_names
=
names
;
return
*
this
;
}
LiteNNAdapterConfig
&
LiteNNAdapterConfig
::
SetContextProperties
(
const
std
::
string
&
properties
)
{
nnadapter_context_properties
=
properties
;
return
*
this
;
}
LiteNNAdapterConfig
&
LiteNNAdapterConfig
::
SetModelCacheDir
(
const
std
::
string
&
dir
)
{
nnadapter_model_cache_dir
=
dir
;
return
*
this
;
}
LiteNNAdapterConfig
&
LiteNNAdapterConfig
::
SetModelCacheBuffers
(
const
std
::
string
&
model_cache_token
,
const
std
::
vector
<
char
>
&
model_cache_buffer
)
{
PADDLE_ENFORCE_EQ
(
model_cache_token
.
empty
(),
false
,
platform
::
errors
::
InvalidArgument
(
"model_cache_token should not be empty."
));
PADDLE_ENFORCE_EQ
(
model_cache_buffer
.
empty
(),
false
,
platform
::
errors
::
InvalidArgument
(
"model_cache_buffer should not be empty."
));
PADDLE_ENFORCE_EQ
(
nnadapter_model_cache_buffers
.
count
(
model_cache_token
),
false
,
platform
::
errors
::
InvalidArgument
(
"model_cache_token has already been set."
));
nnadapter_model_cache_buffers
[
model_cache_token
]
=
model_cache_buffer
;
return
*
this
;
}
LiteNNAdapterConfig
&
LiteNNAdapterConfig
::
SetSubgraphPartitionConfigPath
(
const
std
::
string
&
path
)
{
nnadapter_subgraph_partition_config_path
=
path
;
return
*
this
;
}
LiteNNAdapterConfig
&
LiteNNAdapterConfig
::
SetSubgraphPartitionConfigBuffer
(
const
std
::
string
&
buffer
)
{
nnadapter_subgraph_partition_config_buffer
=
buffer
;
return
*
this
;
}
LiteNNAdapterConfig
&
LiteNNAdapterConfig
::
Enable
()
{
use_nnadapter
=
true
;
return
*
this
;
}
LiteNNAdapterConfig
&
LiteNNAdapterConfig
::
Disable
()
{
use_nnadapter
=
false
;
return
*
this
;
}
void
AnalysisConfig
::
CollectShapeRangeInfo
(
const
std
::
string
&
shape_range_info_path
)
{
LOG
(
INFO
)
<<
"In CollectShapeInfo mode, we will disable optimizations and "
...
...
paddle/fluid/inference/api/analysis_predictor.cc
浏览文件 @
2aaa417e
...
...
@@ -276,6 +276,22 @@ bool AnalysisPredictor::CreateExecutor() {
"You tried to use NPU forward propagation, but Paddle was not compiled "
"with WITH_ASCEND_CL."
));
#endif
}
else
if
(
config_
.
NNAdapter
().
use_nnadapter
)
{
if
(
config_
.
lite_engine_enabled
())
{
place_
=
paddle
::
platform
::
CPUPlace
();
#ifndef LITE_SUBGRAPH_WITH_NNADAPTER
PADDLE_THROW
(
platform
::
errors
::
Unavailable
(
"You tried to use an NNAdapter lite "
"engine, but Paddle was not compiled "
"with it."
));
#endif // LITE_SUBGRAPH_WITH_NNADAPTER
}
else
{
PADDLE_THROW
(
platform
::
errors
::
Unavailable
(
"You tried to use NNadapter forward "
"propagation (inference without lite "
"engine), but Paddle was not compiled "
"with LITE_WITH_NNADAPTER."
));
}
}
else
{
place_
=
paddle
::
platform
::
CPUPlace
();
}
...
...
@@ -601,6 +617,26 @@ void AnalysisPredictor::PrepareArgument() {
argument_
.
SetXpuAutotuneFile
(
config_
.
xpu_autotune_file_
);
argument_
.
SetXpuPrecision
(
config_
.
xpu_precision_
);
argument_
.
SetXpuAdaptiveSeqlen
(
config_
.
xpu_adaptive_seqlen_
);
// NNAdapter related
argument_
.
SetUseNNAdapter
(
config_
.
NNAdapter
().
use_nnadapter
);
argument_
.
SetNNAdapterDeviceNames
(
config_
.
NNAdapter
().
nnadapter_device_names
);
argument_
.
SetNNAdapterContextProperties
(
config_
.
NNAdapter
().
nnadapter_context_properties
);
argument_
.
SetNNAdapterModelCacheDir
(
config_
.
NNAdapter
().
nnadapter_model_cache_dir
);
argument_
.
SetNNAdapterSubgraphPartitionConfigBuffer
(
config_
.
NNAdapter
().
nnadapter_subgraph_partition_config_buffer
);
argument_
.
SetNNAdapterSubgraphPartitionConfigPath
(
config_
.
NNAdapter
().
nnadapter_subgraph_partition_config_path
);
std
::
vector
<
std
::
string
>
buffer_keys
;
std
::
vector
<
std
::
vector
<
char
>>
buffer_vals
;
for
(
auto
it
:
config_
.
NNAdapter
().
nnadapter_model_cache_buffers
)
{
buffer_keys
.
emplace_back
(
it
.
first
);
buffer_vals
.
emplace_back
(
it
.
second
);
}
argument_
.
SetNNAdapterModelCacheToken
(
buffer_keys
);
argument_
.
SetNNAdapterModelCacheBuffer
(
buffer_vals
);
LOG
(
INFO
)
<<
"Lite subgraph engine is enabled"
;
}
...
...
paddle/fluid/inference/api/analysis_predictor_tester.cc
浏览文件 @
2aaa417e
...
...
@@ -61,6 +61,26 @@ TEST(AnalysisPredictor, analysis_off) {
ASSERT_TRUE
(
predictor
->
Run
(
inputs
,
&
outputs
));
}
#ifndef WIN32
TEST
(
AnalysisPredictor
,
lite_nn_adapter_npu
)
{
AnalysisConfig
config
;
config
.
SetModel
(
FLAGS_dirname
);
config
.
EnableLiteEngine
();
config
.
NNAdapter
()
.
Disable
()
.
Enable
()
.
SetDeviceNames
({
"huawei_ascend_npu"
})
.
SetContextProperties
(
"HUAWEI_ASCEND_NPU_SELECTED_DEVICE_IDS=0"
)
.
SetModelCacheDir
(
"cache_dirr"
)
.
SetSubgraphPartitionConfigPath
(
""
)
.
SetModelCacheBuffers
(
"c1"
,
{
'c'
});
#ifndef LITE_SUBGRAPH_WITH_NNADAPTER
EXPECT_THROW
(
CreatePaddlePredictor
<
AnalysisConfig
>
(
config
),
paddle
::
platform
::
EnforceNotMet
);
#endif
}
#endif
TEST
(
AnalysisPredictor
,
analysis_on
)
{
AnalysisConfig
config
;
config
.
SetModel
(
FLAGS_dirname
);
...
...
paddle/fluid/inference/api/paddle_analysis_config.h
浏览文件 @
2aaa417e
...
...
@@ -48,6 +48,34 @@ namespace paddle {
class
AnalysisPredictor
;
struct
MkldnnQuantizerConfig
;
struct
LiteNNAdapterConfig
{
bool
use_nnadapter
{
false
};
std
::
string
nnadapter_model_cache_dir
;
std
::
map
<
std
::
string
,
std
::
vector
<
char
>>
nnadapter_model_cache_buffers
;
std
::
vector
<
std
::
string
>
nnadapter_device_names
;
std
::
string
nnadapter_context_properties
;
std
::
string
nnadapter_subgraph_partition_config_path
;
std
::
string
nnadapter_subgraph_partition_config_buffer
;
LiteNNAdapterConfig
&
SetDeviceNames
(
const
std
::
vector
<
std
::
string
>&
names
);
LiteNNAdapterConfig
&
SetContextProperties
(
const
std
::
string
&
properties
);
LiteNNAdapterConfig
&
SetModelCacheDir
(
const
std
::
string
&
dir
);
LiteNNAdapterConfig
&
SetModelCacheBuffers
(
const
std
::
string
&
model_cache_token
,
const
std
::
vector
<
char
>&
model_cache_buffer
);
LiteNNAdapterConfig
&
SetSubgraphPartitionConfigPath
(
const
std
::
string
&
path
);
LiteNNAdapterConfig
&
SetSubgraphPartitionConfigBuffer
(
const
std
::
string
&
buffer
);
LiteNNAdapterConfig
&
Enable
();
LiteNNAdapterConfig
&
Disable
();
};
///
/// \brief configuration manager for AnalysisPredictor.
/// \since 1.7.0
...
...
@@ -692,6 +720,8 @@ struct PD_INFER_DECL AnalysisConfig {
///
std
::
string
Summary
();
LiteNNAdapterConfig
&
NNAdapter
()
{
return
nnadapter_config_
;
}
protected:
// Update the config.
void
Update
();
...
...
@@ -800,6 +830,9 @@ struct PD_INFER_DECL AnalysisConfig {
std
::
string
xpu_precision_
;
bool
xpu_adaptive_seqlen_
;
// NNAdapter related
LiteNNAdapterConfig
nnadapter_config_
;
// mkldnn related.
int
mkldnn_cache_capacity_
{
10
};
bool
use_mkldnn_quantizer_
{
false
};
...
...
paddle/fluid/inference/lite/CMakeLists.txt
浏览文件 @
2aaa417e
...
...
@@ -2,8 +2,8 @@ if(XPU_SDK_ROOT)
set
(
XPU_DEPS xpuapi xpurt
)
endif
()
cc_library
(
lite_op_teller SRCS op_teller.cc DEPS
lite_full_static
framework_proto device_context boost xxhash
)
cc_library
(
lite_engine SRCS engine.cc DEPS
lite_full_static
framework_proto
${
XPU_DEPS
}
)
cc_library
(
lite_tensor_utils SRCS tensor_utils.cc DEPS memcpy
lite_full_static
framework_proto boost device_context
${
XPU_DEPS
}
)
cc_library
(
lite_op_teller SRCS op_teller.cc DEPS
${
LITE_DEPS
}
framework_proto device_context boost xxhash
)
cc_library
(
lite_engine SRCS engine.cc DEPS
${
LITE_DEPS
}
framework_proto
${
XPU_DEPS
}
)
cc_library
(
lite_tensor_utils SRCS tensor_utils.cc DEPS memcpy
${
LITE_DEPS
}
framework_proto boost device_context
${
XPU_DEPS
}
)
cc_test
(
test_lite_engine SRCS test_engine_lite.cc DEPS lite_engine protobuf framework_proto glog gtest analysis
)
cc_test
(
test_lite_tensor_utils SRCS test_tensor_utils.cc DEPS lite_engine lite_tensor_utils
)
paddle/fluid/inference/lite/engine.cc
浏览文件 @
2aaa417e
...
...
@@ -69,6 +69,25 @@ paddle::lite_api::PaddlePredictor* EngineManager::Create(
cfg
.
adaptive_seqlen
);
#endif
#ifdef LITE_SUBGRAPH_WITH_NPU
lite_cxx_config
.
set_nnadapter_device_names
(
cfg
.
nnadapter_device_names
);
lite_cxx_config
.
set_nnadapter_context_properties
(
cfg
.
nnadapter_context_properties
);
lite_cxx_config
.
set_nnadapter_model_cache_dir
(
cfg
.
nnadapter_model_cache_dir
);
if
(
!
cfg
.
nnadapter_subgraph_partition_config_path
.
empty
())
{
lite_cxx_config
.
set_nnadapter_subgraph_partition_config_path
(
cfg
.
nnadapter_subgraph_partition_config_path
);
}
if
(
!
cfg
.
nnadapter_subgraph_partition_config_buffer
.
empty
())
{
lite_cxx_config
.
set_nnadapter_subgraph_partition_config_buffer
(
cfg
.
nnadapter_subgraph_partition_config_buffer
);
}
for
(
size_t
i
=
0
;
i
<
cfg
.
nnadapter_model_cache_token
.
size
();
++
i
)
{
lite_cxx_config
.
set_nnadapter_model_cache_buffers
(
cfg
.
nnadapter_model_cache_token
[
i
],
cfg
.
nnadapter_model_cache_buffer
[
i
]);
}
#endif
// create predictor
std
::
shared_ptr
<
paddle
::
lite_api
::
PaddlePredictor
>
p
=
paddle
::
lite_api
::
CreatePaddlePredictor
(
lite_cxx_config
);
...
...
paddle/fluid/inference/lite/engine.h
浏览文件 @
2aaa417e
...
...
@@ -53,6 +53,15 @@ struct EngineConfig {
// for cuda
bool
use_multi_stream
{
false
};
// for nnadapter or npu.
std
::
string
nnadapter_model_cache_dir
;
std
::
vector
<
std
::
string
>
nnadapter_device_names
;
std
::
string
nnadapter_context_properties
;
std
::
string
nnadapter_subgraph_partition_config_buffer
;
std
::
string
nnadapter_subgraph_partition_config_path
;
std
::
vector
<
std
::
string
>
nnadapter_model_cache_token
;
std
::
vector
<
std
::
vector
<
char
>>
nnadapter_model_cache_buffer
;
};
class
EngineManager
{
...
...
paddle/fluid/operators/lite/lite_engine_op_test.cc
浏览文件 @
2aaa417e
...
...
@@ -30,6 +30,8 @@ using paddle::inference::lite::CreateTensor;
using
paddle
::
inference
::
lite
::
serialize_params
;
namespace
paddle
{
namespace
operators
{
#if defined(PADDLE_WITH_CUDA)
TEST
(
LiteEngineOp
,
engine_op
)
{
framework
::
ProgramDesc
program
;
auto
*
block_
=
program
.
Proto
()
->
mutable_blocks
(
0
);
...
...
@@ -75,8 +77,8 @@ TEST(LiteEngineOp, engine_op) {
platform
::
CPUDeviceContext
ctx
(
place
);
#endif
// Prepare variables.
CreateTensor
(
&
scope
,
"x"
,
std
::
vector
<
int64_t
>
({
2
,
4
}),
fals
e
);
CreateTensor
(
&
scope
,
"y"
,
std
::
vector
<
int64_t
>
({
2
,
4
}),
fals
e
);
CreateTensor
(
&
scope
,
"x"
,
std
::
vector
<
int64_t
>
({
2
,
4
}),
tru
e
);
CreateTensor
(
&
scope
,
"y"
,
std
::
vector
<
int64_t
>
({
2
,
4
}),
tru
e
);
CreateTensor
(
&
scope
,
"out"
,
std
::
vector
<
int64_t
>
({
2
,
4
}),
false
);
ASSERT_EQ
(
block_
->
ops_size
(),
4
);
...
...
@@ -113,5 +115,7 @@ TEST(LiteEngineOp, engine_op) {
engine_op
->
Run
(
scope
,
place
);
LOG
(
INFO
)
<<
"done"
;
}
#endif
}
// namespace operators
}
// namespace paddle
paddle/fluid/pybind/inference_api.cc
浏览文件 @
2aaa417e
...
...
@@ -87,6 +87,7 @@ void BindPaddlePlace(py::module *m);
void
BindPaddlePredictor
(
py
::
module
*
m
);
void
BindNativeConfig
(
py
::
module
*
m
);
void
BindNativePredictor
(
py
::
module
*
m
);
void
BindLiteNNAdapterConfig
(
py
::
module
*
m
);
void
BindAnalysisConfig
(
py
::
module
*
m
);
void
BindAnalysisPredictor
(
py
::
module
*
m
);
void
BindZeroCopyTensor
(
py
::
module
*
m
);
...
...
@@ -303,6 +304,7 @@ void BindInferenceApi(py::module *m) {
BindPaddlePredictor
(
m
);
BindNativeConfig
(
m
);
BindNativePredictor
(
m
);
BindLiteNNAdapterConfig
(
m
);
BindAnalysisConfig
(
m
);
BindAnalysisPredictor
(
m
);
BindPaddleInferPredictor
(
m
);
...
...
@@ -624,7 +626,26 @@ void BindAnalysisConfig(py::module *m) {
[](
AnalysisConfig
&
self
)
{
return
dynamic_cast
<
PaddlePassBuilder
*>
(
self
.
pass_builder
());
},
py
::
return_value_policy
::
reference
);
py
::
return_value_policy
::
reference
)
.
def
(
"nnadapter"
,
&
AnalysisConfig
::
NNAdapter
);
}
void
BindLiteNNAdapterConfig
(
py
::
module
*
m
)
{
py
::
class_
<
LiteNNAdapterConfig
>
lite_nnadapter_config
(
*
m
,
"LiteNNAdapterConfig"
);
lite_nnadapter_config
.
def
(
"set_device_names"
,
&
LiteNNAdapterConfig
::
SetDeviceNames
)
.
def
(
"set_context_properties"
,
&
LiteNNAdapterConfig
::
SetContextProperties
)
.
def
(
"set_model_cache_dir"
,
&
LiteNNAdapterConfig
::
SetModelCacheDir
)
.
def
(
"set_model_cache_buffers"
,
&
LiteNNAdapterConfig
::
SetModelCacheBuffers
)
.
def
(
"set_subgraph_partition_config_path"
,
&
LiteNNAdapterConfig
::
SetSubgraphPartitionConfigPath
)
.
def
(
"set_subgraph_partition_config_buffer"
,
&
LiteNNAdapterConfig
::
SetSubgraphPartitionConfigBuffer
)
.
def
(
"enable"
,
&
LiteNNAdapterConfig
::
Enable
)
.
def
(
"disable"
,
&
LiteNNAdapterConfig
::
Disable
);
}
#ifdef PADDLE_WITH_MKLDNN
...
...
paddle/scripts/paddle_build.sh
浏览文件 @
2aaa417e
...
...
@@ -223,7 +223,7 @@ function cmake_base() {
-DWITH_GLOO=
${
gloo_flag
}
-DWITH_LITE=
${
WITH_LITE
:-
OFF
}
-DWITH_XPU=
${
WITH_XPU
:-
OFF
}
-DLITE_GIT_TAG=
release/v2.8
-DLITE_GIT_TAG=
_release/v2.10
-DWITH_UNITY_BUILD=
${
WITH_UNITY_BUILD
:-
OFF
}
-DWITH_XPU_BKCL=
${
WITH_XPU_BKCL
:-
OFF
}
-DWITH_ARM=
${
WITH_ARM
:-
OFF
}
...
...
@@ -266,7 +266,7 @@ EOF
-DWITH_PSCORE
=
${
distibuted_flag
}
\
-DWITH_PSLIB
=
${
WITH_PSLIB
:-
OFF
}
\
-DWITH_GLOO
=
${
gloo_flag
}
\
-DLITE_GIT_TAG
=
release/v2.8
\
-DLITE_GIT_TAG
=
_release/v2.10
\
-DWITH_XPU
=
${
WITH_XPU
:-
OFF
}
\
-DXPU_SDK_ROOT
=
${
XPU_SDK_ROOT
:-
""
}
\
-DWITH_LITE
=
${
WITH_LITE
:-
OFF
}
\
...
...
python/setup.py.in
浏览文件 @
2aaa417e
...
...
@@ -338,6 +338,12 @@ else:
if '${WITH_LITE}' == 'ON':
shutil.copy('${LITE_SHARED_LIB}', libs_path)
package_data['paddle.libs']+=['libpaddle_full_api_shared' + ext_name]
if '${LITE_WITH_NNADAPTER}' == 'ON':
shutil.copy('${LITE_NNADAPTER_LIB}', libs_path)
package_data['paddle.libs']+=['libnnadapter' + ext_name]
if '${NNADAPTER_WITH_HUAWEI_ASCEND_NPU}' == 'ON':
shutil.copy('${LITE_NNADAPTER_NPU_LIB}', libs_path)
package_data['paddle.libs']+=['libnnadapter_driver_huawei_ascend_npu' + ext_name]
if '${WITH_PSLIB}' == 'ON':
shutil.copy('${PSLIB_LIB}', libs_path)
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录