未验证 提交 10e53044 编写于 作者: J JingZhuangzhuang 提交者: GitHub

[Inference] Support NNAdapter and ascend310 (#35226)

上级 5cda6b2b
...@@ -35,6 +35,14 @@ if (LITE_WITH_XPU) ...@@ -35,6 +35,14 @@ if (LITE_WITH_XPU)
ENDIF() ENDIF()
endif() endif()
if (LITE_WITH_NNADAPTER)
add_definitions(-DLITE_SUBGRAPH_WITH_NNADAPTER)
if (NNADAPTER_WITH_HUAWEI_ASCEND_NPU)
add_definitions(-DLITE_SUBGRAPH_WITH_NPU)
set(NPU_SDK_ROOT "/usr/local/Ascend/ascend-toolkit/latest" CACHE STRING "default NPU SDK ROOT")
endif()
endif()
if (NOT LITE_SOURCE_DIR OR NOT LITE_BINARY_DIR) if (NOT LITE_SOURCE_DIR OR NOT LITE_BINARY_DIR)
include(ExternalProject) include(ExternalProject)
set(LITE_PROJECT extern_lite) set(LITE_PROJECT extern_lite)
...@@ -67,6 +75,9 @@ if (NOT LITE_SOURCE_DIR OR NOT LITE_BINARY_DIR) ...@@ -67,6 +75,9 @@ if (NOT LITE_SOURCE_DIR OR NOT LITE_BINARY_DIR)
-DLITE_WITH_XPU=${LITE_WITH_XPU} -DLITE_WITH_XPU=${LITE_WITH_XPU}
-DXPU_SDK_URL=${XPU_BASE_URL} -DXPU_SDK_URL=${XPU_BASE_URL}
-DXPU_SDK_ENV=${XPU_SDK_ENV} -DXPU_SDK_ENV=${XPU_SDK_ENV}
-DLITE_WITH_NNADAPTER=${LITE_WITH_NNADAPTER}
-DNNADAPTER_WITH_HUAWEI_ASCEND_NPU=${NNADAPTER_WITH_HUAWEI_ASCEND_NPU}
-DNNADAPTER_HUAWEI_ASCEND_NPU_SDK_ROOT=${NPU_SDK_ROOT}
-DLITE_WITH_CODE_META_INFO=OFF -DLITE_WITH_CODE_META_INFO=OFF
-DLITE_WITH_ARM=ON) -DLITE_WITH_ARM=ON)
ExternalProject_Add( ExternalProject_Add(
...@@ -110,6 +121,9 @@ if (NOT LITE_SOURCE_DIR OR NOT LITE_BINARY_DIR) ...@@ -110,6 +121,9 @@ if (NOT LITE_SOURCE_DIR OR NOT LITE_BINARY_DIR)
-DLITE_WITH_XPU=${LITE_WITH_XPU} -DLITE_WITH_XPU=${LITE_WITH_XPU}
-DXPU_SDK_URL=${XPU_BASE_URL} -DXPU_SDK_URL=${XPU_BASE_URL}
-DXPU_SDK_ENV=${XPU_SDK_ENV} -DXPU_SDK_ENV=${XPU_SDK_ENV}
-DLITE_WITH_NNADAPTER=${LITE_WITH_NNADAPTER}
-DNNADAPTER_WITH_HUAWEI_ASCEND_NPU=${NNADAPTER_WITH_HUAWEI_ASCEND_NPU}
-DNNADAPTER_HUAWEI_ASCEND_NPU_SDK_ROOT=${NPU_SDK_ROOT}
-DLITE_WITH_CODE_META_INFO=OFF -DLITE_WITH_CODE_META_INFO=OFF
-DLITE_WITH_ARM=OFF) -DLITE_WITH_ARM=OFF)
...@@ -120,6 +134,7 @@ if (NOT LITE_SOURCE_DIR OR NOT LITE_BINARY_DIR) ...@@ -120,6 +134,7 @@ if (NOT LITE_SOURCE_DIR OR NOT LITE_BINARY_DIR)
GIT_TAG ${LITE_GIT_TAG} GIT_TAG ${LITE_GIT_TAG}
PREFIX ${LITE_SOURCES_DIR} PREFIX ${LITE_SOURCES_DIR}
UPDATE_COMMAND "" UPDATE_COMMAND ""
PATCH_COMMAND sed -i "s?NNadapter_bridges_path = os.path.abspath('..')+\"\/lite\/kernels\/nnadapter\/bridges\/paddle_use_bridges.h\"?NNadapter_bridges_path = os.path.abspath(\'..\')+\"\/extern_lite\/lite\/kernels\/nnadapter\/bridges\/paddle_use_bridges.h\"?" ${LITE_SOURCES_DIR}/src/extern_lite//lite/tools/cmake_tools/record_supported_kernel_op.py && sed -i "/general::ssa::ConvertToSSA(cpp_prog)$<SEMICOLON>/d" ${LITE_SOURCES_DIR}/src/extern_lite/lite/model_parser/model_parser.cc
BUILD_COMMAND ${LITE_BUILD_COMMAND} BUILD_COMMAND ${LITE_BUILD_COMMAND}
INSTALL_COMMAND "" INSTALL_COMMAND ""
CMAKE_ARGS -DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER} CMAKE_ARGS -DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER}
...@@ -146,6 +161,11 @@ endif() ...@@ -146,6 +161,11 @@ endif()
if (WITH_ARM) if (WITH_ARM)
if(LITE_WITH_XPU) if(LITE_WITH_XPU)
set(LITE_OUTPUT_BIN_DIR inference_lite_lib.armlinux.armv8.xpu) set(LITE_OUTPUT_BIN_DIR inference_lite_lib.armlinux.armv8.xpu)
elseif(LITE_WITH_NNADAPTER)
message("Enable LITE_WITH_NNADAPTER")
if (NNADAPTER_WITH_HUAWEI_ASCEND_NPU)
set(LITE_OUTPUT_BIN_DIR inference_lite_lib.armlinux.armv8.nnadapter)
endif()
else() else()
set(LITE_OUTPUT_BIN_DIR inference_lite_lib.armlinux.armv8) set(LITE_OUTPUT_BIN_DIR inference_lite_lib.armlinux.armv8)
endif() endif()
...@@ -174,5 +194,16 @@ endfunction() ...@@ -174,5 +194,16 @@ endfunction()
external_lite_libs(lite_full_static ${LITE_BINARY_DIR}/${LITE_OUTPUT_BIN_DIR}/cxx/lib/libpaddle_full_api_shared.so) external_lite_libs(lite_full_static ${LITE_BINARY_DIR}/${LITE_OUTPUT_BIN_DIR}/cxx/lib/libpaddle_full_api_shared.so)
set(LITE_SHARED_LIB ${LITE_BINARY_DIR}/${LITE_OUTPUT_BIN_DIR}/cxx/lib/libpaddle_full_api_shared.so) set(LITE_SHARED_LIB ${LITE_BINARY_DIR}/${LITE_OUTPUT_BIN_DIR}/cxx/lib/libpaddle_full_api_shared.so)
if (LITE_WITH_NNADAPTER)
set(LITE_NNADAPTER_LIB ${LITE_BINARY_DIR}/${LITE_OUTPUT_BIN_DIR}/cxx/lib/libnnadapter.so)
if (NNADAPTER_WITH_HUAWEI_ASCEND_NPU)
external_lite_libs(lite_nnadapter ${LITE_BINARY_DIR}/${LITE_OUTPUT_BIN_DIR}/cxx/lib/libnnadapter.so ${LITE_BINARY_DIR}/${LITE_OUTPUT_BIN_DIR}/cxx/lib/libnnadapter_driver_huawei_ascend_npu.so)
set(LITE_DEPS lite_full_static lite_nnadapter)
set(LITE_NNADAPTER_NPU_LIB ${LITE_BINARY_DIR}/${LITE_OUTPUT_BIN_DIR}/cxx/lib/libnnadapter_driver_huawei_ascend_npu.so)
endif()
else()
set(LITE_DEPS lite_full_static)
endif()
add_definitions(-DPADDLE_WITH_LITE) add_definitions(-DPADDLE_WITH_LITE)
add_definitions(-DLITE_WITH_LOG) add_definitions(-DLITE_WITH_LOG)
...@@ -239,6 +239,22 @@ struct Argument { ...@@ -239,6 +239,22 @@ struct Argument {
DECL_ARGUMENT_FIELD(xpu_precision, XpuPrecision, std::string); DECL_ARGUMENT_FIELD(xpu_precision, XpuPrecision, std::string);
DECL_ARGUMENT_FIELD(xpu_adaptive_seqlen, XpuAdaptiveSeqlen, bool); DECL_ARGUMENT_FIELD(xpu_adaptive_seqlen, XpuAdaptiveSeqlen, bool);
DECL_ARGUMENT_FIELD(use_nnadapter, UseNNAdapter, bool);
DECL_ARGUMENT_FIELD(nnadapter_model_cache_dir, NNAdapterModelCacheDir,
std::string);
DECL_ARGUMENT_FIELD(nnadapter_device_names, NNAdapterDeviceNames,
std::vector<std::string>);
DECL_ARGUMENT_FIELD(nnadapter_context_properties, NNAdapterContextProperties,
std::string);
DECL_ARGUMENT_FIELD(nnadapter_subgraph_partition_config_buffer,
NNAdapterSubgraphPartitionConfigBuffer, std::string);
DECL_ARGUMENT_FIELD(nnadapter_subgraph_partition_config_path,
NNAdapterSubgraphPartitionConfigPath, std::string);
DECL_ARGUMENT_FIELD(nnadapter_model_cache_token, NNAdapterModelCacheToken,
std::vector<std::string>);
DECL_ARGUMENT_FIELD(nnadapter_model_cache_buffer, NNAdapterModelCacheBuffer,
std::vector<std::vector<char>>);
// Memory optimized related. // Memory optimized related.
DECL_ARGUMENT_FIELD(enable_memory_optim, EnableMemoryOptim, bool); DECL_ARGUMENT_FIELD(enable_memory_optim, EnableMemoryOptim, bool);
......
...@@ -202,6 +202,27 @@ void IRPassManager::CreatePasses(Argument *argument, ...@@ -202,6 +202,27 @@ void IRPassManager::CreatePasses(Argument *argument,
new std::string(argument->xpu_autotune_file())); new std::string(argument->xpu_autotune_file()));
pass->Set("precision", new std::string(argument->xpu_precision())); pass->Set("precision", new std::string(argument->xpu_precision()));
pass->Set("adaptive_seqlen", new bool(argument->xpu_adaptive_seqlen())); pass->Set("adaptive_seqlen", new bool(argument->xpu_adaptive_seqlen()));
// NNAdapter Related
pass->Set("use_nnadapter", new bool(argument->use_nnadapter()));
pass->Set("nnadapter_model_cache_dir",
new std::string(argument->nnadapter_model_cache_dir()));
pass->Set(
"nnadapter_device_names",
new std::vector<std::string>(argument->nnadapter_device_names()));
pass->Set("nnadapter_context_properties",
new std::string(argument->nnadapter_context_properties()));
pass->Set("nnadapter_subgraph_partition_config_buffer",
new std::string(
argument->nnadapter_subgraph_partition_config_buffer()));
pass->Set("nnadapter_subgraph_partition_config_path",
new std::string(
argument->nnadapter_subgraph_partition_config_path()));
pass->Set("nnadapter_model_cache_buffer",
new std::vector<std::vector<char>>(
argument->nnadapter_model_cache_buffer()));
pass->Set("nnadapter_model_cache_token",
new std::vector<std::string>(
argument->nnadapter_model_cache_token()));
} }
disable_logs_ = argument->disable_logs(); disable_logs_ = argument->disable_logs();
if (pass_name == "fc_fuse_pass") { if (pass_name == "fc_fuse_pass") {
......
...@@ -250,12 +250,30 @@ void LiteSubgraphPass::SetUpEngine( ...@@ -250,12 +250,30 @@ void LiteSubgraphPass::SetUpEngine(
std::string autotune_file = Get<std::string>("autotune_file"); std::string autotune_file = Get<std::string>("autotune_file");
std::string precision = Get<std::string>("precision"); std::string precision = Get<std::string>("precision");
bool adaptive_seqlen = Get<bool>("adaptive_seqlen"); bool adaptive_seqlen = Get<bool>("adaptive_seqlen");
// NNAdapter Related
bool use_nnadapter = Get<bool>("use_nnadapter");
std::string nnadapter_model_cache_dir =
Get<std::string>("nnadapter_model_cache_dir");
auto nnadapter_device_names =
Get<std::vector<std::string>>("nnadapter_device_names");
std::string nnadapter_context_properties =
Get<std::string>("nnadapter_context_properties");
std::string nnadapter_subgraph_partition_config_buffer =
Get<std::string>("nnadapter_subgraph_partition_config_buffer");
std::string nnadapter_subgraph_partition_config_path =
Get<std::string>("nnadapter_subgraph_partition_config_path");
auto nnadapter_model_cache_buffer =
Get<std::vector<std::vector<char>>>("nnadapter_model_cache_buffer");
auto nnadapter_model_cache_token =
Get<std::vector<std::string>>("nnadapter_model_cache_token");
lite_api::TargetType target_type; lite_api::TargetType target_type;
if (use_gpu) { if (use_gpu) {
target_type = TARGET(kCUDA); target_type = TARGET(kCUDA);
} else if (use_xpu) { } else if (use_xpu) {
target_type = TARGET(kXPU); target_type = TARGET(kXPU);
} else if (use_nnadapter) {
target_type = TARGET(kNNAdapter);
} else { } else {
#ifdef PADDLE_WITH_ARM #ifdef PADDLE_WITH_ARM
target_type = TARGET(kARM); target_type = TARGET(kARM);
...@@ -292,6 +310,17 @@ void LiteSubgraphPass::SetUpEngine( ...@@ -292,6 +310,17 @@ void LiteSubgraphPass::SetUpEngine(
config.autotune_file = autotune_file; config.autotune_file = autotune_file;
config.precision = precision; config.precision = precision;
config.adaptive_seqlen = adaptive_seqlen; config.adaptive_seqlen = adaptive_seqlen;
// NNAdapter Related
config.nnadapter_model_cache_dir = nnadapter_model_cache_dir;
config.nnadapter_device_names = nnadapter_device_names;
config.nnadapter_context_properties = nnadapter_context_properties;
config.nnadapter_subgraph_partition_config_buffer =
nnadapter_subgraph_partition_config_buffer;
config.nnadapter_subgraph_partition_config_path =
nnadapter_subgraph_partition_config_path;
config.nnadapter_model_cache_buffer = nnadapter_model_cache_buffer;
config.nnadapter_model_cache_token = nnadapter_model_cache_token;
if (dump_model) { if (dump_model) {
lite::StrToBinaryFile("./model.bin", config.model); lite::StrToBinaryFile("./model.bin", config.model);
lite::StrToBinaryFile("./param.bin", config.param); lite::StrToBinaryFile("./param.bin", config.param);
......
...@@ -207,6 +207,7 @@ AnalysisConfig::AnalysisConfig(const AnalysisConfig &other) { ...@@ -207,6 +207,7 @@ AnalysisConfig::AnalysisConfig(const AnalysisConfig &other) {
// NPU related. // NPU related.
CP_MEMBER(use_npu_); CP_MEMBER(use_npu_);
CP_MEMBER(npu_device_id_); CP_MEMBER(npu_device_id_);
CP_MEMBER(nnadapter_config_);
// profile related. // profile related.
CP_MEMBER(with_profile_); CP_MEMBER(with_profile_);
...@@ -554,7 +555,7 @@ void AnalysisConfig::Update() { ...@@ -554,7 +555,7 @@ void AnalysisConfig::Update() {
} }
if (use_npu_) { if (use_npu_) {
#ifdef PADDLE_WITH_ASCEND_CL #if defined(PADDLE_WITH_ASCEND_CL) || defined(LITE_SUBGRAPH_WITH_NPU)
PADDLE_ENFORCE_EQ(use_gpu_, false, PADDLE_ENFORCE_EQ(use_gpu_, false,
platform::errors::Unavailable( platform::errors::Unavailable(
"Currently, NPU and GPU cannot be enabled in the " "Currently, NPU and GPU cannot be enabled in the "
...@@ -833,6 +834,61 @@ std::string AnalysisConfig::Summary() { ...@@ -833,6 +834,61 @@ std::string AnalysisConfig::Summary() {
return os.PrintTable(); return os.PrintTable();
} }
LiteNNAdapterConfig &LiteNNAdapterConfig::SetDeviceNames(
const std::vector<std::string> &names) {
nnadapter_device_names = names;
return *this;
}
LiteNNAdapterConfig &LiteNNAdapterConfig::SetContextProperties(
const std::string &properties) {
nnadapter_context_properties = properties;
return *this;
}
LiteNNAdapterConfig &LiteNNAdapterConfig::SetModelCacheDir(
const std::string &dir) {
nnadapter_model_cache_dir = dir;
return *this;
}
LiteNNAdapterConfig &LiteNNAdapterConfig::SetModelCacheBuffers(
const std::string &model_cache_token,
const std::vector<char> &model_cache_buffer) {
PADDLE_ENFORCE_EQ(model_cache_token.empty(), false,
platform::errors::InvalidArgument(
"model_cache_token should not be empty."));
PADDLE_ENFORCE_EQ(model_cache_buffer.empty(), false,
platform::errors::InvalidArgument(
"model_cache_buffer should not be empty."));
PADDLE_ENFORCE_EQ(nnadapter_model_cache_buffers.count(model_cache_token),
false, platform::errors::InvalidArgument(
"model_cache_token has already been set."));
nnadapter_model_cache_buffers[model_cache_token] = model_cache_buffer;
return *this;
}
LiteNNAdapterConfig &LiteNNAdapterConfig::SetSubgraphPartitionConfigPath(
const std::string &path) {
nnadapter_subgraph_partition_config_path = path;
return *this;
}
LiteNNAdapterConfig &LiteNNAdapterConfig::SetSubgraphPartitionConfigBuffer(
const std::string &buffer) {
nnadapter_subgraph_partition_config_buffer = buffer;
return *this;
}
LiteNNAdapterConfig &LiteNNAdapterConfig::Enable() {
use_nnadapter = true;
return *this;
}
LiteNNAdapterConfig &LiteNNAdapterConfig::Disable() {
use_nnadapter = false;
return *this;
}
void AnalysisConfig::CollectShapeRangeInfo( void AnalysisConfig::CollectShapeRangeInfo(
const std::string &shape_range_info_path) { const std::string &shape_range_info_path) {
LOG(INFO) << "In CollectShapeInfo mode, we will disable optimizations and " LOG(INFO) << "In CollectShapeInfo mode, we will disable optimizations and "
......
...@@ -276,6 +276,22 @@ bool AnalysisPredictor::CreateExecutor() { ...@@ -276,6 +276,22 @@ bool AnalysisPredictor::CreateExecutor() {
"You tried to use NPU forward propagation, but Paddle was not compiled " "You tried to use NPU forward propagation, but Paddle was not compiled "
"with WITH_ASCEND_CL.")); "with WITH_ASCEND_CL."));
#endif #endif
} else if (config_.NNAdapter().use_nnadapter) {
if (config_.lite_engine_enabled()) {
place_ = paddle::platform::CPUPlace();
#ifndef LITE_SUBGRAPH_WITH_NNADAPTER
PADDLE_THROW(
platform::errors::Unavailable("You tried to use an NNAdapter lite "
"engine, but Paddle was not compiled "
"with it."));
#endif // LITE_SUBGRAPH_WITH_NNADAPTER
} else {
PADDLE_THROW(
platform::errors::Unavailable("You tried to use NNadapter forward "
"propagation (inference without lite "
"engine), but Paddle was not compiled "
"with LITE_WITH_NNADAPTER."));
}
} else { } else {
place_ = paddle::platform::CPUPlace(); place_ = paddle::platform::CPUPlace();
} }
...@@ -601,6 +617,26 @@ void AnalysisPredictor::PrepareArgument() { ...@@ -601,6 +617,26 @@ void AnalysisPredictor::PrepareArgument() {
argument_.SetXpuAutotuneFile(config_.xpu_autotune_file_); argument_.SetXpuAutotuneFile(config_.xpu_autotune_file_);
argument_.SetXpuPrecision(config_.xpu_precision_); argument_.SetXpuPrecision(config_.xpu_precision_);
argument_.SetXpuAdaptiveSeqlen(config_.xpu_adaptive_seqlen_); argument_.SetXpuAdaptiveSeqlen(config_.xpu_adaptive_seqlen_);
// NNAdapter related
argument_.SetUseNNAdapter(config_.NNAdapter().use_nnadapter);
argument_.SetNNAdapterDeviceNames(
config_.NNAdapter().nnadapter_device_names);
argument_.SetNNAdapterContextProperties(
config_.NNAdapter().nnadapter_context_properties);
argument_.SetNNAdapterModelCacheDir(
config_.NNAdapter().nnadapter_model_cache_dir);
argument_.SetNNAdapterSubgraphPartitionConfigBuffer(
config_.NNAdapter().nnadapter_subgraph_partition_config_buffer);
argument_.SetNNAdapterSubgraphPartitionConfigPath(
config_.NNAdapter().nnadapter_subgraph_partition_config_path);
std::vector<std::string> buffer_keys;
std::vector<std::vector<char>> buffer_vals;
for (auto it : config_.NNAdapter().nnadapter_model_cache_buffers) {
buffer_keys.emplace_back(it.first);
buffer_vals.emplace_back(it.second);
}
argument_.SetNNAdapterModelCacheToken(buffer_keys);
argument_.SetNNAdapterModelCacheBuffer(buffer_vals);
LOG(INFO) << "Lite subgraph engine is enabled"; LOG(INFO) << "Lite subgraph engine is enabled";
} }
......
...@@ -61,6 +61,26 @@ TEST(AnalysisPredictor, analysis_off) { ...@@ -61,6 +61,26 @@ TEST(AnalysisPredictor, analysis_off) {
ASSERT_TRUE(predictor->Run(inputs, &outputs)); ASSERT_TRUE(predictor->Run(inputs, &outputs));
} }
#ifndef WIN32
TEST(AnalysisPredictor, lite_nn_adapter_npu) {
AnalysisConfig config;
config.SetModel(FLAGS_dirname);
config.EnableLiteEngine();
config.NNAdapter()
.Disable()
.Enable()
.SetDeviceNames({"huawei_ascend_npu"})
.SetContextProperties("HUAWEI_ASCEND_NPU_SELECTED_DEVICE_IDS=0")
.SetModelCacheDir("cache_dirr")
.SetSubgraphPartitionConfigPath("")
.SetModelCacheBuffers("c1", {'c'});
#ifndef LITE_SUBGRAPH_WITH_NNADAPTER
EXPECT_THROW(CreatePaddlePredictor<AnalysisConfig>(config),
paddle::platform::EnforceNotMet);
#endif
}
#endif
TEST(AnalysisPredictor, analysis_on) { TEST(AnalysisPredictor, analysis_on) {
AnalysisConfig config; AnalysisConfig config;
config.SetModel(FLAGS_dirname); config.SetModel(FLAGS_dirname);
......
...@@ -48,6 +48,34 @@ namespace paddle { ...@@ -48,6 +48,34 @@ namespace paddle {
class AnalysisPredictor; class AnalysisPredictor;
struct MkldnnQuantizerConfig; struct MkldnnQuantizerConfig;
struct LiteNNAdapterConfig {
bool use_nnadapter{false};
std::string nnadapter_model_cache_dir;
std::map<std::string, std::vector<char>> nnadapter_model_cache_buffers;
std::vector<std::string> nnadapter_device_names;
std::string nnadapter_context_properties;
std::string nnadapter_subgraph_partition_config_path;
std::string nnadapter_subgraph_partition_config_buffer;
LiteNNAdapterConfig& SetDeviceNames(const std::vector<std::string>& names);
LiteNNAdapterConfig& SetContextProperties(const std::string& properties);
LiteNNAdapterConfig& SetModelCacheDir(const std::string& dir);
LiteNNAdapterConfig& SetModelCacheBuffers(
const std::string& model_cache_token,
const std::vector<char>& model_cache_buffer);
LiteNNAdapterConfig& SetSubgraphPartitionConfigPath(const std::string& path);
LiteNNAdapterConfig& SetSubgraphPartitionConfigBuffer(
const std::string& buffer);
LiteNNAdapterConfig& Enable();
LiteNNAdapterConfig& Disable();
};
/// ///
/// \brief configuration manager for AnalysisPredictor. /// \brief configuration manager for AnalysisPredictor.
/// \since 1.7.0 /// \since 1.7.0
...@@ -692,6 +720,8 @@ struct PD_INFER_DECL AnalysisConfig { ...@@ -692,6 +720,8 @@ struct PD_INFER_DECL AnalysisConfig {
/// ///
std::string Summary(); std::string Summary();
LiteNNAdapterConfig& NNAdapter() { return nnadapter_config_; }
protected: protected:
// Update the config. // Update the config.
void Update(); void Update();
...@@ -800,6 +830,9 @@ struct PD_INFER_DECL AnalysisConfig { ...@@ -800,6 +830,9 @@ struct PD_INFER_DECL AnalysisConfig {
std::string xpu_precision_; std::string xpu_precision_;
bool xpu_adaptive_seqlen_; bool xpu_adaptive_seqlen_;
// NNAdapter related
LiteNNAdapterConfig nnadapter_config_;
// mkldnn related. // mkldnn related.
int mkldnn_cache_capacity_{10}; int mkldnn_cache_capacity_{10};
bool use_mkldnn_quantizer_{false}; bool use_mkldnn_quantizer_{false};
......
...@@ -2,8 +2,8 @@ if(XPU_SDK_ROOT) ...@@ -2,8 +2,8 @@ if(XPU_SDK_ROOT)
set(XPU_DEPS xpuapi xpurt) set(XPU_DEPS xpuapi xpurt)
endif() endif()
cc_library(lite_op_teller SRCS op_teller.cc DEPS lite_full_static framework_proto device_context boost xxhash) cc_library(lite_op_teller SRCS op_teller.cc DEPS ${LITE_DEPS} framework_proto device_context boost xxhash)
cc_library(lite_engine SRCS engine.cc DEPS lite_full_static framework_proto ${XPU_DEPS}) cc_library(lite_engine SRCS engine.cc DEPS ${LITE_DEPS} framework_proto ${XPU_DEPS})
cc_library(lite_tensor_utils SRCS tensor_utils.cc DEPS memcpy lite_full_static framework_proto boost device_context ${XPU_DEPS}) cc_library(lite_tensor_utils SRCS tensor_utils.cc DEPS memcpy ${LITE_DEPS} framework_proto boost device_context ${XPU_DEPS})
cc_test(test_lite_engine SRCS test_engine_lite.cc DEPS lite_engine protobuf framework_proto glog gtest analysis) cc_test(test_lite_engine SRCS test_engine_lite.cc DEPS lite_engine protobuf framework_proto glog gtest analysis)
cc_test(test_lite_tensor_utils SRCS test_tensor_utils.cc DEPS lite_engine lite_tensor_utils) cc_test(test_lite_tensor_utils SRCS test_tensor_utils.cc DEPS lite_engine lite_tensor_utils)
...@@ -69,6 +69,25 @@ paddle::lite_api::PaddlePredictor* EngineManager::Create( ...@@ -69,6 +69,25 @@ paddle::lite_api::PaddlePredictor* EngineManager::Create(
cfg.adaptive_seqlen); cfg.adaptive_seqlen);
#endif #endif
#ifdef LITE_SUBGRAPH_WITH_NPU
lite_cxx_config.set_nnadapter_device_names(cfg.nnadapter_device_names);
lite_cxx_config.set_nnadapter_context_properties(
cfg.nnadapter_context_properties);
lite_cxx_config.set_nnadapter_model_cache_dir(cfg.nnadapter_model_cache_dir);
if (!cfg.nnadapter_subgraph_partition_config_path.empty()) {
lite_cxx_config.set_nnadapter_subgraph_partition_config_path(
cfg.nnadapter_subgraph_partition_config_path);
}
if (!cfg.nnadapter_subgraph_partition_config_buffer.empty()) {
lite_cxx_config.set_nnadapter_subgraph_partition_config_buffer(
cfg.nnadapter_subgraph_partition_config_buffer);
}
for (size_t i = 0; i < cfg.nnadapter_model_cache_token.size(); ++i) {
lite_cxx_config.set_nnadapter_model_cache_buffers(
cfg.nnadapter_model_cache_token[i],
cfg.nnadapter_model_cache_buffer[i]);
}
#endif
// create predictor // create predictor
std::shared_ptr<paddle::lite_api::PaddlePredictor> p = std::shared_ptr<paddle::lite_api::PaddlePredictor> p =
paddle::lite_api::CreatePaddlePredictor(lite_cxx_config); paddle::lite_api::CreatePaddlePredictor(lite_cxx_config);
......
...@@ -53,6 +53,15 @@ struct EngineConfig { ...@@ -53,6 +53,15 @@ struct EngineConfig {
// for cuda // for cuda
bool use_multi_stream{false}; bool use_multi_stream{false};
// for nnadapter or npu.
std::string nnadapter_model_cache_dir;
std::vector<std::string> nnadapter_device_names;
std::string nnadapter_context_properties;
std::string nnadapter_subgraph_partition_config_buffer;
std::string nnadapter_subgraph_partition_config_path;
std::vector<std::string> nnadapter_model_cache_token;
std::vector<std::vector<char>> nnadapter_model_cache_buffer;
}; };
class EngineManager { class EngineManager {
......
...@@ -30,6 +30,8 @@ using paddle::inference::lite::CreateTensor; ...@@ -30,6 +30,8 @@ using paddle::inference::lite::CreateTensor;
using paddle::inference::lite::serialize_params; using paddle::inference::lite::serialize_params;
namespace paddle { namespace paddle {
namespace operators { namespace operators {
#if defined(PADDLE_WITH_CUDA)
TEST(LiteEngineOp, engine_op) { TEST(LiteEngineOp, engine_op) {
framework::ProgramDesc program; framework::ProgramDesc program;
auto* block_ = program.Proto()->mutable_blocks(0); auto* block_ = program.Proto()->mutable_blocks(0);
...@@ -75,8 +77,8 @@ TEST(LiteEngineOp, engine_op) { ...@@ -75,8 +77,8 @@ TEST(LiteEngineOp, engine_op) {
platform::CPUDeviceContext ctx(place); platform::CPUDeviceContext ctx(place);
#endif #endif
// Prepare variables. // Prepare variables.
CreateTensor(&scope, "x", std::vector<int64_t>({2, 4}), false); CreateTensor(&scope, "x", std::vector<int64_t>({2, 4}), true);
CreateTensor(&scope, "y", std::vector<int64_t>({2, 4}), false); CreateTensor(&scope, "y", std::vector<int64_t>({2, 4}), true);
CreateTensor(&scope, "out", std::vector<int64_t>({2, 4}), false); CreateTensor(&scope, "out", std::vector<int64_t>({2, 4}), false);
ASSERT_EQ(block_->ops_size(), 4); ASSERT_EQ(block_->ops_size(), 4);
...@@ -113,5 +115,7 @@ TEST(LiteEngineOp, engine_op) { ...@@ -113,5 +115,7 @@ TEST(LiteEngineOp, engine_op) {
engine_op->Run(scope, place); engine_op->Run(scope, place);
LOG(INFO) << "done"; LOG(INFO) << "done";
} }
#endif
} // namespace operators } // namespace operators
} // namespace paddle } // namespace paddle
...@@ -87,6 +87,7 @@ void BindPaddlePlace(py::module *m); ...@@ -87,6 +87,7 @@ void BindPaddlePlace(py::module *m);
void BindPaddlePredictor(py::module *m); void BindPaddlePredictor(py::module *m);
void BindNativeConfig(py::module *m); void BindNativeConfig(py::module *m);
void BindNativePredictor(py::module *m); void BindNativePredictor(py::module *m);
void BindLiteNNAdapterConfig(py::module *m);
void BindAnalysisConfig(py::module *m); void BindAnalysisConfig(py::module *m);
void BindAnalysisPredictor(py::module *m); void BindAnalysisPredictor(py::module *m);
void BindZeroCopyTensor(py::module *m); void BindZeroCopyTensor(py::module *m);
...@@ -303,6 +304,7 @@ void BindInferenceApi(py::module *m) { ...@@ -303,6 +304,7 @@ void BindInferenceApi(py::module *m) {
BindPaddlePredictor(m); BindPaddlePredictor(m);
BindNativeConfig(m); BindNativeConfig(m);
BindNativePredictor(m); BindNativePredictor(m);
BindLiteNNAdapterConfig(m);
BindAnalysisConfig(m); BindAnalysisConfig(m);
BindAnalysisPredictor(m); BindAnalysisPredictor(m);
BindPaddleInferPredictor(m); BindPaddleInferPredictor(m);
...@@ -624,7 +626,26 @@ void BindAnalysisConfig(py::module *m) { ...@@ -624,7 +626,26 @@ void BindAnalysisConfig(py::module *m) {
[](AnalysisConfig &self) { [](AnalysisConfig &self) {
return dynamic_cast<PaddlePassBuilder *>(self.pass_builder()); return dynamic_cast<PaddlePassBuilder *>(self.pass_builder());
}, },
py::return_value_policy::reference); py::return_value_policy::reference)
.def("nnadapter", &AnalysisConfig::NNAdapter);
}
void BindLiteNNAdapterConfig(py::module *m) {
py::class_<LiteNNAdapterConfig> lite_nnadapter_config(*m,
"LiteNNAdapterConfig");
lite_nnadapter_config
.def("set_device_names", &LiteNNAdapterConfig::SetDeviceNames)
.def("set_context_properties", &LiteNNAdapterConfig::SetContextProperties)
.def("set_model_cache_dir", &LiteNNAdapterConfig::SetModelCacheDir)
.def("set_model_cache_buffers",
&LiteNNAdapterConfig::SetModelCacheBuffers)
.def("set_subgraph_partition_config_path",
&LiteNNAdapterConfig::SetSubgraphPartitionConfigPath)
.def("set_subgraph_partition_config_buffer",
&LiteNNAdapterConfig::SetSubgraphPartitionConfigBuffer)
.def("enable", &LiteNNAdapterConfig::Enable)
.def("disable", &LiteNNAdapterConfig::Disable);
} }
#ifdef PADDLE_WITH_MKLDNN #ifdef PADDLE_WITH_MKLDNN
......
...@@ -223,7 +223,7 @@ function cmake_base() { ...@@ -223,7 +223,7 @@ function cmake_base() {
-DWITH_GLOO=${gloo_flag} -DWITH_GLOO=${gloo_flag}
-DWITH_LITE=${WITH_LITE:-OFF} -DWITH_LITE=${WITH_LITE:-OFF}
-DWITH_XPU=${WITH_XPU:-OFF} -DWITH_XPU=${WITH_XPU:-OFF}
-DLITE_GIT_TAG=release/v2.8 -DLITE_GIT_TAG=_release/v2.10
-DWITH_UNITY_BUILD=${WITH_UNITY_BUILD:-OFF} -DWITH_UNITY_BUILD=${WITH_UNITY_BUILD:-OFF}
-DWITH_XPU_BKCL=${WITH_XPU_BKCL:-OFF} -DWITH_XPU_BKCL=${WITH_XPU_BKCL:-OFF}
-DWITH_ARM=${WITH_ARM:-OFF} -DWITH_ARM=${WITH_ARM:-OFF}
...@@ -266,7 +266,7 @@ EOF ...@@ -266,7 +266,7 @@ EOF
-DWITH_PSCORE=${distibuted_flag} \ -DWITH_PSCORE=${distibuted_flag} \
-DWITH_PSLIB=${WITH_PSLIB:-OFF} \ -DWITH_PSLIB=${WITH_PSLIB:-OFF} \
-DWITH_GLOO=${gloo_flag} \ -DWITH_GLOO=${gloo_flag} \
-DLITE_GIT_TAG=release/v2.8 \ -DLITE_GIT_TAG=_release/v2.10 \
-DWITH_XPU=${WITH_XPU:-OFF} \ -DWITH_XPU=${WITH_XPU:-OFF} \
-DXPU_SDK_ROOT=${XPU_SDK_ROOT:-""} \ -DXPU_SDK_ROOT=${XPU_SDK_ROOT:-""} \
-DWITH_LITE=${WITH_LITE:-OFF} \ -DWITH_LITE=${WITH_LITE:-OFF} \
......
...@@ -338,6 +338,12 @@ else: ...@@ -338,6 +338,12 @@ else:
if '${WITH_LITE}' == 'ON': if '${WITH_LITE}' == 'ON':
shutil.copy('${LITE_SHARED_LIB}', libs_path) shutil.copy('${LITE_SHARED_LIB}', libs_path)
package_data['paddle.libs']+=['libpaddle_full_api_shared' + ext_name] package_data['paddle.libs']+=['libpaddle_full_api_shared' + ext_name]
if '${LITE_WITH_NNADAPTER}' == 'ON':
shutil.copy('${LITE_NNADAPTER_LIB}', libs_path)
package_data['paddle.libs']+=['libnnadapter' + ext_name]
if '${NNADAPTER_WITH_HUAWEI_ASCEND_NPU}' == 'ON':
shutil.copy('${LITE_NNADAPTER_NPU_LIB}', libs_path)
package_data['paddle.libs']+=['libnnadapter_driver_huawei_ascend_npu' + ext_name]
if '${WITH_PSLIB}' == 'ON': if '${WITH_PSLIB}' == 'ON':
shutil.copy('${PSLIB_LIB}', libs_path) shutil.copy('${PSLIB_LIB}', libs_path)
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册