diff --git a/cmake/external/lite.cmake b/cmake/external/lite.cmake index e213068377b1409595cac9b6169fe7605cff059c..ffadf7bea93768090ee8824a2b6df7e7fc3a2592 100644 --- a/cmake/external/lite.cmake +++ b/cmake/external/lite.cmake @@ -35,6 +35,14 @@ if (LITE_WITH_XPU) ENDIF() endif() +if (LITE_WITH_NNADAPTER) + add_definitions(-DLITE_SUBGRAPH_WITH_NNADAPTER) + if (NNADAPTER_WITH_HUAWEI_ASCEND_NPU) + add_definitions(-DLITE_SUBGRAPH_WITH_NPU) + set(NPU_SDK_ROOT "/usr/local/Ascend/ascend-toolkit/latest" CACHE STRING "default NPU SDK ROOT") + endif() +endif() + if (NOT LITE_SOURCE_DIR OR NOT LITE_BINARY_DIR) include(ExternalProject) set(LITE_PROJECT extern_lite) @@ -67,6 +75,9 @@ if (NOT LITE_SOURCE_DIR OR NOT LITE_BINARY_DIR) -DLITE_WITH_XPU=${LITE_WITH_XPU} -DXPU_SDK_URL=${XPU_BASE_URL} -DXPU_SDK_ENV=${XPU_SDK_ENV} + -DLITE_WITH_NNADAPTER=${LITE_WITH_NNADAPTER} + -DNNADAPTER_WITH_HUAWEI_ASCEND_NPU=${NNADAPTER_WITH_HUAWEI_ASCEND_NPU} + -DNNADAPTER_HUAWEI_ASCEND_NPU_SDK_ROOT=${NPU_SDK_ROOT} -DLITE_WITH_CODE_META_INFO=OFF -DLITE_WITH_ARM=ON) ExternalProject_Add( @@ -110,6 +121,9 @@ if (NOT LITE_SOURCE_DIR OR NOT LITE_BINARY_DIR) -DLITE_WITH_XPU=${LITE_WITH_XPU} -DXPU_SDK_URL=${XPU_BASE_URL} -DXPU_SDK_ENV=${XPU_SDK_ENV} + -DLITE_WITH_NNADAPTER=${LITE_WITH_NNADAPTER} + -DNNADAPTER_WITH_HUAWEI_ASCEND_NPU=${NNADAPTER_WITH_HUAWEI_ASCEND_NPU} + -DNNADAPTER_HUAWEI_ASCEND_NPU_SDK_ROOT=${NPU_SDK_ROOT} -DLITE_WITH_CODE_META_INFO=OFF -DLITE_WITH_ARM=OFF) @@ -120,6 +134,7 @@ if (NOT LITE_SOURCE_DIR OR NOT LITE_BINARY_DIR) GIT_TAG ${LITE_GIT_TAG} PREFIX ${LITE_SOURCES_DIR} UPDATE_COMMAND "" + PATCH_COMMAND sed -i "s?NNadapter_bridges_path = os.path.abspath('..')+\"\/lite\/kernels\/nnadapter\/bridges\/paddle_use_bridges.h\"?NNadapter_bridges_path = os.path.abspath(\'..\')+\"\/extern_lite\/lite\/kernels\/nnadapter\/bridges\/paddle_use_bridges.h\"?" ${LITE_SOURCES_DIR}/src/extern_lite//lite/tools/cmake_tools/record_supported_kernel_op.py && sed -i "/general::ssa::ConvertToSSA(cpp_prog)$/d" ${LITE_SOURCES_DIR}/src/extern_lite/lite/model_parser/model_parser.cc BUILD_COMMAND ${LITE_BUILD_COMMAND} INSTALL_COMMAND "" CMAKE_ARGS -DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER} @@ -146,6 +161,11 @@ endif() if (WITH_ARM) if(LITE_WITH_XPU) set(LITE_OUTPUT_BIN_DIR inference_lite_lib.armlinux.armv8.xpu) + elseif(LITE_WITH_NNADAPTER) + message("Enable LITE_WITH_NNADAPTER") + if (NNADAPTER_WITH_HUAWEI_ASCEND_NPU) + set(LITE_OUTPUT_BIN_DIR inference_lite_lib.armlinux.armv8.nnadapter) + endif() else() set(LITE_OUTPUT_BIN_DIR inference_lite_lib.armlinux.armv8) endif() @@ -174,5 +194,16 @@ endfunction() external_lite_libs(lite_full_static ${LITE_BINARY_DIR}/${LITE_OUTPUT_BIN_DIR}/cxx/lib/libpaddle_full_api_shared.so) set(LITE_SHARED_LIB ${LITE_BINARY_DIR}/${LITE_OUTPUT_BIN_DIR}/cxx/lib/libpaddle_full_api_shared.so) +if (LITE_WITH_NNADAPTER) + set(LITE_NNADAPTER_LIB ${LITE_BINARY_DIR}/${LITE_OUTPUT_BIN_DIR}/cxx/lib/libnnadapter.so) + if (NNADAPTER_WITH_HUAWEI_ASCEND_NPU) + external_lite_libs(lite_nnadapter ${LITE_BINARY_DIR}/${LITE_OUTPUT_BIN_DIR}/cxx/lib/libnnadapter.so ${LITE_BINARY_DIR}/${LITE_OUTPUT_BIN_DIR}/cxx/lib/libnnadapter_driver_huawei_ascend_npu.so) + set(LITE_DEPS lite_full_static lite_nnadapter) + set(LITE_NNADAPTER_NPU_LIB ${LITE_BINARY_DIR}/${LITE_OUTPUT_BIN_DIR}/cxx/lib/libnnadapter_driver_huawei_ascend_npu.so) + endif() +else() + set(LITE_DEPS lite_full_static) +endif() + add_definitions(-DPADDLE_WITH_LITE) add_definitions(-DLITE_WITH_LOG) diff --git a/paddle/fluid/inference/analysis/argument.h b/paddle/fluid/inference/analysis/argument.h index b24005cb6d9acc757fd7166f2502fc81935b8ffe..cda6dc31126d9c6faaaf557f4a433632369d941f 100644 --- a/paddle/fluid/inference/analysis/argument.h +++ b/paddle/fluid/inference/analysis/argument.h @@ -239,6 +239,22 @@ struct Argument { DECL_ARGUMENT_FIELD(xpu_precision, XpuPrecision, std::string); DECL_ARGUMENT_FIELD(xpu_adaptive_seqlen, XpuAdaptiveSeqlen, bool); + DECL_ARGUMENT_FIELD(use_nnadapter, UseNNAdapter, bool); + DECL_ARGUMENT_FIELD(nnadapter_model_cache_dir, NNAdapterModelCacheDir, + std::string); + DECL_ARGUMENT_FIELD(nnadapter_device_names, NNAdapterDeviceNames, + std::vector); + DECL_ARGUMENT_FIELD(nnadapter_context_properties, NNAdapterContextProperties, + std::string); + DECL_ARGUMENT_FIELD(nnadapter_subgraph_partition_config_buffer, + NNAdapterSubgraphPartitionConfigBuffer, std::string); + DECL_ARGUMENT_FIELD(nnadapter_subgraph_partition_config_path, + NNAdapterSubgraphPartitionConfigPath, std::string); + DECL_ARGUMENT_FIELD(nnadapter_model_cache_token, NNAdapterModelCacheToken, + std::vector); + DECL_ARGUMENT_FIELD(nnadapter_model_cache_buffer, NNAdapterModelCacheBuffer, + std::vector>); + // Memory optimized related. DECL_ARGUMENT_FIELD(enable_memory_optim, EnableMemoryOptim, bool); diff --git a/paddle/fluid/inference/analysis/ir_pass_manager.cc b/paddle/fluid/inference/analysis/ir_pass_manager.cc index 8eb7e8d13886f4b14f22ba674d9eab063b15d354..4fdd963b6abff98f052175950e20b6999472569e 100644 --- a/paddle/fluid/inference/analysis/ir_pass_manager.cc +++ b/paddle/fluid/inference/analysis/ir_pass_manager.cc @@ -202,6 +202,27 @@ void IRPassManager::CreatePasses(Argument *argument, new std::string(argument->xpu_autotune_file())); pass->Set("precision", new std::string(argument->xpu_precision())); pass->Set("adaptive_seqlen", new bool(argument->xpu_adaptive_seqlen())); + // NNAdapter Related + pass->Set("use_nnadapter", new bool(argument->use_nnadapter())); + pass->Set("nnadapter_model_cache_dir", + new std::string(argument->nnadapter_model_cache_dir())); + pass->Set( + "nnadapter_device_names", + new std::vector(argument->nnadapter_device_names())); + pass->Set("nnadapter_context_properties", + new std::string(argument->nnadapter_context_properties())); + pass->Set("nnadapter_subgraph_partition_config_buffer", + new std::string( + argument->nnadapter_subgraph_partition_config_buffer())); + pass->Set("nnadapter_subgraph_partition_config_path", + new std::string( + argument->nnadapter_subgraph_partition_config_path())); + pass->Set("nnadapter_model_cache_buffer", + new std::vector>( + argument->nnadapter_model_cache_buffer())); + pass->Set("nnadapter_model_cache_token", + new std::vector( + argument->nnadapter_model_cache_token())); } disable_logs_ = argument->disable_logs(); if (pass_name == "fc_fuse_pass") { diff --git a/paddle/fluid/inference/analysis/ir_passes/lite_subgraph_pass.cc b/paddle/fluid/inference/analysis/ir_passes/lite_subgraph_pass.cc index b8cac8992f4eed36b653b08febe48630c3977652..c04342f837e3f9cb3f29eca3fcb7ca9e503af0a8 100644 --- a/paddle/fluid/inference/analysis/ir_passes/lite_subgraph_pass.cc +++ b/paddle/fluid/inference/analysis/ir_passes/lite_subgraph_pass.cc @@ -250,12 +250,30 @@ void LiteSubgraphPass::SetUpEngine( std::string autotune_file = Get("autotune_file"); std::string precision = Get("precision"); bool adaptive_seqlen = Get("adaptive_seqlen"); + // NNAdapter Related + bool use_nnadapter = Get("use_nnadapter"); + std::string nnadapter_model_cache_dir = + Get("nnadapter_model_cache_dir"); + auto nnadapter_device_names = + Get>("nnadapter_device_names"); + std::string nnadapter_context_properties = + Get("nnadapter_context_properties"); + std::string nnadapter_subgraph_partition_config_buffer = + Get("nnadapter_subgraph_partition_config_buffer"); + std::string nnadapter_subgraph_partition_config_path = + Get("nnadapter_subgraph_partition_config_path"); + auto nnadapter_model_cache_buffer = + Get>>("nnadapter_model_cache_buffer"); + auto nnadapter_model_cache_token = + Get>("nnadapter_model_cache_token"); lite_api::TargetType target_type; if (use_gpu) { target_type = TARGET(kCUDA); } else if (use_xpu) { target_type = TARGET(kXPU); + } else if (use_nnadapter) { + target_type = TARGET(kNNAdapter); } else { #ifdef PADDLE_WITH_ARM target_type = TARGET(kARM); @@ -292,6 +310,17 @@ void LiteSubgraphPass::SetUpEngine( config.autotune_file = autotune_file; config.precision = precision; config.adaptive_seqlen = adaptive_seqlen; + // NNAdapter Related + config.nnadapter_model_cache_dir = nnadapter_model_cache_dir; + config.nnadapter_device_names = nnadapter_device_names; + config.nnadapter_context_properties = nnadapter_context_properties; + config.nnadapter_subgraph_partition_config_buffer = + nnadapter_subgraph_partition_config_buffer; + config.nnadapter_subgraph_partition_config_path = + nnadapter_subgraph_partition_config_path; + config.nnadapter_model_cache_buffer = nnadapter_model_cache_buffer; + config.nnadapter_model_cache_token = nnadapter_model_cache_token; + if (dump_model) { lite::StrToBinaryFile("./model.bin", config.model); lite::StrToBinaryFile("./param.bin", config.param); diff --git a/paddle/fluid/inference/api/analysis_config.cc b/paddle/fluid/inference/api/analysis_config.cc index ac540c75511ef29c840e258ada2171cb9ee0b262..5d056e054f51c5889374024512dcbbe5af1586ca 100644 --- a/paddle/fluid/inference/api/analysis_config.cc +++ b/paddle/fluid/inference/api/analysis_config.cc @@ -207,6 +207,7 @@ AnalysisConfig::AnalysisConfig(const AnalysisConfig &other) { // NPU related. CP_MEMBER(use_npu_); CP_MEMBER(npu_device_id_); + CP_MEMBER(nnadapter_config_); // profile related. CP_MEMBER(with_profile_); @@ -554,7 +555,7 @@ void AnalysisConfig::Update() { } if (use_npu_) { -#ifdef PADDLE_WITH_ASCEND_CL +#if defined(PADDLE_WITH_ASCEND_CL) || defined(LITE_SUBGRAPH_WITH_NPU) PADDLE_ENFORCE_EQ(use_gpu_, false, platform::errors::Unavailable( "Currently, NPU and GPU cannot be enabled in the " @@ -833,6 +834,61 @@ std::string AnalysisConfig::Summary() { return os.PrintTable(); } +LiteNNAdapterConfig &LiteNNAdapterConfig::SetDeviceNames( + const std::vector &names) { + nnadapter_device_names = names; + return *this; +} + +LiteNNAdapterConfig &LiteNNAdapterConfig::SetContextProperties( + const std::string &properties) { + nnadapter_context_properties = properties; + return *this; +} + +LiteNNAdapterConfig &LiteNNAdapterConfig::SetModelCacheDir( + const std::string &dir) { + nnadapter_model_cache_dir = dir; + return *this; +} + +LiteNNAdapterConfig &LiteNNAdapterConfig::SetModelCacheBuffers( + const std::string &model_cache_token, + const std::vector &model_cache_buffer) { + PADDLE_ENFORCE_EQ(model_cache_token.empty(), false, + platform::errors::InvalidArgument( + "model_cache_token should not be empty.")); + PADDLE_ENFORCE_EQ(model_cache_buffer.empty(), false, + platform::errors::InvalidArgument( + "model_cache_buffer should not be empty.")); + PADDLE_ENFORCE_EQ(nnadapter_model_cache_buffers.count(model_cache_token), + false, platform::errors::InvalidArgument( + "model_cache_token has already been set.")); + + nnadapter_model_cache_buffers[model_cache_token] = model_cache_buffer; + return *this; +} + +LiteNNAdapterConfig &LiteNNAdapterConfig::SetSubgraphPartitionConfigPath( + const std::string &path) { + nnadapter_subgraph_partition_config_path = path; + return *this; +} + +LiteNNAdapterConfig &LiteNNAdapterConfig::SetSubgraphPartitionConfigBuffer( + const std::string &buffer) { + nnadapter_subgraph_partition_config_buffer = buffer; + return *this; +} +LiteNNAdapterConfig &LiteNNAdapterConfig::Enable() { + use_nnadapter = true; + return *this; +} +LiteNNAdapterConfig &LiteNNAdapterConfig::Disable() { + use_nnadapter = false; + return *this; +} + void AnalysisConfig::CollectShapeRangeInfo( const std::string &shape_range_info_path) { LOG(INFO) << "In CollectShapeInfo mode, we will disable optimizations and " diff --git a/paddle/fluid/inference/api/analysis_predictor.cc b/paddle/fluid/inference/api/analysis_predictor.cc index efeb0da27804c5db5b1c1680b356568b3e45ca7b..6ebb2193e21187af8e439ad735ab864355808966 100644 --- a/paddle/fluid/inference/api/analysis_predictor.cc +++ b/paddle/fluid/inference/api/analysis_predictor.cc @@ -276,6 +276,22 @@ bool AnalysisPredictor::CreateExecutor() { "You tried to use NPU forward propagation, but Paddle was not compiled " "with WITH_ASCEND_CL.")); #endif + } else if (config_.NNAdapter().use_nnadapter) { + if (config_.lite_engine_enabled()) { + place_ = paddle::platform::CPUPlace(); +#ifndef LITE_SUBGRAPH_WITH_NNADAPTER + PADDLE_THROW( + platform::errors::Unavailable("You tried to use an NNAdapter lite " + "engine, but Paddle was not compiled " + "with it.")); +#endif // LITE_SUBGRAPH_WITH_NNADAPTER + } else { + PADDLE_THROW( + platform::errors::Unavailable("You tried to use NNadapter forward " + "propagation (inference without lite " + "engine), but Paddle was not compiled " + "with LITE_WITH_NNADAPTER.")); + } } else { place_ = paddle::platform::CPUPlace(); } @@ -601,6 +617,26 @@ void AnalysisPredictor::PrepareArgument() { argument_.SetXpuAutotuneFile(config_.xpu_autotune_file_); argument_.SetXpuPrecision(config_.xpu_precision_); argument_.SetXpuAdaptiveSeqlen(config_.xpu_adaptive_seqlen_); + // NNAdapter related + argument_.SetUseNNAdapter(config_.NNAdapter().use_nnadapter); + argument_.SetNNAdapterDeviceNames( + config_.NNAdapter().nnadapter_device_names); + argument_.SetNNAdapterContextProperties( + config_.NNAdapter().nnadapter_context_properties); + argument_.SetNNAdapterModelCacheDir( + config_.NNAdapter().nnadapter_model_cache_dir); + argument_.SetNNAdapterSubgraphPartitionConfigBuffer( + config_.NNAdapter().nnadapter_subgraph_partition_config_buffer); + argument_.SetNNAdapterSubgraphPartitionConfigPath( + config_.NNAdapter().nnadapter_subgraph_partition_config_path); + std::vector buffer_keys; + std::vector> buffer_vals; + for (auto it : config_.NNAdapter().nnadapter_model_cache_buffers) { + buffer_keys.emplace_back(it.first); + buffer_vals.emplace_back(it.second); + } + argument_.SetNNAdapterModelCacheToken(buffer_keys); + argument_.SetNNAdapterModelCacheBuffer(buffer_vals); LOG(INFO) << "Lite subgraph engine is enabled"; } diff --git a/paddle/fluid/inference/api/analysis_predictor_tester.cc b/paddle/fluid/inference/api/analysis_predictor_tester.cc index 513f3669a19ce9d760d53d5b1d2c94c4b0c55703..86fbde00075f0985ee14303c10a54cbf363aecf7 100644 --- a/paddle/fluid/inference/api/analysis_predictor_tester.cc +++ b/paddle/fluid/inference/api/analysis_predictor_tester.cc @@ -61,6 +61,26 @@ TEST(AnalysisPredictor, analysis_off) { ASSERT_TRUE(predictor->Run(inputs, &outputs)); } +#ifndef WIN32 +TEST(AnalysisPredictor, lite_nn_adapter_npu) { + AnalysisConfig config; + config.SetModel(FLAGS_dirname); + config.EnableLiteEngine(); + config.NNAdapter() + .Disable() + .Enable() + .SetDeviceNames({"huawei_ascend_npu"}) + .SetContextProperties("HUAWEI_ASCEND_NPU_SELECTED_DEVICE_IDS=0") + .SetModelCacheDir("cache_dirr") + .SetSubgraphPartitionConfigPath("") + .SetModelCacheBuffers("c1", {'c'}); +#ifndef LITE_SUBGRAPH_WITH_NNADAPTER + EXPECT_THROW(CreatePaddlePredictor(config), + paddle::platform::EnforceNotMet); +#endif +} +#endif + TEST(AnalysisPredictor, analysis_on) { AnalysisConfig config; config.SetModel(FLAGS_dirname); diff --git a/paddle/fluid/inference/api/paddle_analysis_config.h b/paddle/fluid/inference/api/paddle_analysis_config.h index a64377f80f8aad7db5c2a5c4b91160354dde81e7..d6a0b643c2aeee41d501034462aaf6a5f48b9f27 100644 --- a/paddle/fluid/inference/api/paddle_analysis_config.h +++ b/paddle/fluid/inference/api/paddle_analysis_config.h @@ -48,6 +48,34 @@ namespace paddle { class AnalysisPredictor; struct MkldnnQuantizerConfig; +struct LiteNNAdapterConfig { + bool use_nnadapter{false}; + std::string nnadapter_model_cache_dir; + std::map> nnadapter_model_cache_buffers; + std::vector nnadapter_device_names; + std::string nnadapter_context_properties; + std::string nnadapter_subgraph_partition_config_path; + std::string nnadapter_subgraph_partition_config_buffer; + + LiteNNAdapterConfig& SetDeviceNames(const std::vector& names); + + LiteNNAdapterConfig& SetContextProperties(const std::string& properties); + + LiteNNAdapterConfig& SetModelCacheDir(const std::string& dir); + + LiteNNAdapterConfig& SetModelCacheBuffers( + const std::string& model_cache_token, + const std::vector& model_cache_buffer); + + LiteNNAdapterConfig& SetSubgraphPartitionConfigPath(const std::string& path); + + LiteNNAdapterConfig& SetSubgraphPartitionConfigBuffer( + const std::string& buffer); + + LiteNNAdapterConfig& Enable(); + LiteNNAdapterConfig& Disable(); +}; + /// /// \brief configuration manager for AnalysisPredictor. /// \since 1.7.0 @@ -692,6 +720,8 @@ struct PD_INFER_DECL AnalysisConfig { /// std::string Summary(); + LiteNNAdapterConfig& NNAdapter() { return nnadapter_config_; } + protected: // Update the config. void Update(); @@ -800,6 +830,9 @@ struct PD_INFER_DECL AnalysisConfig { std::string xpu_precision_; bool xpu_adaptive_seqlen_; + // NNAdapter related + LiteNNAdapterConfig nnadapter_config_; + // mkldnn related. int mkldnn_cache_capacity_{10}; bool use_mkldnn_quantizer_{false}; diff --git a/paddle/fluid/inference/lite/CMakeLists.txt b/paddle/fluid/inference/lite/CMakeLists.txt index 2482a6917530bb69f0c2bcc7f2b2b9ca0a2d8f8b..6d981d007e73a633ed4e0021474dcd54e651c6a0 100644 --- a/paddle/fluid/inference/lite/CMakeLists.txt +++ b/paddle/fluid/inference/lite/CMakeLists.txt @@ -2,8 +2,8 @@ if(XPU_SDK_ROOT) set(XPU_DEPS xpuapi xpurt) endif() -cc_library(lite_op_teller SRCS op_teller.cc DEPS lite_full_static framework_proto device_context boost xxhash) -cc_library(lite_engine SRCS engine.cc DEPS lite_full_static framework_proto ${XPU_DEPS}) -cc_library(lite_tensor_utils SRCS tensor_utils.cc DEPS memcpy lite_full_static framework_proto boost device_context ${XPU_DEPS}) +cc_library(lite_op_teller SRCS op_teller.cc DEPS ${LITE_DEPS} framework_proto device_context boost xxhash) +cc_library(lite_engine SRCS engine.cc DEPS ${LITE_DEPS} framework_proto ${XPU_DEPS}) +cc_library(lite_tensor_utils SRCS tensor_utils.cc DEPS memcpy ${LITE_DEPS} framework_proto boost device_context ${XPU_DEPS}) cc_test(test_lite_engine SRCS test_engine_lite.cc DEPS lite_engine protobuf framework_proto glog gtest analysis) cc_test(test_lite_tensor_utils SRCS test_tensor_utils.cc DEPS lite_engine lite_tensor_utils) diff --git a/paddle/fluid/inference/lite/engine.cc b/paddle/fluid/inference/lite/engine.cc index 908e1ab990bb73b124158f66cd0413a4b6a20907..47b9d681b4754f7b05741522108a3e71bddc9527 100644 --- a/paddle/fluid/inference/lite/engine.cc +++ b/paddle/fluid/inference/lite/engine.cc @@ -69,6 +69,25 @@ paddle::lite_api::PaddlePredictor* EngineManager::Create( cfg.adaptive_seqlen); #endif +#ifdef LITE_SUBGRAPH_WITH_NPU + lite_cxx_config.set_nnadapter_device_names(cfg.nnadapter_device_names); + lite_cxx_config.set_nnadapter_context_properties( + cfg.nnadapter_context_properties); + lite_cxx_config.set_nnadapter_model_cache_dir(cfg.nnadapter_model_cache_dir); + if (!cfg.nnadapter_subgraph_partition_config_path.empty()) { + lite_cxx_config.set_nnadapter_subgraph_partition_config_path( + cfg.nnadapter_subgraph_partition_config_path); + } + if (!cfg.nnadapter_subgraph_partition_config_buffer.empty()) { + lite_cxx_config.set_nnadapter_subgraph_partition_config_buffer( + cfg.nnadapter_subgraph_partition_config_buffer); + } + for (size_t i = 0; i < cfg.nnadapter_model_cache_token.size(); ++i) { + lite_cxx_config.set_nnadapter_model_cache_buffers( + cfg.nnadapter_model_cache_token[i], + cfg.nnadapter_model_cache_buffer[i]); + } +#endif // create predictor std::shared_ptr p = paddle::lite_api::CreatePaddlePredictor(lite_cxx_config); diff --git a/paddle/fluid/inference/lite/engine.h b/paddle/fluid/inference/lite/engine.h index a64ef1eda828bf2a5fc96c1cc8435c0a4b6912c6..48072656cb996683172210015fef8115de995bf0 100644 --- a/paddle/fluid/inference/lite/engine.h +++ b/paddle/fluid/inference/lite/engine.h @@ -53,6 +53,15 @@ struct EngineConfig { // for cuda bool use_multi_stream{false}; + + // for nnadapter or npu. + std::string nnadapter_model_cache_dir; + std::vector nnadapter_device_names; + std::string nnadapter_context_properties; + std::string nnadapter_subgraph_partition_config_buffer; + std::string nnadapter_subgraph_partition_config_path; + std::vector nnadapter_model_cache_token; + std::vector> nnadapter_model_cache_buffer; }; class EngineManager { diff --git a/paddle/fluid/operators/lite/lite_engine_op_test.cc b/paddle/fluid/operators/lite/lite_engine_op_test.cc index 44ba1e4e497bf1caa737326e4918a4d19ea0044d..8b7f1268081343175a1b8bfdd4971f62c69a3a2f 100644 --- a/paddle/fluid/operators/lite/lite_engine_op_test.cc +++ b/paddle/fluid/operators/lite/lite_engine_op_test.cc @@ -30,6 +30,8 @@ using paddle::inference::lite::CreateTensor; using paddle::inference::lite::serialize_params; namespace paddle { namespace operators { + +#if defined(PADDLE_WITH_CUDA) TEST(LiteEngineOp, engine_op) { framework::ProgramDesc program; auto* block_ = program.Proto()->mutable_blocks(0); @@ -75,8 +77,8 @@ TEST(LiteEngineOp, engine_op) { platform::CPUDeviceContext ctx(place); #endif // Prepare variables. - CreateTensor(&scope, "x", std::vector({2, 4}), false); - CreateTensor(&scope, "y", std::vector({2, 4}), false); + CreateTensor(&scope, "x", std::vector({2, 4}), true); + CreateTensor(&scope, "y", std::vector({2, 4}), true); CreateTensor(&scope, "out", std::vector({2, 4}), false); ASSERT_EQ(block_->ops_size(), 4); @@ -113,5 +115,7 @@ TEST(LiteEngineOp, engine_op) { engine_op->Run(scope, place); LOG(INFO) << "done"; } +#endif + } // namespace operators } // namespace paddle diff --git a/paddle/fluid/pybind/inference_api.cc b/paddle/fluid/pybind/inference_api.cc index b1a91cd302187ed6c43d099c21277dc6b5d89214..a18cbfb86fe5c0e73c8209a494059dfcf5b35f4a 100644 --- a/paddle/fluid/pybind/inference_api.cc +++ b/paddle/fluid/pybind/inference_api.cc @@ -87,6 +87,7 @@ void BindPaddlePlace(py::module *m); void BindPaddlePredictor(py::module *m); void BindNativeConfig(py::module *m); void BindNativePredictor(py::module *m); +void BindLiteNNAdapterConfig(py::module *m); void BindAnalysisConfig(py::module *m); void BindAnalysisPredictor(py::module *m); void BindZeroCopyTensor(py::module *m); @@ -303,6 +304,7 @@ void BindInferenceApi(py::module *m) { BindPaddlePredictor(m); BindNativeConfig(m); BindNativePredictor(m); + BindLiteNNAdapterConfig(m); BindAnalysisConfig(m); BindAnalysisPredictor(m); BindPaddleInferPredictor(m); @@ -624,7 +626,26 @@ void BindAnalysisConfig(py::module *m) { [](AnalysisConfig &self) { return dynamic_cast(self.pass_builder()); }, - py::return_value_policy::reference); + py::return_value_policy::reference) + .def("nnadapter", &AnalysisConfig::NNAdapter); +} + +void BindLiteNNAdapterConfig(py::module *m) { + py::class_ lite_nnadapter_config(*m, + "LiteNNAdapterConfig"); + + lite_nnadapter_config + .def("set_device_names", &LiteNNAdapterConfig::SetDeviceNames) + .def("set_context_properties", &LiteNNAdapterConfig::SetContextProperties) + .def("set_model_cache_dir", &LiteNNAdapterConfig::SetModelCacheDir) + .def("set_model_cache_buffers", + &LiteNNAdapterConfig::SetModelCacheBuffers) + .def("set_subgraph_partition_config_path", + &LiteNNAdapterConfig::SetSubgraphPartitionConfigPath) + .def("set_subgraph_partition_config_buffer", + &LiteNNAdapterConfig::SetSubgraphPartitionConfigBuffer) + .def("enable", &LiteNNAdapterConfig::Enable) + .def("disable", &LiteNNAdapterConfig::Disable); } #ifdef PADDLE_WITH_MKLDNN diff --git a/paddle/scripts/paddle_build.sh b/paddle/scripts/paddle_build.sh index 8c62ccddbc99a88bd0707445813210e4a898cb84..7a0a74d6101a541eca8d59421755836702147c91 100755 --- a/paddle/scripts/paddle_build.sh +++ b/paddle/scripts/paddle_build.sh @@ -223,7 +223,7 @@ function cmake_base() { -DWITH_GLOO=${gloo_flag} -DWITH_LITE=${WITH_LITE:-OFF} -DWITH_XPU=${WITH_XPU:-OFF} - -DLITE_GIT_TAG=release/v2.8 + -DLITE_GIT_TAG=_release/v2.10 -DWITH_UNITY_BUILD=${WITH_UNITY_BUILD:-OFF} -DWITH_XPU_BKCL=${WITH_XPU_BKCL:-OFF} -DWITH_ARM=${WITH_ARM:-OFF} @@ -266,7 +266,7 @@ EOF -DWITH_PSCORE=${distibuted_flag} \ -DWITH_PSLIB=${WITH_PSLIB:-OFF} \ -DWITH_GLOO=${gloo_flag} \ - -DLITE_GIT_TAG=release/v2.8 \ + -DLITE_GIT_TAG=_release/v2.10 \ -DWITH_XPU=${WITH_XPU:-OFF} \ -DXPU_SDK_ROOT=${XPU_SDK_ROOT:-""} \ -DWITH_LITE=${WITH_LITE:-OFF} \ diff --git a/python/setup.py.in b/python/setup.py.in index d78d91a1d412c6640bc063b2698e474dfd5a88d3..b10d5df541f2ff8527b06565cc2b297396d26867 100644 --- a/python/setup.py.in +++ b/python/setup.py.in @@ -338,6 +338,12 @@ else: if '${WITH_LITE}' == 'ON': shutil.copy('${LITE_SHARED_LIB}', libs_path) package_data['paddle.libs']+=['libpaddle_full_api_shared' + ext_name] + if '${LITE_WITH_NNADAPTER}' == 'ON': + shutil.copy('${LITE_NNADAPTER_LIB}', libs_path) + package_data['paddle.libs']+=['libnnadapter' + ext_name] + if '${NNADAPTER_WITH_HUAWEI_ASCEND_NPU}' == 'ON': + shutil.copy('${LITE_NNADAPTER_NPU_LIB}', libs_path) + package_data['paddle.libs']+=['libnnadapter_driver_huawei_ascend_npu' + ext_name] if '${WITH_PSLIB}' == 'ON': shutil.copy('${PSLIB_LIB}', libs_path)