diff --git a/paddle/fluid/inference/analysis/argument.h b/paddle/fluid/inference/analysis/argument.h index 05a8e8f1b5e3e33ae73047176b3b54536b77a22d..255c6ca75dfd74b3cf5984661ea931d36295f72a 100644 --- a/paddle/fluid/inference/analysis/argument.h +++ b/paddle/fluid/inference/analysis/argument.h @@ -227,6 +227,11 @@ struct Argument { DECL_ARGUMENT_FIELD(use_xpu, UseXpu, bool); DECL_ARGUMENT_FIELD(xpu_l3_workspace_size, XpuL3WorkspaceSize, int); + DECL_ARGUMENT_FIELD(xpu_locked, XpuLocked, bool); + DECL_ARGUMENT_FIELD(xpu_autotune, XpuAutotune, bool); + DECL_ARGUMENT_FIELD(xpu_autotune_file, XpuAutotuneFile, std::string); + DECL_ARGUMENT_FIELD(xpu_precision, XpuPrecision, std::string); + DECL_ARGUMENT_FIELD(xpu_adaptive_seqlen, XpuAdaptiveSeqlen, bool); // Memory optimized related. DECL_ARGUMENT_FIELD(enable_memory_optim, EnableMemoryOptim, bool); diff --git a/paddle/fluid/inference/analysis/ir_pass_manager.cc b/paddle/fluid/inference/analysis/ir_pass_manager.cc index 06d48a536664486043c6615f16b442b76d818bb7..8407f98e6dfd9bb253558242fea052846d71eb7e 100644 --- a/paddle/fluid/inference/analysis/ir_pass_manager.cc +++ b/paddle/fluid/inference/analysis/ir_pass_manager.cc @@ -188,6 +188,12 @@ void IRPassManager::CreatePasses(Argument *argument, new int(argument->xpu_l3_workspace_size())); pass->Set("cpu_math_library_num_threads", new int(argument->cpu_math_library_num_threads())); + pass->Set("locked", new bool(argument->xpu_locked())); + pass->Set("autotune", new bool(argument->xpu_autotune())); + pass->Set("autotune_file", + new std::string(argument->xpu_autotune_file())); + pass->Set("precision", new std::string(argument->xpu_precision())); + pass->Set("adaptive_seqlen", new bool(argument->xpu_adaptive_seqlen())); } disable_logs_ = argument->disable_logs(); if (pass_name == "fc_fuse_pass") { diff --git a/paddle/fluid/inference/analysis/ir_passes/lite_subgraph_pass.cc b/paddle/fluid/inference/analysis/ir_passes/lite_subgraph_pass.cc index c697914904b3e949050d6e61a7edb521ad6dd0e5..b8cac8992f4eed36b653b08febe48630c3977652 100644 --- a/paddle/fluid/inference/analysis/ir_passes/lite_subgraph_pass.cc +++ b/paddle/fluid/inference/analysis/ir_passes/lite_subgraph_pass.cc @@ -245,6 +245,11 @@ void LiteSubgraphPass::SetUpEngine( bool use_xpu = Get("use_xpu"); int xpu_l3_workspace_size = Get("xpu_l3_workspace_size"); int cpu_math_library_num_threads = Get("cpu_math_library_num_threads"); + bool locked = Get("locked"); + bool autotune = Get("autotune"); + std::string autotune_file = Get("autotune_file"); + std::string precision = Get("precision"); + bool adaptive_seqlen = Get("adaptive_seqlen"); lite_api::TargetType target_type; if (use_gpu) { @@ -282,6 +287,11 @@ void LiteSubgraphPass::SetUpEngine( }; config.cpu_math_library_num_threads = cpu_math_library_num_threads; config.xpu_l3_workspace_size = xpu_l3_workspace_size; + config.locked = locked; + config.autotune = autotune; + config.autotune_file = autotune_file; + config.precision = precision; + config.adaptive_seqlen = adaptive_seqlen; if (dump_model) { lite::StrToBinaryFile("./model.bin", config.model); lite::StrToBinaryFile("./param.bin", config.param); diff --git a/paddle/fluid/inference/api/analysis_config.cc b/paddle/fluid/inference/api/analysis_config.cc index 7e874b94decbf6053f0882d5d22825584c4fc496..853c1ac1da8742733e609c1dea098a208eadc015 100644 --- a/paddle/fluid/inference/api/analysis_config.cc +++ b/paddle/fluid/inference/api/analysis_config.cc @@ -96,9 +96,17 @@ void AnalysisConfig::DisableFCPadding() { Update(); } -void AnalysisConfig::EnableXpu(int l3_workspace_size) { +void AnalysisConfig::EnableXpu(int l3_workspace_size, bool locked, + bool autotune, const std::string &autotune_file, + const std::string &precision, + bool adaptive_seqlen) { use_xpu_ = true; xpu_l3_workspace_size_ = l3_workspace_size; + xpu_locked_ = locked; + xpu_autotune_ = autotune; + xpu_autotune_file_ = autotune_file; + xpu_precision_ = precision; + xpu_adaptive_seqlen_ = adaptive_seqlen; Update(); } @@ -161,6 +169,11 @@ AnalysisConfig::AnalysisConfig(const AnalysisConfig &other) { CP_MEMBER(use_xpu_); CP_MEMBER(xpu_l3_workspace_size_); + CP_MEMBER(xpu_locked_); + CP_MEMBER(xpu_autotune_); + CP_MEMBER(xpu_autotune_file_); + CP_MEMBER(xpu_precision_); + CP_MEMBER(xpu_adaptive_seqlen_); // profile related. CP_MEMBER(with_profile_); @@ -548,6 +561,11 @@ std::string AnalysisConfig::SerializeInfoCache() { ss << use_lite_; ss << use_xpu_; ss << xpu_l3_workspace_size_; + ss << xpu_locked_; + ss << xpu_autotune_; + ss << xpu_autotune_file_; + ss << xpu_precision_; + ss << xpu_adaptive_seqlen_; ss << thread_local_stream_; diff --git a/paddle/fluid/inference/api/analysis_predictor.cc b/paddle/fluid/inference/api/analysis_predictor.cc index 698cbea5eb83b775abea3d84e03a77cd9b2a72c7..95b08318368438943b538774cbeb83e2d92a5103 100644 --- a/paddle/fluid/inference/api/analysis_predictor.cc +++ b/paddle/fluid/inference/api/analysis_predictor.cc @@ -552,6 +552,11 @@ void AnalysisPredictor::PrepareArgument() { argument_.SetLiteZeroCopy(config_.lite_zero_copy_); argument_.SetUseXpu(config_.use_xpu_); argument_.SetXpuL3WorkspaceSize(config_.xpu_l3_workspace_size_); + argument_.SetXpuLocked(config_.xpu_locked_); + argument_.SetXpuAutotune(config_.xpu_autotune_); + argument_.SetXpuAutotuneFile(config_.xpu_autotune_file_); + argument_.SetXpuPrecision(config_.xpu_precision_); + argument_.SetXpuAdaptiveSeqlen(config_.xpu_adaptive_seqlen_); LOG(INFO) << "Lite subgraph engine is enabled"; } diff --git a/paddle/fluid/inference/api/paddle_analysis_config.h b/paddle/fluid/inference/api/paddle_analysis_config.h index 446d6770f6399940754f176c1a0cc1af14ae72db..2bbd4bb837a22f672e5aa625f299424b6f0c5b88 100644 --- a/paddle/fluid/inference/api/paddle_analysis_config.h +++ b/paddle/fluid/inference/api/paddle_analysis_config.h @@ -177,7 +177,10 @@ struct PD_INFER_DECL AnalysisConfig { /// void DisableGpu(); - void EnableXpu(int l3_workspace_size = 0xfffc00); + void EnableXpu(int l3_workspace_size = 0xfffc00, bool locked = false, + bool autotune = true, const std::string& autotune_file = "", + const std::string& precision = "int16", + bool adaptive_seqlen = false); /// /// \brief A boolean state telling whether the GPU is turned on. /// @@ -668,6 +671,11 @@ struct PD_INFER_DECL AnalysisConfig { bool thread_local_stream_{false}; bool use_xpu_{false}; int xpu_l3_workspace_size_; + bool xpu_locked_; + bool xpu_autotune_; + std::string xpu_autotune_file_; + std::string xpu_precision_; + bool xpu_adaptive_seqlen_; // mkldnn related. int mkldnn_cache_capacity_{0}; diff --git a/paddle/fluid/inference/lite/engine.cc b/paddle/fluid/inference/lite/engine.cc index 59a786e46c98bf5972f23bd6148712eccc198aa6..908e1ab990bb73b124158f66cd0413a4b6a20907 100644 --- a/paddle/fluid/inference/lite/engine.cc +++ b/paddle/fluid/inference/lite/engine.cc @@ -59,8 +59,14 @@ paddle::lite_api::PaddlePredictor* EngineManager::Create( #endif #ifdef LITE_SUBGRAPH_WITH_XPU + // Deprecated in Paddle-Lite release/v2.8 lite_cxx_config.set_xpu_workspace_l3_size_per_thread( cfg.xpu_l3_workspace_size); + lite_cxx_config.set_xpu_l3_cache_method(cfg.xpu_l3_workspace_size, + cfg.locked); + lite_cxx_config.set_xpu_conv_autotune(cfg.autotune, cfg.autotune_file); + lite_cxx_config.set_xpu_multi_encoder_method(cfg.precision, + cfg.adaptive_seqlen); #endif // create predictor diff --git a/paddle/fluid/inference/lite/engine.h b/paddle/fluid/inference/lite/engine.h index 5ba487cc24d7d58cd87853a58fc12f1a82c3610d..a64ef1eda828bf2a5fc96c1cc8435c0a4b6912c6 100644 --- a/paddle/fluid/inference/lite/engine.h +++ b/paddle/fluid/inference/lite/engine.h @@ -42,6 +42,11 @@ struct EngineConfig { // for xpu size_t xpu_l3_workspace_size; + bool locked = false; + bool autotune = true; + std::string autotune_file = ""; + std::string precision = "int16"; + bool adaptive_seqlen = false; // for x86 or arm int cpu_math_library_num_threads{1}; diff --git a/paddle/fluid/pybind/inference_api.cc b/paddle/fluid/pybind/inference_api.cc index 606af27f6baf2a06a3670e6d87065c68513a7241..8a5ad5852aedf5b157876c5d892d2ac4f42c022d 100644 --- a/paddle/fluid/pybind/inference_api.cc +++ b/paddle/fluid/pybind/inference_api.cc @@ -467,7 +467,10 @@ void BindAnalysisConfig(py::module *m) { .def("enable_use_gpu", &AnalysisConfig::EnableUseGpu, py::arg("memory_pool_init_size_mb"), py::arg("device_id") = 0) .def("enable_xpu", &AnalysisConfig::EnableXpu, - py::arg("l3_workspace_size")) + py::arg("l3_workspace_size") = 16 * 1024 * 1024, + py::arg("locked") = false, py::arg("autotune") = true, + py::arg("autotune_file") = "", py::arg("precision") = "int16", + py::arg("adaptive_seqlen") = false) .def("disable_gpu", &AnalysisConfig::DisableGpu) .def("use_gpu", &AnalysisConfig::use_gpu) .def("use_xpu", &AnalysisConfig::use_xpu)