diff --git a/paddle/fluid/inference/analysis/argument.h b/paddle/fluid/inference/analysis/argument.h index c1f35517d8a3518d62fd4745857542ef41c15aea..bea74307b508071e2a9615b1fd522f4616504240 100644 --- a/paddle/fluid/inference/analysis/argument.h +++ b/paddle/fluid/inference/analysis/argument.h @@ -205,6 +205,11 @@ struct Argument { DECL_ARGUMENT_FIELD(use_xpu, UseXpu, bool); DECL_ARGUMENT_FIELD(xpu_l3_workspace_size, XpuL3WorkspaceSize, int); + DECL_ARGUMENT_FIELD(xpu_locked, XpuLocked, bool); + DECL_ARGUMENT_FIELD(xpu_autotune, XpuAutotune, bool); + DECL_ARGUMENT_FIELD(xpu_autotune_file, XpuAutotuneFile, std::string); + DECL_ARGUMENT_FIELD(xpu_precision, XpuPrecision, std::string); + DECL_ARGUMENT_FIELD(xpu_adaptive_seqlen, XpuAdaptiveSeqlen, bool); // Memory optimized related. DECL_ARGUMENT_FIELD(enable_memory_optim, EnableMemoryOptim, bool); diff --git a/paddle/fluid/inference/analysis/ir_pass_manager.cc b/paddle/fluid/inference/analysis/ir_pass_manager.cc index 94c42d1433f5247f5d96823edb7653ed39aec362..2afb93ce4016a5a7e7d89f506269afda98925ede 100644 --- a/paddle/fluid/inference/analysis/ir_pass_manager.cc +++ b/paddle/fluid/inference/analysis/ir_pass_manager.cc @@ -153,6 +153,12 @@ void IRPassManager::CreatePasses(Argument *argument, new int(argument->xpu_l3_workspace_size())); pass->Set("cpu_math_library_num_threads", new int(argument->cpu_math_library_num_threads())); + pass->Set("locked", new bool(argument->xpu_locked())); + pass->Set("autotune", new bool(argument->xpu_autotune())); + pass->Set("autotune_file", + new std::string(argument->xpu_autotune_file())); + pass->Set("precision", new std::string(argument->xpu_precision())); + pass->Set("adaptive_seqlen", new bool(argument->xpu_adaptive_seqlen())); } disable_logs_ = argument->disable_logs(); if (pass_name == "fc_fuse_pass") { diff --git a/paddle/fluid/inference/analysis/ir_passes/lite_subgraph_pass.cc b/paddle/fluid/inference/analysis/ir_passes/lite_subgraph_pass.cc index 4402d5c595a2370d7d6c451c8f051d18d239d207..b56e4cbc91f3f93ff2560823fa3c43eb4c0d95c8 100644 --- a/paddle/fluid/inference/analysis/ir_passes/lite_subgraph_pass.cc +++ b/paddle/fluid/inference/analysis/ir_passes/lite_subgraph_pass.cc @@ -245,6 +245,11 @@ void LiteSubgraphPass::SetUpEngine( bool use_xpu = Get("use_xpu"); int xpu_l3_workspace_size = Get("xpu_l3_workspace_size"); int cpu_math_library_num_threads = Get("cpu_math_library_num_threads"); + bool locked = Get("locked"); + bool autotune = Get("autotune"); + std::string autotune_file = Get("autotune_file"); + std::string precision = Get("precision"); + bool adaptive_seqlen = Get("adaptive_seqlen"); lite_api::TargetType target_type; if (use_gpu) { @@ -277,6 +282,11 @@ void LiteSubgraphPass::SetUpEngine( }; config.cpu_math_library_num_threads = cpu_math_library_num_threads; config.xpu_l3_workspace_size = xpu_l3_workspace_size; + config.locked = locked; + config.autotune = autotune; + config.autotune_file = autotune_file; + config.precision = precision; + config.adaptive_seqlen = adaptive_seqlen; if (dump_model) { lite::StrToBinaryFile("./model.bin", config.model); lite::StrToBinaryFile("./param.bin", config.param); diff --git a/paddle/fluid/inference/api/analysis_config.cc b/paddle/fluid/inference/api/analysis_config.cc index 3691df73ef676a936430ac90b7108aed60ff1d66..c81cbac6db0ad9f14c3f056343f779d7e802b115 100644 --- a/paddle/fluid/inference/api/analysis_config.cc +++ b/paddle/fluid/inference/api/analysis_config.cc @@ -88,9 +88,17 @@ void AnalysisConfig::DisableFCPadding() { Update(); } -void AnalysisConfig::EnableXpu(int l3_workspace_size) { +void AnalysisConfig::EnableXpu(int l3_workspace_size, bool locked, + bool autotune, const std::string &autotune_file, + const std::string &precision, + bool adaptive_seqlen) { use_xpu_ = true; xpu_l3_workspace_size_ = l3_workspace_size; + xpu_locked_ = locked; + xpu_autotune_ = autotune; + xpu_autotune_file_ = autotune_file; + xpu_precision_ = precision; + xpu_adaptive_seqlen_ = adaptive_seqlen; Update(); } @@ -143,6 +151,11 @@ AnalysisConfig::AnalysisConfig(const AnalysisConfig &other) { CP_MEMBER(use_xpu_); CP_MEMBER(xpu_l3_workspace_size_); + CP_MEMBER(xpu_locked_); + CP_MEMBER(xpu_autotune_); + CP_MEMBER(xpu_autotune_file_); + CP_MEMBER(xpu_precision_); + CP_MEMBER(xpu_adaptive_seqlen_); // profile related. CP_MEMBER(with_profile_); @@ -434,6 +447,11 @@ std::string AnalysisConfig::SerializeInfoCache() { ss << use_lite_; ss << use_xpu_; ss << xpu_l3_workspace_size_; + ss << xpu_locked_; + ss << xpu_autotune_; + ss << xpu_autotune_file_; + ss << xpu_precision_; + ss << xpu_adaptive_seqlen_; return ss.str(); } diff --git a/paddle/fluid/inference/api/analysis_predictor.cc b/paddle/fluid/inference/api/analysis_predictor.cc index 360f797af71fde41213af0100452cec8fe113b43..eedf592c68f03243abe151171d9c24461c55ebb8 100644 --- a/paddle/fluid/inference/api/analysis_predictor.cc +++ b/paddle/fluid/inference/api/analysis_predictor.cc @@ -462,6 +462,11 @@ void AnalysisPredictor::PrepareArgument() { argument_.SetLiteZeroCopy(config_.lite_zero_copy_); argument_.SetUseXpu(config_.use_xpu_); argument_.SetXpuL3WorkspaceSize(config_.xpu_l3_workspace_size_); + argument_.SetXpuLocked(config_.xpu_locked_); + argument_.SetXpuAutotune(config_.xpu_autotune_); + argument_.SetXpuAutotuneFile(config_.xpu_autotune_file_); + argument_.SetXpuPrecision(config_.xpu_precision_); + argument_.SetXpuAdaptiveSeqlen(config_.xpu_adaptive_seqlen_); LOG(INFO) << "Lite subgraph engine is enabled"; } diff --git a/paddle/fluid/inference/api/paddle_analysis_config.h b/paddle/fluid/inference/api/paddle_analysis_config.h index e1f787490f937c9b41acff25d50d3fc5a76d0a75..a03711cc2037c53b0e49d17241a47361dcfb0411 100644 --- a/paddle/fluid/inference/api/paddle_analysis_config.h +++ b/paddle/fluid/inference/api/paddle_analysis_config.h @@ -177,7 +177,10 @@ struct AnalysisConfig { /// void DisableGpu(); - void EnableXpu(int l3_workspace_size = 0xfffc00); + void EnableXpu(int l3_workspace_size = 0xfffc00, bool locked = false, + bool autotune = true, const std::string& autotune_file = "", + const std::string& precision = "int16", + bool adaptive_seqlen = false); /// /// \brief A boolean state telling whether the GPU is turned on. /// @@ -587,6 +590,11 @@ struct AnalysisConfig { bool thread_local_stream_{false}; bool use_xpu_{false}; int xpu_l3_workspace_size_; + bool xpu_locked_; + bool xpu_autotune_; + std::string xpu_autotune_file_; + std::string xpu_precision_; + bool xpu_adaptive_seqlen_; // mkldnn related. int mkldnn_cache_capacity_{0}; diff --git a/paddle/fluid/inference/lite/engine.cc b/paddle/fluid/inference/lite/engine.cc index e8ec67d6f0b8eb18ffebcb8065cd176bae102aa1..e3e12687cf0aaab447cd9e8de3596d3b6bc2be50 100644 --- a/paddle/fluid/inference/lite/engine.cc +++ b/paddle/fluid/inference/lite/engine.cc @@ -59,8 +59,15 @@ paddle::lite_api::PaddlePredictor* EngineManager::Create( #endif #ifdef LITE_SUBGRAPH_WITH_XPU + // Deprecated in Paddle-Lite release/v2.8 lite_cxx_config.set_xpu_workspace_l3_size_per_thread( cfg.xpu_l3_workspace_size); + + lite_cxx_config.set_xpu_l3_cache_method(cfg.xpu_l3_workspace_size, + cfg.locked); + lite_cxx_config.set_xpu_conv_autotune(cfg.autotune, cfg.autotune_file); + lite_cxx_config.set_xpu_multi_encoder_method(cfg.precision, + cfg.adaptive_seqlen); #endif // create predictor diff --git a/paddle/fluid/inference/lite/engine.h b/paddle/fluid/inference/lite/engine.h index 5ba487cc24d7d58cd87853a58fc12f1a82c3610d..a64ef1eda828bf2a5fc96c1cc8435c0a4b6912c6 100644 --- a/paddle/fluid/inference/lite/engine.h +++ b/paddle/fluid/inference/lite/engine.h @@ -42,6 +42,11 @@ struct EngineConfig { // for xpu size_t xpu_l3_workspace_size; + bool locked = false; + bool autotune = true; + std::string autotune_file = ""; + std::string precision = "int16"; + bool adaptive_seqlen = false; // for x86 or arm int cpu_math_library_num_threads{1}; diff --git a/paddle/fluid/pybind/inference_api.cc b/paddle/fluid/pybind/inference_api.cc index eb9c166d444f374870022025491bdf577332a03f..e8f6aa9c7f78fd470c1df1ba4c928f91e199ef19 100644 --- a/paddle/fluid/pybind/inference_api.cc +++ b/paddle/fluid/pybind/inference_api.cc @@ -388,7 +388,10 @@ void BindAnalysisConfig(py::module *m) { .def("enable_use_gpu", &AnalysisConfig::EnableUseGpu, py::arg("memory_pool_init_size_mb"), py::arg("device_id") = 0) .def("enable_xpu", &AnalysisConfig::EnableXpu, - py::arg("l3_workspace_size")) + py::arg("l3_workspace_size") = 16 * 1024 * 1024, + py::arg("locked") = false, py::arg("autotune") = true, + py::arg("autotune_file") = "", py::arg("precision") = "int16", + py::arg("adaptive_seqlen") = false) .def("disable_gpu", &AnalysisConfig::DisableGpu) .def("use_gpu", &AnalysisConfig::use_gpu) .def("gpu_device_id", &AnalysisConfig::gpu_device_id)