未验证 提交 92dc9b2b 编写于 作者: W Wilber 提交者: GitHub

update lite subgraph api. (#32513)

上级 486946ae
...@@ -227,6 +227,11 @@ struct Argument { ...@@ -227,6 +227,11 @@ struct Argument {
DECL_ARGUMENT_FIELD(use_xpu, UseXpu, bool); DECL_ARGUMENT_FIELD(use_xpu, UseXpu, bool);
DECL_ARGUMENT_FIELD(xpu_l3_workspace_size, XpuL3WorkspaceSize, int); DECL_ARGUMENT_FIELD(xpu_l3_workspace_size, XpuL3WorkspaceSize, int);
DECL_ARGUMENT_FIELD(xpu_locked, XpuLocked, bool);
DECL_ARGUMENT_FIELD(xpu_autotune, XpuAutotune, bool);
DECL_ARGUMENT_FIELD(xpu_autotune_file, XpuAutotuneFile, std::string);
DECL_ARGUMENT_FIELD(xpu_precision, XpuPrecision, std::string);
DECL_ARGUMENT_FIELD(xpu_adaptive_seqlen, XpuAdaptiveSeqlen, bool);
// Memory optimized related. // Memory optimized related.
DECL_ARGUMENT_FIELD(enable_memory_optim, EnableMemoryOptim, bool); DECL_ARGUMENT_FIELD(enable_memory_optim, EnableMemoryOptim, bool);
......
...@@ -188,6 +188,12 @@ void IRPassManager::CreatePasses(Argument *argument, ...@@ -188,6 +188,12 @@ void IRPassManager::CreatePasses(Argument *argument,
new int(argument->xpu_l3_workspace_size())); new int(argument->xpu_l3_workspace_size()));
pass->Set("cpu_math_library_num_threads", pass->Set("cpu_math_library_num_threads",
new int(argument->cpu_math_library_num_threads())); new int(argument->cpu_math_library_num_threads()));
pass->Set("locked", new bool(argument->xpu_locked()));
pass->Set("autotune", new bool(argument->xpu_autotune()));
pass->Set("autotune_file",
new std::string(argument->xpu_autotune_file()));
pass->Set("precision", new std::string(argument->xpu_precision()));
pass->Set("adaptive_seqlen", new bool(argument->xpu_adaptive_seqlen()));
} }
disable_logs_ = argument->disable_logs(); disable_logs_ = argument->disable_logs();
if (pass_name == "fc_fuse_pass") { if (pass_name == "fc_fuse_pass") {
......
...@@ -245,6 +245,11 @@ void LiteSubgraphPass::SetUpEngine( ...@@ -245,6 +245,11 @@ void LiteSubgraphPass::SetUpEngine(
bool use_xpu = Get<bool>("use_xpu"); bool use_xpu = Get<bool>("use_xpu");
int xpu_l3_workspace_size = Get<int>("xpu_l3_workspace_size"); int xpu_l3_workspace_size = Get<int>("xpu_l3_workspace_size");
int cpu_math_library_num_threads = Get<int>("cpu_math_library_num_threads"); int cpu_math_library_num_threads = Get<int>("cpu_math_library_num_threads");
bool locked = Get<bool>("locked");
bool autotune = Get<bool>("autotune");
std::string autotune_file = Get<std::string>("autotune_file");
std::string precision = Get<std::string>("precision");
bool adaptive_seqlen = Get<bool>("adaptive_seqlen");
lite_api::TargetType target_type; lite_api::TargetType target_type;
if (use_gpu) { if (use_gpu) {
...@@ -282,6 +287,11 @@ void LiteSubgraphPass::SetUpEngine( ...@@ -282,6 +287,11 @@ void LiteSubgraphPass::SetUpEngine(
}; };
config.cpu_math_library_num_threads = cpu_math_library_num_threads; config.cpu_math_library_num_threads = cpu_math_library_num_threads;
config.xpu_l3_workspace_size = xpu_l3_workspace_size; config.xpu_l3_workspace_size = xpu_l3_workspace_size;
config.locked = locked;
config.autotune = autotune;
config.autotune_file = autotune_file;
config.precision = precision;
config.adaptive_seqlen = adaptive_seqlen;
if (dump_model) { if (dump_model) {
lite::StrToBinaryFile("./model.bin", config.model); lite::StrToBinaryFile("./model.bin", config.model);
lite::StrToBinaryFile("./param.bin", config.param); lite::StrToBinaryFile("./param.bin", config.param);
......
...@@ -96,9 +96,17 @@ void AnalysisConfig::DisableFCPadding() { ...@@ -96,9 +96,17 @@ void AnalysisConfig::DisableFCPadding() {
Update(); Update();
} }
void AnalysisConfig::EnableXpu(int l3_workspace_size) { void AnalysisConfig::EnableXpu(int l3_workspace_size, bool locked,
bool autotune, const std::string &autotune_file,
const std::string &precision,
bool adaptive_seqlen) {
use_xpu_ = true; use_xpu_ = true;
xpu_l3_workspace_size_ = l3_workspace_size; xpu_l3_workspace_size_ = l3_workspace_size;
xpu_locked_ = locked;
xpu_autotune_ = autotune;
xpu_autotune_file_ = autotune_file;
xpu_precision_ = precision;
xpu_adaptive_seqlen_ = adaptive_seqlen;
Update(); Update();
} }
...@@ -161,6 +169,11 @@ AnalysisConfig::AnalysisConfig(const AnalysisConfig &other) { ...@@ -161,6 +169,11 @@ AnalysisConfig::AnalysisConfig(const AnalysisConfig &other) {
CP_MEMBER(use_xpu_); CP_MEMBER(use_xpu_);
CP_MEMBER(xpu_l3_workspace_size_); CP_MEMBER(xpu_l3_workspace_size_);
CP_MEMBER(xpu_locked_);
CP_MEMBER(xpu_autotune_);
CP_MEMBER(xpu_autotune_file_);
CP_MEMBER(xpu_precision_);
CP_MEMBER(xpu_adaptive_seqlen_);
// profile related. // profile related.
CP_MEMBER(with_profile_); CP_MEMBER(with_profile_);
...@@ -548,6 +561,11 @@ std::string AnalysisConfig::SerializeInfoCache() { ...@@ -548,6 +561,11 @@ std::string AnalysisConfig::SerializeInfoCache() {
ss << use_lite_; ss << use_lite_;
ss << use_xpu_; ss << use_xpu_;
ss << xpu_l3_workspace_size_; ss << xpu_l3_workspace_size_;
ss << xpu_locked_;
ss << xpu_autotune_;
ss << xpu_autotune_file_;
ss << xpu_precision_;
ss << xpu_adaptive_seqlen_;
ss << thread_local_stream_; ss << thread_local_stream_;
......
...@@ -552,6 +552,11 @@ void AnalysisPredictor::PrepareArgument() { ...@@ -552,6 +552,11 @@ void AnalysisPredictor::PrepareArgument() {
argument_.SetLiteZeroCopy(config_.lite_zero_copy_); argument_.SetLiteZeroCopy(config_.lite_zero_copy_);
argument_.SetUseXpu(config_.use_xpu_); argument_.SetUseXpu(config_.use_xpu_);
argument_.SetXpuL3WorkspaceSize(config_.xpu_l3_workspace_size_); argument_.SetXpuL3WorkspaceSize(config_.xpu_l3_workspace_size_);
argument_.SetXpuLocked(config_.xpu_locked_);
argument_.SetXpuAutotune(config_.xpu_autotune_);
argument_.SetXpuAutotuneFile(config_.xpu_autotune_file_);
argument_.SetXpuPrecision(config_.xpu_precision_);
argument_.SetXpuAdaptiveSeqlen(config_.xpu_adaptive_seqlen_);
LOG(INFO) << "Lite subgraph engine is enabled"; LOG(INFO) << "Lite subgraph engine is enabled";
} }
......
...@@ -177,7 +177,10 @@ struct PD_INFER_DECL AnalysisConfig { ...@@ -177,7 +177,10 @@ struct PD_INFER_DECL AnalysisConfig {
/// ///
void DisableGpu(); void DisableGpu();
void EnableXpu(int l3_workspace_size = 0xfffc00); void EnableXpu(int l3_workspace_size = 0xfffc00, bool locked = false,
bool autotune = true, const std::string& autotune_file = "",
const std::string& precision = "int16",
bool adaptive_seqlen = false);
/// ///
/// \brief A boolean state telling whether the GPU is turned on. /// \brief A boolean state telling whether the GPU is turned on.
/// ///
...@@ -668,6 +671,11 @@ struct PD_INFER_DECL AnalysisConfig { ...@@ -668,6 +671,11 @@ struct PD_INFER_DECL AnalysisConfig {
bool thread_local_stream_{false}; bool thread_local_stream_{false};
bool use_xpu_{false}; bool use_xpu_{false};
int xpu_l3_workspace_size_; int xpu_l3_workspace_size_;
bool xpu_locked_;
bool xpu_autotune_;
std::string xpu_autotune_file_;
std::string xpu_precision_;
bool xpu_adaptive_seqlen_;
// mkldnn related. // mkldnn related.
int mkldnn_cache_capacity_{0}; int mkldnn_cache_capacity_{0};
......
...@@ -59,8 +59,14 @@ paddle::lite_api::PaddlePredictor* EngineManager::Create( ...@@ -59,8 +59,14 @@ paddle::lite_api::PaddlePredictor* EngineManager::Create(
#endif #endif
#ifdef LITE_SUBGRAPH_WITH_XPU #ifdef LITE_SUBGRAPH_WITH_XPU
// Deprecated in Paddle-Lite release/v2.8
lite_cxx_config.set_xpu_workspace_l3_size_per_thread( lite_cxx_config.set_xpu_workspace_l3_size_per_thread(
cfg.xpu_l3_workspace_size); cfg.xpu_l3_workspace_size);
lite_cxx_config.set_xpu_l3_cache_method(cfg.xpu_l3_workspace_size,
cfg.locked);
lite_cxx_config.set_xpu_conv_autotune(cfg.autotune, cfg.autotune_file);
lite_cxx_config.set_xpu_multi_encoder_method(cfg.precision,
cfg.adaptive_seqlen);
#endif #endif
// create predictor // create predictor
......
...@@ -42,6 +42,11 @@ struct EngineConfig { ...@@ -42,6 +42,11 @@ struct EngineConfig {
// for xpu // for xpu
size_t xpu_l3_workspace_size; size_t xpu_l3_workspace_size;
bool locked = false;
bool autotune = true;
std::string autotune_file = "";
std::string precision = "int16";
bool adaptive_seqlen = false;
// for x86 or arm // for x86 or arm
int cpu_math_library_num_threads{1}; int cpu_math_library_num_threads{1};
......
...@@ -467,7 +467,10 @@ void BindAnalysisConfig(py::module *m) { ...@@ -467,7 +467,10 @@ void BindAnalysisConfig(py::module *m) {
.def("enable_use_gpu", &AnalysisConfig::EnableUseGpu, .def("enable_use_gpu", &AnalysisConfig::EnableUseGpu,
py::arg("memory_pool_init_size_mb"), py::arg("device_id") = 0) py::arg("memory_pool_init_size_mb"), py::arg("device_id") = 0)
.def("enable_xpu", &AnalysisConfig::EnableXpu, .def("enable_xpu", &AnalysisConfig::EnableXpu,
py::arg("l3_workspace_size")) py::arg("l3_workspace_size") = 16 * 1024 * 1024,
py::arg("locked") = false, py::arg("autotune") = true,
py::arg("autotune_file") = "", py::arg("precision") = "int16",
py::arg("adaptive_seqlen") = false)
.def("disable_gpu", &AnalysisConfig::DisableGpu) .def("disable_gpu", &AnalysisConfig::DisableGpu)
.def("use_gpu", &AnalysisConfig::use_gpu) .def("use_gpu", &AnalysisConfig::use_gpu)
.def("use_xpu", &AnalysisConfig::use_xpu) .def("use_xpu", &AnalysisConfig::use_xpu)
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册