未验证 提交 92dc9b2b 编写于 作者: W Wilber 提交者: GitHub

update lite subgraph api. (#32513)

上级 486946ae
......@@ -227,6 +227,11 @@ struct Argument {
DECL_ARGUMENT_FIELD(use_xpu, UseXpu, bool);
DECL_ARGUMENT_FIELD(xpu_l3_workspace_size, XpuL3WorkspaceSize, int);
DECL_ARGUMENT_FIELD(xpu_locked, XpuLocked, bool);
DECL_ARGUMENT_FIELD(xpu_autotune, XpuAutotune, bool);
DECL_ARGUMENT_FIELD(xpu_autotune_file, XpuAutotuneFile, std::string);
DECL_ARGUMENT_FIELD(xpu_precision, XpuPrecision, std::string);
DECL_ARGUMENT_FIELD(xpu_adaptive_seqlen, XpuAdaptiveSeqlen, bool);
// Memory optimized related.
DECL_ARGUMENT_FIELD(enable_memory_optim, EnableMemoryOptim, bool);
......
......@@ -188,6 +188,12 @@ void IRPassManager::CreatePasses(Argument *argument,
new int(argument->xpu_l3_workspace_size()));
pass->Set("cpu_math_library_num_threads",
new int(argument->cpu_math_library_num_threads()));
pass->Set("locked", new bool(argument->xpu_locked()));
pass->Set("autotune", new bool(argument->xpu_autotune()));
pass->Set("autotune_file",
new std::string(argument->xpu_autotune_file()));
pass->Set("precision", new std::string(argument->xpu_precision()));
pass->Set("adaptive_seqlen", new bool(argument->xpu_adaptive_seqlen()));
}
disable_logs_ = argument->disable_logs();
if (pass_name == "fc_fuse_pass") {
......
......@@ -245,6 +245,11 @@ void LiteSubgraphPass::SetUpEngine(
bool use_xpu = Get<bool>("use_xpu");
int xpu_l3_workspace_size = Get<int>("xpu_l3_workspace_size");
int cpu_math_library_num_threads = Get<int>("cpu_math_library_num_threads");
bool locked = Get<bool>("locked");
bool autotune = Get<bool>("autotune");
std::string autotune_file = Get<std::string>("autotune_file");
std::string precision = Get<std::string>("precision");
bool adaptive_seqlen = Get<bool>("adaptive_seqlen");
lite_api::TargetType target_type;
if (use_gpu) {
......@@ -282,6 +287,11 @@ void LiteSubgraphPass::SetUpEngine(
};
config.cpu_math_library_num_threads = cpu_math_library_num_threads;
config.xpu_l3_workspace_size = xpu_l3_workspace_size;
config.locked = locked;
config.autotune = autotune;
config.autotune_file = autotune_file;
config.precision = precision;
config.adaptive_seqlen = adaptive_seqlen;
if (dump_model) {
lite::StrToBinaryFile("./model.bin", config.model);
lite::StrToBinaryFile("./param.bin", config.param);
......
......@@ -96,9 +96,17 @@ void AnalysisConfig::DisableFCPadding() {
Update();
}
void AnalysisConfig::EnableXpu(int l3_workspace_size) {
void AnalysisConfig::EnableXpu(int l3_workspace_size, bool locked,
bool autotune, const std::string &autotune_file,
const std::string &precision,
bool adaptive_seqlen) {
use_xpu_ = true;
xpu_l3_workspace_size_ = l3_workspace_size;
xpu_locked_ = locked;
xpu_autotune_ = autotune;
xpu_autotune_file_ = autotune_file;
xpu_precision_ = precision;
xpu_adaptive_seqlen_ = adaptive_seqlen;
Update();
}
......@@ -161,6 +169,11 @@ AnalysisConfig::AnalysisConfig(const AnalysisConfig &other) {
CP_MEMBER(use_xpu_);
CP_MEMBER(xpu_l3_workspace_size_);
CP_MEMBER(xpu_locked_);
CP_MEMBER(xpu_autotune_);
CP_MEMBER(xpu_autotune_file_);
CP_MEMBER(xpu_precision_);
CP_MEMBER(xpu_adaptive_seqlen_);
// profile related.
CP_MEMBER(with_profile_);
......@@ -548,6 +561,11 @@ std::string AnalysisConfig::SerializeInfoCache() {
ss << use_lite_;
ss << use_xpu_;
ss << xpu_l3_workspace_size_;
ss << xpu_locked_;
ss << xpu_autotune_;
ss << xpu_autotune_file_;
ss << xpu_precision_;
ss << xpu_adaptive_seqlen_;
ss << thread_local_stream_;
......
......@@ -552,6 +552,11 @@ void AnalysisPredictor::PrepareArgument() {
argument_.SetLiteZeroCopy(config_.lite_zero_copy_);
argument_.SetUseXpu(config_.use_xpu_);
argument_.SetXpuL3WorkspaceSize(config_.xpu_l3_workspace_size_);
argument_.SetXpuLocked(config_.xpu_locked_);
argument_.SetXpuAutotune(config_.xpu_autotune_);
argument_.SetXpuAutotuneFile(config_.xpu_autotune_file_);
argument_.SetXpuPrecision(config_.xpu_precision_);
argument_.SetXpuAdaptiveSeqlen(config_.xpu_adaptive_seqlen_);
LOG(INFO) << "Lite subgraph engine is enabled";
}
......
......@@ -177,7 +177,10 @@ struct PD_INFER_DECL AnalysisConfig {
///
void DisableGpu();
void EnableXpu(int l3_workspace_size = 0xfffc00);
void EnableXpu(int l3_workspace_size = 0xfffc00, bool locked = false,
bool autotune = true, const std::string& autotune_file = "",
const std::string& precision = "int16",
bool adaptive_seqlen = false);
///
/// \brief A boolean state telling whether the GPU is turned on.
///
......@@ -668,6 +671,11 @@ struct PD_INFER_DECL AnalysisConfig {
bool thread_local_stream_{false};
bool use_xpu_{false};
int xpu_l3_workspace_size_;
bool xpu_locked_;
bool xpu_autotune_;
std::string xpu_autotune_file_;
std::string xpu_precision_;
bool xpu_adaptive_seqlen_;
// mkldnn related.
int mkldnn_cache_capacity_{0};
......
......@@ -59,8 +59,14 @@ paddle::lite_api::PaddlePredictor* EngineManager::Create(
#endif
#ifdef LITE_SUBGRAPH_WITH_XPU
// Deprecated in Paddle-Lite release/v2.8
lite_cxx_config.set_xpu_workspace_l3_size_per_thread(
cfg.xpu_l3_workspace_size);
lite_cxx_config.set_xpu_l3_cache_method(cfg.xpu_l3_workspace_size,
cfg.locked);
lite_cxx_config.set_xpu_conv_autotune(cfg.autotune, cfg.autotune_file);
lite_cxx_config.set_xpu_multi_encoder_method(cfg.precision,
cfg.adaptive_seqlen);
#endif
// create predictor
......
......@@ -42,6 +42,11 @@ struct EngineConfig {
// for xpu
size_t xpu_l3_workspace_size;
bool locked = false;
bool autotune = true;
std::string autotune_file = "";
std::string precision = "int16";
bool adaptive_seqlen = false;
// for x86 or arm
int cpu_math_library_num_threads{1};
......
......@@ -467,7 +467,10 @@ void BindAnalysisConfig(py::module *m) {
.def("enable_use_gpu", &AnalysisConfig::EnableUseGpu,
py::arg("memory_pool_init_size_mb"), py::arg("device_id") = 0)
.def("enable_xpu", &AnalysisConfig::EnableXpu,
py::arg("l3_workspace_size"))
py::arg("l3_workspace_size") = 16 * 1024 * 1024,
py::arg("locked") = false, py::arg("autotune") = true,
py::arg("autotune_file") = "", py::arg("precision") = "int16",
py::arg("adaptive_seqlen") = false)
.def("disable_gpu", &AnalysisConfig::DisableGpu)
.def("use_gpu", &AnalysisConfig::use_gpu)
.def("use_xpu", &AnalysisConfig::use_xpu)
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册