未验证 提交 8a1124b1 编写于 作者: S shentanyue 提交者: GitHub

[Lite][XPU] Upgrade lite subgraph api of xpu (#47373)

上级 ad251cb5
......@@ -288,6 +288,7 @@ struct Argument {
DECL_ARGUMENT_FIELD(xpu_precision, XpuPrecision, std::string);
DECL_ARGUMENT_FIELD(xpu_adaptive_seqlen, XpuAdaptiveSeqlen, bool);
DECL_ARGUMENT_FIELD(xpu_device_id, XpuDeviceId, int);
DECL_ARGUMENT_FIELD(xpu_enable_multi_stream, XpuEnableMultiStream, bool);
DECL_ARGUMENT_FIELD(use_nnadapter, UseNNAdapter, bool);
DECL_ARGUMENT_FIELD(nnadapter_model_cache_dir,
......
......@@ -263,6 +263,8 @@ void IRPassManager::CreatePasses(Argument *argument,
pass->Set("precision", new std::string(argument->xpu_precision()));
pass->Set("adaptive_seqlen", new bool(argument->xpu_adaptive_seqlen()));
pass->Set("xpu_device_id", new int(argument->xpu_device_id()));
pass->Set("enable_multi_stream",
new bool(argument->xpu_enable_multi_stream()));
// NNAdapter Related
pass->Set("use_nnadapter", new bool(argument->use_nnadapter()));
pass->Set("nnadapter_model_cache_dir",
......
......@@ -258,6 +258,7 @@ void LiteSubgraphPass::SetUpEngine(
std::string autotune_file = Get<std::string>("autotune_file");
std::string precision = Get<std::string>("precision");
bool adaptive_seqlen = Get<bool>("adaptive_seqlen");
bool enable_multi_stream = Get<bool>("enable_multi_stream");
// NNAdapter Related
bool use_nnadapter = Get<bool>("use_nnadapter");
std::string nnadapter_model_cache_dir =
......@@ -302,7 +303,6 @@ void LiteSubgraphPass::SetUpEngine(
// input tensor of the Lite engine is located, and then affects
// whether tensor sharing is feasible.
paddle::lite_api::Place({target_type, precision_type}),
paddle::lite_api::Place({target_type, PRECISION(kInt64)}),
paddle::lite_api::Place({target_type, PRECISION(kFloat)}),
#ifdef PADDLE_WITH_ARM
paddle::lite_api::Place({TARGET(kARM), precision_type}),
......@@ -321,6 +321,7 @@ void LiteSubgraphPass::SetUpEngine(
config.autotune_file = autotune_file;
config.precision = precision;
config.adaptive_seqlen = adaptive_seqlen;
config.enable_multi_stream = enable_multi_stream;
// NNAdapter Related
config.nnadapter_model_cache_dir = nnadapter_model_cache_dir;
config.nnadapter_device_names = nnadapter_device_names;
......
......@@ -137,7 +137,8 @@ void AnalysisConfig::EnableXpu(int l3_workspace_size,
bool autotune,
const std::string &autotune_file,
const std::string &precision,
bool adaptive_seqlen) {
bool adaptive_seqlen,
bool enable_multi_stream) {
use_xpu_ = true;
xpu_l3_workspace_size_ = l3_workspace_size;
xpu_locked_ = locked;
......@@ -145,6 +146,7 @@ void AnalysisConfig::EnableXpu(int l3_workspace_size,
xpu_autotune_file_ = autotune_file;
xpu_precision_ = precision;
xpu_adaptive_seqlen_ = adaptive_seqlen;
xpu_enable_multi_stream_ = enable_multi_stream;
Update();
}
......@@ -439,6 +441,7 @@ AnalysisConfig::AnalysisConfig(const AnalysisConfig &other) {
CP_MEMBER(xpu_autotune_file_);
CP_MEMBER(xpu_precision_);
CP_MEMBER(xpu_adaptive_seqlen_);
CP_MEMBER(xpu_enable_multi_stream_);
// NPU related.
CP_MEMBER(use_npu_);
......@@ -1020,6 +1023,7 @@ std::string AnalysisConfig::SerializeInfoCache() {
ss << xpu_autotune_file_;
ss << xpu_precision_;
ss << xpu_adaptive_seqlen_;
ss << xpu_enable_multi_stream_;
ss << use_npu_;
ss << npu_device_id_;
......
......@@ -1148,6 +1148,7 @@ void AnalysisPredictor::PrepareArgument() {
argument_.SetXpuPrecision(config_.xpu_precision_);
argument_.SetXpuAdaptiveSeqlen(config_.xpu_adaptive_seqlen_);
argument_.SetXpuDeviceId(config_.xpu_device_id_);
argument_.SetXpuEnableMultiStream(config_.xpu_enable_multi_stream_);
// NNAdapter related
argument_.SetUseNNAdapter(config_.NNAdapter().use_nnadapter);
argument_.SetNNAdapterDeviceNames(
......
......@@ -274,13 +274,15 @@ struct PD_INFER_DECL AnalysisConfig {
/// file will be used and autotune will not be performed again.
/// \param precision Calculation accuracy of multi_encoder
/// \param adaptive_seqlen Is the input of multi_encoder variable length
/// \param enable_multi_stream Whether to enable the multi stream of xpu.
///
void EnableXpu(int l3_workspace_size = 0xfffc00,
bool locked = false,
bool autotune = true,
const std::string& autotune_file = "",
const std::string& precision = "int16",
bool adaptive_seqlen = false);
bool adaptive_seqlen = false,
bool enable_multi_stream = false);
///
/// \brief configs of IPU
......@@ -1102,6 +1104,7 @@ struct PD_INFER_DECL AnalysisConfig {
std::string xpu_autotune_file_;
std::string xpu_precision_;
bool xpu_adaptive_seqlen_;
bool xpu_enable_multi_stream_;
// NNAdapter related
LiteNNAdapterConfig nnadapter_config_;
......
......@@ -155,14 +155,16 @@ void PD_ConfigEnableXpu(__pd_keep PD_Config* pd_config,
PD_Bool autotune,
const char* autotune_file,
const char* precision,
PD_Bool adaptive_seqlen) {
PD_Bool adaptive_seqlen,
PD_Bool enable_multi_stream) {
CHECK_AND_CONVERT_PD_CONFIG;
config->EnableXpu(l3_workspace_size,
locked,
autotune,
autotune_file,
precision,
adaptive_seqlen);
adaptive_seqlen,
enable_multi_stream);
}
void PD_ConfigEnableNpu(__pd_keep PD_Config* pd_config, int32_t device_id) {
......
......@@ -200,6 +200,7 @@ PADDLE_CAPI_EXPORT extern void PD_ConfigEnableORTOptimization(
/// file will be used and autotune will not be performed again.
/// \param precision Calculation accuracy of multi_encoder
/// \param adaptive_seqlen Is the input of multi_encoder variable length
/// \param enable_multi_stream Whether to enable the multi stream of xpu.
///
PADDLE_CAPI_EXPORT extern void PD_ConfigEnableXpu(
__pd_keep PD_Config* pd_config,
......@@ -208,7 +209,8 @@ PADDLE_CAPI_EXPORT extern void PD_ConfigEnableXpu(
PD_Bool autotune,
const char* autotune_file,
const char* precision,
PD_Bool adaptive_seqlen);
PD_Bool adaptive_seqlen,
PD_Bool enable_multi_stream);
///
/// \brief Turn on NPU.
///
......
......@@ -199,8 +199,9 @@ func (config *Config) EnableORTOptimization() {
/// \param autotune_file Specify the path of the autotune file. If autotune_file is specified, the algorithm specified in the file will be used and autotune will not be performed again.
/// \param precision Calculation accuracy of multi_encoder
/// \param adaptive_seqlen Is the input of multi_encoder variable length
/// \param enable_multi_stream Whether to enable the multi stream of xpu
///
func (config *Config) EnableXpu(l3WorkspaceSize int32, locked bool, autotune bool, autotuneFile string, precision string, adaptiveSeqlen bool) {
func (config *Config) EnableXpu(l3WorkspaceSize int32, locked bool, autotune bool, autotuneFile string, precision string, adaptiveSeqlen bool, enableMultiStream bool) {
cAutotuneFile := C.CString(autotuneFile)
cPrecision := C.CString(precision)
defer func() {
......@@ -208,7 +209,7 @@ func (config *Config) EnableXpu(l3WorkspaceSize int32, locked bool, autotune boo
C.free(unsafe.Pointer(cPrecision))
}()
C.PD_ConfigEnableXpu(config.c, C.int32_t(l3WorkspaceSize), cvtGoBoolToPD(locked), cvtGoBoolToPD(autotune),
cAutotuneFile, cPrecision, cvtGoBoolToPD(adaptiveSeqlen))
cAutotuneFile, cPrecision, cvtGoBoolToPD(adaptiveSeqlen), cvtGoBoolToPD(enableMultiStream))
}
///
......@@ -332,9 +333,9 @@ func (config *Config) IrOptim() bool {
/// \param useCalibMode Use TRT int8 calibration(post training
/// quantization).
///
func (config *Config) EnableTensorRtEngine(workspaceSize int32, maxBatchSize int32, minSubgraphSize int32,
func (config *Config) EnableTensorRtEngine(workspaceSize int64, maxBatchSize int32, minSubgraphSize int32,
precision Precision, useStatic bool, useCalibMode bool) {
C.PD_ConfigEnableTensorRtEngine(config.c, C.int32_t(workspaceSize), C.int32_t(maxBatchSize), C.int32_t(minSubgraphSize), C.int32_t(precision), cvtGoBoolToPD(useStatic), cvtGoBoolToPD(useCalibMode))
C.PD_ConfigEnableTensorRtEngine(config.c, C.int64_t(workspaceSize), C.int32_t(maxBatchSize), C.int32_t(minSubgraphSize), C.int32_t(precision), cvtGoBoolToPD(useStatic), cvtGoBoolToPD(useCalibMode))
}
///
......
......@@ -65,6 +65,7 @@ paddle::lite_api::PaddlePredictor* EngineManager::Create(
lite_cxx_config.set_xpu_multi_encoder_method(cfg.precision,
cfg.adaptive_seqlen);
lite_cxx_config.set_xpu_dev_per_thread(cfg.device_id);
lite_cxx_config.enable_xpu_multi_stream(cfg.enable_multi_stream);
#endif
#ifdef LITE_SUBGRAPH_WITH_NPU
......
......@@ -50,6 +50,7 @@ struct EngineConfig {
std::string autotune_file = "";
std::string precision = "int16";
bool adaptive_seqlen = false;
bool enable_multi_stream = false;
// for x86 or arm
int cpu_math_library_num_threads{1};
......
......@@ -656,7 +656,8 @@ void BindAnalysisConfig(py::module *m) {
py::arg("autotune") = true,
py::arg("autotune_file") = "",
py::arg("precision") = "int16",
py::arg("adaptive_seqlen") = false)
py::arg("adaptive_seqlen") = false,
py::arg("enable_multi_stream") = false)
.def("set_xpu_device_id",
&AnalysisConfig::SetXpuDeviceId,
py::arg("device_id") = 0)
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册