未验证 提交 798ab3f9 编写于 作者: E engineer1109 提交者: GitHub

add lite opencl support api (#47112)

上级 f781473e
......@@ -290,6 +290,8 @@ struct Argument {
DECL_ARGUMENT_FIELD(xpu_device_id, XpuDeviceId, int);
DECL_ARGUMENT_FIELD(xpu_enable_multi_stream, XpuEnableMultiStream, bool);
DECL_ARGUMENT_FIELD(use_opencl, UseOpenCL, bool);
DECL_ARGUMENT_FIELD(use_nnadapter, UseNNAdapter, bool);
DECL_ARGUMENT_FIELD(nnadapter_model_cache_dir,
NNAdapterModelCacheDir,
......
......@@ -254,6 +254,7 @@ void IRPassManager::CreatePasses(Argument *argument,
pass->Set("use_xpu", new bool(argument->use_xpu()));
pass->Set("xpu_l3_workspace_size",
new int(argument->xpu_l3_workspace_size()));
pass->Set("use_opencl", new bool(argument->use_opencl()));
pass->Set("cpu_math_library_num_threads",
new int(argument->cpu_math_library_num_threads()));
pass->Set("locked", new bool(argument->xpu_locked()));
......
......@@ -252,6 +252,7 @@ void LiteSubgraphPass::SetUpEngine(
bool use_xpu = Get<bool>("use_xpu");
int xpu_device_id = Get<int>("xpu_device_id");
int xpu_l3_workspace_size = Get<int>("xpu_l3_workspace_size");
bool use_opencl = Get<bool>("use_opencl");
int cpu_math_library_num_threads = Get<int>("cpu_math_library_num_threads");
bool locked = Get<bool>("locked");
bool autotune = Get<bool>("autotune");
......@@ -285,6 +286,8 @@ void LiteSubgraphPass::SetUpEngine(
#ifdef LITE_WITH_NNADAPTER
target_type = TARGET(kNNAdapter);
#endif
} else if (use_opencl) {
target_type = TARGET(kOpenCL);
} else {
#ifdef PADDLE_WITH_ARM
target_type = TARGET(kARM);
......@@ -313,6 +316,33 @@ void LiteSubgraphPass::SetUpEngine(
#endif
paddle::lite_api::Place({TARGET(kHost), PRECISION(kFloat)}),
};
// opencl has no int64, and has bugs with image io.
if (use_opencl) {
config.valid_places = {
paddle::lite_api::Place{
TARGET(kOpenCL), PRECISION(kFP16), DATALAYOUT(kImageDefault)},
paddle::lite_api::Place{
TARGET(kOpenCL), PRECISION(kFP16), DATALAYOUT(kImageFolder)},
paddle::lite_api::Place{
TARGET(kOpenCL), PRECISION(kFloat), DATALAYOUT(kNCHW)},
paddle::lite_api::Place{
TARGET(kOpenCL), PRECISION(kAny), DATALAYOUT(kImageDefault)},
paddle::lite_api::Place{
TARGET(kOpenCL), PRECISION(kAny), DATALAYOUT(kImageFolder)},
paddle::lite_api::Place{
TARGET(kOpenCL), PRECISION(kAny), DATALAYOUT(kNCHW)},
paddle::lite_api::Place{
TARGET(kOpenCL), PRECISION(kInt32), DATALAYOUT(kNCHW)},
#ifdef PADDLE_WITH_ARM
paddle::lite_api::Place{TARGET(kARM), PRECISION(kFloat)},
#else
paddle::lite_api::Place{TARGET(kX86), PRECISION(kFloat)},
#endif
paddle::lite_api::Place{TARGET(kHost), PRECISION(kFloat)},
};
}
config.cpu_math_library_num_threads = cpu_math_library_num_threads;
config.xpu_l3_workspace_size = xpu_l3_workspace_size;
config.device_id = xpu_device_id;
......
......@@ -446,6 +446,9 @@ AnalysisConfig::AnalysisConfig(const AnalysisConfig &other) {
CP_MEMBER(xpu_adaptive_seqlen_);
CP_MEMBER(xpu_enable_multi_stream_);
// Lite OpenCL Related
CP_MEMBER(use_opencl_);
// NPU related.
CP_MEMBER(use_npu_);
CP_MEMBER(npu_device_id_);
......@@ -1157,6 +1160,11 @@ void AnalysisConfig::EnableLiteEngine(
Update();
}
void AnalysisConfig::EnableOpenCL() {
use_opencl_ = true;
Update();
}
void AnalysisConfig::PartiallyRelease() {
prog_file_.clear();
prog_file_.shrink_to_fit();
......
......@@ -1150,6 +1150,7 @@ void AnalysisPredictor::PrepareArgument() {
argument_.SetXpuAdaptiveSeqlen(config_.xpu_adaptive_seqlen_);
argument_.SetXpuDeviceId(config_.xpu_device_id_);
argument_.SetXpuEnableMultiStream(config_.xpu_enable_multi_stream_);
argument_.SetUseOpenCL(config_.use_opencl_);
// NNAdapter related
argument_.SetUseNNAdapter(config_.NNAdapter().use_nnadapter);
argument_.SetNNAdapterDeviceNames(
......
......@@ -415,6 +415,12 @@ struct PD_INFER_DECL AnalysisConfig {
///
bool use_onnxruntime() const { return use_onnxruntime_; }
///
/// \brief A boolean state telling whether the Lite OpenCL is turned on.
///
/// \return bool Whether the Lite OpenCL is turned on.
///
bool use_opencl() const { return use_opencl_; }
///
/// \brief A boolean state telling whether the ONNXRuntime Optimization is
/// turned on.
///
......@@ -724,6 +730,11 @@ struct PD_INFER_DECL AnalysisConfig {
const std::vector<std::string>& passes_filter = {},
const std::vector<std::string>& ops_filter = {});
///
/// \brief Turn on the usage of Lite sub-graph engine with opencl.
///
void EnableOpenCL();
///
/// \brief A boolean state indicating whether the Lite sub-graph engine is
/// used.
......@@ -1118,6 +1129,9 @@ struct PD_INFER_DECL AnalysisConfig {
bool xpu_adaptive_seqlen_;
bool xpu_enable_multi_stream_;
// LITE OPENCL SETTINGS
bool use_opencl_{false};
// NNAdapter related
LiteNNAdapterConfig nnadapter_config_;
......
......@@ -89,6 +89,14 @@ paddle::lite_api::PaddlePredictor* EngineManager::Create(
cfg.nnadapter_model_cache_buffer[i]);
}
#endif
if (cfg.use_opencl) {
lite_cxx_config.set_opencl_binary_path_name(cfg.opencl_bin_path,
cfg.opencl_bin_name);
lite_cxx_config.set_opencl_tune(cfg.opencl_tune_mode);
lite_cxx_config.set_opencl_precision(cfg.opencl_precision_type);
}
// create predictor
std::shared_ptr<paddle::lite_api::PaddlePredictor> p =
paddle::lite_api::CreatePaddlePredictor(lite_cxx_config);
......
......@@ -66,6 +66,12 @@ struct EngineConfig {
std::string nnadapter_subgraph_partition_config_path;
std::vector<std::string> nnadapter_model_cache_token;
std::vector<std::vector<char>> nnadapter_model_cache_buffer;
bool use_opencl{};
std::string opencl_bin_path = "./";
std::string opencl_bin_name = "lite_opencl_kernel.bin";
paddle::lite_api::CLTuneMode opencl_tune_mode{};
paddle::lite_api::CLPrecisionType opencl_precision_type{};
};
class EngineManager {
......
......@@ -691,6 +691,7 @@ void BindAnalysisConfig(py::module *m) {
.def("enable_onnxruntime", &AnalysisConfig::EnableONNXRuntime)
.def("disable_onnxruntime", &AnalysisConfig::DisableONNXRuntime)
.def("onnxruntime_enabled", &AnalysisConfig::use_onnxruntime)
.def("use_opencl", &AnalysisConfig::use_opencl)
.def("enable_ort_optimization", &AnalysisConfig::EnableORTOptimization)
.def("use_gpu", &AnalysisConfig::use_gpu)
.def("use_xpu", &AnalysisConfig::use_xpu)
......@@ -783,6 +784,7 @@ void BindAnalysisConfig(py::module *m) {
py::arg("zero_copy") = false,
py::arg("passes_filter") = std::vector<std::string>(),
py::arg("ops_filter") = std::vector<std::string>())
.def("enable_opencl", &AnalysisConfig::EnableOpenCL)
.def("lite_engine_enabled", &AnalysisConfig::lite_engine_enabled)
.def("switch_ir_debug",
&AnalysisConfig::SwitchIrDebug,
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册