From 97fc888ccb45bda5992e74f7e382b3d1b661cc97 Mon Sep 17 00:00:00 2001 From: Yuan Shuai Date: Wed, 29 Apr 2020 21:45:41 +0800 Subject: [PATCH] =?UTF-8?q?[LITE][OPENCL]=20add=20gpu=20perf=20mode,=20pri?= =?UTF-8?q?ority=20level=20for=20qcom=E2=80=A6=20(#3507)=20(#3522)?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit * [LITE][OPENCL] add gpu perf mode, priority level for qcom adreno. test=develop --- README.md | 3 +- lite/backends/opencl/cl_runtime.cc | 69 ++++++++++++++++++++++++++++++ lite/backends/opencl/cl_runtime.h | 66 +++++++++++++++++++++++++++- 3 files changed, 135 insertions(+), 3 deletions(-) diff --git a/README.md b/README.md index b72e4bc930..7094720b49 100644 --- a/README.md +++ b/README.md @@ -61,7 +61,8 @@ For demands of Apple's GPU Metal and web front end inference, please see `./meta Paddle Lite has referenced the following open-source projects: - [ARM compute library](http://agroup.baidu.com/paddle-infer/md/article/%28https://github.com/ARM-software/ComputeLibrary%29) -- [Anakin](https://github.com/PaddlePaddle/Anakin). The optimizations under Anakin has been incorporated into Paddle Lite, and so there will not be any future updates of Anakin. As another high-performance inference project under PaddlePaddle, Anakin has been forward-looking and helpful to the making of Paddle Lite. +- [Anakin](https://github.com/PaddlePaddle/Anakin). The optimizations under Anakin has been incorporated into Paddle Lite, and so there will not be any future updates of Anakin. As another high-performance inference project under PaddlePaddle, Anakin has been forward-looking and helpful to the making of Paddle Lite. + ## Feedback and Community Support diff --git a/lite/backends/opencl/cl_runtime.cc b/lite/backends/opencl/cl_runtime.cc index c074768a64..93ceb6f9c5 100644 --- a/lite/backends/opencl/cl_runtime.cc +++ b/lite/backends/opencl/cl_runtime.cc @@ -129,6 +129,26 @@ bool CLRuntime::InitializePlatform() { return true; } +GpuType CLRuntime::ParseGpuTypeFromDeviceName(std::string device_name) { + const std::string kMALI_PATTERN_STR = "Mali"; + const std::string kADRENO_PATTERN_STR = "QUALCOMM Adreno(TM)"; + const std::string kPOWERVR_PATTERN_STR = "PowerVR"; + + if (device_name == kADRENO_PATTERN_STR) { + LOG(INFO) << "adreno gpu"; + return GpuType::QUALCOMM_ADRENO; + } else if (device_name.find(kMALI_PATTERN_STR) != std::string::npos) { + LOG(INFO) << "mali gpu"; + return GpuType::ARM_MALI; + } else if (device_name.find(kPOWERVR_PATTERN_STR) != std::string::npos) { + LOG(INFO) << "powerVR gpu"; + return GpuType::IMAGINATION_POWERVR; + } else { + LOG(INFO) << "others gpu"; + return GpuType::UNKNOWN; + } +} + bool CLRuntime::InitializeDevice() { // ===================== BASIC ===================== // CL_DEVICE_TYPE_GPU @@ -148,6 +168,7 @@ bool CLRuntime::InitializeDevice() { auto device_name = device_->getInfo(); LOG(INFO) << "Using device: " << device_name; + gpu_type_ = ParseGpuTypeFromDeviceName(device_name); cl_device_type device_type = device_->getInfo(); auto device_type_to_str = [](cl_device_type t) -> std::string { @@ -296,5 +317,53 @@ std::map& CLRuntime::GetDeviceInfo() { return device_info_; } +void CLRuntime::GetAdrenoContextProperties( + std::vector* properties, + GPUPerfMode gpu_perf_mode, + GPUPriorityLevel gpu_priority_level) { + CHECK(properties) << "cl_context_properties is nullptr"; + properties->reserve(5); + switch (gpu_perf_mode) { + case GPUPerfMode::PERF_LOW: + LOG(INFO) << "GPUPerfMode::PERF_LOW"; + properties->push_back(CL_CONTEXT_PERF_MODE_QCOM); + properties->push_back(CL_PERF_MODE_LOW_QCOM); + break; + case GPUPerfMode::PERF_NORMAL: + LOG(INFO) << "GPUPerfMode::PERF_NORMAL"; + properties->push_back(CL_CONTEXT_PERF_MODE_QCOM); + properties->push_back(CL_PERF_MODE_NORMAL_QCOM); + break; + case GPUPerfMode::PERF_HIGH: + LOG(INFO) << "GPUPerfMode::PERF_HIGH"; + properties->push_back(CL_CONTEXT_PERF_MODE_QCOM); + properties->push_back(CL_PERF_MODE_HIGH_QCOM); + break; + default: + break; + } + switch (gpu_priority_level) { + case GPUPriorityLevel::PRIORITY_LOW: + LOG(INFO) << "GPUPriorityLevel::PRIORITY_LOW"; + properties->push_back(CL_CONTEXT_PRIORITY_LEVEL_QCOM); + properties->push_back(CL_PRIORITY_HINT_LOW_QCOM); + break; + case GPUPriorityLevel::PRIORITY_NORMAL: + LOG(INFO) << "GPUPriorityLevel::PRIORITY_NORMAL"; + properties->push_back(CL_CONTEXT_PRIORITY_LEVEL_QCOM); + properties->push_back(CL_PRIORITY_HINT_NORMAL_QCOM); + break; + case GPUPriorityLevel::PRIORITY_HIGH: + LOG(INFO) << "GPUPriorityLevel::PRIORITY_HIGH"; + properties->push_back(CL_CONTEXT_PRIORITY_LEVEL_QCOM); + properties->push_back(CL_PRIORITY_HINT_HIGH_QCOM); + break; + default: + break; + } + // The properties list should be terminated with 0 + properties->push_back(0); +} + } // namespace lite } // namespace paddle diff --git a/lite/backends/opencl/cl_runtime.h b/lite/backends/opencl/cl_runtime.h index 503b3a0116..122422c79b 100644 --- a/lite/backends/opencl/cl_runtime.h +++ b/lite/backends/opencl/cl_runtime.h @@ -19,6 +19,45 @@ limitations under the License. */ #include "lite/backends/opencl/cl_include.h" #include "lite/backends/opencl/cl_utility.h" +typedef enum { + UNKNOWN = 0, + QUALCOMM_ADRENO = 1, + ARM_MALI = 2, + IMAGINATION_POWERVR = 3, + OTHERS = 4, +} GpuType; + +typedef enum { + PERF_DEFAULT = 0, + PERF_LOW = 1, + PERF_NORMAL = 2, + PERF_HIGH = 3 +} GPUPerfMode; + +typedef enum { + PRIORITY_DEFAULT = 0, + PRIORITY_LOW = 1, + PRIORITY_NORMAL = 2, + PRIORITY_HIGH = 3 +} GPUPriorityLevel; + +// Adreno extensions +// Adreno performance hints +typedef cl_uint cl_perf_hint; +#define CL_CONTEXT_PERF_MODE_QCOM 0x40C2 +#define CL_PERF_MODE_HIGH_QCOM 0x40C3 +#define CL_PERF_MODE_NORMAL_QCOM 0x40C4 +#define CL_PERF_MODE_LOW_QCOM 0x40C5 + +// Adreno priority hints +typedef cl_uint cl_priority_hint; + +#define CL_PRIORITY_HINT_NONE_QCOM 0 +#define CL_CONTEXT_PRIORITY_LEVEL_QCOM 0x40C9 +#define CL_PRIORITY_HINT_HIGH_QCOM 0x40CA +#define CL_PRIORITY_HINT_NORMAL_QCOM 0x40CB +#define CL_PRIORITY_HINT_LOW_QCOM 0x40CC + namespace paddle { namespace lite { @@ -63,9 +102,28 @@ class CLRuntime { bool InitializeDevice(); + void GetAdrenoContextProperties( + std::vector* properties, + GPUPerfMode gpu_perf_mode, + GPUPriorityLevel gpu_priority_level); + std::shared_ptr CreateContext() { - auto context = std::make_shared( - std::vector{device()}, nullptr, nullptr, nullptr, &status_); + // note(ysh329): gpu perf mode and priority level of adreno gpu referred + // from xiaomi/mace. + // However, no performance gain after `PERF_HIGH` and `PRIORITY_HIGH` set. + auto perf_mode = GPUPerfMode::PERF_HIGH; + auto priority_level = GPUPriorityLevel::PRIORITY_HIGH; + std::vector context_properties; + if (gpu_type_ == GpuType::QUALCOMM_ADRENO) { + GetAdrenoContextProperties( + &context_properties, perf_mode, priority_level); + } + auto context = + std::make_shared(std::vector{device()}, + context_properties.data(), + nullptr, + nullptr, + &status_); CL_CHECK_FATAL(status_); return context; } @@ -83,8 +141,12 @@ class CLRuntime { return queue; } + GpuType ParseGpuTypeFromDeviceName(std::string device_name); + std::map device_info_; + GpuType gpu_type_{GpuType::UNKNOWN}; + std::string cl_path_; std::shared_ptr platform_{nullptr}; -- GitLab