[LITE][OPENCL] add gpu perf mode, priority level for qcom… (#3507)

* [LITE][OPENCL] add gpu perf mode, priority level for qcom adreno. test=develop

[LITE][OPENCL] add gpu perf mode, priority level for qcom… (#3507)
* [LITE][OPENCL] add gpu perf mode, priority level for qcom adreno. test=develop
4e0835ca · Yuan Shuai · GitHub · 0145720a · 4e0835ca · 4e0835ca
Showing with 135 addition and 3 deletion

README.md README.md +2 -1

lite/backends/opencl/cl_runtime.cc lite/backends/opencl/cl_runtime.cc +69 -0

lite/backends/opencl/cl_runtime.h lite/backends/opencl/cl_runtime.h +64 -2

未找到文件。
--- a/README.md
+++ b/README.md
@@ -64,6 +64,7 @@ Paddle Lite has referenced the following open-source projects:
 - [Anakin](https://github.com/PaddlePaddle/Anakin). The optimizations under Anakin has been incorporated into Paddle Lite, and so there will not be any future updates of Anakin. As another high-performance inference project under PaddlePaddle, Anakin has been forward-looking and helpful to the making of Paddle Lite.  


+
 ## Feedback and Community Support

 - Questions, reports, and suggestions are welcome through Github Issues!

--- a/lite/backends/opencl/cl_runtime.cc
+++ b/lite/backends/opencl/cl_runtime.cc
@@ -129,6 +129,26 @@ bool CLRuntime::InitializePlatform() {
  return true;
 }

+GpuType CLRuntime::ParseGpuTypeFromDeviceName(std::string device_name) {
+  const std::string kMALI_PATTERN_STR = "Mali";
+  const std::string kADRENO_PATTERN_STR = "QUALCOMM Adreno(TM)";
+  const std::string kPOWERVR_PATTERN_STR = "PowerVR";
+
+  if (device_name == kADRENO_PATTERN_STR) {
+    LOG(INFO) << "adreno gpu";
+    return GpuType::QUALCOMM_ADRENO;
+  } else if (device_name.find(kMALI_PATTERN_STR) != std::string::npos) {
+    LOG(INFO) << "mali gpu";
+    return GpuType::ARM_MALI;
+  } else if (device_name.find(kPOWERVR_PATTERN_STR) != std::string::npos) {
+    LOG(INFO) << "powerVR gpu";
+    return GpuType::IMAGINATION_POWERVR;
+  } else {
+    LOG(INFO) << "others gpu";
+    return GpuType::UNKNOWN;
+  }
+}
+
 bool CLRuntime::InitializeDevice() {
  // ===================== BASIC =====================
  // CL_DEVICE_TYPE_GPU
@@ -148,6 +168,7 @@ bool CLRuntime::InitializeDevice() {

  auto device_name = device_->getInfo<CL_DEVICE_NAME>();
  LOG(INFO) << "Using device: " << device_name;
+  gpu_type_ = ParseGpuTypeFromDeviceName(device_name);

  cl_device_type device_type = device_->getInfo<CL_DEVICE_TYPE>();
  auto device_type_to_str = [](cl_device_type t) -> std::string {
@@ -296,5 +317,53 @@ std::map<std::string, size_t>& CLRuntime::GetDeviceInfo() {
  return device_info_;
 }

+void CLRuntime::GetAdrenoContextProperties(
+    std::vector<cl_context_properties>* properties,
+    GPUPerfMode gpu_perf_mode,
+    GPUPriorityLevel gpu_priority_level) {
+  CHECK(properties) << "cl_context_properties is nullptr";
+  properties->reserve(5);
+  switch (gpu_perf_mode) {
+    case GPUPerfMode::PERF_LOW:
+      LOG(INFO) << "GPUPerfMode::PERF_LOW";
+      properties->push_back(CL_CONTEXT_PERF_MODE_QCOM);
+      properties->push_back(CL_PERF_MODE_LOW_QCOM);
+      break;
+    case GPUPerfMode::PERF_NORMAL:
+      LOG(INFO) << "GPUPerfMode::PERF_NORMAL";
+      properties->push_back(CL_CONTEXT_PERF_MODE_QCOM);
+      properties->push_back(CL_PERF_MODE_NORMAL_QCOM);
+      break;
+    case GPUPerfMode::PERF_HIGH:
+      LOG(INFO) << "GPUPerfMode::PERF_HIGH";
+      properties->push_back(CL_CONTEXT_PERF_MODE_QCOM);
+      properties->push_back(CL_PERF_MODE_HIGH_QCOM);
+      break;
+    default:
+      break;
+  }
+  switch (gpu_priority_level) {
+    case GPUPriorityLevel::PRIORITY_LOW:
+      LOG(INFO) << "GPUPriorityLevel::PRIORITY_LOW";
+      properties->push_back(CL_CONTEXT_PRIORITY_LEVEL_QCOM);
+      properties->push_back(CL_PRIORITY_HINT_LOW_QCOM);
+      break;
+    case GPUPriorityLevel::PRIORITY_NORMAL:
+      LOG(INFO) << "GPUPriorityLevel::PRIORITY_NORMAL";
+      properties->push_back(CL_CONTEXT_PRIORITY_LEVEL_QCOM);
+      properties->push_back(CL_PRIORITY_HINT_NORMAL_QCOM);
+      break;
+    case GPUPriorityLevel::PRIORITY_HIGH:
+      LOG(INFO) << "GPUPriorityLevel::PRIORITY_HIGH";
+      properties->push_back(CL_CONTEXT_PRIORITY_LEVEL_QCOM);
+      properties->push_back(CL_PRIORITY_HINT_HIGH_QCOM);
+      break;
+    default:
+      break;
+  }
+  // The properties list should be terminated with 0
+  properties->push_back(0);
+}
+
 }  // namespace lite
 }  // namespace paddle
--- a/lite/backends/opencl/cl_runtime.h
+++ b/lite/backends/opencl/cl_runtime.h
@@ -19,6 +19,45 @@ limitations under the License. */
 #include "lite/backends/opencl/cl_include.h"
 #include "lite/backends/opencl/cl_utility.h"

+typedef enum {
+  UNKNOWN = 0,
+  QUALCOMM_ADRENO = 1,
+  ARM_MALI = 2,
+  IMAGINATION_POWERVR = 3,
+  OTHERS = 4,
+} GpuType;
+
+typedef enum {
+  PERF_DEFAULT = 0,
+  PERF_LOW = 1,
+  PERF_NORMAL = 2,
+  PERF_HIGH = 3
+} GPUPerfMode;
+
+typedef enum {
+  PRIORITY_DEFAULT = 0,
+  PRIORITY_LOW = 1,
+  PRIORITY_NORMAL = 2,
+  PRIORITY_HIGH = 3
+} GPUPriorityLevel;
+
+// Adreno extensions
+// Adreno performance hints
+typedef cl_uint cl_perf_hint;
+#define CL_CONTEXT_PERF_MODE_QCOM 0x40C2
+#define CL_PERF_MODE_HIGH_QCOM 0x40C3
+#define CL_PERF_MODE_NORMAL_QCOM 0x40C4
+#define CL_PERF_MODE_LOW_QCOM 0x40C5
+
+// Adreno priority hints
+typedef cl_uint cl_priority_hint;
+
+#define CL_PRIORITY_HINT_NONE_QCOM 0
+#define CL_CONTEXT_PRIORITY_LEVEL_QCOM 0x40C9
+#define CL_PRIORITY_HINT_HIGH_QCOM 0x40CA
+#define CL_PRIORITY_HINT_NORMAL_QCOM 0x40CB
+#define CL_PRIORITY_HINT_LOW_QCOM 0x40CC
+
 namespace paddle {
 namespace lite {

@@ -63,9 +102,28 @@ class CLRuntime {

  bool InitializeDevice();

+  void GetAdrenoContextProperties(
+      std::vector<cl_context_properties>* properties,
+      GPUPerfMode gpu_perf_mode,
+      GPUPriorityLevel gpu_priority_level);
+
  std::shared_ptr<cl::Context> CreateContext() {
-    auto context = std::make_shared<cl::Context>(
-        std::vector<cl::Device>{device()}, nullptr, nullptr, nullptr, &status_);
+    // note(ysh329): gpu perf mode and priority level of adreno gpu referred
+    // from xiaomi/mace.
+    // However, no performance gain after `PERF_HIGH` and `PRIORITY_HIGH` set.
+    auto perf_mode = GPUPerfMode::PERF_HIGH;
+    auto priority_level = GPUPriorityLevel::PRIORITY_HIGH;
+    std::vector<cl_context_properties> context_properties;
+    if (gpu_type_ == GpuType::QUALCOMM_ADRENO) {
+      GetAdrenoContextProperties(
+          &context_properties, perf_mode, priority_level);
+    }
+    auto context =
+        std::make_shared<cl::Context>(std::vector<cl::Device>{device()},
+                                      context_properties.data(),
+                                      nullptr,
+                                      nullptr,
+                                      &status_);
    CL_CHECK_FATAL(status_);
    return context;
  }
@@ -83,8 +141,12 @@ class CLRuntime {
    return queue;
  }

+  GpuType ParseGpuTypeFromDeviceName(std::string device_name);
+
  std::map<std::string, size_t> device_info_;

+  GpuType gpu_type_{GpuType::UNKNOWN};
+
  std::string cl_path_;

  std::shared_ptr<cl::Platform> platform_{nullptr};