未验证 提交 4e0835ca 编写于 作者: Y Yuan Shuai 提交者: GitHub

[LITE][OPENCL] add gpu perf mode, priority level for qcom… (#3507)

* [LITE][OPENCL] add gpu perf mode, priority level for qcom adreno. test=develop
上级 0145720a
......@@ -64,6 +64,7 @@ Paddle Lite has referenced the following open-source projects:
- [Anakin](https://github.com/PaddlePaddle/Anakin). The optimizations under Anakin has been incorporated into Paddle Lite, and so there will not be any future updates of Anakin. As another high-performance inference project under PaddlePaddle, Anakin has been forward-looking and helpful to the making of Paddle Lite.
## Feedback and Community Support
- Questions, reports, and suggestions are welcome through Github Issues!
......
......@@ -129,6 +129,26 @@ bool CLRuntime::InitializePlatform() {
return true;
}
GpuType CLRuntime::ParseGpuTypeFromDeviceName(std::string device_name) {
const std::string kMALI_PATTERN_STR = "Mali";
const std::string kADRENO_PATTERN_STR = "QUALCOMM Adreno(TM)";
const std::string kPOWERVR_PATTERN_STR = "PowerVR";
if (device_name == kADRENO_PATTERN_STR) {
LOG(INFO) << "adreno gpu";
return GpuType::QUALCOMM_ADRENO;
} else if (device_name.find(kMALI_PATTERN_STR) != std::string::npos) {
LOG(INFO) << "mali gpu";
return GpuType::ARM_MALI;
} else if (device_name.find(kPOWERVR_PATTERN_STR) != std::string::npos) {
LOG(INFO) << "powerVR gpu";
return GpuType::IMAGINATION_POWERVR;
} else {
LOG(INFO) << "others gpu";
return GpuType::UNKNOWN;
}
}
bool CLRuntime::InitializeDevice() {
// ===================== BASIC =====================
// CL_DEVICE_TYPE_GPU
......@@ -148,6 +168,7 @@ bool CLRuntime::InitializeDevice() {
auto device_name = device_->getInfo<CL_DEVICE_NAME>();
LOG(INFO) << "Using device: " << device_name;
gpu_type_ = ParseGpuTypeFromDeviceName(device_name);
cl_device_type device_type = device_->getInfo<CL_DEVICE_TYPE>();
auto device_type_to_str = [](cl_device_type t) -> std::string {
......@@ -296,5 +317,53 @@ std::map<std::string, size_t>& CLRuntime::GetDeviceInfo() {
return device_info_;
}
void CLRuntime::GetAdrenoContextProperties(
std::vector<cl_context_properties>* properties,
GPUPerfMode gpu_perf_mode,
GPUPriorityLevel gpu_priority_level) {
CHECK(properties) << "cl_context_properties is nullptr";
properties->reserve(5);
switch (gpu_perf_mode) {
case GPUPerfMode::PERF_LOW:
LOG(INFO) << "GPUPerfMode::PERF_LOW";
properties->push_back(CL_CONTEXT_PERF_MODE_QCOM);
properties->push_back(CL_PERF_MODE_LOW_QCOM);
break;
case GPUPerfMode::PERF_NORMAL:
LOG(INFO) << "GPUPerfMode::PERF_NORMAL";
properties->push_back(CL_CONTEXT_PERF_MODE_QCOM);
properties->push_back(CL_PERF_MODE_NORMAL_QCOM);
break;
case GPUPerfMode::PERF_HIGH:
LOG(INFO) << "GPUPerfMode::PERF_HIGH";
properties->push_back(CL_CONTEXT_PERF_MODE_QCOM);
properties->push_back(CL_PERF_MODE_HIGH_QCOM);
break;
default:
break;
}
switch (gpu_priority_level) {
case GPUPriorityLevel::PRIORITY_LOW:
LOG(INFO) << "GPUPriorityLevel::PRIORITY_LOW";
properties->push_back(CL_CONTEXT_PRIORITY_LEVEL_QCOM);
properties->push_back(CL_PRIORITY_HINT_LOW_QCOM);
break;
case GPUPriorityLevel::PRIORITY_NORMAL:
LOG(INFO) << "GPUPriorityLevel::PRIORITY_NORMAL";
properties->push_back(CL_CONTEXT_PRIORITY_LEVEL_QCOM);
properties->push_back(CL_PRIORITY_HINT_NORMAL_QCOM);
break;
case GPUPriorityLevel::PRIORITY_HIGH:
LOG(INFO) << "GPUPriorityLevel::PRIORITY_HIGH";
properties->push_back(CL_CONTEXT_PRIORITY_LEVEL_QCOM);
properties->push_back(CL_PRIORITY_HINT_HIGH_QCOM);
break;
default:
break;
}
// The properties list should be terminated with 0
properties->push_back(0);
}
} // namespace lite
} // namespace paddle
......@@ -19,6 +19,45 @@ limitations under the License. */
#include "lite/backends/opencl/cl_include.h"
#include "lite/backends/opencl/cl_utility.h"
typedef enum {
UNKNOWN = 0,
QUALCOMM_ADRENO = 1,
ARM_MALI = 2,
IMAGINATION_POWERVR = 3,
OTHERS = 4,
} GpuType;
typedef enum {
PERF_DEFAULT = 0,
PERF_LOW = 1,
PERF_NORMAL = 2,
PERF_HIGH = 3
} GPUPerfMode;
typedef enum {
PRIORITY_DEFAULT = 0,
PRIORITY_LOW = 1,
PRIORITY_NORMAL = 2,
PRIORITY_HIGH = 3
} GPUPriorityLevel;
// Adreno extensions
// Adreno performance hints
typedef cl_uint cl_perf_hint;
#define CL_CONTEXT_PERF_MODE_QCOM 0x40C2
#define CL_PERF_MODE_HIGH_QCOM 0x40C3
#define CL_PERF_MODE_NORMAL_QCOM 0x40C4
#define CL_PERF_MODE_LOW_QCOM 0x40C5
// Adreno priority hints
typedef cl_uint cl_priority_hint;
#define CL_PRIORITY_HINT_NONE_QCOM 0
#define CL_CONTEXT_PRIORITY_LEVEL_QCOM 0x40C9
#define CL_PRIORITY_HINT_HIGH_QCOM 0x40CA
#define CL_PRIORITY_HINT_NORMAL_QCOM 0x40CB
#define CL_PRIORITY_HINT_LOW_QCOM 0x40CC
namespace paddle {
namespace lite {
......@@ -63,9 +102,28 @@ class CLRuntime {
bool InitializeDevice();
void GetAdrenoContextProperties(
std::vector<cl_context_properties>* properties,
GPUPerfMode gpu_perf_mode,
GPUPriorityLevel gpu_priority_level);
std::shared_ptr<cl::Context> CreateContext() {
auto context = std::make_shared<cl::Context>(
std::vector<cl::Device>{device()}, nullptr, nullptr, nullptr, &status_);
// note(ysh329): gpu perf mode and priority level of adreno gpu referred
// from xiaomi/mace.
// However, no performance gain after `PERF_HIGH` and `PRIORITY_HIGH` set.
auto perf_mode = GPUPerfMode::PERF_HIGH;
auto priority_level = GPUPriorityLevel::PRIORITY_HIGH;
std::vector<cl_context_properties> context_properties;
if (gpu_type_ == GpuType::QUALCOMM_ADRENO) {
GetAdrenoContextProperties(
&context_properties, perf_mode, priority_level);
}
auto context =
std::make_shared<cl::Context>(std::vector<cl::Device>{device()},
context_properties.data(),
nullptr,
nullptr,
&status_);
CL_CHECK_FATAL(status_);
return context;
}
......@@ -83,8 +141,12 @@ class CLRuntime {
return queue;
}
GpuType ParseGpuTypeFromDeviceName(std::string device_name);
std::map<std::string, size_t> device_info_;
GpuType gpu_type_{GpuType::UNKNOWN};
std::string cl_path_;
std::shared_ptr<cl::Platform> platform_{nullptr};
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册