提交 595003f9 编写于 作者: 吴承辉

Merge branch 'fix-adreno-context-setting' into 'master'

Set Adreno settings only for Adreno GPU

See merge request !322
......@@ -212,7 +212,6 @@ DEFINE_bool(show_flops, true, "whether to estimate the model's FLOPs");
DEFINE_int32(warmup_runs, 1, "how many runs to initialize model");
DEFINE_string(model_data_file, "",
"model data file name, used when EMBED_MODEL_DATA set to 0");
DEFINE_string(gpu_type, "ADRENO", "ADRENO/MALI");
DEFINE_int32(gpu_perf_hint, 2, "0:DEFAULT/1:LOW/2:NORMAL/3:HIGH");
DEFINE_int32(gpu_priority_hint, 1, "0:DEFAULT/1:LOW/2:NORMAL/3:HIGH");
DEFINE_int32(omp_num_threads, 8, "num of openmp threads");
......@@ -227,7 +226,6 @@ int Main(int argc, char **argv) {
LOG(INFO) << "Benchmark name: [" << FLAGS_benchmark_name << "]";
LOG(INFO) << "Device: [" << FLAGS_device << "]";
LOG(INFO) << "gpu_type: [" << FLAGS_gpu_type << "]";
LOG(INFO) << "gpu_perf_hint: [" << FLAGS_gpu_perf_hint << "]";
LOG(INFO) << "gpu_priority_hint: [" << FLAGS_gpu_priority_hint << "]";
LOG(INFO) << "omp_num_threads: [" << FLAGS_omp_num_threads << "]";
......@@ -270,10 +268,7 @@ int Main(int argc, char **argv) {
// config runtime
if (device_type == OPENCL) {
GPUType gpu_type = ADRENO;
if (FLAGS_gpu_type == "MALI") gpu_type = MALI;
mace::ConfigOpenCLRuntime(
gpu_type,
static_cast<GPUPerfHint>(FLAGS_gpu_perf_hint),
static_cast<GPUPriorityHint>(FLAGS_gpu_priority_hint));
} else if (device_type == CPU) {
......
......@@ -351,19 +351,17 @@ const OperatorDef &NetDef::op(const int idx) const {
return op_[idx];
}
void ConfigOpenCLRuntime(GPUType gpu_type,
GPUPerfHint gpu_perf_hint,
void ConfigOpenCLRuntime(GPUPerfHint gpu_perf_hint,
GPUPriorityHint gpu_priority_hint) {
LOG(INFO) << "Config OpenCL Runtime: gpu_type: " << gpu_type
<< ", gpu_perf_hint: " << gpu_perf_hint
<< ", gpu_priority_hint: " << gpu_priority_hint;
OpenCLRuntime::CreateGlobal(gpu_type, gpu_perf_hint, gpu_priority_hint);
VLOG(1) << "Set GPU configurations, gpu_perf_hint: " << gpu_perf_hint
<< ", gpu_priority_hint: " << gpu_priority_hint;
OpenCLRuntime::CreateGlobal(gpu_perf_hint, gpu_priority_hint);
}
void ConfigOmpThreadsAndAffinity(int omp_num_threads,
CPUPowerOption power_option) {
LOG(INFO) << "Config CPU Runtime: omp_num_threads: " << omp_num_threads
<< ", cpu_power_option: " << power_option;
VLOG(1) << "Config CPU Runtime: omp_num_threads: " << omp_num_threads
<< ", cpu_power_option: " << power_option;
SetOmpThreadsAndAffinity(omp_num_threads, power_option);
}
......
......@@ -41,7 +41,6 @@ void *OpenCLAllocator::New(size_t nbytes) const {
CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR,
nbytes, nullptr, &error);
MACE_CHECK(error == CL_SUCCESS) << "error code: " << error;
MACE_CHECK_NOTNULL(buffer);
return static_cast<void *>(buffer);
}
......
......@@ -70,66 +70,61 @@ std::unique_ptr<OpenCLRuntime> OpenCLRuntime::runtime_instance_ = nullptr;
OpenCLRuntime *OpenCLRuntime::Global() {
// FIXME: not thread safe
if (runtime_instance_ == nullptr) {
return CreateGlobal(GPUType::ADRENO, GPUPerfHint::PERF_DEFAULT,
return CreateGlobal(GPUPerfHint::PERF_DEFAULT,
GPUPriorityHint::PRIORITY_DEFAULT);
}
return runtime_instance_.get();
}
OpenCLRuntime *OpenCLRuntime::CreateGlobal(GPUType gpu_type,
GPUPerfHint gpu_perf_hint,
OpenCLRuntime *OpenCLRuntime::CreateGlobal(GPUPerfHint gpu_perf_hint,
GPUPriorityHint gpu_priority_hint) {
runtime_instance_ =
std::unique_ptr<OpenCLRuntime>(new OpenCLRuntime(gpu_type, gpu_perf_hint,
std::unique_ptr<OpenCLRuntime>(new OpenCLRuntime(gpu_perf_hint,
gpu_priority_hint));
return runtime_instance_.get();
}
void ParseOpenCLRuntimeConfig(std::vector<cl_context_properties> *properties,
GPUType gpu_type,
GPUPerfHint gpu_perf_hint,
GPUPriorityHint gpu_priority_hint) {
void GetAdrenoContextProperties(std::vector<cl_context_properties> *properties,
GPUPerfHint gpu_perf_hint,
GPUPriorityHint gpu_priority_hint) {
MACE_CHECK_NOTNULL(properties);
if (gpu_type == GPUType::ADRENO) {
switch (gpu_perf_hint) {
case GPUPerfHint::PERF_LOW:
properties->push_back(CL_CONTEXT_PERF_HINT_QCOM);
properties->push_back(CL_PERF_HINT_LOW_QCOM);
break;
case GPUPerfHint::PERF_NORMAL:
properties->push_back(CL_CONTEXT_PERF_HINT_QCOM);
properties->push_back(CL_PERF_HINT_NORMAL_QCOM);
break;
case GPUPerfHint::PERF_HIGH:
properties->push_back(CL_CONTEXT_PERF_HINT_QCOM);
properties->push_back(CL_PERF_HINT_HIGH_QCOM);
break;
default:break;
}
switch (gpu_priority_hint) {
case GPUPriorityHint::PRIORITY_LOW:
properties->push_back(CL_CONTEXT_PRIORITY_HINT_QCOM);
properties->push_back(CL_PRIORITY_HINT_LOW_QCOM);
break;
case GPUPriorityHint::PRIORITY_NORMAL:
properties->push_back(CL_CONTEXT_PRIORITY_HINT_QCOM);
properties->push_back(CL_PRIORITY_HINT_NORMAL_QCOM);
break;
case GPUPriorityHint::PRIORITY_HIGH:
properties->push_back(CL_CONTEXT_PRIORITY_HINT_QCOM);
properties->push_back(CL_PRIORITY_HINT_HIGH_QCOM);
break;
default:break;
}
} else {
LOG(WARNING) << "GPU options are only supported by Adreno GPU";
switch (gpu_perf_hint) {
case GPUPerfHint::PERF_LOW:
properties->push_back(CL_CONTEXT_PERF_HINT_QCOM);
properties->push_back(CL_PERF_HINT_LOW_QCOM);
break;
case GPUPerfHint::PERF_NORMAL:
properties->push_back(CL_CONTEXT_PERF_HINT_QCOM);
properties->push_back(CL_PERF_HINT_NORMAL_QCOM);
break;
case GPUPerfHint::PERF_HIGH:
properties->push_back(CL_CONTEXT_PERF_HINT_QCOM);
properties->push_back(CL_PERF_HINT_HIGH_QCOM);
break;
default:
break;
}
switch (gpu_priority_hint) {
case GPUPriorityHint::PRIORITY_LOW:
properties->push_back(CL_CONTEXT_PRIORITY_HINT_QCOM);
properties->push_back(CL_PRIORITY_HINT_LOW_QCOM);
break;
case GPUPriorityHint::PRIORITY_NORMAL:
properties->push_back(CL_CONTEXT_PRIORITY_HINT_QCOM);
properties->push_back(CL_PRIORITY_HINT_NORMAL_QCOM);
break;
case GPUPriorityHint::PRIORITY_HIGH:
properties->push_back(CL_CONTEXT_PRIORITY_HINT_QCOM);
properties->push_back(CL_PRIORITY_HINT_HIGH_QCOM);
break;
default:
break;
}
// The properties list should be terminated with 0
properties->push_back(0);
}
OpenCLRuntime::OpenCLRuntime(GPUType gpu_type, GPUPerfHint gpu_perf_hint,
OpenCLRuntime::OpenCLRuntime(GPUPerfHint gpu_perf_hint,
GPUPriorityHint gpu_priority_hint) {
LoadOpenCLLibrary();
......@@ -151,12 +146,19 @@ OpenCLRuntime::OpenCLRuntime(GPUType gpu_type, GPUPerfHint gpu_perf_hint,
}
bool gpu_detected = false;
cl::Device gpu_device;
bool is_adreno_gpu = false;
device_ = std::make_shared<cl::Device>();
for (auto device : all_devices) {
if (device.getInfo<CL_DEVICE_TYPE>() == CL_DEVICE_TYPE_GPU) {
gpu_device = device;
*device_ = device;
gpu_detected = true;
VLOG(1) << "Using device: " << device.getInfo<CL_DEVICE_NAME>();
const std::string device_name = device.getInfo<CL_DEVICE_NAME>();
constexpr const char *kQualcommAdrenoGPUStr = "QUALCOMM Adreno(TM)";
if (device_name == kQualcommAdrenoGPUStr) {
is_adreno_gpu = true;
}
VLOG(1) << "Using device: " << device_name;
break;
}
}
......@@ -172,28 +174,38 @@ OpenCLRuntime::OpenCLRuntime(GPUType gpu_type, GPUPerfHint gpu_perf_hint,
properties |= CL_QUEUE_PROFILING_ENABLE;
}
std::vector<cl_context_properties> context_properties;
context_properties.reserve(5);
ParseOpenCLRuntimeConfig(&context_properties, gpu_type, gpu_perf_hint,
gpu_priority_hint);
cl_int err;
if (is_adreno_gpu) {
std::vector<cl_context_properties> context_properties;
context_properties.reserve(5);
GetAdrenoContextProperties(&context_properties, gpu_perf_hint,
gpu_priority_hint);
context_ = std::shared_ptr<cl::Context>(
new cl::Context({*device_}, context_properties.data(),
nullptr, nullptr, &err));
} else {
context_ = std::shared_ptr<cl::Context>(
new cl::Context({*device_}, nullptr, nullptr, nullptr, &err));
}
MACE_CHECK(err == CL_SUCCESS) << "error code: " << err;
cl::Context context({gpu_device}, context_properties.data());
cl::CommandQueue command_queue(context, gpu_device, properties);
command_queue_ = std::make_shared<cl::CommandQueue>(*context_,
*device_,
properties,
&err);
MACE_CHECK(err == CL_SUCCESS) << "error code: " << err;
const char *kernel_path = getenv("MACE_KERNEL_PATH");
this->kernel_path_ =
std::string(kernel_path == nullptr ? "" : kernel_path) + "/";
this->device_ = new cl::Device(gpu_device);
this->context_ = new cl::Context(context);
this->command_queue_ = new cl::CommandQueue(command_queue);
}
OpenCLRuntime::~OpenCLRuntime() {
built_program_map_.clear();
delete command_queue_;
delete context_;
delete device_;
// We need to control the destruction order, which has dependencies
command_queue_.reset();
context_.reset();
device_.reset();
UnloadOpenCLLibrary();
}
......
......@@ -39,7 +39,7 @@ class OpenCLProfilingTimer : public Timer {
class OpenCLRuntime {
public:
static OpenCLRuntime *Global();
static OpenCLRuntime *CreateGlobal(GPUType, GPUPerfHint, GPUPriorityHint);
static OpenCLRuntime *CreateGlobal(GPUPerfHint, GPUPriorityHint);
cl::Context &context();
cl::Device &device();
......@@ -55,7 +55,7 @@ class OpenCLRuntime {
~OpenCLRuntime();
private:
OpenCLRuntime(GPUType, GPUPerfHint, GPUPriorityHint);
OpenCLRuntime(GPUPerfHint, GPUPriorityHint);
OpenCLRuntime(const OpenCLRuntime &) = delete;
OpenCLRuntime &operator=(const OpenCLRuntime &) = delete;
......@@ -68,9 +68,9 @@ class OpenCLRuntime {
private:
// All OpenCL object must be a pointer and manually deleted before unloading
// OpenCL library.
cl::Context *context_;
cl::Device *device_;
cl::CommandQueue *command_queue_;
std::shared_ptr<cl::Context> context_;
std::shared_ptr<cl::Device> device_;
std::shared_ptr<cl::CommandQueue> command_queue_;
std::map<std::string, cl::Program> built_program_map_;
std::mutex program_build_mutex_;
std::string kernel_path_;
......
......@@ -9,31 +9,18 @@
#include "mace/public/mace.h"
DEFINE_string(pattern, "all", "op benchmark pattern, eg:.*CONV.*");
DEFINE_string(gpu_type, "ADRENO", "ADRENO/MALI");
DEFINE_int32(gpu_perf_hint, 3, "0:DEFAULT/1:LOW/2:NORMAL/3:HIGH");
DEFINE_int32(gpu_priority_hint, 3, "0:DEFAULT/1:LOW/2:NORMAL/3:HIGH");
DEFINE_int32(omp_num_threads, 1, "num of openmp threads");
DEFINE_int32(cpu_power_option, 1,
"0:DEFAULT/1:HIGH_PERFORMANCE/2:BATTERY_SAVE");
mace::GPUType ParseGPUType(const std::string &gpu_type_str) {
if (gpu_type_str.compare("ADRENO") == 0) {
return mace::GPUType::ADRENO;
} else if (gpu_type_str.compare("MALI") == 0) {
return mace::GPUType::MALI;
} else {
return mace::GPUType::ADRENO;
}
}
int main(int argc, char **argv) {
gflags::SetUsageMessage("some usage message");
gflags::ParseCommandLineFlags(&argc, &argv, true);
// config runtime
mace::GPUType gpu_type = ParseGPUType(FLAGS_gpu_type);
mace::ConfigOpenCLRuntime(
gpu_type,
static_cast<mace::GPUPerfHint>(FLAGS_gpu_perf_hint),
static_cast<mace::GPUPriorityHint>(FLAGS_gpu_priority_hint));
mace::ConfigOmpThreadsAndAffinity(
......
......@@ -98,16 +98,6 @@ DeviceType ParseDeviceType(const std::string &device_str) {
}
}
GPUType ParseGPUType(const std::string &gpu_type_str) {
if (gpu_type_str.compare("ADRENO") == 0) {
return GPUType::ADRENO;
} else if (gpu_type_str.compare("MALI") == 0) {
return GPUType::MALI;
} else {
return GPUType::ADRENO;
}
}
struct mallinfo LogMallinfoChange(struct mallinfo prev) {
struct mallinfo curr = mallinfo();
if (prev.arena != curr.arena) {
......@@ -173,7 +163,6 @@ DEFINE_string(device, "OPENCL", "CPU/NEON/OPENCL/HEXAGON");
DEFINE_int32(round, 1, "round");
DEFINE_int32(restart_round, 1, "restart round");
DEFINE_int32(malloc_check_cycle, -1, "malloc debug check cycle, -1 to disable");
DEFINE_string(gpu_type, "ADRENO", "ADRENO/MALI");
DEFINE_int32(gpu_perf_hint, 2, "0:DEFAULT/1:LOW/2:NORMAL/3:HIGH");
DEFINE_int32(gpu_priority_hint, 1, "0:DEFAULT/1:LOW/2:NORMAL/3:HIGH");
DEFINE_int32(omp_num_threads, 8, "num of openmp threads");
......@@ -196,9 +185,7 @@ bool SingleInputAndOutput(const std::vector<int64_t> &input_shape,
// config runtime
if (device_type == DeviceType::OPENCL) {
GPUType gpu_type = ParseGPUType(FLAGS_gpu_type);
mace::ConfigOpenCLRuntime(
gpu_type,
static_cast<GPUPerfHint>(FLAGS_gpu_perf_hint),
static_cast<GPUPriorityHint>(FLAGS_gpu_priority_hint));
} else if (device_type == DeviceType::CPU) {
......@@ -302,9 +289,7 @@ bool MultipleInputOrOutput(
// config runtime
if (device_type == DeviceType::OPENCL) {
GPUType gpu_type = ParseGPUType(FLAGS_gpu_type);
mace::ConfigOpenCLRuntime(
gpu_type,
static_cast<GPUPerfHint>(FLAGS_gpu_perf_hint),
static_cast<GPUPriorityHint>(FLAGS_gpu_priority_hint));
} else if (device_type == DeviceType::CPU) {
......@@ -418,7 +403,6 @@ int Main(int argc, char **argv) {
LOG(INFO) << "device: " << FLAGS_device;
LOG(INFO) << "round: " << FLAGS_round;
LOG(INFO) << "restart_round: " << FLAGS_restart_round;
LOG(INFO) << "gpu_type: " << FLAGS_gpu_type;
LOG(INFO) << "gpu_perf_hint: " << FLAGS_gpu_perf_hint;
LOG(INFO) << "gpu_priority_hint: " << FLAGS_gpu_priority_hint;
LOG(INFO) << "omp_num_threads: " << FLAGS_omp_num_threads;
......
......@@ -61,13 +61,13 @@ enum DataType {
DT_UINT32 = 22
};
enum GPUType { ADRENO = 0, MALI = 1 };
enum GPUPerfHint {
PERF_DEFAULT = 0,
PERF_LOW = 1,
PERF_NORMAL = 2,
PERF_HIGH = 3
};
enum GPUPriorityHint {
PRIORITY_DEFAULT = 0,
PRIORITY_LOW = 1,
......@@ -385,7 +385,7 @@ struct MaceInputInfo {
const float *data;
};
void ConfigOpenCLRuntime(GPUType, GPUPerfHint, GPUPriorityHint);
void ConfigOpenCLRuntime(GPUPerfHint, GPUPriorityHint);
void ConfigOmpThreadsAndAffinity(int omp_num_threads,
CPUPowerOption power_option);
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册