diff --git a/mace/benchmark/benchmark_model.cc b/mace/benchmark/benchmark_model.cc index 38db846b2a5c651e4dead8357ce1753649f6bd65..063e4f4c270c59db13314c12b0919e73bc84c395 100644 --- a/mace/benchmark/benchmark_model.cc +++ b/mace/benchmark/benchmark_model.cc @@ -212,7 +212,6 @@ DEFINE_bool(show_flops, true, "whether to estimate the model's FLOPs"); DEFINE_int32(warmup_runs, 1, "how many runs to initialize model"); DEFINE_string(model_data_file, "", "model data file name, used when EMBED_MODEL_DATA set to 0"); -DEFINE_string(gpu_type, "ADRENO", "ADRENO/MALI"); DEFINE_int32(gpu_perf_hint, 2, "0:DEFAULT/1:LOW/2:NORMAL/3:HIGH"); DEFINE_int32(gpu_priority_hint, 1, "0:DEFAULT/1:LOW/2:NORMAL/3:HIGH"); DEFINE_int32(omp_num_threads, 8, "num of openmp threads"); @@ -227,7 +226,6 @@ int Main(int argc, char **argv) { LOG(INFO) << "Benchmark name: [" << FLAGS_benchmark_name << "]"; LOG(INFO) << "Device: [" << FLAGS_device << "]"; - LOG(INFO) << "gpu_type: [" << FLAGS_gpu_type << "]"; LOG(INFO) << "gpu_perf_hint: [" << FLAGS_gpu_perf_hint << "]"; LOG(INFO) << "gpu_priority_hint: [" << FLAGS_gpu_priority_hint << "]"; LOG(INFO) << "omp_num_threads: [" << FLAGS_omp_num_threads << "]"; @@ -270,10 +268,7 @@ int Main(int argc, char **argv) { // config runtime if (device_type == OPENCL) { - GPUType gpu_type = ADRENO; - if (FLAGS_gpu_type == "MALI") gpu_type = MALI; mace::ConfigOpenCLRuntime( - gpu_type, static_cast(FLAGS_gpu_perf_hint), static_cast(FLAGS_gpu_priority_hint)); } else if (device_type == CPU) { diff --git a/mace/core/mace.cc b/mace/core/mace.cc index 48d29343c173b397c3a3956b5503b832d6835ccd..2444f66365682ac89567a0116250e759c280a7da 100644 --- a/mace/core/mace.cc +++ b/mace/core/mace.cc @@ -351,19 +351,17 @@ const OperatorDef &NetDef::op(const int idx) const { return op_[idx]; } -void ConfigOpenCLRuntime(GPUType gpu_type, - GPUPerfHint gpu_perf_hint, +void ConfigOpenCLRuntime(GPUPerfHint gpu_perf_hint, GPUPriorityHint gpu_priority_hint) { - LOG(INFO) << "Config OpenCL Runtime: gpu_type: " << gpu_type - << ", gpu_perf_hint: " << gpu_perf_hint - << ", gpu_priority_hint: " << gpu_priority_hint; - OpenCLRuntime::CreateGlobal(gpu_type, gpu_perf_hint, gpu_priority_hint); + VLOG(1) << "Set GPU configurations, gpu_perf_hint: " << gpu_perf_hint + << ", gpu_priority_hint: " << gpu_priority_hint; + OpenCLRuntime::CreateGlobal(gpu_perf_hint, gpu_priority_hint); } void ConfigOmpThreadsAndAffinity(int omp_num_threads, CPUPowerOption power_option) { - LOG(INFO) << "Config CPU Runtime: omp_num_threads: " << omp_num_threads - << ", cpu_power_option: " << power_option; + VLOG(1) << "Config CPU Runtime: omp_num_threads: " << omp_num_threads + << ", cpu_power_option: " << power_option; SetOmpThreadsAndAffinity(omp_num_threads, power_option); } diff --git a/mace/core/runtime/opencl/opencl_allocator.cc b/mace/core/runtime/opencl/opencl_allocator.cc index 76bc437db1595bca5db41aa0a2c698c02a3f523c..a9eb73e8dfbd00706ae569af48745abcdfd97d15 100644 --- a/mace/core/runtime/opencl/opencl_allocator.cc +++ b/mace/core/runtime/opencl/opencl_allocator.cc @@ -41,7 +41,6 @@ void *OpenCLAllocator::New(size_t nbytes) const { CL_MEM_READ_WRITE | CL_MEM_ALLOC_HOST_PTR, nbytes, nullptr, &error); MACE_CHECK(error == CL_SUCCESS) << "error code: " << error; - MACE_CHECK_NOTNULL(buffer); return static_cast(buffer); } diff --git a/mace/core/runtime/opencl/opencl_runtime.cc b/mace/core/runtime/opencl/opencl_runtime.cc index f2f5be1bba3bebf9e3d43b5563778e5e8ebf5179..c5b6557bd550d9c2d9f88a130f76aeb7e725ff22 100644 --- a/mace/core/runtime/opencl/opencl_runtime.cc +++ b/mace/core/runtime/opencl/opencl_runtime.cc @@ -70,66 +70,61 @@ std::unique_ptr OpenCLRuntime::runtime_instance_ = nullptr; OpenCLRuntime *OpenCLRuntime::Global() { // FIXME: not thread safe if (runtime_instance_ == nullptr) { - return CreateGlobal(GPUType::ADRENO, GPUPerfHint::PERF_DEFAULT, + return CreateGlobal(GPUPerfHint::PERF_DEFAULT, GPUPriorityHint::PRIORITY_DEFAULT); } return runtime_instance_.get(); } -OpenCLRuntime *OpenCLRuntime::CreateGlobal(GPUType gpu_type, - GPUPerfHint gpu_perf_hint, +OpenCLRuntime *OpenCLRuntime::CreateGlobal(GPUPerfHint gpu_perf_hint, GPUPriorityHint gpu_priority_hint) { runtime_instance_ = - std::unique_ptr(new OpenCLRuntime(gpu_type, gpu_perf_hint, + std::unique_ptr(new OpenCLRuntime(gpu_perf_hint, gpu_priority_hint)); return runtime_instance_.get(); } -void ParseOpenCLRuntimeConfig(std::vector *properties, - GPUType gpu_type, - GPUPerfHint gpu_perf_hint, - GPUPriorityHint gpu_priority_hint) { +void GetAdrenoContextProperties(std::vector *properties, + GPUPerfHint gpu_perf_hint, + GPUPriorityHint gpu_priority_hint) { MACE_CHECK_NOTNULL(properties); - if (gpu_type == GPUType::ADRENO) { - switch (gpu_perf_hint) { - case GPUPerfHint::PERF_LOW: - properties->push_back(CL_CONTEXT_PERF_HINT_QCOM); - properties->push_back(CL_PERF_HINT_LOW_QCOM); - break; - case GPUPerfHint::PERF_NORMAL: - properties->push_back(CL_CONTEXT_PERF_HINT_QCOM); - properties->push_back(CL_PERF_HINT_NORMAL_QCOM); - break; - case GPUPerfHint::PERF_HIGH: - properties->push_back(CL_CONTEXT_PERF_HINT_QCOM); - properties->push_back(CL_PERF_HINT_HIGH_QCOM); - break; - default:break; - } - switch (gpu_priority_hint) { - case GPUPriorityHint::PRIORITY_LOW: - properties->push_back(CL_CONTEXT_PRIORITY_HINT_QCOM); - properties->push_back(CL_PRIORITY_HINT_LOW_QCOM); - break; - case GPUPriorityHint::PRIORITY_NORMAL: - properties->push_back(CL_CONTEXT_PRIORITY_HINT_QCOM); - properties->push_back(CL_PRIORITY_HINT_NORMAL_QCOM); - break; - case GPUPriorityHint::PRIORITY_HIGH: - properties->push_back(CL_CONTEXT_PRIORITY_HINT_QCOM); - properties->push_back(CL_PRIORITY_HINT_HIGH_QCOM); - break; - default:break; - } - } else { - LOG(WARNING) << "GPU options are only supported by Adreno GPU"; + switch (gpu_perf_hint) { + case GPUPerfHint::PERF_LOW: + properties->push_back(CL_CONTEXT_PERF_HINT_QCOM); + properties->push_back(CL_PERF_HINT_LOW_QCOM); + break; + case GPUPerfHint::PERF_NORMAL: + properties->push_back(CL_CONTEXT_PERF_HINT_QCOM); + properties->push_back(CL_PERF_HINT_NORMAL_QCOM); + break; + case GPUPerfHint::PERF_HIGH: + properties->push_back(CL_CONTEXT_PERF_HINT_QCOM); + properties->push_back(CL_PERF_HINT_HIGH_QCOM); + break; + default: + break; + } + switch (gpu_priority_hint) { + case GPUPriorityHint::PRIORITY_LOW: + properties->push_back(CL_CONTEXT_PRIORITY_HINT_QCOM); + properties->push_back(CL_PRIORITY_HINT_LOW_QCOM); + break; + case GPUPriorityHint::PRIORITY_NORMAL: + properties->push_back(CL_CONTEXT_PRIORITY_HINT_QCOM); + properties->push_back(CL_PRIORITY_HINT_NORMAL_QCOM); + break; + case GPUPriorityHint::PRIORITY_HIGH: + properties->push_back(CL_CONTEXT_PRIORITY_HINT_QCOM); + properties->push_back(CL_PRIORITY_HINT_HIGH_QCOM); + break; + default: + break; } - // The properties list should be terminated with 0 properties->push_back(0); } -OpenCLRuntime::OpenCLRuntime(GPUType gpu_type, GPUPerfHint gpu_perf_hint, +OpenCLRuntime::OpenCLRuntime(GPUPerfHint gpu_perf_hint, GPUPriorityHint gpu_priority_hint) { LoadOpenCLLibrary(); @@ -151,12 +146,19 @@ OpenCLRuntime::OpenCLRuntime(GPUType gpu_type, GPUPerfHint gpu_perf_hint, } bool gpu_detected = false; - cl::Device gpu_device; + bool is_adreno_gpu = false; + device_ = std::make_shared(); for (auto device : all_devices) { if (device.getInfo() == CL_DEVICE_TYPE_GPU) { - gpu_device = device; + *device_ = device; gpu_detected = true; - VLOG(1) << "Using device: " << device.getInfo(); + const std::string device_name = device.getInfo(); + constexpr const char *kQualcommAdrenoGPUStr = "QUALCOMM Adreno(TM)"; + if (device_name == kQualcommAdrenoGPUStr) { + is_adreno_gpu = true; + } + + VLOG(1) << "Using device: " << device_name; break; } } @@ -172,28 +174,38 @@ OpenCLRuntime::OpenCLRuntime(GPUType gpu_type, GPUPerfHint gpu_perf_hint, properties |= CL_QUEUE_PROFILING_ENABLE; } - std::vector context_properties; - context_properties.reserve(5); - ParseOpenCLRuntimeConfig(&context_properties, gpu_type, gpu_perf_hint, - gpu_priority_hint); + cl_int err; + if (is_adreno_gpu) { + std::vector context_properties; + context_properties.reserve(5); + GetAdrenoContextProperties(&context_properties, gpu_perf_hint, + gpu_priority_hint); + context_ = std::shared_ptr( + new cl::Context({*device_}, context_properties.data(), + nullptr, nullptr, &err)); + } else { + context_ = std::shared_ptr( + new cl::Context({*device_}, nullptr, nullptr, nullptr, &err)); + } + MACE_CHECK(err == CL_SUCCESS) << "error code: " << err; - cl::Context context({gpu_device}, context_properties.data()); - cl::CommandQueue command_queue(context, gpu_device, properties); + command_queue_ = std::make_shared(*context_, + *device_, + properties, + &err); + MACE_CHECK(err == CL_SUCCESS) << "error code: " << err; const char *kernel_path = getenv("MACE_KERNEL_PATH"); this->kernel_path_ = std::string(kernel_path == nullptr ? "" : kernel_path) + "/"; - - this->device_ = new cl::Device(gpu_device); - this->context_ = new cl::Context(context); - this->command_queue_ = new cl::CommandQueue(command_queue); } OpenCLRuntime::~OpenCLRuntime() { built_program_map_.clear(); - delete command_queue_; - delete context_; - delete device_; + // We need to control the destruction order, which has dependencies + command_queue_.reset(); + context_.reset(); + device_.reset(); UnloadOpenCLLibrary(); } diff --git a/mace/core/runtime/opencl/opencl_runtime.h b/mace/core/runtime/opencl/opencl_runtime.h index f5e2c25bb5d0a62b0bc403791bb8f18df9b80938..801ee4711dcf07441ece9dcca4bc3ce13c3ff65f 100644 --- a/mace/core/runtime/opencl/opencl_runtime.h +++ b/mace/core/runtime/opencl/opencl_runtime.h @@ -39,7 +39,7 @@ class OpenCLProfilingTimer : public Timer { class OpenCLRuntime { public: static OpenCLRuntime *Global(); - static OpenCLRuntime *CreateGlobal(GPUType, GPUPerfHint, GPUPriorityHint); + static OpenCLRuntime *CreateGlobal(GPUPerfHint, GPUPriorityHint); cl::Context &context(); cl::Device &device(); @@ -55,7 +55,7 @@ class OpenCLRuntime { ~OpenCLRuntime(); private: - OpenCLRuntime(GPUType, GPUPerfHint, GPUPriorityHint); + OpenCLRuntime(GPUPerfHint, GPUPriorityHint); OpenCLRuntime(const OpenCLRuntime &) = delete; OpenCLRuntime &operator=(const OpenCLRuntime &) = delete; @@ -68,9 +68,9 @@ class OpenCLRuntime { private: // All OpenCL object must be a pointer and manually deleted before unloading // OpenCL library. - cl::Context *context_; - cl::Device *device_; - cl::CommandQueue *command_queue_; + std::shared_ptr context_; + std::shared_ptr device_; + std::shared_ptr command_queue_; std::map built_program_map_; std::mutex program_build_mutex_; std::string kernel_path_; diff --git a/mace/core/testing/test_benchmark_main.cc b/mace/core/testing/test_benchmark_main.cc index 143b5ef623dd139a7020989d7d97727018afac8e..9c2d1daa916513054796ec8ad22caca59869c772 100644 --- a/mace/core/testing/test_benchmark_main.cc +++ b/mace/core/testing/test_benchmark_main.cc @@ -9,31 +9,18 @@ #include "mace/public/mace.h" DEFINE_string(pattern, "all", "op benchmark pattern, eg:.*CONV.*"); -DEFINE_string(gpu_type, "ADRENO", "ADRENO/MALI"); DEFINE_int32(gpu_perf_hint, 3, "0:DEFAULT/1:LOW/2:NORMAL/3:HIGH"); DEFINE_int32(gpu_priority_hint, 3, "0:DEFAULT/1:LOW/2:NORMAL/3:HIGH"); DEFINE_int32(omp_num_threads, 1, "num of openmp threads"); DEFINE_int32(cpu_power_option, 1, "0:DEFAULT/1:HIGH_PERFORMANCE/2:BATTERY_SAVE"); -mace::GPUType ParseGPUType(const std::string &gpu_type_str) { - if (gpu_type_str.compare("ADRENO") == 0) { - return mace::GPUType::ADRENO; - } else if (gpu_type_str.compare("MALI") == 0) { - return mace::GPUType::MALI; - } else { - return mace::GPUType::ADRENO; - } -} - int main(int argc, char **argv) { gflags::SetUsageMessage("some usage message"); gflags::ParseCommandLineFlags(&argc, &argv, true); // config runtime - mace::GPUType gpu_type = ParseGPUType(FLAGS_gpu_type); mace::ConfigOpenCLRuntime( - gpu_type, static_cast(FLAGS_gpu_perf_hint), static_cast(FLAGS_gpu_priority_hint)); mace::ConfigOmpThreadsAndAffinity( diff --git a/mace/examples/mace_run.cc b/mace/examples/mace_run.cc index 373b2455a3cba8e70911c652d04d94fd270834fb..1bfb79273821884462fc529909521c3e42e590d6 100644 --- a/mace/examples/mace_run.cc +++ b/mace/examples/mace_run.cc @@ -98,16 +98,6 @@ DeviceType ParseDeviceType(const std::string &device_str) { } } -GPUType ParseGPUType(const std::string &gpu_type_str) { - if (gpu_type_str.compare("ADRENO") == 0) { - return GPUType::ADRENO; - } else if (gpu_type_str.compare("MALI") == 0) { - return GPUType::MALI; - } else { - return GPUType::ADRENO; - } -} - struct mallinfo LogMallinfoChange(struct mallinfo prev) { struct mallinfo curr = mallinfo(); if (prev.arena != curr.arena) { @@ -173,7 +163,6 @@ DEFINE_string(device, "OPENCL", "CPU/NEON/OPENCL/HEXAGON"); DEFINE_int32(round, 1, "round"); DEFINE_int32(restart_round, 1, "restart round"); DEFINE_int32(malloc_check_cycle, -1, "malloc debug check cycle, -1 to disable"); -DEFINE_string(gpu_type, "ADRENO", "ADRENO/MALI"); DEFINE_int32(gpu_perf_hint, 2, "0:DEFAULT/1:LOW/2:NORMAL/3:HIGH"); DEFINE_int32(gpu_priority_hint, 1, "0:DEFAULT/1:LOW/2:NORMAL/3:HIGH"); DEFINE_int32(omp_num_threads, 8, "num of openmp threads"); @@ -196,9 +185,7 @@ bool SingleInputAndOutput(const std::vector &input_shape, // config runtime if (device_type == DeviceType::OPENCL) { - GPUType gpu_type = ParseGPUType(FLAGS_gpu_type); mace::ConfigOpenCLRuntime( - gpu_type, static_cast(FLAGS_gpu_perf_hint), static_cast(FLAGS_gpu_priority_hint)); } else if (device_type == DeviceType::CPU) { @@ -302,9 +289,7 @@ bool MultipleInputOrOutput( // config runtime if (device_type == DeviceType::OPENCL) { - GPUType gpu_type = ParseGPUType(FLAGS_gpu_type); mace::ConfigOpenCLRuntime( - gpu_type, static_cast(FLAGS_gpu_perf_hint), static_cast(FLAGS_gpu_priority_hint)); } else if (device_type == DeviceType::CPU) { @@ -418,7 +403,6 @@ int Main(int argc, char **argv) { LOG(INFO) << "device: " << FLAGS_device; LOG(INFO) << "round: " << FLAGS_round; LOG(INFO) << "restart_round: " << FLAGS_restart_round; - LOG(INFO) << "gpu_type: " << FLAGS_gpu_type; LOG(INFO) << "gpu_perf_hint: " << FLAGS_gpu_perf_hint; LOG(INFO) << "gpu_priority_hint: " << FLAGS_gpu_priority_hint; LOG(INFO) << "omp_num_threads: " << FLAGS_omp_num_threads; diff --git a/mace/public/mace.h b/mace/public/mace.h index 725d262660f88d2a0ec046dc9d55ada2f836d05a..eb74e6749517b1c7cd616a0073bd0c3c7965f916 100644 --- a/mace/public/mace.h +++ b/mace/public/mace.h @@ -61,13 +61,13 @@ enum DataType { DT_UINT32 = 22 }; -enum GPUType { ADRENO = 0, MALI = 1 }; enum GPUPerfHint { PERF_DEFAULT = 0, PERF_LOW = 1, PERF_NORMAL = 2, PERF_HIGH = 3 }; + enum GPUPriorityHint { PRIORITY_DEFAULT = 0, PRIORITY_LOW = 1, @@ -385,7 +385,7 @@ struct MaceInputInfo { const float *data; }; -void ConfigOpenCLRuntime(GPUType, GPUPerfHint, GPUPriorityHint); +void ConfigOpenCLRuntime(GPUPerfHint, GPUPriorityHint); void ConfigOmpThreadsAndAffinity(int omp_num_threads, CPUPowerOption power_option);