diff --git a/docs/user_guide/advanced_usage.rst b/docs/user_guide/advanced_usage.rst index 738f4448b85a9c2b9ba4d264655ea83e81822f2f..e6284be9b03921cdeb9caf4e9f0767228a8af3a7 100644 --- a/docs/user_guide/advanced_usage.rst +++ b/docs/user_guide/advanced_usage.rst @@ -180,7 +180,6 @@ There are two common advanced use cases: // Include the headers #include "mace/public/mace.h" - #include "mace/public/mace_runtime.h" // If the model_graph_format is code #include "mace/public/${model_name}.h" #include "mace/public/mace_engine_factory.h" @@ -199,7 +198,7 @@ There are two common advanced use cases: device_type, &engine); if (create_engine_status != MaceStatus::MACE_SUCCESS) { - // Report error + // Report error or fallback } // ... Same with the code in basic usage @@ -271,13 +270,24 @@ There are two common advanced use cases: // Include the headers #include "mace/public/mace.h" - #include "mace/public/mace_runtime.h" - - // 0. Set pre-compiled OpenCL binary program file paths and OpenCL parameters file path when available - if (device_type == DeviceType::GPU) { - mace::SetOpenCLBinaryPaths(path/to/opencl_binary_paths); - mace::SetOpenCLParameterPath(path/to/opencl_parameter_file); - } + // 0. Declare the device type (must be same with ``runtime`` in configuration file) + DeviceType device_type = DeviceType::GPU; + + // 1. configuration + MaceStatus status; + MaceEngineConfig config(device_type); + std::shared_ptr gpu_context; + + const std::string storage_path ="path/to/storage"; + gpu_context = GPUContextBuilder() + .SetStoragePath(storage_path) + .SetOpenCLBinaryPaths(path/to/opencl_binary_paths) + .SetOpenCLParameterPath(path/to/opencl_parameter_file) + .Finalize(); + config.SetGPUContext(gpu_context); + config.SetGPUHints( + static_cast(GPUPerfHint::PERF_NORMAL), + static_cast(GPUPriorityHint::PRIORITY_LOW)); // ... Same with the code in basic usage. diff --git a/docs/user_guide/basic_usage.rst b/docs/user_guide/basic_usage.rst index 82748c8c64647cb1b698e389a72a1f08a1b23270..5b0379c6db2eb246b23b1adae0c6d6a5e595155a 100644 --- a/docs/user_guide/basic_usage.rst +++ b/docs/user_guide/basic_usage.rst @@ -332,21 +332,28 @@ Please refer to \ ``mace/examples/example.cc``\ for full usage. The following li // Include the headers #include "mace/public/mace.h" - #include "mace/public/mace_runtime.h" - - // 0. Set compiled OpenCL kernel cache, this is used to reduce the - // initialization time since the compiling is too slow. It's suggested - // to set this even when pre-compiled OpenCL program file is provided - // because the OpenCL version upgrade may also leads to kernel - // recompilations. - const std::string file_path ="path/to/opencl_cache_file"; - std::shared_ptr storage_factory( - new FileStorageFactory(file_path)); - ConfigKVStorageFactory(storage_factory); - - // 1. Declare the device type (must be same with ``runtime`` in configuration file) + + // 0. Declare the device type (must be same with ``runtime`` in configuration file) DeviceType device_type = DeviceType::GPU; + // 1. configuration + MaceStatus status; + MaceEngineConfig config(device_type); + std::shared_ptr gpu_context; + // Set the path to store compiled OpenCL kernel binaries. + // please make sure your application have read/write rights of the directory. + // this is used to reduce the initialization time since the compiling is too slow. + // It's suggested to set this even when pre-compiled OpenCL program file is provided + // because the OpenCL version upgrade may also leads to kernel recompilations. + const std::string storage_path ="path/to/storage"; + gpu_context = GPUContextBuilder() + .SetStoragePath(storage_path) + .Finalize(); + config.SetGPUContext(gpu_context); + config.SetGPUHints( + static_cast(GPUPerfHint::PERF_NORMAL), + static_cast(GPUPriorityHint::PRIORITY_LOW)); + // 2. Define the input and output tensor names. std::vector input_names = {...}; std::vector output_names = {...}; diff --git a/mace/core/net.cc b/mace/core/net.cc index 0c538b801bb1f9c8bcbbc109dc80fc0893255dfe..faa4e1d3728ec8783f554f7b50f65111d2e462ad 100644 --- a/mace/core/net.cc +++ b/mace/core/net.cc @@ -87,9 +87,6 @@ MaceStatus SerialNet::Run(RunMetadata *run_metadata) { } else { future.wait_fn(nullptr); } -#ifdef MACE_ENABLE_OPENCL - device_->opencl_runtime()->command_queue().finish(); -#endif } else if (run_metadata != nullptr) { call_stats.start_micros = NowMicros(); MACE_RETURN_IF_ERROR(op->Run(nullptr)); diff --git a/mace/core/runtime/opencl/opencl_runtime.cc b/mace/core/runtime/opencl/opencl_runtime.cc index 967a040f01395d1ea13b25d6c1a1c67650a95c3b..1fe145508fa22f65cd7444f5b7279c534c97a0bd 100644 --- a/mace/core/runtime/opencl/opencl_runtime.cc +++ b/mace/core/runtime/opencl/opencl_runtime.cc @@ -441,6 +441,7 @@ OpenCLRuntime::OpenCLRuntime( } OpenCLRuntime::~OpenCLRuntime() { + command_queue_->finish(); built_program_map_.clear(); // We need to control the destruction order, which has dependencies command_queue_.reset(); diff --git a/mace/examples/cli/example.cc b/mace/examples/cli/example.cc index 99436fa4876bcb1731dab73aac39f70ea8ef136a..d303c0c0c3a78acad16ecec5227e065562a54f73 100644 --- a/mace/examples/cli/example.cc +++ b/mace/examples/cli/example.cc @@ -120,8 +120,8 @@ DEFINE_string(device, "GPU", "CPU/GPU/HEXAGON"); DEFINE_int32(round, 1, "round"); DEFINE_int32(restart_round, 1, "restart round"); DEFINE_int32(malloc_check_cycle, -1, "malloc debug check cycle, -1 to disable"); -DEFINE_int32(gpu_perf_hint, 3, "0:DEFAULT/1:LOW/2:NORMAL/3:HIGH"); -DEFINE_int32(gpu_priority_hint, 3, "0:DEFAULT/1:LOW/2:NORMAL/3:HIGH"); +DEFINE_int32(gpu_perf_hint, 2, "0:DEFAULT/1:LOW/2:NORMAL/3:HIGH"); +DEFINE_int32(gpu_priority_hint, 1, "0:DEFAULT/1:LOW/2:NORMAL/3:HIGH"); DEFINE_int32(omp_num_threads, -1, "num of openmp threads"); DEFINE_int32(cpu_affinity_policy, 1, "0:AFFINITY_NONE/1:AFFINITY_BIG_ONLY/2:AFFINITY_LITTLE_ONLY"); diff --git a/mace/ops/ops_test_util.h b/mace/ops/ops_test_util.h index 278c3515f575c2c72eaa9f9a9908db491fc0c3cd..3a248ac17249adadf14123a7b124eb93fc057594 100644 --- a/mace/ops/ops_test_util.h +++ b/mace/ops/ops_test_util.h @@ -131,10 +131,6 @@ class OpsTestNet { op_registry_(new OperatorRegistry()) { } - ~OpsTestNet() { - Sync(); - } - template void AddInputFromArray(const std::string &name, const std::vector &shape,