diff --git a/modules/dnn/src/ocl4dnn/include/ocl4dnn.hpp b/modules/dnn/src/ocl4dnn/include/ocl4dnn.hpp index e0ce77e27a28dd10089d926dbb2cd77d2b8bf2ea..f3a26a3e6d69b9fe9285c7fdc6120d0cc71be5b4 100644 --- a/modules/dnn/src/ocl4dnn/include/ocl4dnn.hpp +++ b/modules/dnn/src/ocl4dnn/include/ocl4dnn.hpp @@ -306,6 +306,7 @@ class OCL4DNNConvSpatial std::string kernel_name_; std::string cache_path_; bool use_cache_path_; // true if cache_path_ directory exists + bool run_auto_tuning_; bool force_auto_tuning_; int32_t kernel_index_; std::vector< cv::Ptr > kernelQueue; diff --git a/modules/dnn/src/ocl4dnn/src/ocl4dnn_conv_spatial.cpp b/modules/dnn/src/ocl4dnn/src/ocl4dnn_conv_spatial.cpp index 159319425ef2b3158e7784a7b2e36c10d0edb05f..b5699b253570e10c7b2193ef348546d6264654e8 100644 --- a/modules/dnn/src/ocl4dnn/src/ocl4dnn_conv_spatial.cpp +++ b/modules/dnn/src/ocl4dnn/src/ocl4dnn_conv_spatial.cpp @@ -67,6 +67,46 @@ typedef std::map kernel_hash_t; static kernel_hash_t kernelConfigMap; static bool defaultConfigLoaded = false; +static std::string sanitize(const std::string& s) +{ + std::string s_ = s; + for (size_t i = 0; i < s_.size(); i++) + { + char c = s_[i]; + if (!((c >= '0' && c <= '9') || (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || c == '_')) + { + s_[i] = '_'; + } + } + // TODO add hash? + // s_ = s_ + cv::format("_%08llx", crc64((uchar*)s.c_str(), s.size())); + return s_; +} + +static void initializeGlobalBuiltinConfigurations(const std::string& cache_path) +{ + CV_Assert(defaultConfigLoaded == false); + CV_Assert(kernelConfigMap.empty()); + const size_t numConfigs = sizeof(default_kernel_config_intel)/sizeof(default_kernel_config_intel[0])/2; + for (size_t i = 0; i < numConfigs; i++) + { + std::string key = std::string("Intel(R) Corporation_") + default_kernel_config_intel[2 * i]; + if (!cache_path.empty()) + { + std::string cacheFile = cache_path + sanitize(key); + std::ifstream cachedKernel(cacheFile.c_str()); + if (cachedKernel) + continue; // external configuration found, skip builtin + } + std::pair entry( + key, + default_kernel_config_intel[2 * i + 1]); + kernelConfigMap.insert(entry); + } + defaultConfigLoaded = true; +} + + template OCL4DNNConvSpatial::OCL4DNNConvSpatial(OCL4DNNConvConfig config) { @@ -139,9 +179,8 @@ OCL4DNNConvSpatial::OCL4DNNConvSpatial(OCL4DNNConvConfig config) } } - force_auto_tuning_ = - (use_cache_path_ && !utils::getConfigurationParameterBool("OPENCV_OCL4DNN_DISABLE_AUTO_TUNING", false)) - || utils::getConfigurationParameterBool("OPENCV_OCL4DNN_FORCE_AUTO_TUNING", false); + run_auto_tuning_ = use_cache_path_ && !utils::getConfigurationParameterBool("OPENCV_OCL4DNN_DISABLE_AUTO_TUNING", false); + force_auto_tuning_ = utils::getConfigurationParameterBool("OPENCV_OCL4DNN_FORCE_AUTO_TUNING", false); } template @@ -562,17 +601,7 @@ void OCL4DNNConvSpatial::generateKey() key_ = ocl::Device::getDefault().vendorName() + "_EU" + cv::format("%d", ocl::Device::getDefault().maxComputeUnits()) + "_" + keyBuilder.str(); - key_sanitized_ = key_; - for (size_t i = 0; i < key_sanitized_.size(); i++) - { - char c = key_sanitized_[i]; - if (!((c >= '0' && c <= '9') || (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z') || c == '_')) - { - key_sanitized_[i] = '_'; - } - } - // TODO add hash? - // key_sanitized_ = key_sanitized_ + cv::format("_%08llx", crc64((uchar*)key_.c_str(), key_.size())); + key_sanitized_ = sanitize(key_); short_key_ = keyBuilder.str(); } @@ -1805,7 +1834,7 @@ void OCL4DNNConvSpatial::prepareKernel(const UMat &bottom, UMat &top, calculateBenchmark(bottom, benchData, (use_half_) ? weights_half : weight, bias, numImages); - if (force_auto_tuning_) + if (run_auto_tuning_ || force_auto_tuning_) { setupConvolution(bottom, top, weight, bias, numImages, benchData); } @@ -1820,18 +1849,8 @@ template bool OCL4DNNConvSpatial::loadCachedConfig() { cv::AutoLock lock(kernelConfigMutex); - if (!defaultConfigLoaded) - { - const size_t numConfigs = sizeof(default_kernel_config_intel)/sizeof(default_kernel_config_intel[0])/2; - for (size_t i = 0; i < numConfigs; i++) - { - std::pair entry( - std::string("Intel(R) Corporation_") + default_kernel_config_intel[2 * i], - default_kernel_config_intel[2 * i + 1]); - kernelConfigMap.insert(entry); - } - defaultConfigLoaded = true; - } + if (!defaultConfigLoaded && !force_auto_tuning_) + initializeGlobalBuiltinConfigurations((use_cache_path_ && !cache_path_.empty()) ? (cache_path_ + '/') : std::string()); kernel_hash_t::iterator it = kernelConfigMap.find(key_); if (it != kernelConfigMap.end()) @@ -1904,9 +1923,12 @@ bool OCL4DNNConvSpatial::setupKernelByConfig(int x, int y, int z, int typ template bool OCL4DNNConvSpatial::loadTunedConfig() { + if (force_auto_tuning_) + return false; // don't load results from external storage + if (!use_cache_path_) { - if (cache_path_.empty() && !force_auto_tuning_) + if (cache_path_.empty()) { static int warn_ = 0; if (!warn_)