提交 88e6daa3 编写于 作者: W Wu Zhiwen

dnn(ocl4dnn): Fix wrong measurement for tuning time

convolution kernel use default queue to run, so that ocl::Timer
, to measure the kernel run time, should use the default queue too.
Also remove useless parameter for convolve()
Signed-off-by: NWu Zhiwen <zhiwen.wu@intel.com>
上级 1fbdca83
...@@ -217,8 +217,7 @@ class OCL4DNNConvSpatial ...@@ -217,8 +217,7 @@ class OCL4DNNConvSpatial
bool convolve(const UMat &bottom, UMat &top, bool convolve(const UMat &bottom, UMat &top,
const UMat &weight, const UMat &bias, const UMat &weight, const UMat &bias,
int32_t numImages, int32_t numImages,
kernelConfig* config, kernelConfig* config);
const cv::ocl::Queue& queue);
float timedConvolve(const UMat &bottom, UMat &top, float timedConvolve(const UMat &bottom, UMat &top,
const UMat &weight, const UMat &bias, const UMat &weight, const UMat &bias,
int32_t numImages, kernelConfig* config); int32_t numImages, kernelConfig* config);
......
...@@ -381,7 +381,7 @@ bool OCL4DNNConvSpatial<Dtype>::Forward(const UMat& bottom, ...@@ -381,7 +381,7 @@ bool OCL4DNNConvSpatial<Dtype>::Forward(const UMat& bottom,
prepareKernel(bottom, top, weight, bias, numImages); prepareKernel(bottom, top, weight, bias, numImages);
if (bestKernelConfig.empty()) if (bestKernelConfig.empty())
return false; return false;
return convolve(bottom, top, weight, bias, numImages, bestKernelConfig, cv::ocl::Queue::getDefault()); return convolve(bottom, top, weight, bias, numImages, bestKernelConfig);
} }
template<typename Dtype> template<typename Dtype>
...@@ -392,7 +392,7 @@ void OCL4DNNConvSpatial<Dtype>::calculateBenchmark(const UMat &bottom, UMat &ver ...@@ -392,7 +392,7 @@ void OCL4DNNConvSpatial<Dtype>::calculateBenchmark(const UMat &bottom, UMat &ver
options_.str(""); options_.clear(); // clear contents and state flags options_.str(""); options_.clear(); // clear contents and state flags
createBasicKernel(1, 1, 1); createBasicKernel(1, 1, 1);
kernel_index_ = kernelQueue.size() - 1; kernel_index_ = kernelQueue.size() - 1;
convolve(bottom, verifyTop, weight, bias, numImages, kernelQueue[kernel_index_], cv::ocl::Queue::getDefault()); convolve(bottom, verifyTop, weight, bias, numImages, kernelQueue[kernel_index_]);
CV_Assert(phash.find(kernelQueue[kernel_index_]->kernelName) != phash.end()); CV_Assert(phash.find(kernelQueue[kernel_index_]->kernelName) != phash.end());
//unloadProgram(kernelQueue[kernel_index_]->kernelName); //unloadProgram(kernelQueue[kernel_index_]->kernelName);
kernelQueue.pop_back(); kernelQueue.pop_back();
...@@ -649,8 +649,7 @@ void OCL4DNNConvSpatial<float>::CreateSubBuffer(const UMat& buffer, UMat& sub_bu ...@@ -649,8 +649,7 @@ void OCL4DNNConvSpatial<float>::CreateSubBuffer(const UMat& buffer, UMat& sub_bu
template<> template<>
bool OCL4DNNConvSpatial<float>::convolve(const UMat &bottom, UMat &top, bool OCL4DNNConvSpatial<float>::convolve(const UMat &bottom, UMat &top,
const UMat &weight, const UMat &bias, const UMat &weight, const UMat &bias,
int32_t numImages, kernelConfig* config, int32_t numImages, kernelConfig* config)
const cv::ocl::Queue& queue)
{ {
ocl::Program program; ocl::Program program;
phash_t::iterator it = phash.find(config->kernelName); phash_t::iterator it = phash.find(config->kernelName);
...@@ -926,17 +925,17 @@ float OCL4DNNConvSpatial<float>::timedConvolve(const UMat &bottom, UMat &top, ...@@ -926,17 +925,17 @@ float OCL4DNNConvSpatial<float>::timedConvolve(const UMat &bottom, UMat &top,
const UMat &weight, const UMat &bias, const UMat &weight, const UMat &bias,
int32_t numImages, kernelConfig* config) int32_t numImages, kernelConfig* config)
{ {
cv::ocl::Queue profilingQueue; cv::ocl::Queue queue;
try try
{ {
profilingQueue = cv::ocl::Queue::getDefault().getProfilingQueue(); queue = cv::ocl::Queue::getDefault();
} }
catch (const cv::Exception&) catch (const cv::Exception&)
{ {
static int warn_ = 0; static int warn_ = 0;
if (!warn_) if (!warn_)
{ {
std::cout << "OpenCV(ocl4dnn): Can't create OpenCL profiling queue for auto-tuning." << std::endl; std::cout << "OpenCV(ocl4dnn): Can't get OpenCL default queue for auto-tuning." << std::endl;
warn_ = true; warn_ = true;
} }
return 1e6; return 1e6;
...@@ -945,16 +944,16 @@ float OCL4DNNConvSpatial<float>::timedConvolve(const UMat &bottom, UMat &top, ...@@ -945,16 +944,16 @@ float OCL4DNNConvSpatial<float>::timedConvolve(const UMat &bottom, UMat &top,
// warm up. // warm up.
bool saved_tuned = tuned_; bool saved_tuned = tuned_;
tuned_ = false; tuned_ = false;
convolve(bottom, top, weight, bias, numImages, config, profilingQueue); convolve(bottom, top, weight, bias, numImages, config);
cv::ocl::Timer timer(profilingQueue); cv::ocl::Timer timer(queue);
timer.start(); timer.start();
bool res = true;; bool res = true;;
dbgPrint(std::cout << "Benchmarking kernel: " << config->kernelName << std::endl); dbgPrint(std::cout << "Benchmarking kernel: " << config->kernelName << std::endl);
tuned_ = true; tuned_ = true;
int loop_cnt = 4; int loop_cnt = 4;
for (int i = 0; i < loop_cnt; i++) { for (int i = 0; i < loop_cnt; i++) {
res = convolve(bottom, top, weight, bias, numImages, config, profilingQueue); res = convolve(bottom, top, weight, bias, numImages, config);
if (!res) if (!res)
break; break;
} }
...@@ -1009,7 +1008,7 @@ bool OCL4DNNConvSpatial<float>::verifyResult(const UMat &bottom, ...@@ -1009,7 +1008,7 @@ bool OCL4DNNConvSpatial<float>::verifyResult(const UMat &bottom,
top.zeros(4, sz, CV_32FC1); top.zeros(4, sz, CV_32FC1);
bool saved_tuned = tuned_; bool saved_tuned = tuned_;
tuned_ = false; tuned_ = false;
convolve(bottom, top, weight, bias, numImages, config, cv::ocl::Queue::getDefault()); convolve(bottom, top, weight, bias, numImages, config);
tuned_ = saved_tuned; tuned_ = saved_tuned;
float *data = (float *)top.getMat(ACCESS_READ).ptr<float>(); float *data = (float *)top.getMat(ACCESS_READ).ptr<float>();
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册