提交 b37aaa83 编写于 作者: V Vadim Pisarevsky

significantly improved parallel non-local means by using granularity parameter...

significantly improved parallel non-local means by using granularity parameter in parallel_for_ loop. Because the algorithm deals with sliding sums, it's essential that each thread has enough work to do, otherwise the algorithm gets higher theoretical complexity and thus there is no speedup comparing to 1-thread code (at best).
上级 feb5b6aa
......@@ -50,42 +50,50 @@ static void fastNlMeansDenoising_( const Mat& src, Mat& dst, const std::vector<f
int templateWindowSize, int searchWindowSize)
{
int hn = (int)h.size();
double granularity = (double)std::max(1., (double)dst.total()/(1 << 17));
switch (CV_MAT_CN(src.type())) {
case 1:
parallel_for_(cv::Range(0, src.rows),
FastNlMeansDenoisingInvoker<ST, IT, UIT, D, int>(
src, dst, templateWindowSize, searchWindowSize, &h[0]));
src, dst, templateWindowSize, searchWindowSize, &h[0]),
granularity);
break;
case 2:
if (hn == 1)
parallel_for_(cv::Range(0, src.rows),
FastNlMeansDenoisingInvoker<Vec<ST, 2>, IT, UIT, D, int>(
src, dst, templateWindowSize, searchWindowSize, &h[0]));
src, dst, templateWindowSize, searchWindowSize, &h[0]),
granularity);
else
parallel_for_(cv::Range(0, src.rows),
FastNlMeansDenoisingInvoker<Vec<ST, 2>, IT, UIT, D, Vec2i>(
src, dst, templateWindowSize, searchWindowSize, &h[0]));
src, dst, templateWindowSize, searchWindowSize, &h[0]),
granularity);
break;
case 3:
if (hn == 1)
parallel_for_(cv::Range(0, src.rows),
FastNlMeansDenoisingInvoker<Vec<ST, 3>, IT, UIT, D, int>(
src, dst, templateWindowSize, searchWindowSize, &h[0]));
src, dst, templateWindowSize, searchWindowSize, &h[0]),
granularity);
else
parallel_for_(cv::Range(0, src.rows),
FastNlMeansDenoisingInvoker<Vec<ST, 3>, IT, UIT, D, Vec3i>(
src, dst, templateWindowSize, searchWindowSize, &h[0]));
src, dst, templateWindowSize, searchWindowSize, &h[0]),
granularity);
break;
case 4:
if (hn == 1)
parallel_for_(cv::Range(0, src.rows),
FastNlMeansDenoisingInvoker<Vec<ST, 4>, IT, UIT, D, int>(
src, dst, templateWindowSize, searchWindowSize, &h[0]));
src, dst, templateWindowSize, searchWindowSize, &h[0]),
granularity);
else
parallel_for_(cv::Range(0, src.rows),
FastNlMeansDenoisingInvoker<Vec<ST, 4>, IT, UIT, D, Vec4i>(
src, dst, templateWindowSize, searchWindowSize, &h[0]));
src, dst, templateWindowSize, searchWindowSize, &h[0]),
granularity);
break;
default:
CV_Error(Error::StsBadArg,
......@@ -237,6 +245,7 @@ static void fastNlMeansDenoisingMulti_( const std::vector<Mat>& srcImgs, Mat& ds
int templateWindowSize, int searchWindowSize)
{
int hn = (int)h.size();
double granularity = (double)std::max(1., (double)dst.total()/(1 << 16));
switch (srcImgs[0].type())
{
......@@ -244,43 +253,50 @@ static void fastNlMeansDenoisingMulti_( const std::vector<Mat>& srcImgs, Mat& ds
parallel_for_(cv::Range(0, srcImgs[0].rows),
FastNlMeansMultiDenoisingInvoker<uchar, IT, UIT, D, int>(
srcImgs, imgToDenoiseIndex, temporalWindowSize,
dst, templateWindowSize, searchWindowSize, &h[0]));
dst, templateWindowSize, searchWindowSize, &h[0]),
granularity);
break;
case CV_8UC2:
if (hn == 1)
parallel_for_(cv::Range(0, srcImgs[0].rows),
FastNlMeansMultiDenoisingInvoker<Vec<ST, 2>, IT, UIT, D, int>(
srcImgs, imgToDenoiseIndex, temporalWindowSize,
dst, templateWindowSize, searchWindowSize, &h[0]));
dst, templateWindowSize, searchWindowSize, &h[0]),
granularity);
else
parallel_for_(cv::Range(0, srcImgs[0].rows),
FastNlMeansMultiDenoisingInvoker<Vec<ST, 2>, IT, UIT, D, Vec2i>(
srcImgs, imgToDenoiseIndex, temporalWindowSize,
dst, templateWindowSize, searchWindowSize, &h[0]));
dst, templateWindowSize, searchWindowSize, &h[0]),
granularity);
break;
case CV_8UC3:
if (hn == 1)
parallel_for_(cv::Range(0, srcImgs[0].rows),
FastNlMeansMultiDenoisingInvoker<Vec<ST, 3>, IT, UIT, D, int>(
srcImgs, imgToDenoiseIndex, temporalWindowSize,
dst, templateWindowSize, searchWindowSize, &h[0]));
dst, templateWindowSize, searchWindowSize, &h[0]),
granularity);
else
parallel_for_(cv::Range(0, srcImgs[0].rows),
FastNlMeansMultiDenoisingInvoker<Vec<ST, 3>, IT, UIT, D, Vec3i>(
srcImgs, imgToDenoiseIndex, temporalWindowSize,
dst, templateWindowSize, searchWindowSize, &h[0]));
dst, templateWindowSize, searchWindowSize, &h[0]),
granularity);
break;
case CV_8UC4:
if (hn == 1)
parallel_for_(cv::Range(0, srcImgs[0].rows),
FastNlMeansMultiDenoisingInvoker<Vec<ST, 4>, IT, UIT, D, int>(
srcImgs, imgToDenoiseIndex, temporalWindowSize,
dst, templateWindowSize, searchWindowSize, &h[0]));
dst, templateWindowSize, searchWindowSize, &h[0]),
granularity);
else
parallel_for_(cv::Range(0, srcImgs[0].rows),
FastNlMeansMultiDenoisingInvoker<Vec<ST, 4>, IT, UIT, D, Vec4i>(
srcImgs, imgToDenoiseIndex, temporalWindowSize,
dst, templateWindowSize, searchWindowSize, &h[0]));
dst, templateWindowSize, searchWindowSize, &h[0]),
granularity);
break;
default:
CV_Error(Error::StsBadArg,
......
......@@ -156,3 +156,14 @@ TEST(Photo_White, issue_2646)
ASSERT_EQ(0, nonWhitePixelsCount);
}
TEST(Photo_Denoising, speed)
{
string imgname = string(cvtest::TS::ptr()->get_data_path()) + "shared/5MP.png";
Mat src = imread(imgname, 0), dst;
double t = (double)getTickCount();
fastNlMeansDenoising(src, dst, 5, 7, 21);
t = (double)getTickCount() - t;
printf("execution time: %gms\n", t*1000./getTickFrequency());
}
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册