From da017fbeb9b3b27930dfce7cbbf943dd14c4d0ec Mon Sep 17 00:00:00 2001 From: Vladislav Vinogradov Date: Mon, 17 Dec 2012 10:39:19 +0400 Subject: [PATCH] fast optical flow bm implementation --- modules/gpu/app/nv_perf_test/main.cpp | 59 ++++- modules/gpu/include/opencv2/gpu/gpu.hpp | 11 + modules/gpu/perf/perf_video.cpp | 49 ++++ modules/gpu/src/cuda/optflowbm.cu | 247 ++++++++++++++++++ modules/gpu/src/optflowbm.cpp | 38 +++ modules/gpu/test/test_video.cpp | 115 ++++++++ ...tvl1_optical_flow.cpp => optical_flow.cpp} | 101 ++++++- 7 files changed, 607 insertions(+), 13 deletions(-) rename samples/gpu/{tvl1_optical_flow.cpp => optical_flow.cpp} (61%) diff --git a/modules/gpu/app/nv_perf_test/main.cpp b/modules/gpu/app/nv_perf_test/main.cpp index 3d457e492b..928b30a19e 100644 --- a/modules/gpu/app/nv_perf_test/main.cpp +++ b/modules/gpu/app/nv_perf_test/main.cpp @@ -94,13 +94,13 @@ PERF_TEST_P(Image, HoughLinesP, { cv::gpu::GpuMat d_image(image); cv::gpu::GpuMat d_lines; - cv::gpu::CannyBuf d_buf; + cv::gpu::HoughLinesBuf d_buf; - cv::gpu::HoughLinesP(d_image, d_lines, d_buf, minLineLenght, maxLineGap); + cv::gpu::HoughLinesP(d_image, d_lines, d_buf, rho, theta, minLineLenght, maxLineGap); TEST_CYCLE() { - cv::gpu::HoughLinesP(d_image, d_lines, d_buf, minLineLenght, maxLineGap); + cv::gpu::HoughLinesP(d_image, d_lines, d_buf, rho, theta, minLineLenght, maxLineGap); } } else @@ -434,3 +434,56 @@ PERF_TEST_P(ImagePair_BlockSize_ShiftSize_MaxRange, OpticalFlowBM, SANITY_CHECK(0); } + +PERF_TEST_P(ImagePair_BlockSize_ShiftSize_MaxRange, FastOpticalFlowBM, + testing::Combine( + testing::Values(string_pair("im1_1280x800.jpg", "im2_1280x800.jpg")), + testing::Values(cv::Size(16, 16)), + testing::Values(cv::Size(1, 1)), + testing::Values(cv::Size(16, 16)) + )) +{ + declare.time(1000); + + const string_pair fileNames = std::tr1::get<0>(GetParam()); + const cv::Size block_size = std::tr1::get<1>(GetParam()); + const cv::Size shift_size = std::tr1::get<2>(GetParam()); + const cv::Size max_range = std::tr1::get<3>(GetParam()); + + cv::Mat src1 = cv::imread(fileNames.first, cv::IMREAD_GRAYSCALE); + if (src1.empty()) + FAIL() << "Unable to load source image [" << fileNames.first << "]"; + + cv::Mat src2 = cv::imread(fileNames.second, cv::IMREAD_GRAYSCALE); + if (src2.empty()) + FAIL() << "Unable to load source image [" << fileNames.second << "]"; + + if (PERF_RUN_GPU()) + { + cv::gpu::GpuMat d_src1(src1); + cv::gpu::GpuMat d_src2(src2); + cv::gpu::GpuMat d_velx, d_vely; + + cv::gpu::FastOpticalFlowBM fastBM; + + fastBM(d_src1, d_src2, d_velx, d_vely, max_range.width, block_size.width); + + TEST_CYCLE_N(10) + { + fastBM(d_src1, d_src2, d_velx, d_vely, max_range.width, block_size.width); + } + } + else + { + cv::Mat velx, vely; + + calcOpticalFlowBM(src1, src2, block_size, shift_size, max_range, false, velx, vely); + + TEST_CYCLE_N(10) + { + calcOpticalFlowBM(src1, src2, block_size, shift_size, max_range, false, velx, vely); + } + } + + SANITY_CHECK(0); +} diff --git a/modules/gpu/include/opencv2/gpu/gpu.hpp b/modules/gpu/include/opencv2/gpu/gpu.hpp index 298bf918a3..895afb274e 100644 --- a/modules/gpu/include/opencv2/gpu/gpu.hpp +++ b/modules/gpu/include/opencv2/gpu/gpu.hpp @@ -2129,6 +2129,17 @@ CV_EXPORTS void calcOpticalFlowBM(const GpuMat& prev, const GpuMat& curr, GpuMat& velx, GpuMat& vely, GpuMat& buf, Stream& stream = Stream::Null()); +class CV_EXPORTS FastOpticalFlowBM +{ +public: + void operator ()(const GpuMat& I0, const GpuMat& I1, GpuMat& flowx, GpuMat& flowy, int search_window = 21, int block_window = 7, Stream& s = Stream::Null()); + +private: + GpuMat buffer; + GpuMat extended_I0; + GpuMat extended_I1; +}; + //! Interpolate frames (images) using provided optical flow (displacement field). //! frame0 - frame 0 (32-bit floating point images, single channel) diff --git a/modules/gpu/perf/perf_video.cpp b/modules/gpu/perf/perf_video.cpp index edbe2aacd6..5afe0d15f6 100644 --- a/modules/gpu/perf/perf_video.cpp +++ b/modules/gpu/perf/perf_video.cpp @@ -512,6 +512,55 @@ PERF_TEST_P(ImagePair, Video_OpticalFlowBM, } } +PERF_TEST_P(ImagePair, Video_FastOpticalFlowBM, + Values(make_pair("gpu/opticalflow/frame0.png", "gpu/opticalflow/frame1.png"))) +{ + declare.time(400); + + cv::Mat frame0 = readImage(GetParam().first, cv::IMREAD_GRAYSCALE); + ASSERT_FALSE(frame0.empty()); + + cv::Mat frame1 = readImage(GetParam().second, cv::IMREAD_GRAYSCALE); + ASSERT_FALSE(frame1.empty()); + + cv::Size block_size(16, 16); + cv::Size shift_size(1, 1); + cv::Size max_range(16, 16); + + if (PERF_RUN_GPU()) + { + cv::gpu::GpuMat d_frame0(frame0); + cv::gpu::GpuMat d_frame1(frame1); + cv::gpu::GpuMat d_velx, d_vely; + + cv::gpu::FastOpticalFlowBM fastBM; + + fastBM(d_frame0, d_frame1, d_velx, d_vely, max_range.width, block_size.width); + + TEST_CYCLE() + { + fastBM(d_frame0, d_frame1, d_velx, d_vely, max_range.width, block_size.width); + } + + GPU_SANITY_CHECK(d_velx); + GPU_SANITY_CHECK(d_vely); + } + else + { + cv::Mat velx, vely; + + calcOpticalFlowBM(frame0, frame1, block_size, shift_size, max_range, false, velx, vely); + + TEST_CYCLE() + { + calcOpticalFlowBM(frame0, frame1, block_size, shift_size, max_range, false, velx, vely); + } + + CPU_SANITY_CHECK(velx); + CPU_SANITY_CHECK(vely); + } +} + ////////////////////////////////////////////////////// // FGDStatModel diff --git a/modules/gpu/src/cuda/optflowbm.cu b/modules/gpu/src/cuda/optflowbm.cu index 7e4acd9003..f9090abdc0 100644 --- a/modules/gpu/src/cuda/optflowbm.cu +++ b/modules/gpu/src/cuda/optflowbm.cu @@ -44,6 +44,8 @@ #include "opencv2/gpu/device/common.hpp" #include "opencv2/gpu/device/limits.hpp" +#include "opencv2/gpu/device/functional.hpp" +#include "opencv2/gpu/device/reduce.hpp" using namespace cv::gpu; using namespace cv::gpu::device; @@ -164,4 +166,249 @@ namespace optflowbm } } +///////////////////////////////////////////////////////// +// Fast approximate version + +namespace optflowbm_fast +{ + enum + { + CTA_SIZE = 128, + + TILE_COLS = 128, + TILE_ROWS = 32, + + STRIDE = CTA_SIZE + }; + + template __device__ __forceinline__ int calcDist(T a, T b) + { + return ::abs(a - b); + } + + template struct FastOptFlowBM + { + + int search_radius; + int block_radius; + + int search_window; + int block_window; + + PtrStepSz I0; + PtrStep I1; + + mutable PtrStepi buffer; + + FastOptFlowBM(int search_window_, int block_window_, + PtrStepSz I0_, PtrStepSz I1_, + PtrStepi buffer_) : + search_radius(search_window_ / 2), block_radius(block_window_ / 2), + search_window(search_window_), block_window(block_window_), + I0(I0_), I1(I1_), + buffer(buffer_) + { + } + + __device__ void initSums_BruteForce(int i, int j, int* dist_sums, PtrStepi& col_sums, PtrStepi& up_col_sums) const + { + for (int index = threadIdx.x; index < search_window * search_window; index += STRIDE) + { + dist_sums[index] = 0; + + for (int tx = 0; tx < block_window; ++tx) + col_sums(tx, index) = 0; + + int y = index / search_window; + int x = index - y * search_window; + + int ay = i; + int ax = j; + + int by = i + y - search_radius; + int bx = j + x - search_radius; + + for (int tx = -block_radius; tx <= block_radius; ++tx) + { + int col_sum = 0; + for (int ty = -block_radius; ty <= block_radius; ++ty) + { + int dist = calcDist(I0(ay + ty, ax + tx), I1(by + ty, bx + tx)); + + dist_sums[index] += dist; + col_sum += dist; + } + + col_sums(tx + block_radius, index) = col_sum; + } + + up_col_sums(j, index) = col_sums(block_window - 1, index); + } + } + + __device__ void shiftRight_FirstRow(int i, int j, int first, int* dist_sums, PtrStepi& col_sums, PtrStepi& up_col_sums) const + { + for (int index = threadIdx.x; index < search_window * search_window; index += STRIDE) + { + int y = index / search_window; + int x = index - y * search_window; + + int ay = i; + int ax = j + block_radius; + + int by = i + y - search_radius; + int bx = j + x - search_radius + block_radius; + + int col_sum = 0; + + for (int ty = -block_radius; ty <= block_radius; ++ty) + col_sum += calcDist(I0(ay + ty, ax), I1(by + ty, bx)); + + dist_sums[index] += col_sum - col_sums(first, index); + + col_sums(first, index) = col_sum; + up_col_sums(j, index) = col_sum; + } + } + + __device__ void shiftRight_UpSums(int i, int j, int first, int* dist_sums, PtrStepi& col_sums, PtrStepi& up_col_sums) const + { + int ay = i; + int ax = j + block_radius; + + T a_up = I0(ay - block_radius - 1, ax); + T a_down = I0(ay + block_radius, ax); + + for(int index = threadIdx.x; index < search_window * search_window; index += STRIDE) + { + int y = index / search_window; + int x = index - y * search_window; + + int by = i + y - search_radius; + int bx = j + x - search_radius + block_radius; + + T b_up = I1(by - block_radius - 1, bx); + T b_down = I1(by + block_radius, bx); + + int col_sum = up_col_sums(j, index) + calcDist(a_down, b_down) - calcDist(a_up, b_up); + + dist_sums[index] += col_sum - col_sums(first, index); + col_sums(first, index) = col_sum; + up_col_sums(j, index) = col_sum; + } + } + + __device__ void convolve_window(int i, int j, const int* dist_sums, float& velx, float& vely) const + { + int bestDist = numeric_limits::max(); + int bestInd = -1; + + for (int index = threadIdx.x; index < search_window * search_window; index += STRIDE) + { + int curDist = dist_sums[index]; + if (curDist < bestDist) + { + bestDist = curDist; + bestInd = index; + } + } + + __shared__ int cta_dist_buffer[CTA_SIZE]; + __shared__ int cta_ind_buffer[CTA_SIZE]; + + reduceKeyVal(cta_dist_buffer, bestDist, cta_ind_buffer, bestInd, threadIdx.x, less()); + + if (threadIdx.x == 0) + { + int y = bestInd / search_window; + int x = bestInd - y * search_window; + + velx = x - search_radius; + vely = y - search_radius; + } + } + + __device__ void operator()(PtrStepf velx, PtrStepf vely) const + { + int tbx = blockIdx.x * TILE_COLS; + int tby = blockIdx.y * TILE_ROWS; + + int tex = ::min(tbx + TILE_COLS, I0.cols); + int tey = ::min(tby + TILE_ROWS, I0.rows); + + PtrStepi col_sums; + col_sums.data = buffer.ptr(I0.cols + blockIdx.x * block_window) + blockIdx.y * search_window * search_window; + col_sums.step = buffer.step; + + PtrStepi up_col_sums; + up_col_sums.data = buffer.data + blockIdx.y * search_window * search_window; + up_col_sums.step = buffer.step; + + extern __shared__ int dist_sums[]; //search_window * search_window + + int first = 0; + + for (int i = tby; i < tey; ++i) + { + for (int j = tbx; j < tex; ++j) + { + __syncthreads(); + + if (j == tbx) + { + initSums_BruteForce(i, j, dist_sums, col_sums, up_col_sums); + first = 0; + } + else + { + if (i == tby) + shiftRight_FirstRow(i, j, first, dist_sums, col_sums, up_col_sums); + else + shiftRight_UpSums(i, j, first, dist_sums, col_sums, up_col_sums); + + first = (first + 1) % block_window; + } + + __syncthreads(); + + convolve_window(i, j, dist_sums, velx(i, j), vely(i, j)); + } + } + } + + }; + + template __global__ void optflowbm_fast_kernel(const FastOptFlowBM fbm, PtrStepf velx, PtrStepf vely) + { + fbm(velx, vely); + } + + void get_buffer_size(int src_cols, int src_rows, int search_window, int block_window, int& buffer_cols, int& buffer_rows) + { + dim3 grid(divUp(src_cols, TILE_COLS), divUp(src_rows, TILE_ROWS)); + + buffer_cols = search_window * search_window * grid.y; + buffer_rows = src_cols + block_window * grid.x; + } + + template + void calc(PtrStepSzb I0, PtrStepSzb I1, PtrStepSzf velx, PtrStepSzf vely, PtrStepi buffer, int search_window, int block_window, cudaStream_t stream) + { + FastOptFlowBM fbm(search_window, block_window, I0, I1, buffer); + + dim3 block(CTA_SIZE, 1); + dim3 grid(divUp(I0.cols, TILE_COLS), divUp(I0.rows, TILE_ROWS)); + + size_t smem = search_window * search_window * sizeof(int); + + optflowbm_fast_kernel<<>>(fbm, velx, vely); + cudaSafeCall ( cudaGetLastError () ); + + if (stream == 0) + cudaSafeCall( cudaDeviceSynchronize() ); + } + + template void calc(PtrStepSzb I0, PtrStepSzb I1, PtrStepSzf velx, PtrStepSzf vely, PtrStepi buffer, int search_window, int block_window, cudaStream_t stream); +} + #endif // !defined CUDA_DISABLER diff --git a/modules/gpu/src/optflowbm.cpp b/modules/gpu/src/optflowbm.cpp index ec2f0f9f92..a4321c89cc 100644 --- a/modules/gpu/src/optflowbm.cpp +++ b/modules/gpu/src/optflowbm.cpp @@ -50,6 +50,8 @@ using namespace cv::gpu; void cv::gpu::calcOpticalFlowBM(const GpuMat&, const GpuMat&, Size, Size, Size, bool, GpuMat&, GpuMat&, GpuMat&, Stream&) { throw_nogpu(); } +void cv::gpu::FastOpticalFlowBM::operator ()(const GpuMat&, const GpuMat&, GpuMat&, GpuMat&, int, int, Stream&) { throw_nogpu(); } + #else // HAVE_CUDA namespace optflowbm @@ -202,4 +204,40 @@ void cv::gpu::calcOpticalFlowBM(const GpuMat& prev, const GpuMat& curr, Size blo maxX, maxY, acceptLevel, escapeLevel, buf.ptr(), ssCount, stream); } +namespace optflowbm_fast +{ + void get_buffer_size(int src_cols, int src_rows, int search_window, int block_window, int& buffer_cols, int& buffer_rows); + + template + void calc(PtrStepSzb I0, PtrStepSzb I1, PtrStepSzf velx, PtrStepSzf vely, PtrStepi buffer, int search_window, int block_window, cudaStream_t stream); +} + +void cv::gpu::FastOpticalFlowBM::operator ()(const GpuMat& I0, const GpuMat& I1, GpuMat& flowx, GpuMat& flowy, int search_window, int block_window, Stream& stream) +{ + CV_Assert( I0.type() == CV_8UC1 ); + CV_Assert( I1.size() == I0.size() && I1.type() == I0.type() ); + + int border_size = search_window / 2 + block_window / 2; + Size esize = I0.size() + Size(border_size, border_size) * 2; + + ensureSizeIsEnough(esize, I0.type(), extended_I0); + ensureSizeIsEnough(esize, I0.type(), extended_I1); + + copyMakeBorder(I0, extended_I0, border_size, border_size, border_size, border_size, cv::BORDER_DEFAULT, Scalar(), stream); + copyMakeBorder(I1, extended_I1, border_size, border_size, border_size, border_size, cv::BORDER_DEFAULT, Scalar(), stream); + + GpuMat I0_hdr = extended_I0(Rect(Point2i(border_size, border_size), I0.size())); + GpuMat I1_hdr = extended_I1(Rect(Point2i(border_size, border_size), I0.size())); + + int bcols, brows; + optflowbm_fast::get_buffer_size(I0.cols, I0.rows, search_window, block_window, bcols, brows); + + ensureSizeIsEnough(brows, bcols, CV_32SC1, buffer); + + flowx.create(I0.size(), CV_32FC1); + flowy.create(I0.size(), CV_32FC1); + + optflowbm_fast::calc(I0_hdr, I1_hdr, flowx, flowy, buffer, search_window, block_window, StreamAccessor::getStream(stream)); +} + #endif // HAVE_CUDA diff --git a/modules/gpu/test/test_video.cpp b/modules/gpu/test/test_video.cpp index a4b98bb3d2..97e792c7fe 100644 --- a/modules/gpu/test/test_video.cpp +++ b/modules/gpu/test/test_video.cpp @@ -513,6 +513,121 @@ TEST_P(OpticalFlowBM, Accuracy) INSTANTIATE_TEST_CASE_P(GPU_Video, OpticalFlowBM, ALL_DEVICES); +////////////////////////////////////////////////////// +// FastOpticalFlowBM + +static void FastOpticalFlowBM_gold(const cv::Mat_& I0, const cv::Mat_& I1, cv::Mat_& velx, cv::Mat_& vely, int search_window, int block_window) +{ + velx.create(I0.size()); + vely.create(I0.size()); + + int search_radius = search_window / 2; + int block_radius = block_window / 2; + + for (int y = 0; y < I0.rows; ++y) + { + for (int x = 0; x < I0.cols; ++x) + { + int bestDist = std::numeric_limits::max(); + int bestDx = 0; + int bestDy = 0; + + for (int dy = -search_radius; dy <= search_radius; ++dy) + { + for (int dx = -search_radius; dx <= search_radius; ++dx) + { + int dist = 0; + + for (int by = -block_radius; by <= block_radius; ++by) + { + for (int bx = -block_radius; bx <= block_radius; ++bx) + { + int I0_val = I0(cv::borderInterpolate(y + by, I0.rows, cv::BORDER_DEFAULT), cv::borderInterpolate(x + bx, I0.cols, cv::BORDER_DEFAULT)); + int I1_val = I1(cv::borderInterpolate(y + dy + by, I0.rows, cv::BORDER_DEFAULT), cv::borderInterpolate(x + dx + bx, I0.cols, cv::BORDER_DEFAULT)); + + dist += std::abs(I0_val - I1_val); + } + } + + if (dist < bestDist) + { + bestDist = dist; + bestDx = dx; + bestDy = dy; + } + } + } + + velx(y, x) = bestDx; + vely(y, x) = bestDy; + } + } +} + +static double calc_rmse(const cv::Mat_& flow1, const cv::Mat_& flow2) +{ + double sum = 0.0; + + for (int y = 0; y < flow1.rows; ++y) + { + for (int x = 0; x < flow1.cols; ++x) + { + double diff = flow1(y, x) - flow2(y, x); + sum += diff * diff; + } + } + + return std::sqrt(sum / flow1.size().area()); +} + +struct FastOpticalFlowBM : testing::TestWithParam +{ +}; + +TEST_P(FastOpticalFlowBM, Accuracy) +{ + const double MAX_RMSE = 0.6; + + int search_window = 15; + int block_window = 5; + + cv::gpu::DeviceInfo devInfo = GetParam(); + cv::gpu::setDevice(devInfo.deviceID()); + + cv::Mat frame0 = readImage("opticalflow/rubberwhale1.png", cv::IMREAD_GRAYSCALE); + ASSERT_FALSE(frame0.empty()); + + cv::Mat frame1 = readImage("opticalflow/rubberwhale2.png", cv::IMREAD_GRAYSCALE); + ASSERT_FALSE(frame1.empty()); + + cv::Size smallSize(320, 240); + cv::Mat frame0_small; + cv::Mat frame1_small; + + cv::resize(frame0, frame0_small, smallSize); + cv::resize(frame1, frame1_small, smallSize); + + cv::gpu::GpuMat d_flowx; + cv::gpu::GpuMat d_flowy; + cv::gpu::FastOpticalFlowBM fastBM; + + fastBM(loadMat(frame0_small), loadMat(frame1_small), d_flowx, d_flowy, search_window, block_window); + + cv::Mat_ flowx; + cv::Mat_ flowy; + FastOpticalFlowBM_gold(frame0_small, frame1_small, flowx, flowy, search_window, block_window); + + double err; + + err = calc_rmse(flowx, cv::Mat(d_flowx)); + EXPECT_LE(err, MAX_RMSE); + + err = calc_rmse(flowy, cv::Mat(d_flowy)); + EXPECT_LE(err, MAX_RMSE); +} + +INSTANTIATE_TEST_CASE_P(GPU_Video, FastOpticalFlowBM, ALL_DEVICES); + ////////////////////////////////////////////////////// // FGDStatModel diff --git a/samples/gpu/tvl1_optical_flow.cpp b/samples/gpu/optical_flow.cpp similarity index 61% rename from samples/gpu/tvl1_optical_flow.cpp rename to samples/gpu/optical_flow.cpp index c13afc1c79..8afff89ead 100644 --- a/samples/gpu/tvl1_optical_flow.cpp +++ b/samples/gpu/optical_flow.cpp @@ -121,6 +121,17 @@ static void drawOpticalFlow(const Mat_& flowx, const Mat_& flowy, } } +static void showFlow(const char* name, const GpuMat& d_flowx, const GpuMat& d_flowy) +{ + Mat flowx(d_flowx); + Mat flowy(d_flowy); + + Mat out; + drawOpticalFlow(flowx, flowy, out, 10); + + imshow(name, out); +} + int main(int argc, const char* argv[]) { if (argc < 3) @@ -152,20 +163,90 @@ int main(int argc, const char* argv[]) GpuMat d_frame0(frame0); GpuMat d_frame1(frame1); - GpuMat d_flowx, d_flowy; + GpuMat d_flowx(frame0.size(), CV_32FC1); + GpuMat d_flowy(frame0.size(), CV_32FC1); + + BroxOpticalFlow brox(0.197, 50.0, 0.8, 10, 77, 10); + PyrLKOpticalFlow lk; lk.winSize = Size(7, 7); + FarnebackOpticalFlow farn; OpticalFlowDual_TVL1_GPU tvl1; + FastOpticalFlowBM fastBM; - const double start = getTickCount(); - tvl1(d_frame0, d_frame1, d_flowx, d_flowy); - const double timeSec = (getTickCount() - start) / getTickFrequency(); - cout << "Time : " << timeSec << " sec" << endl; + { + GpuMat d_frame0f; + GpuMat d_frame1f; - Mat flowx(d_flowx); - Mat flowy(d_flowy); - Mat out; - drawOpticalFlow(flowx, flowy, out); + d_frame0.convertTo(d_frame0f, CV_32F, 1.0 / 255.0); + d_frame1.convertTo(d_frame1f, CV_32F, 1.0 / 255.0); + + const double start = getTickCount(); + + brox(d_frame0f, d_frame1f, d_flowx, d_flowy); + + const double timeSec = (getTickCount() - start) / getTickFrequency(); + cout << "Brox : " << timeSec << " sec" << endl; + + showFlow("Brox", d_flowx, d_flowy); + } + + { + const double start = getTickCount(); + + lk.dense(d_frame0, d_frame1, d_flowx, d_flowy); + + const double timeSec = (getTickCount() - start) / getTickFrequency(); + cout << "LK : " << timeSec << " sec" << endl; + + showFlow("LK", d_flowx, d_flowy); + } + + { + const double start = getTickCount(); + + farn(d_frame0, d_frame1, d_flowx, d_flowy); + + const double timeSec = (getTickCount() - start) / getTickFrequency(); + cout << "Farn : " << timeSec << " sec" << endl; + + showFlow("Farn", d_flowx, d_flowy); + } + + { + const double start = getTickCount(); + + tvl1(d_frame0, d_frame1, d_flowx, d_flowy); + + const double timeSec = (getTickCount() - start) / getTickFrequency(); + cout << "TVL1 : " << timeSec << " sec" << endl; + + showFlow("TVL1", d_flowx, d_flowy); + } + + { + const double start = getTickCount(); + + GpuMat buf; + calcOpticalFlowBM(d_frame0, d_frame1, Size(7, 7), Size(1, 1), Size(21, 21), false, d_flowx, d_flowy, buf); + + const double timeSec = (getTickCount() - start) / getTickFrequency(); + cout << "BM : " << timeSec << " sec" << endl; + + showFlow("BM", d_flowx, d_flowy); + } + + { + const double start = getTickCount(); + + fastBM(d_frame0, d_frame1, d_flowx, d_flowy); + + const double timeSec = (getTickCount() - start) / getTickFrequency(); + cout << "Fast BM : " << timeSec << " sec" << endl; + + showFlow("Fast BM", d_flowx, d_flowy); + } - imshow("Flow", out); + imshow("Frame 0", frame0); + imshow("Frame 1", frame1); waitKey(); return 0; -- GitLab