提交 1c957bea 编写于 作者: A Alexander Alekhin

Merge pull request #2807 from vchiluka5:NVIDIA_OPTICAL_FLOW_SDK_2.0_INTEGRATION

......@@ -8,21 +8,21 @@ ocv_warnings_disable(CMAKE_CXX_FLAGS /wd4127 /wd4324 /wd4512 -Wundef -Wmissing-d
ocv_define_module(cudaoptflow opencv_video opencv_optflow opencv_cudaarithm opencv_cudawarping opencv_cudaimgproc OPTIONAL opencv_cudalegacy WRAP python)
set(NVIDIA_OPTICAL_FLOW_1_0_HEADERS_COMMIT "79c6cee80a2df9a196f20afd6b598a9810964c32")
set(NVIDIA_OPTICAL_FLOW_1_0_HEADERS_MD5 "ca5acedee6cb45d0ec610a6732de5c15")
set(NVIDIA_OPTICAL_FLOW_1_0_HEADERS_PATH "${OpenCV_BINARY_DIR}/3rdparty/NVIDIAOpticalFlowSDK_1_0_Headers")
ocv_download(FILENAME "${NVIDIA_OPTICAL_FLOW_1_0_HEADERS_COMMIT}.zip"
HASH ${NVIDIA_OPTICAL_FLOW_1_0_HEADERS_MD5}
set(NVIDIA_OPTICAL_FLOW_2_0_HEADERS_COMMIT "edb50da3cf849840d680249aa6dbef248ebce2ca")
set(NVIDIA_OPTICAL_FLOW_2_0_HEADERS_MD5 "a73cd48b18dcc0cc8933b30796074191")
set(NVIDIA_OPTICAL_FLOW_2_0_HEADERS_PATH "${OpenCV_BINARY_DIR}/3rdparty/NVIDIAOpticalFlowSDK_2_0_Headers")
ocv_download(FILENAME "${NVIDIA_OPTICAL_FLOW_2_0_HEADERS_COMMIT}.zip"
HASH ${NVIDIA_OPTICAL_FLOW_2_0_HEADERS_MD5}
URL
"https://github.com/NVIDIA/NVIDIAOpticalFlowSDK/archive/"
DESTINATION_DIR "${NVIDIA_OPTICAL_FLOW_1_0_HEADERS_PATH}"
STATUS NVIDIA_OPTICAL_FLOW_1_0_HEADERS_DOWNLOAD_SUCCESS
DESTINATION_DIR "${NVIDIA_OPTICAL_FLOW_2_0_HEADERS_PATH}"
STATUS NVIDIA_OPTICAL_FLOW_2_0_HEADERS_DOWNLOAD_SUCCESS
ID "NVIDIA_OPTICAL_FLOW"
UNPACK RELATIVE_URL)
if(NOT NVIDIA_OPTICAL_FLOW_1_0_HEADERS_DOWNLOAD_SUCCESS)
message(STATUS "Failed to download NVIDIA_Optical_Flow_1_0 Headers")
if(NOT NVIDIA_OPTICAL_FLOW_2_0_HEADERS_DOWNLOAD_SUCCESS)
message(STATUS "Failed to download NVIDIA_Optical_Flow_2_0 Headers")
else()
add_definitions(-DHAVE_NVIDIA_OPTFLOW=1)
ocv_include_directories(SYSTEM "${NVIDIA_OPTICAL_FLOW_1_0_HEADERS_PATH}/NVIDIAOpticalFlowSDK-${NVIDIA_OPTICAL_FLOW_1_0_HEADERS_COMMIT}")
ocv_include_directories(SYSTEM "${NVIDIA_OPTICAL_FLOW_2_0_HEADERS_PATH}/NVIDIAOpticalFlowSDK-${NVIDIA_OPTICAL_FLOW_2_0_HEADERS_COMMIT}")
endif()
\ No newline at end of file
......@@ -392,9 +392,9 @@ public:
/** @brief Class for computing the optical flow vectors between two images using NVIDIA Optical Flow hardware and Optical Flow SDK 1.0.
@note
- A sample application demonstrating the use of NVIDIA Optical Flow can be found at
opencv_source_code/samples/gpu/nvidia_optical_flow.cpp
opencv_contrib_source_code/modules/cudaoptflow/samples/nvidia_optical_flow.cpp
- An example application comparing accuracy and performance of NVIDIA Optical Flow with other optical flow algorithms in OpenCV can be found at
opencv_source_code/samples/gpu/optical_flow.cpp
opencv_contrib_source_code/modules/cudaoptflow/samples/optical_flow.cpp
*/
class CV_EXPORTS_W NvidiaOpticalFlow_1_0 : public NvidiaHWOpticalFlow
......@@ -417,18 +417,16 @@ public:
* using nearest neighbour upsampling method.
@param flow Buffer of type CV_16FC2 containing flow vectors generated by calc().
@param width Width of the input image in pixels for which these flow vectors were generated.
@param height Height of the input image in pixels for which these flow vectors were generated.
@param imageSize Size of the input image in pixels for which these flow vectors were generated.
@param gridSize Granularity of the optical flow vectors returned by calc() function. Can be queried using getGridSize().
@param upsampledFlow Buffer of type CV_32FC2, containing upsampled flow vectors, each flow vector for 1 pixel, in the pitch-linear layout.
*/
CV_WRAP virtual void upSampler(InputArray flow, int width, int height,
CV_WRAP virtual void upSampler(InputArray flow, cv::Size imageSize,
int gridSize, InputOutputArray upsampledFlow) = 0;
/** @brief Instantiate NVIDIA Optical Flow
@param width Width of input image in pixels.
@param height Height of input image in pixels.
@param imageSize Size of input image in pixels.
@param perfPreset Optional parameter. Refer [NV OF SDK documentation](https://developer.nvidia.com/opticalflow-sdk) for details about presets.
Defaults to NV_OF_PERF_LEVEL_SLOW.
@param enableTemporalHints Optional parameter. Flag to enable temporal hints. When set to true, the hardware uses the flow vectors
......@@ -445,10 +443,142 @@ public:
If output stream is not set, the execute function will use default stream which is NULL stream;
*/
CV_WRAP static Ptr<NvidiaOpticalFlow_1_0> create(
int width,
int height,
cv::Size imageSize,
cv::cuda::NvidiaOpticalFlow_1_0::NVIDIA_OF_PERF_LEVEL perfPreset
= cv::cuda::NvidiaOpticalFlow_1_0::NVIDIA_OF_PERF_LEVEL::NV_OF_PERF_LEVEL_SLOW,
= cv::cuda::NvidiaOpticalFlow_1_0::NV_OF_PERF_LEVEL_SLOW,
bool enableTemporalHints = false,
bool enableExternalHints = false,
bool enableCostBuffer = false,
int gpuId = 0,
Stream& inputStream = Stream::Null(),
Stream& outputStream = Stream::Null());
};
/** @brief Class for computing the optical flow vectors between two images using NVIDIA Optical Flow hardware and Optical Flow SDK 2.0.
@note
- A sample application demonstrating the use of NVIDIA Optical Flow can be found at
opencv_contrib_source_code/modules/cudaoptflow/samples/nvidia_optical_flow.cpp
- An example application comparing accuracy and performance of NVIDIA Optical Flow with other optical flow algorithms in OpenCV can be found at
opencv_contrib_source_code/modules/cudaoptflow/samples/optical_flow.cpp
*/
class CV_EXPORTS_W NvidiaOpticalFlow_2_0 : public NvidiaHWOpticalFlow
{
public:
/**
* Supported optical flow performance levels.
*/
enum NVIDIA_OF_PERF_LEVEL
{
NV_OF_PERF_LEVEL_UNDEFINED,
NV_OF_PERF_LEVEL_SLOW = 5, /**< Slow perf level results in lowest performance and best quality */
NV_OF_PERF_LEVEL_MEDIUM = 10, /**< Medium perf level results in low performance and medium quality */
NV_OF_PERF_LEVEL_FAST = 20, /**< Fast perf level results in high performance and low quality */
NV_OF_PERF_LEVEL_MAX
};
/**
* Supported grid size for output buffer.
*/
enum NVIDIA_OF_OUTPUT_VECTOR_GRID_SIZE
{
NV_OF_OUTPUT_VECTOR_GRID_SIZE_UNDEFINED,
NV_OF_OUTPUT_VECTOR_GRID_SIZE_1 = 1, /**< Output buffer grid size is 1x1 */
NV_OF_OUTPUT_VECTOR_GRID_SIZE_2 = 2, /**< Output buffer grid size is 2x2 */
NV_OF_OUTPUT_VECTOR_GRID_SIZE_4 = 4, /**< Output buffer grid size is 4x4 */
NV_OF_OUTPUT_VECTOR_GRID_SIZE_MAX
};
/**
* Supported grid size for hint buffer.
*/
enum NVIDIA_OF_HINT_VECTOR_GRID_SIZE
{
NV_OF_HINT_VECTOR_GRID_SIZE_UNDEFINED,
NV_OF_HINT_VECTOR_GRID_SIZE_1 = 1, /**< Hint buffer grid size is 1x1.*/
NV_OF_HINT_VECTOR_GRID_SIZE_2 = 2, /**< Hint buffer grid size is 2x2.*/
NV_OF_HINT_VECTOR_GRID_SIZE_4 = 4, /**< Hint buffer grid size is 4x4.*/
NV_OF_HINT_VECTOR_GRID_SIZE_8 = 8, /**< Hint buffer grid size is 8x8.*/
NV_OF_HINT_VECTOR_GRID_SIZE_MAX
};
/** @brief convertToFloat() helper function converts the hardware-generated flow vectors to floating point representation (1 flow vector for gridSize).
* gridSize can be queried via function getGridSize().
@param flow Buffer of type CV_16FC2 containing flow vectors generated by calc().
@param floatFlow Buffer of type CV_32FC2, containing flow vectors in floating point representation, each flow vector for 1 pixel per gridSize, in the pitch-linear layout.
*/
CV_WRAP virtual void convertToFloat(InputArray flow, InputOutputArray floatFlow) = 0;
/** @brief Instantiate NVIDIA Optical Flow
@param imageSize Size of input image in pixels.
@param perfPreset Optional parameter. Refer [NV OF SDK documentation](https://developer.nvidia.com/opticalflow-sdk) for details about presets.
Defaults to NV_OF_PERF_LEVEL_SLOW.
@param outputGridSize Optional parameter. Refer [NV OF SDK documentation](https://developer.nvidia.com/opticalflow-sdk) for details about output grid sizes.
Defaults to NV_OF_OUTPUT_VECTOR_GRID_SIZE_1.
@param hintGridSize Optional parameter. Refer [NV OF SDK documentation](https://developer.nvidia.com/opticalflow-sdk) for details about hint grid sizes.
Defaults to NV_OF_HINT_VECTOR_GRID_SIZE_1.
@param enableTemporalHints Optional parameter. Flag to enable temporal hints. When set to true, the hardware uses the flow vectors
generated in previous call to calc() as internal hints for the current call to calc().
Useful when computing flow vectors between successive video frames. Defaults to false.
@param enableExternalHints Optional Parameter. Flag to enable passing external hints buffer to calc(). Defaults to false.
@param enableCostBuffer Optional Parameter. Flag to enable cost buffer output from calc(). Defaults to false.
@param gpuId Optional parameter to select the GPU ID on which the optical flow should be computed. Useful in multi-GPU systems. Defaults to 0.
@param inputStream Optical flow algorithm may optionally involve cuda preprocessing on the input buffers.
The input cuda stream can be used to pipeline and synchronize the cuda preprocessing tasks with OF HW engine.
If input stream is not set, the execute function will use default stream which is NULL stream;
@param outputStream Optical flow algorithm may optionally involve cuda post processing on the output flow vectors.
The output cuda stream can be used to pipeline and synchronize the cuda post processing tasks with OF HW engine.
If output stream is not set, the execute function will use default stream which is NULL stream;
*/
CV_WRAP static Ptr<NvidiaOpticalFlow_2_0> create(
cv::Size imageSize,
cv::cuda::NvidiaOpticalFlow_2_0::NVIDIA_OF_PERF_LEVEL perfPreset
= cv::cuda::NvidiaOpticalFlow_2_0::NV_OF_PERF_LEVEL_SLOW,
cv::cuda::NvidiaOpticalFlow_2_0::NVIDIA_OF_OUTPUT_VECTOR_GRID_SIZE outputGridSize
= cv::cuda::NvidiaOpticalFlow_2_0::NV_OF_OUTPUT_VECTOR_GRID_SIZE_1,
cv::cuda::NvidiaOpticalFlow_2_0::NVIDIA_OF_HINT_VECTOR_GRID_SIZE hintGridSize
= cv::cuda::NvidiaOpticalFlow_2_0::NV_OF_HINT_VECTOR_GRID_SIZE_1,
bool enableTemporalHints = false,
bool enableExternalHints = false,
bool enableCostBuffer = false,
int gpuId = 0,
Stream& inputStream = Stream::Null(),
Stream& outputStream = Stream::Null());
/** @brief Instantiate NVIDIA Optical Flow with ROI Feature
@param imageSize Size of input image in pixels.
@param roiData Pointer to ROI data.
@param perfPreset Optional parameter. Refer [NV OF SDK documentation](https://developer.nvidia.com/opticalflow-sdk) for details about presets.
Defaults to NV_OF_PERF_LEVEL_SLOW.
@param outputGridSize Optional parameter. Refer [NV OF SDK documentation](https://developer.nvidia.com/opticalflow-sdk) for details about output grid sizes.
Defaults to NV_OF_OUTPUT_VECTOR_GRID_SIZE_1.
@param hintGridSize Optional parameter. Refer [NV OF SDK documentation](https://developer.nvidia.com/opticalflow-sdk) for details about hint grid sizes.
Defaults to NV_OF_HINT_VECTOR_GRID_SIZE_1.
@param enableTemporalHints Optional parameter. Flag to enable temporal hints. When set to true, the hardware uses the flow vectors
generated in previous call to calc() as internal hints for the current call to calc().
Useful when computing flow vectors between successive video frames. Defaults to false.
@param enableExternalHints Optional Parameter. Flag to enable passing external hints buffer to calc(). Defaults to false.
@param enableCostBuffer Optional Parameter. Flag to enable cost buffer output from calc(). Defaults to false.
@param gpuId Optional parameter to select the GPU ID on which the optical flow should be computed. Useful in multi-GPU systems. Defaults to 0.
@param inputStream Optical flow algorithm may optionally involve cuda preprocessing on the input buffers.
The input cuda stream can be used to pipeline and synchronize the cuda preprocessing tasks with OF HW engine.
If input stream is not set, the execute function will use default stream which is NULL stream;
@param outputStream Optical flow algorithm may optionally involve cuda post processing on the output flow vectors.
The output cuda stream can be used to pipeline and synchronize the cuda post processing tasks with OF HW engine.
If output stream is not set, the execute function will use default stream which is NULL stream;
*/
CV_WRAP static Ptr<NvidiaOpticalFlow_2_0> create(
cv::Size imageSize,
std::vector<Rect> roiData,
cv::cuda::NvidiaOpticalFlow_2_0::NVIDIA_OF_PERF_LEVEL perfPreset
= cv::cuda::NvidiaOpticalFlow_2_0::NV_OF_PERF_LEVEL_SLOW,
cv::cuda::NvidiaOpticalFlow_2_0::NVIDIA_OF_OUTPUT_VECTOR_GRID_SIZE outputGridSize
= cv::cuda::NvidiaOpticalFlow_2_0::NV_OF_OUTPUT_VECTOR_GRID_SIZE_1,
cv::cuda::NvidiaOpticalFlow_2_0::NVIDIA_OF_HINT_VECTOR_GRID_SIZE hintGridSize
= cv::cuda::NvidiaOpticalFlow_2_0::NV_OF_HINT_VECTOR_GRID_SIZE_1,
bool enableTemporalHints = false,
bool enableExternalHints = false,
bool enableCostBuffer = false,
......
import os
import cv2 as cv
import numpy as np
from tests_common import NewOpenCVTests, unittest
class nvidiaopticalflow_test(NewOpenCVTests):
def setUp(self):
super(nvidiaopticalflow_test, self).setUp()
if not cv.cuda.getCudaEnabledDeviceCount():
self.skipTest("No CUDA-capable device is detected")
@unittest.skipIf('OPENCV_TEST_DATA_PATH' not in os.environ,
"OPENCV_TEST_DATA_PATH is not defined")
def test_calc(self):
frame1 = os.environ['OPENCV_TEST_DATA_PATH'] + '/gpu/opticalflow/frame0.png'
frame2 = os.environ['OPENCV_TEST_DATA_PATH'] + '/gpu/opticalflow/frame1.png'
npMat1 = cv.cvtColor(cv.imread(frame1),cv.COLOR_BGR2GRAY)
npMat2 = cv.cvtColor(cv.imread(frame2),cv.COLOR_BGR2GRAY)
cuMat1 = cv.cuda_GpuMat(npMat1)
cuMat2 = cv.cuda_GpuMat(npMat2)
try:
nvof = cv.cuda_NvidiaOpticalFlow_1_0.create(cuMat1.shape[1], cuMat1.shape[0], 5, False, False, False, 0)
flow = nvof.calc(cuMat1, cuMat2, None)
self.assertTrue(flow.shape[1] > 0 and flow.shape[0] > 0)
flowUpSampled = nvof.upSampler(flow[0], cuMat1.shape[1], cuMat1.shape[0], nvof.getGridSize(), None)
nvof.collectGarbage()
except cv.error as e:
if e.code == cv.Error.StsBadFunc or e.code == cv.Error.StsBadArg or e.code == cv.Error.StsNullPtr:
self.skipTest("Algorithm is not supported in the current environment")
self.assertTrue(flowUpSampled.shape[1] > 0 and flowUpSampled.shape[0] > 0)
if __name__ == '__main__':
NewOpenCVTests.bootstrap()
\ No newline at end of file
......@@ -339,13 +339,8 @@ PERF_TEST_P(ImagePair, NvidiaOpticalFlow_1_0,
const cv::Mat frame1 = readImage(GetParam().second, cv::IMREAD_GRAYSCALE);
ASSERT_FALSE(frame1.empty());
const int width = frame0.size().width;
const int height = frame0.size().height;
const bool enableTemporalHints = false;
const bool enableExternalHints = false;
const bool enableCostBuffer = false;
const int gpuid = 0;
Stream inputStream;
Stream outputStream;
if (PERF_RUN_CUDA())
{
......@@ -355,9 +350,9 @@ PERF_TEST_P(ImagePair, NvidiaOpticalFlow_1_0,
cv::Ptr<cv::cuda::NvidiaOpticalFlow_1_0> d_nvof;
try
{
d_nvof = cv::cuda::NvidiaOpticalFlow_1_0::create(width, height,
d_nvof = cv::cuda::NvidiaOpticalFlow_1_0::create(frame0.size(),
cv::cuda::NvidiaOpticalFlow_1_0::NVIDIA_OF_PERF_LEVEL::NV_OF_PERF_LEVEL_FAST,
enableTemporalHints, enableExternalHints, enableCostBuffer, gpuid);
false, false, false, 0, inputStream, outputStream);
}
catch (const cv::Exception& e)
{
......@@ -376,6 +371,63 @@ PERF_TEST_P(ImagePair, NvidiaOpticalFlow_1_0,
CUDA_SANITY_CHECK(u, 1e-10);
CUDA_SANITY_CHECK(v, 1e-10);
d_nvof->collectGarbage();
}
}
//////////////////////////////////////////////////////
// NvidiaOpticalFlow_2_0
PERF_TEST_P(ImagePair, NvidiaOpticalFlow_2_0,
Values<pair_string>(make_pair("gpu/opticalflow/frame0.png", "gpu/opticalflow/frame1.png")))
{
declare.time(10);
const cv::Mat frame0 = readImage(GetParam().first, cv::IMREAD_GRAYSCALE);
ASSERT_FALSE(frame0.empty());
const cv::Mat frame1 = readImage(GetParam().second, cv::IMREAD_GRAYSCALE);
ASSERT_FALSE(frame1.empty());
const cv::cuda::NvidiaOpticalFlow_2_0::NVIDIA_OF_OUTPUT_VECTOR_GRID_SIZE outGridSize
= cv::cuda::NvidiaOpticalFlow_2_0::NVIDIA_OF_OUTPUT_VECTOR_GRID_SIZE::NV_OF_OUTPUT_VECTOR_GRID_SIZE_1;
const cv::cuda::NvidiaOpticalFlow_2_0::NVIDIA_OF_HINT_VECTOR_GRID_SIZE hintGridSize
= cv::cuda::NvidiaOpticalFlow_2_0::NVIDIA_OF_HINT_VECTOR_GRID_SIZE::NV_OF_HINT_VECTOR_GRID_SIZE_1;
Stream inputStream;
Stream outputStream;
if (PERF_RUN_CUDA())
{
const cv::cuda::GpuMat d_frame0(frame0);
const cv::cuda::GpuMat d_frame1(frame1);
cv::cuda::GpuMat d_flow;
cv::Ptr<cv::cuda::NvidiaOpticalFlow_2_0> d_nvof;
try
{
d_nvof = cv::cuda::NvidiaOpticalFlow_2_0::create(frame0.size(),
cv::cuda::NvidiaOpticalFlow_2_0::NVIDIA_OF_PERF_LEVEL::NV_OF_PERF_LEVEL_FAST, outGridSize, hintGridSize,
false, false, false, 0, inputStream, outputStream);
}
catch (const cv::Exception& e)
{
if (e.code == Error::StsBadFunc || e.code == Error::StsBadArg || e.code == Error::StsNullPtr)
throw SkipTestException("Current configuration is not supported");
throw;
}
TEST_CYCLE() d_nvof->calc(d_frame0, d_frame1, d_flow);
cv::cuda::GpuMat flow[2];
cv::cuda::split(d_flow, flow);
cv::cuda::GpuMat u = flow[0];
cv::cuda::GpuMat v = flow[1];
CUDA_SANITY_CHECK(u, 1e-10);
CUDA_SANITY_CHECK(v, 1e-10);
d_nvof->collectGarbage();
}
}
......
......@@ -2,6 +2,7 @@
#include <iostream>
#include <fstream>
#include <iomanip>
#include <iterator>
#include "opencv2/core.hpp"
#include "opencv2/core/utility.hpp"
......@@ -11,7 +12,6 @@
#include "opencv2/cudaarithm.hpp"
#include "opencv2/video/tracking.hpp"
using namespace std;
using namespace cv;
using namespace cv::cuda;
......@@ -131,12 +131,88 @@ static void drawOpticalFlow(const Mat_<float>& flowx, const Mat_<float>& flowy
}
}
/*
ROI config file format.
numrois 3
roi0 640 96 1152 192
roi1 640 64 896 864
roi2 640 960 256 32
*/
bool parseROI(std::string ROIFileName, std::vector<Rect>& roiData)
{
std::string str;
uint32_t nRois = 0;
std::ifstream hRoiFile;
hRoiFile.open(ROIFileName, std::ios::in);
if (hRoiFile.is_open())
{
while (std::getline(hRoiFile, str))
{
std::istringstream iss(str);
std::vector<std::string> tokens{ std::istream_iterator<std::string>{iss},
std::istream_iterator<std::string>{} };
if (tokens.size() == 0) continue; // if empty line, coninue
transform(tokens[0].begin(), tokens[0].end(), tokens[0].begin(), ::tolower);
if (tokens[0] == "numrois")
{
nRois = atoi(tokens[1].data());
}
else if (tokens[0].rfind("roi", 0) == 0)
{
cv::Rect roi;
roi.x = atoi(tokens[1].data());
roi.y = atoi(tokens[2].data());
roi.width = atoi(tokens[3].data());
roi.height = atoi(tokens[4].data());
roiData.push_back(roi);
}
else if (tokens[0].rfind("#", 0) == 0)
{
continue;
}
else
{
std::cout << "Unidentified keyword in roi config file " << tokens[0] << std::endl;
hRoiFile.close();
return false;
}
}
}
else
{
std::cout << "Unable to open ROI file " << std::endl;
return false;
}
if (nRois != roiData.size())
{
std::cout << "NumRois(" << nRois << ")and specified roi rects (" << roiData.size() << ")are not matching " << std::endl;
hRoiFile.close();
return false;
}
hRoiFile.close();
return true;
}
int main(int argc, char **argv)
{
std::unordered_map<std::string, NvidiaOpticalFlow_1_0::NVIDIA_OF_PERF_LEVEL> presetMap = {
{ "slow", NvidiaOpticalFlow_1_0::NVIDIA_OF_PERF_LEVEL::NV_OF_PERF_LEVEL_SLOW },
{ "medium", NvidiaOpticalFlow_1_0::NVIDIA_OF_PERF_LEVEL::NV_OF_PERF_LEVEL_MEDIUM },
{ "fast", NvidiaOpticalFlow_1_0::NVIDIA_OF_PERF_LEVEL::NV_OF_PERF_LEVEL_FAST } };
std::unordered_map<std::string, NvidiaOpticalFlow_2_0::NVIDIA_OF_PERF_LEVEL> presetMap = {
{ "slow", NvidiaOpticalFlow_2_0::NVIDIA_OF_PERF_LEVEL::NV_OF_PERF_LEVEL_SLOW },
{ "medium", NvidiaOpticalFlow_2_0::NVIDIA_OF_PERF_LEVEL::NV_OF_PERF_LEVEL_MEDIUM },
{ "fast", NvidiaOpticalFlow_2_0::NVIDIA_OF_PERF_LEVEL::NV_OF_PERF_LEVEL_FAST } };
std::unordered_map<int, NvidiaOpticalFlow_2_0::NVIDIA_OF_OUTPUT_VECTOR_GRID_SIZE> outputGridSize = {
{ 1, NvidiaOpticalFlow_2_0::NVIDIA_OF_OUTPUT_VECTOR_GRID_SIZE::NV_OF_OUTPUT_VECTOR_GRID_SIZE_1 },
{ 2, NvidiaOpticalFlow_2_0::NVIDIA_OF_OUTPUT_VECTOR_GRID_SIZE::NV_OF_OUTPUT_VECTOR_GRID_SIZE_2 },
{ 4, NvidiaOpticalFlow_2_0::NVIDIA_OF_OUTPUT_VECTOR_GRID_SIZE::NV_OF_OUTPUT_VECTOR_GRID_SIZE_4 } };
std::unordered_map<int, NvidiaOpticalFlow_2_0::NVIDIA_OF_HINT_VECTOR_GRID_SIZE> hintGridSize = {
{ 1, NvidiaOpticalFlow_2_0::NVIDIA_OF_HINT_VECTOR_GRID_SIZE::NV_OF_HINT_VECTOR_GRID_SIZE_1 },
{ 2, NvidiaOpticalFlow_2_0::NVIDIA_OF_HINT_VECTOR_GRID_SIZE::NV_OF_HINT_VECTOR_GRID_SIZE_2 },
{ 4, NvidiaOpticalFlow_2_0::NVIDIA_OF_HINT_VECTOR_GRID_SIZE::NV_OF_HINT_VECTOR_GRID_SIZE_4 },
{ 8, NvidiaOpticalFlow_2_0::NVIDIA_OF_HINT_VECTOR_GRID_SIZE::NV_OF_HINT_VECTOR_GRID_SIZE_8 } };
try
{
......@@ -145,7 +221,10 @@ int main(int argc, char **argv)
"{ r right | ../data/basketball2.png | specify right image }"
"{ g gpuid | 0 | cuda device index}"
"{ p preset | slow | perf preset for OF algo [ options : slow, medium, fast ]}"
"{ og outputGridSize | 1 | Output grid size of OF vector [ options : 1, 2, 4 ]}"
"{ hg hintGridSize | 1 | Hint grid size of OF vector [ options : 1, 2, 4, 8 ]}"
"{ o output | OpenCVNvOF.flo | output flow vector file in middlebury format}"
"{ rc roiConfigFile | | Region of Interest config file }"
"{ th enableTemporalHints | false | Enable temporal hints}"
"{ eh enableExternalHints | false | Enable external hints}"
"{ cb enableCostBuffer | false | Enable output cost buffer}"
......@@ -159,60 +238,93 @@ int main(int argc, char **argv)
return 0;
}
string pathL = cmd.get<string>("left");
string pathR = cmd.get<string>("right");
string preset = cmd.get<string>("preset");
string output = cmd.get<string>("output");
std::string pathL = cmd.get<std::string>("left");
std::string pathR = cmd.get<std::string>("right");
std::string preset = cmd.get<std::string>("preset");
std::string output = cmd.get<std::string>("output");
std::string roiConfiFile = cmd.get<std::string>("roiConfigFile");
bool enableExternalHints = cmd.get<bool>("enableExternalHints");
bool enableTemporalHints = cmd.get<bool>("enableTemporalHints");
bool enableCostBuffer = cmd.get<bool>("enableCostBuffer");
int gpuId = cmd.get<int>("gpuid");
int outputBufferGridSize = cmd.get<int>("outputGridSize");
int hintBufferGridSize = cmd.get<int>("hintGridSize");
if (pathL.empty()) cout << "Specify left image path\n";
if (pathR.empty()) cout << "Specify right image path\n";
if (preset.empty()) cout << "Specify perf preset for OpticalFlow algo\n";
if (pathL.empty()) std::cout << "Specify left image path" << std::endl;
if (pathR.empty()) std::cout << "Specify right image path" << std::endl;
if (preset.empty()) std::cout << "Specify perf preset for OpticalFlow algo" << std::endl;
if (pathL.empty() || pathR.empty()) return 0;
auto search = presetMap.find(preset);
if (search == presetMap.end())
auto p = presetMap.find(preset);
if (p == presetMap.end())
{
std::cout << "Invalid preset level : " << preset << std::endl;
return 0;
}
NvidiaOpticalFlow_1_0::NVIDIA_OF_PERF_LEVEL perfPreset = search->second;
NvidiaOpticalFlow_2_0::NVIDIA_OF_PERF_LEVEL perfPreset = p->second;
auto o = outputGridSize.find(outputBufferGridSize);
if (o == outputGridSize.end())
{
std::cout << "Invalid output grid size: " << outputBufferGridSize << std::endl;
return 0;
}
NvidiaOpticalFlow_2_0::NVIDIA_OF_OUTPUT_VECTOR_GRID_SIZE outBufGridSize = o->second;
NvidiaOpticalFlow_2_0::NVIDIA_OF_HINT_VECTOR_GRID_SIZE hintBufGridSize =
NvidiaOpticalFlow_2_0::NV_OF_HINT_VECTOR_GRID_SIZE_UNDEFINED;
if (enableExternalHints)
{
auto h = hintGridSize.find(hintBufferGridSize);
if (h == hintGridSize.end())
{
std::cout << "Invalid hint grid size: " << hintBufferGridSize << std::endl;
return 0;
}
hintBufGridSize = h->second;
}
std::vector<Rect> roiData;
if (!roiConfiFile.empty())
{
if (!parseROI(roiConfiFile, roiData))
{
std::cout << "Wrong Region of Interest config file, proceeding without ROI" << std::endl;
}
}
Mat frameL = imread(pathL, IMREAD_GRAYSCALE);
Mat frameR = imread(pathR, IMREAD_GRAYSCALE);
if (frameL.empty()) cout << "Can't open '" << pathL << "'\n";
if (frameR.empty()) cout << "Can't open '" << pathR << "'\n";
if (frameL.empty()) std::cout << "Can't open '" << pathL << "'" << std::endl;
if (frameR.empty()) std::cout << "Can't open '" << pathR << "'" << std::endl;
if (frameL.empty() || frameR.empty()) return -1;
Ptr<NvidiaOpticalFlow_1_0> nvof = NvidiaOpticalFlow_1_0::create(
frameL.size().width, frameL.size().height, perfPreset,
Ptr<NvidiaOpticalFlow_2_0> nvof = NvidiaOpticalFlow_2_0::create(
frameL.size(), roiData, perfPreset, outBufGridSize, hintBufGridSize,
enableTemporalHints, enableExternalHints, enableCostBuffer, gpuId);
Mat flowx, flowy, flowxy, upsampledFlowXY, image;
Mat flowx, flowy, flowxy, floatFlow, image;
nvof->calc(frameL, frameR, flowxy);
nvof->upSampler(flowxy, frameL.size().width, frameL.size().height,
nvof->getGridSize(), upsampledFlowXY);
nvof->convertToFloat(flowxy, floatFlow);
if (output.size() != 0)
if (!output.empty())
{
if (!writeOpticalFlow(output, upsampledFlowXY))
cout << "Failed to save Flow Vector" << endl;
if (!writeOpticalFlow(output, floatFlow))
std::cout << "Failed to save Flow Vector" << std::endl;
else
cout << "Flow vector saved as '" << output << "'\n";
std::cout << "Flow vector saved as '" << output << "'" << std::endl;
}
Mat planes[] = { flowx, flowy };
split(upsampledFlowXY, planes);
split(floatFlow, planes);
flowx = planes[0]; flowy = planes[1];
drawOpticalFlow(flowx, flowy, image, 10);
imshow("Colorize image",image);
imshow("Colorize image", image);
waitKey(0);
nvof->collectGarbage();
}
......
......@@ -183,8 +183,11 @@ int main(int argc, const char* argv[])
Ptr<cuda::DensePyrLKOpticalFlow> lk = cuda::DensePyrLKOpticalFlow::create(Size(7, 7));
Ptr<cuda::FarnebackOpticalFlow> farn = cuda::FarnebackOpticalFlow::create();
Ptr<cuda::OpticalFlowDual_TVL1> tvl1 = cuda::OpticalFlowDual_TVL1::create();
Ptr<cuda::NvidiaOpticalFlow_1_0> nvof = cuda::NvidiaOpticalFlow_1_0::create(frame0.size().width, frame0.size().height,
Ptr<cuda::NvidiaOpticalFlow_1_0> nvof_1_0 = cuda::NvidiaOpticalFlow_1_0::create(frame0.size(),
NvidiaOpticalFlow_1_0::NVIDIA_OF_PERF_LEVEL::NV_OF_PERF_LEVEL_FAST, false, false, false, 0, inputStream, outputStream);
Ptr<cuda::NvidiaOpticalFlow_2_0> nvof_2_0 = cuda::NvidiaOpticalFlow_2_0::create(frame0.size(),
NvidiaOpticalFlow_2_0::NVIDIA_OF_PERF_LEVEL::NV_OF_PERF_LEVEL_FAST, NvidiaOpticalFlow_2_0::NVIDIA_OF_OUTPUT_VECTOR_GRID_SIZE::NV_OF_OUTPUT_VECTOR_GRID_SIZE_1,
NvidiaOpticalFlow_2_0::NVIDIA_OF_HINT_VECTOR_GRID_SIZE::NV_OF_HINT_VECTOR_GRID_SIZE_UNDEFINED, false, false, false, 0, inputStream, outputStream);
{
GpuMat d_frame0f;
......@@ -242,16 +245,32 @@ int main(int argc, const char* argv[])
//Hence it is expected to be more than what is displayed in the NVIDIA Optical Flow SDK documentation.
const int64 start = getTickCount();
nvof->calc(d_frame0, d_frame1, d_flowxy);
nvof_1_0->calc(d_frame0, d_frame1, d_flowxy);
const double timeSec = (getTickCount() - start) / getTickFrequency();
cout << "NVIDIAOpticalFlow : " << timeSec << " sec" << endl;
cout << "NVIDIAOpticalFlow_1_0 : " << timeSec << " sec" << endl;
nvof->upSampler(d_flowxy, frame0.size().width, frame0.size().height,
nvof->getGridSize(), d_flow);
nvof_1_0->upSampler(d_flowxy, frame0.size(), nvof_1_0->getGridSize(), d_flow);
showFlow("NVIDIAOpticalFlow", d_flow);
nvof->collectGarbage();
showFlow("NVIDIAOpticalFlow_1_0", d_flow);
nvof_1_0->collectGarbage();
}
{
//The timing displayed below includes the time taken to copy the input buffers to the OF CUDA input buffers
//and to copy the output buffers from the OF CUDA output buffer to the output buffer.
//Hence it is expected to be more than what is displayed in the NVIDIA Optical Flow SDK documentation.
const int64 start = getTickCount();
nvof_2_0->calc(d_frame0, d_frame1, d_flowxy);
const double timeSec = (getTickCount() - start) / getTickFrequency();
cout << "NVIDIAOpticalFlow_2_0 : " << timeSec << " sec" << endl;
nvof_2_0->convertToFloat(d_flowxy, d_flow);
showFlow("NVIDIAOpticalFlow_2_0", d_flow);
nvof_2_0->collectGarbage();
}
imshow("Frame 0", frame0);
......
//
// This file is part of OpenCV project.
// It is subject to the license terms in the LICENSE file found in the top-level directory
// of this distribution and at http://opencv.org/license.html.
//
//M*/
#if !defined CUDA_DISABLER
#include <cuda_runtime.h>
#include <stdio.h>
typedef unsigned char uint8_t;
typedef unsigned short uint16_t;
typedef unsigned int uint32_t;
typedef signed short int16_t;
typedef signed int int32_t;
#define BLOCKDIM_X 32
#define BLOCKDIM_Y 16
// data required to do 2x upsampling. Same can be used for 4x upsampling also
#define SMEM_COLS ((BLOCKDIM_X)/2)
#define SMEM_ROWS ((BLOCKDIM_Y)/2)
namespace cv { namespace cuda { namespace device { namespace optflow_nvidia
{
static const char *_cudaGetErrorEnum(cudaError_t error) { return cudaGetErrorName(error); }
template <typename T>
void check(T result, char const *const func, const char *const file,
int const line) {
if (result) {
fprintf(stderr, "CUDA error at %s:%d code=%d(%s) \"%s\" \n", file, line,
static_cast<uint32_t>(result), _cudaGetErrorEnum(result), func);
// Make sure we call CUDA Device Reset before exiting
exit(EXIT_FAILURE);
}
}
#define checkCudaErrors(val) check((val), #val, __FILE__, __LINE__)
template <class T>
static __device__ void ReadDevPtrData(void* devptr, uint32_t x0, uint32_t y0, uint32_t src_w, uint32_t src_h, uint32_t src_pitch,
T src[][SMEM_COLS], uint32_t i, uint32_t j)
{
uint32_t shift = (sizeof(T) == sizeof(int32_t)) ? 2 : 1;
src[j][i] = *(T*)((uint8_t*)devptr + y0 * src_pitch + (x0 << shift));
}
extern "C"
__global__ void NearestNeighborFlowKernel(cudaSurfaceObject_t srcSurf, void* srcDevPtr, uint32_t src_w, uint32_t src_pitch, uint32_t src_h,
cudaSurfaceObject_t dstSurf, void* dstDevPtr, uint32_t dst_w, uint32_t dst_pitch, uint32_t dst_h,
uint32_t nScaleFactor)
{
int x = blockDim.x * blockIdx.x + threadIdx.x;
int y = blockDim.y * blockIdx.y + threadIdx.y;
int x0 = x / nScaleFactor;
int y0 = y / nScaleFactor;
__shared__ short2 src[SMEM_ROWS][SMEM_COLS];
int i = threadIdx.x / nScaleFactor;
int j = threadIdx.y / nScaleFactor;
if ((x % nScaleFactor == 0) && (y % nScaleFactor == 0))
{
ReadDevPtrData<short2>(srcDevPtr, x0, y0, src_w, src_h, src_pitch, src, i, j);
}
__syncthreads();
if (x < dst_w && y < dst_h)
{
if (dstDevPtr == NULL)
{
surf2Dwrite<short2>(src[j][i], dstSurf, x * sizeof(short2), y, cudaBoundaryModeClamp);
}
else
{
*(short2*)((uint8_t*)dstDevPtr + y * dst_pitch + (x << 2)) = src[j][i];
}
}
}
void FlowUpsample(void* srcDevPtr, uint32_t nSrcWidth, uint32_t nSrcPitch, uint32_t nSrcHeight,
void* dstDevPtr, uint32_t nDstWidth, uint32_t nDstPitch, uint32_t nDstHeight,
uint32_t nScaleFactor)
{
dim3 blockDim(BLOCKDIM_X, BLOCKDIM_Y);
dim3 gridDim((nDstWidth + blockDim.x - 1) / blockDim.x, (nDstHeight + blockDim.y - 1) / blockDim.y);
NearestNeighborFlowKernel << <gridDim, blockDim >> > (0, srcDevPtr, nSrcWidth, nSrcPitch, nSrcHeight,
0, dstDevPtr, nDstWidth, nDstPitch, nDstHeight,
nScaleFactor);
checkCudaErrors(cudaGetLastError());
}}}}}
#endif
\ No newline at end of file
......@@ -495,19 +495,11 @@ CUDA_TEST_P(NvidiaOpticalFlow_1_0, Regression)
cv::Mat frame1 = readImage("opticalflow/frame1.png", cv::IMREAD_GRAYSCALE);
ASSERT_FALSE(frame1.empty());
const int width = frame0.size().width;
const int height = frame0.size().height;
const bool enableTemporalHints = false;
const bool enableExternalHints = false;
const bool enableCostBuffer = false;
const int gpuid = 0;
cv::Ptr<cv::cuda::NvidiaOpticalFlow_1_0> d_nvof;
try
{
d_nvof = cv::cuda::NvidiaOpticalFlow_1_0::create(width, height,
cv::cuda::NvidiaOpticalFlow_1_0::NVIDIA_OF_PERF_LEVEL::NV_OF_PERF_LEVEL_SLOW,
enableTemporalHints, enableExternalHints, enableCostBuffer, gpuid);
d_nvof = cv::cuda::NvidiaOpticalFlow_1_0::create(frame0.size(),
cv::cuda::NvidiaOpticalFlow_1_0::NVIDIA_OF_PERF_LEVEL::NV_OF_PERF_LEVEL_SLOW);
}
catch (const cv::Exception& e)
{
......@@ -519,7 +511,7 @@ CUDA_TEST_P(NvidiaOpticalFlow_1_0, Regression)
Mat flow, upsampledFlow;
d_nvof->calc(loadMat(frame0), loadMat(frame1), flow);
d_nvof->upSampler(flow, width, height, gridSize, upsampledFlow);
d_nvof->upSampler(flow, frame0.size(), gridSize, upsampledFlow);
std::string fname(cvtest::TS::ptr()->get_data_path());
fname += "opticalflow/nvofGolden.flo";
......@@ -527,6 +519,7 @@ CUDA_TEST_P(NvidiaOpticalFlow_1_0, Regression)
ASSERT_FALSE(golden.empty());
EXPECT_MAT_SIMILAR(golden, upsampledFlow, 1e-10);
d_nvof->collectGarbage();
}
CUDA_TEST_P(NvidiaOpticalFlow_1_0, OpticalFlowNan)
......@@ -539,19 +532,11 @@ CUDA_TEST_P(NvidiaOpticalFlow_1_0, OpticalFlowNan)
cv::Mat r_frame0, r_frame1;
const int width = frame0.size().width;
const int height = frame0.size().height;
const bool enableTemporalHints = false;
const bool enableExternalHints = false;
const bool enableCostBuffer = false;
const int gpuid = 0;
cv::Ptr<cv::cuda::NvidiaOpticalFlow_1_0> d_nvof;
try
{
d_nvof = cv::cuda::NvidiaOpticalFlow_1_0::create(width, height,
cv::cuda::NvidiaOpticalFlow_1_0::NVIDIA_OF_PERF_LEVEL::NV_OF_PERF_LEVEL_SLOW,
enableTemporalHints, enableExternalHints, enableCostBuffer, gpuid);
d_nvof = cv::cuda::NvidiaOpticalFlow_1_0::create(frame0.size(),
cv::cuda::NvidiaOpticalFlow_1_0::NVIDIA_OF_PERF_LEVEL::NV_OF_PERF_LEVEL_SLOW);
}
catch (const cv::Exception& e)
{
......@@ -569,9 +554,96 @@ CUDA_TEST_P(NvidiaOpticalFlow_1_0, OpticalFlowNan)
EXPECT_TRUE(cv::checkRange(flowx));
EXPECT_TRUE(cv::checkRange(flowy));
d_nvof->collectGarbage();
};
INSTANTIATE_TEST_CASE_P(CUDA_OptFlow, NvidiaOpticalFlow_1_0, ALL_DEVICES);
//////////////////////////////////////////////////////
// NvidiaOpticalFlow_2_0
struct NvidiaOpticalFlow_2_0 : testing::TestWithParam<cv::cuda::DeviceInfo>
{
cv::cuda::DeviceInfo devInfo;
virtual void SetUp()
{
devInfo = GetParam();
cv::cuda::setDevice(devInfo.deviceID());
}
};
CUDA_TEST_P(NvidiaOpticalFlow_2_0, Regression)
{
cv::Mat frame0 = readImage("opticalflow/frame0.png", cv::IMREAD_GRAYSCALE);
ASSERT_FALSE(frame0.empty());
cv::Mat frame1 = readImage("opticalflow/frame1.png", cv::IMREAD_GRAYSCALE);
ASSERT_FALSE(frame1.empty());
cv::Ptr<cv::cuda::NvidiaOpticalFlow_2_0> d_nvof;
try
{
d_nvof = cv::cuda::NvidiaOpticalFlow_2_0::create(frame0.size(),
cv::cuda::NvidiaOpticalFlow_2_0::NVIDIA_OF_PERF_LEVEL::NV_OF_PERF_LEVEL_SLOW);
}
catch (const cv::Exception& e)
{
if (e.code == Error::StsBadFunc || e.code == Error::StsBadArg || e.code == Error::StsNullPtr)
throw SkipTestException("Current configuration is not supported");
throw;
}
Mat flow, upsampledFlow;
d_nvof->calc(loadMat(frame0), loadMat(frame1), flow);
d_nvof->convertToFloat(flow, upsampledFlow);
std::string fname(cvtest::TS::ptr()->get_data_path());
fname += "opticalflow/nvofGolden_2.flo";
cv::Mat golden = cv::readOpticalFlow(fname.c_str());
ASSERT_FALSE(golden.empty());
EXPECT_MAT_SIMILAR(golden, upsampledFlow, 1e-10);
d_nvof->collectGarbage();
}
CUDA_TEST_P(NvidiaOpticalFlow_2_0, OpticalFlowNan)
{
cv::Mat frame0 = readImage("opticalflow/rubberwhale1.png", cv::IMREAD_GRAYSCALE);
ASSERT_FALSE(frame0.empty());
cv::Mat frame1 = readImage("opticalflow/rubberwhale2.png", cv::IMREAD_GRAYSCALE);
ASSERT_FALSE(frame1.empty());
cv::Mat r_frame0, r_frame1;
cv::Ptr<cv::cuda::NvidiaOpticalFlow_2_0> d_nvof;
try
{
d_nvof = cv::cuda::NvidiaOpticalFlow_2_0::create(frame0.size(),
cv::cuda::NvidiaOpticalFlow_2_0::NVIDIA_OF_PERF_LEVEL::NV_OF_PERF_LEVEL_SLOW);
}
catch (const cv::Exception& e)
{
if (e.code == Error::StsBadFunc || e.code == Error::StsBadArg || e.code == Error::StsNullPtr)
throw SkipTestException("Current configuration is not supported");
throw;
}
Mat flow, flowx, flowy;
d_nvof->calc(loadMat(frame0), loadMat(frame1), flow);
Mat planes[] = { flowx, flowy };
split(flow, planes);
flowx = planes[0]; flowy = planes[1];
EXPECT_TRUE(cv::checkRange(flowx));
EXPECT_TRUE(cv::checkRange(flowy));
d_nvof->collectGarbage();
};
INSTANTIATE_TEST_CASE_P(CUDA_OptFlow, NvidiaOpticalFlow_2_0, ALL_DEVICES);
}} // namespace
#endif // HAVE_CUDA
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册