提交 381216aa 编写于 作者: V Vladislav Vinogradov

refactor cudaoptflow public API:

* use opaque algorithm interfaces
* add stream support
上级 19c6bbe7
......@@ -61,49 +61,94 @@ namespace cv { namespace cuda {
//! @addtogroup cudaoptflow
//! @{
/** @brief Class computing the optical flow for two images using Brox et al Optical Flow algorithm
(@cite Brox2004). :
//
// Interface
//
/** @brief Base interface for dense optical flow algorithms.
*/
class CV_EXPORTS BroxOpticalFlow
class CV_EXPORTS DenseOpticalFlow : public Algorithm
{
public:
BroxOpticalFlow(float alpha_, float gamma_, float scale_factor_, int inner_iterations_, int outer_iterations_, int solver_iterations_) :
alpha(alpha_), gamma(gamma_), scale_factor(scale_factor_),
inner_iterations(inner_iterations_), outer_iterations(outer_iterations_), solver_iterations(solver_iterations_)
{
}
/** @brief Calculates a dense optical flow.
//! Compute optical flow
//! frame0 - source frame (supports only CV_32FC1 type)
//! frame1 - frame to track (with the same size and type as frame0)
//! u - flow horizontal component (along x axis)
//! v - flow vertical component (along y axis)
void operator ()(const GpuMat& frame0, const GpuMat& frame1, GpuMat& u, GpuMat& v, Stream& stream = Stream::Null());
@param I0 first input image.
@param I1 second input image of the same size and the same type as I0.
@param flow computed flow image that has the same size as I0 and type CV_32FC2.
@param stream Stream for the asynchronous version.
*/
virtual void calc(InputArray I0, InputArray I1, InputOutputArray flow, Stream& stream = Stream::Null()) = 0;
};
//! flow smoothness
float alpha;
/** @brief Base interface for sparse optical flow algorithms.
*/
class CV_EXPORTS SparseOpticalFlow : public Algorithm
{
public:
/** @brief Calculates a sparse optical flow.
@param prevImg First input image.
@param nextImg Second input image of the same size and the same type as prevImg.
@param prevPts Vector of 2D points for which the flow needs to be found.
@param nextPts Output vector of 2D points containing the calculated new positions of input features in the second image.
@param status Output status vector. Each element of the vector is set to 1 if the
flow for the corresponding features has been found. Otherwise, it is set to 0.
@param err Optional output vector that contains error response for each point (inverse confidence).
@param stream Stream for the asynchronous version.
*/
virtual void calc(InputArray prevImg, InputArray nextImg,
InputArray prevPts, InputOutputArray nextPts,
OutputArray status,
OutputArray err = cv::noArray(),
Stream& stream = Stream::Null()) = 0;
};
//! gradient constancy importance
float gamma;
//
// BroxOpticalFlow
//
//! pyramid scale factor
float scale_factor;
/** @brief Class computing the optical flow for two images using Brox et al Optical Flow algorithm (@cite Brox2004).
*/
class CV_EXPORTS BroxOpticalFlow : public DenseOpticalFlow
{
public:
virtual double getFlowSmoothness() const = 0;
virtual void setFlowSmoothness(double alpha) = 0;
virtual double getGradientConstancyImportance() const = 0;
virtual void setGradientConstancyImportance(double gamma) = 0;
virtual double getPyramidScaleFactor() const = 0;
virtual void setPyramidScaleFactor(double scale_factor) = 0;
//! number of lagged non-linearity iterations (inner loop)
int inner_iterations;
virtual int getInnerIterations() const = 0;
virtual void setInnerIterations(int inner_iterations) = 0;
//! number of warping iterations (number of pyramid levels)
int outer_iterations;
virtual int getOuterIterations() const = 0;
virtual void setOuterIterations(int outer_iterations) = 0;
//! number of linear system solver iterations
int solver_iterations;
GpuMat buf;
virtual int getSolverIterations() const = 0;
virtual void setSolverIterations(int solver_iterations) = 0;
static Ptr<BroxOpticalFlow> create(
double alpha = 0.197,
double gamma = 50.0,
double scale_factor = 0.8,
int inner_iterations = 5,
int outer_iterations = 150,
int solver_iterations = 10);
};
/** @brief Class used for calculating an optical flow.
//
// PyrLKOpticalFlow
//
/** @brief Class used for calculating a sparse optical flow.
The class can calculate an optical flow for a sparse feature set or dense optical flow using the
The class can calculate an optical flow for a sparse feature set using the
iterative Lucas-Kanade method with pyramids.
@sa calcOpticalFlowPyrLK
......@@ -112,158 +157,116 @@ iterative Lucas-Kanade method with pyramids.
- An example of the Lucas Kanade optical flow algorithm can be found at
opencv_source_code/samples/gpu/pyrlk_optical_flow.cpp
*/
class CV_EXPORTS PyrLKOpticalFlow
class CV_EXPORTS SparsePyrLKOpticalFlow : public SparseOpticalFlow
{
public:
PyrLKOpticalFlow();
/** @brief Calculate an optical flow for a sparse feature set.
@param prevImg First 8-bit input image (supports both grayscale and color images).
@param nextImg Second input image of the same size and the same type as prevImg .
@param prevPts Vector of 2D points for which the flow needs to be found. It must be one row matrix
with CV_32FC2 type.
@param nextPts Output vector of 2D points (with single-precision floating-point coordinates)
containing the calculated new positions of input features in the second image. When useInitialFlow
is true, the vector must have the same size as in the input.
@param status Output status vector (CV_8UC1 type). Each element of the vector is set to 1 if the
flow for the corresponding features has been found. Otherwise, it is set to 0.
@param err Output vector (CV_32FC1 type) that contains the difference between patches around the
original and moved points or min eigen value if getMinEigenVals is checked. It can be NULL, if not
needed.
@sa calcOpticalFlowPyrLK
*/
void sparse(const GpuMat& prevImg, const GpuMat& nextImg, const GpuMat& prevPts, GpuMat& nextPts,
GpuMat& status, GpuMat* err = 0);
/** @brief Calculate dense optical flow.
@param prevImg First 8-bit grayscale input image.
@param nextImg Second input image of the same size and the same type as prevImg .
@param u Horizontal component of the optical flow of the same size as input images, 32-bit
floating-point, single-channel
@param v Vertical component of the optical flow of the same size as input images, 32-bit
floating-point, single-channel
@param err Output vector (CV_32FC1 type) that contains the difference between patches around the
original and moved points or min eigen value if getMinEigenVals is checked. It can be NULL, if not
needed.
*/
void dense(const GpuMat& prevImg, const GpuMat& nextImg, GpuMat& u, GpuMat& v, GpuMat* err = 0);
/** @brief Releases inner buffers memory.
*/
void releaseMemory();
virtual Size getWinSize() const = 0;
virtual void setWinSize(Size winSize) = 0;
Size winSize;
int maxLevel;
int iters;
bool useInitialFlow;
virtual int getMaxLevel() const = 0;
virtual void setMaxLevel(int maxLevel) = 0;
private:
std::vector<GpuMat> prevPyr_;
std::vector<GpuMat> nextPyr_;
virtual int getNumIters() const = 0;
virtual void setNumIters(int iters) = 0;
GpuMat buf_;
virtual bool getUseInitialFlow() const = 0;
virtual void setUseInitialFlow(bool useInitialFlow) = 0;
GpuMat uPyr_[2];
GpuMat vPyr_[2];
static Ptr<SparsePyrLKOpticalFlow> create(
Size winSize = Size(21, 21),
int maxLevel = 3,
int iters = 30,
bool useInitialFlow = false);
};
/** @brief Class computing a dense optical flow using the Gunnar Farneback’s algorithm. :
/** @brief Class used for calculating a dense optical flow.
The class can calculate an optical flow for a dense optical flow using the
iterative Lucas-Kanade method with pyramids.
*/
class CV_EXPORTS FarnebackOpticalFlow
class CV_EXPORTS DensePyrLKOpticalFlow : public DenseOpticalFlow
{
public:
FarnebackOpticalFlow()
{
numLevels = 5;
pyrScale = 0.5;
fastPyramids = false;
winSize = 13;
numIters = 10;
polyN = 5;
polySigma = 1.1;
flags = 0;
}
int numLevels;
double pyrScale;
bool fastPyramids;
int winSize;
int numIters;
int polyN;
double polySigma;
int flags;
/** @brief Computes a dense optical flow using the Gunnar Farneback’s algorithm.
@param frame0 First 8-bit gray-scale input image
@param frame1 Second 8-bit gray-scale input image
@param flowx Flow horizontal component
@param flowy Flow vertical component
@param s Stream
@sa calcOpticalFlowFarneback
*/
void operator ()(const GpuMat &frame0, const GpuMat &frame1, GpuMat &flowx, GpuMat &flowy, Stream &s = Stream::Null());
virtual Size getWinSize() const = 0;
virtual void setWinSize(Size winSize) = 0;
/** @brief Releases unused auxiliary memory buffers.
*/
void releaseMemory()
{
frames_[0].release();
frames_[1].release();
pyrLevel_[0].release();
pyrLevel_[1].release();
M_.release();
bufM_.release();
R_[0].release();
R_[1].release();
blurredFrame_[0].release();
blurredFrame_[1].release();
pyramid0_.clear();
pyramid1_.clear();
}
private:
void prepareGaussian(
int n, double sigma, float *g, float *xg, float *xxg,
double &ig11, double &ig03, double &ig33, double &ig55);
void setPolynomialExpansionConsts(int n, double sigma);
void updateFlow_boxFilter(
const GpuMat& R0, const GpuMat& R1, GpuMat& flowx, GpuMat &flowy,
GpuMat& M, GpuMat &bufM, int blockSize, bool updateMatrices, Stream streams[]);
void updateFlow_gaussianBlur(
const GpuMat& R0, const GpuMat& R1, GpuMat& flowx, GpuMat& flowy,
GpuMat& M, GpuMat &bufM, int blockSize, bool updateMatrices, Stream streams[]);
GpuMat frames_[2];
GpuMat pyrLevel_[2], M_, bufM_, R_[2], blurredFrame_[2];
std::vector<GpuMat> pyramid0_, pyramid1_;
virtual int getMaxLevel() const = 0;
virtual void setMaxLevel(int maxLevel) = 0;
virtual int getNumIters() const = 0;
virtual void setNumIters(int iters) = 0;
virtual bool getUseInitialFlow() const = 0;
virtual void setUseInitialFlow(bool useInitialFlow) = 0;
static Ptr<DensePyrLKOpticalFlow> create(
Size winSize = Size(13, 13),
int maxLevel = 3,
int iters = 30,
bool useInitialFlow = false);
};
// Implementation of the Zach, Pock and Bischof Dual TV-L1 Optical Flow method
//
// see reference:
// [1] C. Zach, T. Pock and H. Bischof, "A Duality Based Approach for Realtime TV-L1 Optical Flow".
// [2] Javier Sanchez, Enric Meinhardt-Llopis and Gabriele Facciolo. "TV-L1 Optical Flow Estimation".
class CV_EXPORTS OpticalFlowDual_TVL1_CUDA
// FarnebackOpticalFlow
//
/** @brief Class computing a dense optical flow using the Gunnar Farneback’s algorithm.
*/
class CV_EXPORTS FarnebackOpticalFlow : public DenseOpticalFlow
{
public:
OpticalFlowDual_TVL1_CUDA();
virtual int getNumLevels() const = 0;
virtual void setNumLevels(int numLevels) = 0;
virtual double getPyrScale() const = 0;
virtual void setPyrScale(double pyrScale) = 0;
virtual bool getFastPyramids() const = 0;
virtual void setFastPyramids(bool fastPyramids) = 0;
void operator ()(const GpuMat& I0, const GpuMat& I1, GpuMat& flowx, GpuMat& flowy);
virtual int getWinSize() const = 0;
virtual void setWinSize(int winSize) = 0;
void collectGarbage();
virtual int getNumIters() const = 0;
virtual void setNumIters(int numIters) = 0;
virtual int getPolyN() const = 0;
virtual void setPolyN(int polyN) = 0;
virtual double getPolySigma() const = 0;
virtual void setPolySigma(double polySigma) = 0;
virtual int getFlags() const = 0;
virtual void setFlags(int flags) = 0;
static Ptr<FarnebackOpticalFlow> create(
int numLevels = 5,
double pyrScale = 0.5,
bool fastPyramids = false,
int winSize = 13,
int numIters = 10,
int polyN = 5,
double polySigma = 1.1,
int flags = 0);
};
//
// OpticalFlowDual_TVL1
//
/** @brief Implementation of the Zach, Pock and Bischof Dual TV-L1 Optical Flow method.
*
* @sa C. Zach, T. Pock and H. Bischof, "A Duality Based Approach for Realtime TV-L1 Optical Flow".
* @sa Javier Sanchez, Enric Meinhardt-Llopis and Gabriele Facciolo. "TV-L1 Optical Flow Estimation".
*/
class CV_EXPORTS OpticalFlowDual_TVL1 : public DenseOpticalFlow
{
public:
/**
* Time step of the numerical scheme.
*/
double tau;
virtual double getTau() const = 0;
virtual void setTau(double tau) = 0;
/**
* Weight parameter for the data term, attachment parameter.
......@@ -271,7 +274,8 @@ public:
* The smaller this parameter is, the smoother the solutions we obtain.
* It depends on the range of motions of the images, so its value should be adapted to each image sequence.
*/
double lambda;
virtual double getLambda() const = 0;
virtual void setLambda(double lambda) = 0;
/**
* Weight parameter for (u - v)^2, tightness parameter.
......@@ -279,20 +283,23 @@ public:
* In theory, it should have a small value in order to maintain both parts in correspondence.
* The method is stable for a large range of values of this parameter.
*/
virtual double getGamma() const = 0;
virtual void setGamma(double gamma) = 0;
double gamma;
/**
* parameter used for motion estimation. It adds a variable allowing for illumination variations
* Set this parameter to 1. if you have varying illumination.
* See: Chambolle et al, A First-Order Primal-Dual Algorithm for Convex Problems with Applications to Imaging
* Journal of Mathematical imaging and vision, may 2011 Vol 40 issue 1, pp 120-145
*/
double theta;
* parameter used for motion estimation. It adds a variable allowing for illumination variations
* Set this parameter to 1. if you have varying illumination.
* See: Chambolle et al, A First-Order Primal-Dual Algorithm for Convex Problems with Applications to Imaging
* Journal of Mathematical imaging and vision, may 2011 Vol 40 issue 1, pp 120-145
*/
virtual double getTheta() const = 0;
virtual void setTheta(double theta) = 0;
/**
* Number of scales used to create the pyramid of images.
*/
int nscales;
virtual int getNumScales() const = 0;
virtual void setNumScales(int nscales) = 0;
/**
* Number of warpings per scale.
......@@ -300,51 +307,39 @@ public:
* This is a parameter that assures the stability of the method.
* It also affects the running time, so it is a compromise between speed and accuracy.
*/
int warps;
virtual int getNumWarps() const = 0;
virtual void setNumWarps(int warps) = 0;
/**
* Stopping criterion threshold used in the numerical scheme, which is a trade-off between precision and running time.
* A small value will yield more accurate solutions at the expense of a slower convergence.
*/
double epsilon;
virtual double getEpsilon() const = 0;
virtual void setEpsilon(double epsilon) = 0;
/**
* Stopping criterion iterations number used in the numerical scheme.
*/
int iterations;
double scaleStep;
bool useInitialFlow;
private:
void procOneScale(const GpuMat& I0, const GpuMat& I1, GpuMat& u1, GpuMat& u2, GpuMat& u3);
std::vector<GpuMat> I0s;
std::vector<GpuMat> I1s;
std::vector<GpuMat> u1s;
std::vector<GpuMat> u2s;
std::vector<GpuMat> u3s;
GpuMat I1x_buf;
GpuMat I1y_buf;
GpuMat I1w_buf;
GpuMat I1wx_buf;
GpuMat I1wy_buf;
GpuMat grad_buf;
GpuMat rho_c_buf;
GpuMat p11_buf;
GpuMat p12_buf;
GpuMat p21_buf;
GpuMat p22_buf;
GpuMat p31_buf;
GpuMat p32_buf;
GpuMat diff_buf;
GpuMat norm_buf;
virtual int getNumIterations() const = 0;
virtual void setNumIterations(int iterations) = 0;
virtual double getScaleStep() const = 0;
virtual void setScaleStep(double scaleStep) = 0;
virtual bool getUseInitialFlow() const = 0;
virtual void setUseInitialFlow(bool useInitialFlow) = 0;
static Ptr<OpticalFlowDual_TVL1> create(
double tau = 0.25,
double lambda = 0.15,
double theta = 0.3,
int nscales = 5,
int warps = 5,
double epsilon = 0.01,
int iterations = 300,
double scaleStep = 0.8,
double gamma = 0.0,
bool useInitialFlow = false);
};
//! @}
......
......@@ -47,84 +47,148 @@ using namespace cv::cuda;
#if !defined (HAVE_CUDA) || !defined (HAVE_OPENCV_CUDALEGACY) || defined (CUDA_DISABLER)
void cv::cuda::BroxOpticalFlow::operator ()(const GpuMat&, const GpuMat&, GpuMat&, GpuMat&, Stream&) { throw_no_cuda(); }
Ptr<BroxOpticalFlow> cv::cuda::BroxOpticalFlow::create(double, double, double, int, int, int) { throw_no_cuda(); return Ptr<BroxOpticalFlow>(); }
#else
namespace
{
size_t getBufSize(const NCVBroxOpticalFlowDescriptor& desc, const NCVMatrix<Ncv32f>& frame0, const NCVMatrix<Ncv32f>& frame1,
NCVMatrix<Ncv32f>& u, NCVMatrix<Ncv32f>& v, const cudaDeviceProp& devProp)
namespace {
class BroxOpticalFlowImpl : public BroxOpticalFlow
{
NCVMemStackAllocator gpuCounter(static_cast<Ncv32u>(devProp.textureAlignment));
public:
BroxOpticalFlowImpl(double alpha, double gamma, double scale_factor,
int inner_iterations, int outer_iterations, int solver_iterations) :
alpha_(alpha), gamma_(gamma), scale_factor_(scale_factor),
inner_iterations_(inner_iterations), outer_iterations_(outer_iterations),
solver_iterations_(solver_iterations)
{
}
virtual void calc(InputArray I0, InputArray I1, InputOutputArray flow, Stream& stream);
virtual double getFlowSmoothness() const { return alpha_; }
virtual void setFlowSmoothness(double alpha) { alpha_ = static_cast<float>(alpha); }
virtual double getGradientConstancyImportance() const { return gamma_; }
virtual void setGradientConstancyImportance(double gamma) { gamma_ = static_cast<float>(gamma); }
virtual double getPyramidScaleFactor() const { return scale_factor_; }
virtual void setPyramidScaleFactor(double scale_factor) { scale_factor_ = static_cast<float>(scale_factor); }
//! number of lagged non-linearity iterations (inner loop)
virtual int getInnerIterations() const { return inner_iterations_; }
virtual void setInnerIterations(int inner_iterations) { inner_iterations_ = inner_iterations; }
//! number of warping iterations (number of pyramid levels)
virtual int getOuterIterations() const { return outer_iterations_; }
virtual void setOuterIterations(int outer_iterations) { outer_iterations_ = outer_iterations; }
//! number of linear system solver iterations
virtual int getSolverIterations() const { return solver_iterations_; }
virtual void setSolverIterations(int solver_iterations) { solver_iterations_ = solver_iterations; }
private:
//! flow smoothness
float alpha_;
//! gradient constancy importance
float gamma_;
//! pyramid scale factor
float scale_factor_;
//! number of lagged non-linearity iterations (inner loop)
int inner_iterations_;
//! number of warping iterations (number of pyramid levels)
int outer_iterations_;
//! number of linear system solver iterations
int solver_iterations_;
};
static size_t getBufSize(const NCVBroxOpticalFlowDescriptor& desc,
const NCVMatrix<Ncv32f>& frame0, const NCVMatrix<Ncv32f>& frame1,
NCVMatrix<Ncv32f>& u, NCVMatrix<Ncv32f>& v,
size_t textureAlignment)
{
NCVMemStackAllocator gpuCounter(static_cast<Ncv32u>(textureAlignment));
ncvSafeCall( NCVBroxOpticalFlow(desc, gpuCounter, frame0, frame1, u, v, 0) );
return gpuCounter.maxSize();
}
}
namespace
{
static void outputHandler(const String &msg) { CV_Error(cv::Error::GpuApiCallError, msg.c_str()); }
}
static void outputHandler(const String &msg)
{
CV_Error(cv::Error::GpuApiCallError, msg.c_str());
}
void cv::cuda::BroxOpticalFlow::operator ()(const GpuMat& frame0, const GpuMat& frame1, GpuMat& u, GpuMat& v, Stream& s)
{
ncvSetDebugOutputHandler(outputHandler);
void BroxOpticalFlowImpl::calc(InputArray _I0, InputArray _I1, InputOutputArray _flow, Stream& stream)
{
const GpuMat frame0 = _I0.getGpuMat();
const GpuMat frame1 = _I1.getGpuMat();
CV_Assert(frame0.type() == CV_32FC1);
CV_Assert(frame1.size() == frame0.size() && frame1.type() == frame0.type());
CV_Assert( frame0.type() == CV_32FC1 );
CV_Assert( frame1.size() == frame0.size() && frame1.type() == frame0.type() );
u.create(frame0.size(), CV_32FC1);
v.create(frame0.size(), CV_32FC1);
ncvSetDebugOutputHandler(outputHandler);
cudaDeviceProp devProp;
cudaSafeCall( cudaGetDeviceProperties(&devProp, getDevice()) );
BufferPool pool(stream);
GpuMat u = pool.getBuffer(frame0.size(), CV_32FC1);
GpuMat v = pool.getBuffer(frame0.size(), CV_32FC1);
NCVBroxOpticalFlowDescriptor desc;
NCVBroxOpticalFlowDescriptor desc;
desc.alpha = alpha_;
desc.gamma = gamma_;
desc.scale_factor = scale_factor_;
desc.number_of_inner_iterations = inner_iterations_;
desc.number_of_outer_iterations = outer_iterations_;
desc.number_of_solver_iterations = solver_iterations_;
desc.alpha = alpha;
desc.gamma = gamma;
desc.scale_factor = scale_factor;
desc.number_of_inner_iterations = inner_iterations;
desc.number_of_outer_iterations = outer_iterations;
desc.number_of_solver_iterations = solver_iterations;
NCVMemSegment frame0MemSeg;
frame0MemSeg.begin.memtype = NCVMemoryTypeDevice;
frame0MemSeg.begin.ptr = const_cast<uchar*>(frame0.data);
frame0MemSeg.size = frame0.step * frame0.rows;
NCVMemSegment frame0MemSeg;
frame0MemSeg.begin.memtype = NCVMemoryTypeDevice;
frame0MemSeg.begin.ptr = const_cast<uchar*>(frame0.data);
frame0MemSeg.size = frame0.step * frame0.rows;
NCVMemSegment frame1MemSeg;
frame1MemSeg.begin.memtype = NCVMemoryTypeDevice;
frame1MemSeg.begin.ptr = const_cast<uchar*>(frame1.data);
frame1MemSeg.size = frame1.step * frame1.rows;
NCVMemSegment frame1MemSeg;
frame1MemSeg.begin.memtype = NCVMemoryTypeDevice;
frame1MemSeg.begin.ptr = const_cast<uchar*>(frame1.data);
frame1MemSeg.size = frame1.step * frame1.rows;
NCVMemSegment uMemSeg;
uMemSeg.begin.memtype = NCVMemoryTypeDevice;
uMemSeg.begin.ptr = u.ptr();
uMemSeg.size = u.step * u.rows;
NCVMemSegment uMemSeg;
uMemSeg.begin.memtype = NCVMemoryTypeDevice;
uMemSeg.begin.ptr = u.ptr();
uMemSeg.size = u.step * u.rows;
NCVMemSegment vMemSeg;
vMemSeg.begin.memtype = NCVMemoryTypeDevice;
vMemSeg.begin.ptr = v.ptr();
vMemSeg.size = v.step * v.rows;
NCVMemSegment vMemSeg;
vMemSeg.begin.memtype = NCVMemoryTypeDevice;
vMemSeg.begin.ptr = v.ptr();
vMemSeg.size = v.step * v.rows;
DeviceInfo devInfo;
size_t textureAlignment = devInfo.textureAlignment();
NCVMatrixReuse<Ncv32f> frame0Mat(frame0MemSeg, static_cast<Ncv32u>(devProp.textureAlignment), frame0.cols, frame0.rows, static_cast<Ncv32u>(frame0.step));
NCVMatrixReuse<Ncv32f> frame1Mat(frame1MemSeg, static_cast<Ncv32u>(devProp.textureAlignment), frame1.cols, frame1.rows, static_cast<Ncv32u>(frame1.step));
NCVMatrixReuse<Ncv32f> uMat(uMemSeg, static_cast<Ncv32u>(devProp.textureAlignment), u.cols, u.rows, static_cast<Ncv32u>(u.step));
NCVMatrixReuse<Ncv32f> vMat(vMemSeg, static_cast<Ncv32u>(devProp.textureAlignment), v.cols, v.rows, static_cast<Ncv32u>(v.step));
NCVMatrixReuse<Ncv32f> frame0Mat(frame0MemSeg, static_cast<Ncv32u>(textureAlignment), frame0.cols, frame0.rows, static_cast<Ncv32u>(frame0.step));
NCVMatrixReuse<Ncv32f> frame1Mat(frame1MemSeg, static_cast<Ncv32u>(textureAlignment), frame1.cols, frame1.rows, static_cast<Ncv32u>(frame1.step));
NCVMatrixReuse<Ncv32f> uMat(uMemSeg, static_cast<Ncv32u>(textureAlignment), u.cols, u.rows, static_cast<Ncv32u>(u.step));
NCVMatrixReuse<Ncv32f> vMat(vMemSeg, static_cast<Ncv32u>(textureAlignment), v.cols, v.rows, static_cast<Ncv32u>(v.step));
cudaStream_t stream = StreamAccessor::getStream(s);
size_t bufSize = getBufSize(desc, frame0Mat, frame1Mat, uMat, vMat, textureAlignment);
GpuMat buf = pool.getBuffer(1, static_cast<int>(bufSize), CV_8UC1);
size_t bufSize = getBufSize(desc, frame0Mat, frame1Mat, uMat, vMat, devProp);
NCVMemStackAllocator gpuAllocator(NCVMemoryTypeDevice, bufSize, static_cast<Ncv32u>(textureAlignment), buf.ptr());
ensureSizeIsEnough(1, static_cast<int>(bufSize), CV_8UC1, buf);
ncvSafeCall( NCVBroxOpticalFlow(desc, gpuAllocator, frame0Mat, frame1Mat, uMat, vMat, StreamAccessor::getStream(stream)) );
NCVMemStackAllocator gpuAllocator(NCVMemoryTypeDevice, bufSize, static_cast<Ncv32u>(devProp.textureAlignment), buf.ptr());
GpuMat flows[] = {u, v};
cuda::merge(flows, 2, _flow, stream);
}
}
ncvSafeCall( NCVBroxOpticalFlow(desc, gpuAllocator, frame0Mat, frame1Mat, uMat, vMat, stream) );
Ptr<BroxOpticalFlow> cv::cuda::BroxOpticalFlow::create(double alpha, double gamma, double scale_factor, int inner_iterations, int outer_iterations, int solver_iterations)
{
return makePtr<BroxOpticalFlowImpl>(alpha, gamma, scale_factor, inner_iterations, outer_iterations, solver_iterations);
}
#endif /* HAVE_CUDA */
......@@ -472,16 +472,16 @@ namespace pyrlk
}
}
void loadConstants(int2 winSize, int iters)
void loadConstants(int2 winSize, int iters, cudaStream_t stream)
{
cudaSafeCall( cudaMemcpyToSymbol(c_winSize_x, &winSize.x, sizeof(int)) );
cudaSafeCall( cudaMemcpyToSymbol(c_winSize_y, &winSize.y, sizeof(int)) );
cudaSafeCall( cudaMemcpyToSymbolAsync(c_winSize_x, &winSize.x, sizeof(int), 0, cudaMemcpyHostToDevice, stream) );
cudaSafeCall( cudaMemcpyToSymbolAsync(c_winSize_y, &winSize.y, sizeof(int), 0, cudaMemcpyHostToDevice, stream) );
int2 halfWin = make_int2((winSize.x - 1) / 2, (winSize.y - 1) / 2);
cudaSafeCall( cudaMemcpyToSymbol(c_halfWin_x, &halfWin.x, sizeof(int)) );
cudaSafeCall( cudaMemcpyToSymbol(c_halfWin_y, &halfWin.y, sizeof(int)) );
cudaSafeCall( cudaMemcpyToSymbolAsync(c_halfWin_x, &halfWin.x, sizeof(int), 0, cudaMemcpyHostToDevice, stream) );
cudaSafeCall( cudaMemcpyToSymbolAsync(c_halfWin_y, &halfWin.y, sizeof(int), 0, cudaMemcpyHostToDevice, stream) );
cudaSafeCall( cudaMemcpyToSymbol(c_iters, &iters, sizeof(int)) );
cudaSafeCall( cudaMemcpyToSymbolAsync(c_iters, &iters, sizeof(int), 0, cudaMemcpyHostToDevice, stream) );
}
void sparse1(PtrStepSzf I, PtrStepSzf J, const float2* prevPts, float2* nextPts, uchar* status, float* err, int ptcount,
......
......@@ -66,15 +66,16 @@ namespace tvl1flow
dy(y, x) = 0.5f * (src(::min(y + 1, src.rows - 1), x) - src(::max(y - 1, 0), x));
}
void centeredGradient(PtrStepSzf src, PtrStepSzf dx, PtrStepSzf dy)
void centeredGradient(PtrStepSzf src, PtrStepSzf dx, PtrStepSzf dy, cudaStream_t stream)
{
const dim3 block(32, 8);
const dim3 grid(divUp(src.cols, block.x), divUp(src.rows, block.y));
centeredGradientKernel<<<grid, block>>>(src, dx, dy);
centeredGradientKernel<<<grid, block, 0, stream>>>(src, dx, dy);
cudaSafeCall( cudaGetLastError() );
cudaSafeCall( cudaDeviceSynchronize() );
if (!stream)
cudaSafeCall( cudaDeviceSynchronize() );
}
}
......@@ -164,7 +165,10 @@ namespace tvl1flow
rho(y, x) = I1wVal - I1wxVal * u1Val - I1wyVal * u2Val - I0Val;
}
void warpBackward(PtrStepSzf I0, PtrStepSzf I1, PtrStepSzf I1x, PtrStepSzf I1y, PtrStepSzf u1, PtrStepSzf u2, PtrStepSzf I1w, PtrStepSzf I1wx, PtrStepSzf I1wy, PtrStepSzf grad, PtrStepSzf rho)
void warpBackward(PtrStepSzf I0, PtrStepSzf I1, PtrStepSzf I1x, PtrStepSzf I1y,
PtrStepSzf u1, PtrStepSzf u2, PtrStepSzf I1w, PtrStepSzf I1wx,
PtrStepSzf I1wy, PtrStepSzf grad, PtrStepSzf rho,
cudaStream_t stream)
{
const dim3 block(32, 8);
const dim3 grid(divUp(I0.cols, block.x), divUp(I0.rows, block.y));
......@@ -173,10 +177,11 @@ namespace tvl1flow
bindTexture(&tex_I1x, I1x);
bindTexture(&tex_I1y, I1y);
warpBackwardKernel<<<grid, block>>>(I0, u1, u2, I1w, I1wx, I1wy, grad, rho);
warpBackwardKernel<<<grid, block, 0, stream>>>(I0, u1, u2, I1w, I1wx, I1wy, grad, rho);
cudaSafeCall( cudaGetLastError() );
cudaSafeCall( cudaDeviceSynchronize() );
if (!stream)
cudaSafeCall( cudaDeviceSynchronize() );
}
}
......@@ -292,15 +297,17 @@ namespace tvl1flow
PtrStepSzf grad, PtrStepSzf rho_c,
PtrStepSzf p11, PtrStepSzf p12, PtrStepSzf p21, PtrStepSzf p22, PtrStepSzf p31, PtrStepSzf p32,
PtrStepSzf u1, PtrStepSzf u2, PtrStepSzf u3, PtrStepSzf error,
float l_t, float theta, float gamma, bool calcError)
float l_t, float theta, float gamma, bool calcError,
cudaStream_t stream)
{
const dim3 block(32, 8);
const dim3 grid(divUp(I1wx.cols, block.x), divUp(I1wx.rows, block.y));
estimateUKernel<<<grid, block>>>(I1wx, I1wy, grad, rho_c, p11, p12, p21, p22, p31, p32, u1, u2, u3, error, l_t, theta, gamma, calcError);
estimateUKernel<<<grid, block, 0, stream>>>(I1wx, I1wy, grad, rho_c, p11, p12, p21, p22, p31, p32, u1, u2, u3, error, l_t, theta, gamma, calcError);
cudaSafeCall( cudaGetLastError() );
cudaSafeCall( cudaDeviceSynchronize() );
if (!stream)
cudaSafeCall( cudaDeviceSynchronize() );
}
}
......@@ -346,15 +353,19 @@ namespace tvl1flow
}
}
void estimateDualVariables(PtrStepSzf u1, PtrStepSzf u2, PtrStepSzf u3, PtrStepSzf p11, PtrStepSzf p12, PtrStepSzf p21, PtrStepSzf p22, PtrStepSzf p31, PtrStepSzf p32, float taut, float gamma)
void estimateDualVariables(PtrStepSzf u1, PtrStepSzf u2, PtrStepSzf u3,
PtrStepSzf p11, PtrStepSzf p12, PtrStepSzf p21, PtrStepSzf p22, PtrStepSzf p31, PtrStepSzf p32,
float taut, float gamma,
cudaStream_t stream)
{
const dim3 block(32, 8);
const dim3 grid(divUp(u1.cols, block.x), divUp(u1.rows, block.y));
estimateDualVariablesKernel<<<grid, block>>>(u1, u2, u3, p11, p12, p21, p22, p31, p32, taut, gamma);
estimateDualVariablesKernel<<<grid, block, 0, stream>>>(u1, u2, u3, p11, p12, p21, p22, p31, p32, taut, gamma);
cudaSafeCall( cudaGetLastError() );
cudaSafeCall( cudaDeviceSynchronize() );
if (!stream)
cudaSafeCall( cudaDeviceSynchronize() );
}
}
......
......@@ -47,37 +47,54 @@ using namespace cv::cuda;
#if !defined (HAVE_CUDA) || defined (CUDA_DISABLER)
cv::cuda::PyrLKOpticalFlow::PyrLKOpticalFlow() { throw_no_cuda(); }
void cv::cuda::PyrLKOpticalFlow::sparse(const GpuMat&, const GpuMat&, const GpuMat&, GpuMat&, GpuMat&, GpuMat*) { throw_no_cuda(); }
void cv::cuda::PyrLKOpticalFlow::dense(const GpuMat&, const GpuMat&, GpuMat&, GpuMat&, GpuMat*) { throw_no_cuda(); }
void cv::cuda::PyrLKOpticalFlow::releaseMemory() {}
Ptr<SparsePyrLKOpticalFlow> cv::cuda::SparsePyrLKOpticalFlow::create(Size, int, int, bool) { throw_no_cuda(); return Ptr<SparsePyrLKOpticalFlow>(); }
Ptr<DensePyrLKOpticalFlow> cv::cuda::DensePyrLKOpticalFlow::create(Size, int, int, bool) { throw_no_cuda(); return Ptr<SparsePyrLKOpticalFlow>(); }
#else /* !defined (HAVE_CUDA) */
namespace pyrlk
{
void loadConstants(int2 winSize, int iters);
void loadConstants(int2 winSize, int iters, cudaStream_t stream);
void sparse1(PtrStepSzf I, PtrStepSzf J, const float2* prevPts, float2* nextPts, uchar* status, float* err, int ptcount,
int level, dim3 block, dim3 patch, cudaStream_t stream = 0);
int level, dim3 block, dim3 patch, cudaStream_t stream);
void sparse4(PtrStepSz<float4> I, PtrStepSz<float4> J, const float2* prevPts, float2* nextPts, uchar* status, float* err, int ptcount,
int level, dim3 block, dim3 patch, cudaStream_t stream = 0);
int level, dim3 block, dim3 patch, cudaStream_t stream);
void dense(PtrStepSzb I, PtrStepSzf J, PtrStepSzf u, PtrStepSzf v, PtrStepSzf prevU, PtrStepSzf prevV,
PtrStepSzf err, int2 winSize, cudaStream_t stream = 0);
}
cv::cuda::PyrLKOpticalFlow::PyrLKOpticalFlow()
{
winSize = Size(21, 21);
maxLevel = 3;
iters = 30;
useInitialFlow = false;
PtrStepSzf err, int2 winSize, cudaStream_t stream);
}
namespace
{
void calcPatchSize(cv::Size winSize, dim3& block, dim3& patch)
class PyrLKOpticalFlowBase
{
public:
PyrLKOpticalFlowBase(Size winSize, int maxLevel, int iters, bool useInitialFlow);
void sparse(const GpuMat& prevImg, const GpuMat& nextImg, const GpuMat& prevPts, GpuMat& nextPts,
GpuMat& status, GpuMat* err, Stream& stream);
void dense(const GpuMat& prevImg, const GpuMat& nextImg, GpuMat& u, GpuMat& v, Stream& stream);
protected:
Size winSize_;
int maxLevel_;
int iters_;
bool useInitialFlow_;
private:
std::vector<GpuMat> prevPyr_;
std::vector<GpuMat> nextPyr_;
};
PyrLKOpticalFlowBase::PyrLKOpticalFlowBase(Size winSize, int maxLevel, int iters, bool useInitialFlow) :
winSize_(winSize), maxLevel_(maxLevel), iters_(iters), useInitialFlow_(useInitialFlow)
{
}
void calcPatchSize(Size winSize, dim3& block, dim3& patch)
{
if (winSize.width > 32 && winSize.width > 2 * winSize.height)
{
......@@ -95,156 +112,239 @@ namespace
block.z = patch.z = 1;
}
}
void cv::cuda::PyrLKOpticalFlow::sparse(const GpuMat& prevImg, const GpuMat& nextImg, const GpuMat& prevPts, GpuMat& nextPts, GpuMat& status, GpuMat* err)
{
if (prevPts.empty())
void PyrLKOpticalFlowBase::sparse(const GpuMat& prevImg, const GpuMat& nextImg, const GpuMat& prevPts, GpuMat& nextPts, GpuMat& status, GpuMat* err, Stream& stream)
{
nextPts.release();
status.release();
if (err) err->release();
return;
}
if (prevPts.empty())
{
nextPts.release();
status.release();
if (err) err->release();
return;
}
dim3 block, patch;
calcPatchSize(winSize, block, patch);
dim3 block, patch;
calcPatchSize(winSize_, block, patch);
CV_Assert(prevImg.channels() == 1 || prevImg.channels() == 3 || prevImg.channels() == 4);
CV_Assert(prevImg.size() == nextImg.size() && prevImg.type() == nextImg.type());
CV_Assert(maxLevel >= 0);
CV_Assert(winSize.width > 2 && winSize.height > 2);
CV_Assert(patch.x > 0 && patch.x < 6 && patch.y > 0 && patch.y < 6);
CV_Assert(prevPts.rows == 1 && prevPts.type() == CV_32FC2);
CV_Assert( prevImg.channels() == 1 || prevImg.channels() == 3 || prevImg.channels() == 4 );
CV_Assert( prevImg.size() == nextImg.size() && prevImg.type() == nextImg.type() );
CV_Assert( maxLevel_ >= 0 );
CV_Assert( winSize_.width > 2 && winSize_.height > 2 );
CV_Assert( patch.x > 0 && patch.x < 6 && patch.y > 0 && patch.y < 6 );
CV_Assert( prevPts.rows == 1 && prevPts.type() == CV_32FC2 );
if (useInitialFlow)
CV_Assert(nextPts.size() == prevPts.size() && nextPts.type() == CV_32FC2);
else
ensureSizeIsEnough(1, prevPts.cols, prevPts.type(), nextPts);
if (useInitialFlow_)
CV_Assert( nextPts.size() == prevPts.size() && nextPts.type() == prevPts.type() );
else
ensureSizeIsEnough(1, prevPts.cols, prevPts.type(), nextPts);
GpuMat temp1 = (useInitialFlow ? nextPts : prevPts).reshape(1);
GpuMat temp2 = nextPts.reshape(1);
cuda::multiply(temp1, Scalar::all(1.0 / (1 << maxLevel) / 2.0), temp2);
GpuMat temp1 = (useInitialFlow_ ? nextPts : prevPts).reshape(1);
GpuMat temp2 = nextPts.reshape(1);
cuda::multiply(temp1, Scalar::all(1.0 / (1 << maxLevel_) / 2.0), temp2, 1, -1, stream);
ensureSizeIsEnough(1, prevPts.cols, CV_8UC1, status);
status.setTo(Scalar::all(1));
ensureSizeIsEnough(1, prevPts.cols, CV_8UC1, status);
status.setTo(Scalar::all(1), stream);
if (err)
ensureSizeIsEnough(1, prevPts.cols, CV_32FC1, *err);
if (err)
ensureSizeIsEnough(1, prevPts.cols, CV_32FC1, *err);
// build the image pyramids.
// build the image pyramids.
prevPyr_.resize(maxLevel + 1);
nextPyr_.resize(maxLevel + 1);
BufferPool pool(stream);
int cn = prevImg.channels();
prevPyr_.resize(maxLevel_ + 1);
nextPyr_.resize(maxLevel_ + 1);
if (cn == 1 || cn == 4)
{
prevImg.convertTo(prevPyr_[0], CV_32F);
nextImg.convertTo(nextPyr_[0], CV_32F);
}
else
{
cuda::cvtColor(prevImg, buf_, COLOR_BGR2BGRA);
buf_.convertTo(prevPyr_[0], CV_32F);
int cn = prevImg.channels();
if (cn == 1 || cn == 4)
{
prevImg.convertTo(prevPyr_[0], CV_32F, stream);
nextImg.convertTo(nextPyr_[0], CV_32F, stream);
}
else
{
GpuMat buf = pool.getBuffer(prevImg.size(), CV_MAKE_TYPE(prevImg.depth(), 4));
cuda::cvtColor(nextImg, buf_, COLOR_BGR2BGRA);
buf_.convertTo(nextPyr_[0], CV_32F);
cuda::cvtColor(prevImg, buf, COLOR_BGR2BGRA, 0, stream);
buf.convertTo(prevPyr_[0], CV_32F, stream);
cuda::cvtColor(nextImg, buf, COLOR_BGR2BGRA, 0, stream);
buf.convertTo(nextPyr_[0], CV_32F, stream);
}
for (int level = 1; level <= maxLevel_; ++level)
{
cuda::pyrDown(prevPyr_[level - 1], prevPyr_[level], stream);
cuda::pyrDown(nextPyr_[level - 1], nextPyr_[level], stream);
}
pyrlk::loadConstants(make_int2(winSize_.width, winSize_.height), iters_, StreamAccessor::getStream(stream));
for (int level = maxLevel_; level >= 0; level--)
{
if (cn == 1)
{
pyrlk::sparse1(prevPyr_[level], nextPyr_[level],
prevPts.ptr<float2>(), nextPts.ptr<float2>(),
status.ptr(),
level == 0 && err ? err->ptr<float>() : 0, prevPts.cols,
level, block, patch,
StreamAccessor::getStream(stream));
}
else
{
pyrlk::sparse4(prevPyr_[level], nextPyr_[level],
prevPts.ptr<float2>(), nextPts.ptr<float2>(),
status.ptr(),
level == 0 && err ? err->ptr<float>() : 0, prevPts.cols,
level, block, patch,
StreamAccessor::getStream(stream));
}
}
}
for (int level = 1; level <= maxLevel; ++level)
void PyrLKOpticalFlowBase::dense(const GpuMat& prevImg, const GpuMat& nextImg, GpuMat& u, GpuMat& v, Stream& stream)
{
cuda::pyrDown(prevPyr_[level - 1], prevPyr_[level]);
cuda::pyrDown(nextPyr_[level - 1], nextPyr_[level]);
}
CV_Assert( prevImg.type() == CV_8UC1 );
CV_Assert( prevImg.size() == nextImg.size() && prevImg.type() == nextImg.type() );
CV_Assert( maxLevel_ >= 0 );
CV_Assert( winSize_.width > 2 && winSize_.height > 2 );
pyrlk::loadConstants(make_int2(winSize.width, winSize.height), iters);
// build the image pyramids.
for (int level = maxLevel; level >= 0; level--)
{
if (cn == 1)
prevPyr_.resize(maxLevel_ + 1);
nextPyr_.resize(maxLevel_ + 1);
prevPyr_[0] = prevImg;
nextImg.convertTo(nextPyr_[0], CV_32F, stream);
for (int level = 1; level <= maxLevel_; ++level)
{
pyrlk::sparse1(prevPyr_[level], nextPyr_[level],
prevPts.ptr<float2>(), nextPts.ptr<float2>(), status.ptr(), level == 0 && err ? err->ptr<float>() : 0, prevPts.cols,
level, block, patch);
cuda::pyrDown(prevPyr_[level - 1], prevPyr_[level], stream);
cuda::pyrDown(nextPyr_[level - 1], nextPyr_[level], stream);
}
else
BufferPool pool(stream);
GpuMat uPyr[] = {
pool.getBuffer(prevImg.size(), CV_32FC1),
pool.getBuffer(prevImg.size(), CV_32FC1),
};
GpuMat vPyr[] = {
pool.getBuffer(prevImg.size(), CV_32FC1),
pool.getBuffer(prevImg.size(), CV_32FC1),
};
uPyr[0].setTo(Scalar::all(0), stream);
vPyr[0].setTo(Scalar::all(0), stream);
uPyr[1].setTo(Scalar::all(0), stream);
vPyr[1].setTo(Scalar::all(0), stream);
int2 winSize2i = make_int2(winSize_.width, winSize_.height);
pyrlk::loadConstants(winSize2i, iters_, StreamAccessor::getStream(stream));
int idx = 0;
for (int level = maxLevel_; level >= 0; level--)
{
pyrlk::sparse4(prevPyr_[level], nextPyr_[level],
prevPts.ptr<float2>(), nextPts.ptr<float2>(), status.ptr(), level == 0 && err ? err->ptr<float>() : 0, prevPts.cols,
level, block, patch);
int idx2 = (idx + 1) & 1;
pyrlk::dense(prevPyr_[level], nextPyr_[level],
uPyr[idx], vPyr[idx], uPyr[idx2], vPyr[idx2],
PtrStepSzf(), winSize2i,
StreamAccessor::getStream(stream));
if (level > 0)
idx = idx2;
}
uPyr[idx].copyTo(u, stream);
vPyr[idx].copyTo(v, stream);
}
}
void cv::cuda::PyrLKOpticalFlow::dense(const GpuMat& prevImg, const GpuMat& nextImg, GpuMat& u, GpuMat& v, GpuMat* err)
{
CV_Assert(prevImg.type() == CV_8UC1);
CV_Assert(prevImg.size() == nextImg.size() && prevImg.type() == nextImg.type());
CV_Assert(maxLevel >= 0);
CV_Assert(winSize.width > 2 && winSize.height > 2);
class SparsePyrLKOpticalFlowImpl : public SparsePyrLKOpticalFlow, private PyrLKOpticalFlowBase
{
public:
SparsePyrLKOpticalFlowImpl(Size winSize, int maxLevel, int iters, bool useInitialFlow) :
PyrLKOpticalFlowBase(winSize, maxLevel, iters, useInitialFlow)
{
}
if (err)
err->create(prevImg.size(), CV_32FC1);
virtual Size getWinSize() const { return winSize_; }
virtual void setWinSize(Size winSize) { winSize_ = winSize; }
// build the image pyramids.
virtual int getMaxLevel() const { return maxLevel_; }
virtual void setMaxLevel(int maxLevel) { maxLevel_ = maxLevel; }
prevPyr_.resize(maxLevel + 1);
nextPyr_.resize(maxLevel + 1);
virtual int getNumIters() const { return iters_; }
virtual void setNumIters(int iters) { iters_ = iters; }
prevPyr_[0] = prevImg;
nextImg.convertTo(nextPyr_[0], CV_32F);
virtual bool getUseInitialFlow() const { return useInitialFlow_; }
virtual void setUseInitialFlow(bool useInitialFlow) { useInitialFlow_ = useInitialFlow; }
for (int level = 1; level <= maxLevel; ++level)
virtual void calc(InputArray _prevImg, InputArray _nextImg,
InputArray _prevPts, InputOutputArray _nextPts,
OutputArray _status,
OutputArray _err,
Stream& stream)
{
const GpuMat prevImg = _prevImg.getGpuMat();
const GpuMat nextImg = _nextImg.getGpuMat();
const GpuMat prevPts = _prevPts.getGpuMat();
GpuMat& nextPts = _nextPts.getGpuMatRef();
GpuMat& status = _status.getGpuMatRef();
GpuMat* err = _err.needed() ? &(_err.getGpuMatRef()) : NULL;
sparse(prevImg, nextImg, prevPts, nextPts, status, err, stream);
}
};
class DensePyrLKOpticalFlowImpl : public DensePyrLKOpticalFlow, private PyrLKOpticalFlowBase
{
cuda::pyrDown(prevPyr_[level - 1], prevPyr_[level]);
cuda::pyrDown(nextPyr_[level - 1], nextPyr_[level]);
}
public:
DensePyrLKOpticalFlowImpl(Size winSize, int maxLevel, int iters, bool useInitialFlow) :
PyrLKOpticalFlowBase(winSize, maxLevel, iters, useInitialFlow)
{
}
ensureSizeIsEnough(prevImg.size(), CV_32FC1, uPyr_[0]);
ensureSizeIsEnough(prevImg.size(), CV_32FC1, vPyr_[0]);
ensureSizeIsEnough(prevImg.size(), CV_32FC1, uPyr_[1]);
ensureSizeIsEnough(prevImg.size(), CV_32FC1, vPyr_[1]);
uPyr_[0].setTo(Scalar::all(0));
vPyr_[0].setTo(Scalar::all(0));
uPyr_[1].setTo(Scalar::all(0));
vPyr_[1].setTo(Scalar::all(0));
virtual Size getWinSize() const { return winSize_; }
virtual void setWinSize(Size winSize) { winSize_ = winSize; }
int2 winSize2i = make_int2(winSize.width, winSize.height);
pyrlk::loadConstants(winSize2i, iters);
virtual int getMaxLevel() const { return maxLevel_; }
virtual void setMaxLevel(int maxLevel) { maxLevel_ = maxLevel; }
PtrStepSzf derr = err ? *err : PtrStepSzf();
virtual int getNumIters() const { return iters_; }
virtual void setNumIters(int iters) { iters_ = iters; }
int idx = 0;
virtual bool getUseInitialFlow() const { return useInitialFlow_; }
virtual void setUseInitialFlow(bool useInitialFlow) { useInitialFlow_ = useInitialFlow; }
for (int level = maxLevel; level >= 0; level--)
{
int idx2 = (idx + 1) & 1;
virtual void calc(InputArray _prevImg, InputArray _nextImg, InputOutputArray _flow, Stream& stream)
{
const GpuMat prevImg = _prevImg.getGpuMat();
const GpuMat nextImg = _nextImg.getGpuMat();
pyrlk::dense(prevPyr_[level], nextPyr_[level], uPyr_[idx], vPyr_[idx], uPyr_[idx2], vPyr_[idx2],
level == 0 ? derr : PtrStepSzf(), winSize2i);
BufferPool pool(stream);
GpuMat u = pool.getBuffer(prevImg.size(), CV_32FC1);
GpuMat v = pool.getBuffer(prevImg.size(), CV_32FC1);
if (level > 0)
idx = idx2;
}
dense(prevImg, nextImg, u, v, stream);
uPyr_[idx].copyTo(u);
vPyr_[idx].copyTo(v);
GpuMat flows[] = {u, v};
cuda::merge(flows, 2, _flow, stream);
}
};
}
void cv::cuda::PyrLKOpticalFlow::releaseMemory()
Ptr<SparsePyrLKOpticalFlow> cv::cuda::SparsePyrLKOpticalFlow::create(Size winSize, int maxLevel, int iters, bool useInitialFlow)
{
prevPyr_.clear();
nextPyr_.clear();
buf_.release();
uPyr_[0].release();
vPyr_[0].release();
return makePtr<SparsePyrLKOpticalFlowImpl>(winSize, maxLevel, iters, useInitialFlow);
}
uPyr_[1].release();
vPyr_[1].release();
Ptr<DensePyrLKOpticalFlow> cv::cuda::DensePyrLKOpticalFlow::create(Size winSize, int maxLevel, int iters, bool useInitialFlow)
{
return makePtr<DensePyrLKOpticalFlowImpl>(winSize, maxLevel, iters, useInitialFlow);
}
#endif /* !defined (HAVE_CUDA) */
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册