提交 1a43ed98 编写于 作者: K Konstantin Matskevich

finalizing

上级 af1084eb
...@@ -65,10 +65,11 @@ OCL_PERF_TEST_P(StereoBMFixture, StereoBM, ::testing::Combine(OCL_PERF_ENUM(32, ...@@ -65,10 +65,11 @@ OCL_PERF_TEST_P(StereoBMFixture, StereoBM, ::testing::Combine(OCL_PERF_ENUM(32,
Ptr<StereoBM> bm = createStereoBM( n_disp, winSize ); Ptr<StereoBM> bm = createStereoBM( n_disp, winSize );
bm->setPreFilterType(bm->PREFILTER_XSOBEL); bm->setPreFilterType(bm->PREFILTER_XSOBEL);
bm->setTextureThreshold(0);
OCL_TEST_CYCLE() bm->compute(left, right, disp); OCL_TEST_CYCLE() bm->compute(left, right, disp);
SANITY_CHECK_NOTHING();//(disp, 1e-3, ERROR_RELATIVE); SANITY_CHECK(disp, 1e-3, ERROR_RELATIVE);
} }
}//ocl }//ocl
......
...@@ -40,8 +40,6 @@ ...@@ -40,8 +40,6 @@
// //
//M*/ //M*/
#pragma OPENCL EXTENSION cl_amd_printf : enable
////////////////////////////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////////////////////////////
////////////////////////////////////////// stereoBM ////////////////////////////////////////////// ////////////////////////////////////////// stereoBM //////////////////////////////////////////////
////////////////////////////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////////////////////////////
...@@ -50,28 +48,28 @@ ...@@ -50,28 +48,28 @@
#define MAX_VAL 32767 #define MAX_VAL 32767
void calcDisp(__local short * cost, __global short * disp, int uniquenessRatio/*, int textureTreshold, short textsum*/, void calcDisp(__local short * cost, __global short * disp, int uniquenessRatio, int mindisp, int ndisp, int w,
int mindisp, int ndisp, int w, __local int * bestDisp, __local int * bestCost, int d, int x, int y, int cols, int rows, int wsz2) __local int * bestDisp, __local int * bestCost, int d, int x, int y, int cols, int rows, int wsz2)
{ {
short FILTERED = (mindisp - 1)<<4; short FILTERED = (mindisp - 1)<<4;
int best_disp = *bestDisp, best_cost = *bestCost, best_disp_back = ndisp - best_disp - 1; int best_disp = *bestDisp, best_cost = *bestCost, best_disp_back = ndisp - best_disp - 1;
short c = cost[0];
int thresh = best_cost + (best_cost * uniquenessRatio/100); int thresh = best_cost + (best_cost * uniquenessRatio/100);
bool notUniq = ( (cost[0] <= thresh) && (d < (best_disp_back - 1) || d > (best_disp_back + 1) ) ); bool notUniq = ( (c <= thresh) && (d < (best_disp_back - 1) || d > (best_disp_back + 1) ) );
if(notUniq) if(notUniq)
*bestCost = FILTERED; *bestCost = FILTERED;
barrier(CLK_LOCAL_MEM_FENCE); barrier(CLK_LOCAL_MEM_FENCE);
// best_disp = (textsum < textureTreshold) ? FILTERED : best_disp;
if( *bestCost != FILTERED && x < cols-wsz2-mindisp && y < rows-wsz2 && d == best_disp_back) if( *bestCost != FILTERED && x < cols-wsz2-mindisp && y < rows-wsz2 && d == best_disp_back)
{ {
int y3 = (best_disp_back > 0) ? cost[-w] : cost[w], int y3 = (best_disp_back > 0) ? cost[-w] : cost[w],
y2 = cost[0], y2 = c,
y1 = (best_disp_back < ndisp-1) ? cost[w] : cost[-w]; y1 = (best_disp_back < ndisp-1) ? cost[w] : cost[-w];
int d = y3+y1-2*y2 + abs(y3-y1); int d_aprox = y3+y1-2*y2 + abs(y3-y1);
disp[0] = (short)(((best_disp_back + mindisp)*256 + (d != 0 ? (y3-y1)*256/d : 0) + 15) >> 4); disp[0] = (short)(((best_disp_back + mindisp)*256 + (d_aprox != 0 ? (y3-y1)*256/d_aprox : 0) + 15) >> 4);
} }
} }
...@@ -111,23 +109,25 @@ short calcCostBorder(__global const uchar * leftptr, __global const uchar * righ ...@@ -111,23 +109,25 @@ short calcCostBorder(__global const uchar * leftptr, __global const uchar * righ
} }
short calcCostInside(__global const uchar * leftptr, __global const uchar * rightptr, int x, int y, short calcCostInside(__global const uchar * leftptr, __global const uchar * rightptr, int x, int y,
int wsz2, int cols, int d, short cost_up_left, short cost_up, short cost_left, int winsize) int wsz2, int cols, int d, short cost_up_left, short cost_up, short cost_left,
int winsize)
{ {
__global const uchar * left, * right; __global const uchar * left, * right;
int idx = mad24(y-wsz2-1, cols, x-wsz2-1); int idx = mad24(y-wsz2-1, cols, x-wsz2-1);
left = leftptr + idx; left = leftptr + idx;
right = rightptr + (idx - d); right = rightptr + (idx - d);
int idx2 = winsize*cols;
uchar corrner1 = abs(left[0] - right[0]), uchar corrner1 = abs(left[0] - right[0]),
corrner2 = abs(left[winsize] - right[winsize]), corrner2 = abs(left[winsize] - right[winsize]),
corrner3 = abs(left[(winsize)*cols] - right[(winsize)*cols]), corrner3 = abs(left[idx2] - right[idx2]),
corrner4 = abs(left[(winsize)*cols + winsize] - right[(winsize)*cols + winsize]); corrner4 = abs(left[idx2 + winsize] - right[idx2 + winsize]);
return cost_up + cost_left - cost_up_left + corrner1 - return cost_up + cost_left - cost_up_left + corrner1 -
corrner2 - corrner3 + corrner4; corrner2 - corrner3 + corrner4;
} }
__kernel void stereoBM_opt(__global const uchar * leftptr, __global const uchar * rightptr, __global uchar * dispptr, __kernel void stereoBM(__global const uchar * leftptr, __global const uchar * rightptr, __global uchar * dispptr,
int disp_step, int disp_offset, int rows, int cols, int mindisp, int ndisp, int disp_step, int disp_offset, int rows, int cols, int mindisp, int ndisp,
int preFilterCap, int textureTreshold, int uniquenessRatio, int sizeX, int sizeY, int winsize) int preFilterCap, int textureTreshold, int uniquenessRatio, int sizeX, int sizeY, int winsize)
{ {
...@@ -135,8 +135,8 @@ __kernel void stereoBM_opt(__global const uchar * leftptr, __global const uchar ...@@ -135,8 +135,8 @@ __kernel void stereoBM_opt(__global const uchar * leftptr, __global const uchar
int gy = get_global_id(1)*sizeY; int gy = get_global_id(1)*sizeY;
int lz = get_local_id(2); int lz = get_local_id(2);
int nthread = lz/ndisp;// only 0 or 1 int nthread = lz/ndisp;
int d = lz%ndisp;// 1 .. ndisp int d = lz%ndisp;
int wsz2 = wsz/2; int wsz2 = wsz/2;
__global short * disp; __global short * disp;
...@@ -169,7 +169,6 @@ __kernel void stereoBM_opt(__global const uchar * leftptr, __global const uchar ...@@ -169,7 +169,6 @@ __kernel void stereoBM_opt(__global const uchar * leftptr, __global const uchar
left = leftptr + idx; left = leftptr + idx;
right = rightptr + (idx - d); right = rightptr + (idx - d);
short costdiff = 0; short costdiff = 0;
for(int j = 0; j < winsize; j++) for(int j = 0; j < winsize; j++)
{ {
costdiff += abs( left[0] - right[0] ); costdiff += abs( left[0] - right[0] );
...@@ -197,8 +196,8 @@ __kernel void stereoBM_opt(__global const uchar * leftptr, __global const uchar ...@@ -197,8 +196,8 @@ __kernel void stereoBM_opt(__global const uchar * leftptr, __global const uchar
int dispIdx = mad24(gy, disp_step, disp_offset + gx*(int)sizeof(short)); int dispIdx = mad24(gy, disp_step, disp_offset + gx*(int)sizeof(short));
disp = (__global short *)(dispptr + dispIdx); disp = (__global short *)(dispptr + dispIdx);
calcDisp(cost, disp, uniquenessRatio, //textureTreshold, textsum, calcDisp(cost, disp, uniquenessRatio, mindisp, ndisp, 2*sizeY,
mindisp, ndisp, 2*sizeY, best_disp + 1, best_cost+1, d, x, y, cols, rows, wsz2); best_disp + 1, best_cost+1, d, x, y, cols, rows, wsz2);
barrier(CLK_LOCAL_MEM_FENCE); barrier(CLK_LOCAL_MEM_FENCE);
lx = 1 - nthread; lx = 1 - nthread;
...@@ -222,9 +221,9 @@ __kernel void stereoBM_opt(__global const uchar * leftptr, __global const uchar ...@@ -222,9 +221,9 @@ __kernel void stereoBM_opt(__global const uchar * leftptr, __global const uchar
cost[2*nthread-1], winsize) : cost[2*nthread-1], winsize) :
calcCostInside(leftptr, rightptr, x, y, wsz2, cols, d, calcCostInside(leftptr, rightptr, x, y, wsz2, cols, d,
cost[0], cost[1], cost[-1], winsize); cost[0], cost[1], cost[-1], winsize);
}
cost[0] = tempcost; cost[0] = tempcost;
atomic_min(best_cost + nthread, tempcost); atomic_min(best_cost + nthread, tempcost);
}
barrier(CLK_LOCAL_MEM_FENCE); barrier(CLK_LOCAL_MEM_FENCE);
if(best_cost[nthread] == tempcost) if(best_cost[nthread] == tempcost)
...@@ -233,8 +232,9 @@ __kernel void stereoBM_opt(__global const uchar * leftptr, __global const uchar ...@@ -233,8 +232,9 @@ __kernel void stereoBM_opt(__global const uchar * leftptr, __global const uchar
int dispIdx = mad24(gy+ly, disp_step, disp_offset + (gx+lx)*(int)sizeof(short)); int dispIdx = mad24(gy+ly, disp_step, disp_offset + (gx+lx)*(int)sizeof(short));
disp = (__global short *)(dispptr + dispIdx); disp = (__global short *)(dispptr + dispIdx);
calcDisp(cost, disp, uniquenessRatio, //textureTreshold, textsum,
mindisp, ndisp, 2*sizeY, best_disp + nthread, best_cost + nthread, d, x, y, cols, rows, wsz2); calcDisp(cost, disp, uniquenessRatio, mindisp, ndisp, 2*sizeY,
best_disp + nthread, best_cost + nthread, d, x, y, cols, rows, wsz2);
barrier(CLK_LOCAL_MEM_FENCE); barrier(CLK_LOCAL_MEM_FENCE);
calcNewCoordinates(&lx, &ly, nthread); calcNewCoordinates(&lx, &ly, nthread);
...@@ -243,68 +243,6 @@ __kernel void stereoBM_opt(__global const uchar * leftptr, __global const uchar ...@@ -243,68 +243,6 @@ __kernel void stereoBM_opt(__global const uchar * leftptr, __global const uchar
#endif #endif
#ifdef SIZE
__kernel void stereoBM_BF(__global const uchar * left, __global const uchar * right, __global uchar * dispptr,
int disp_step, int disp_offset, int rows, int cols, int mindisp, int ndisp,
int preFilterCap, int winsize, int textureTreshold, int uniquenessRatio)
{
int x = get_global_id(0);
int y = get_global_id(1);
int wsz2 = winsize/2;
short FILTERED = (mindisp - 1)<<4;
if(x < cols && y < rows )
{
int dispIdx = mad24(y, disp_step, disp_offset + x*(int)sizeof(short) );
__global short * disp = (__global short*)(dispptr + dispIdx);
disp[0] = FILTERED;
if( (x > mindisp+ndisp+wsz2-2) && (y > wsz2-1) && (x < cols-wsz2-mindisp) && (y < rows - wsz2))
{
int cost[SIZE];
int textsum = 0;
for(int d = mindisp; d < ndisp+mindisp; d++)
{
cost[(ndisp-1) - (d - mindisp)] = 0;
for(int i = -wsz2; i < wsz2+1; i++)
for(int j = -wsz2; j < wsz2+1; j++)
{
textsum += (d == mindisp) ? abs( left[ (y+i) * cols + x + j] - preFilterCap ) : 0;
cost[(ndisp-1) - (d - mindisp)] += abs(left[(y+i) * cols + x+j] - right[(y+i) * cols + x+j-d] );
}
}
int best_disp = -1, best_cost = INT_MAX;
for(int d = ndisp + mindisp - 1; d > mindisp-1; d--)
{
best_cost = (cost[d-mindisp] < best_cost) ? cost[d-mindisp] : best_cost;
best_disp = (best_cost == cost[d-mindisp]) ? (d) : best_disp;
}
int thresh = best_cost + (best_cost * uniquenessRatio/100);
for(int d = mindisp; (d < ndisp + mindisp) && (uniquenessRatio > 0); d++)
{
best_disp = ( (cost[d-mindisp] <= thresh) && (d < best_disp-1 || d > best_disp + 1) ) ? FILTERED : best_disp;
}
disp[0] = textsum < textureTreshold ? (FILTERED) : (best_disp == FILTERED) ? (short)(best_disp) : (short)(best_disp);
if( best_disp != FILTERED )
{
int y1 = (best_disp > mindisp) ? cost[best_disp-mindisp-1] : cost[best_disp-mindisp+1],
y2 = cost[best_disp-mindisp],
y3 = (best_disp < mindisp+ndisp-1) ? cost[best_disp-mindisp+1] : cost[best_disp-mindisp-1];
int _d = y3+y1-2*y2 + abs(y3-y1);
disp[0] = (short)(((ndisp - (best_disp-mindisp) - 1 + mindisp)*256 + (_d != 0 ? (y3-y1)*256/_d : 0) + 15) >> 4);
}
}
}
}
#endif
////////////////////////////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////////////////////////////
/////////////////////////////////////// Norm Prefiler //////////////////////////////////////////// /////////////////////////////////////// Norm Prefiler ////////////////////////////////////////////
////////////////////////////////////////////////////////////////////////////////////////////////// //////////////////////////////////////////////////////////////////////////////////////////////////
......
...@@ -735,9 +735,9 @@ struct PrefilterInvoker : public ParallelLoopBody ...@@ -735,9 +735,9 @@ struct PrefilterInvoker : public ParallelLoopBody
StereoBMParams* state; StereoBMParams* state;
}; };
static bool ocl_stereobm_opt( InputArray _left, InputArray _right, static bool ocl_stereobm( InputArray _left, InputArray _right,
OutputArray _disp, StereoBMParams* state) OutputArray _disp, StereoBMParams* state)
{//printf("opt\n"); {
int ndisp = state->numDisparities; int ndisp = state->numDisparities;
int mindisp = state->minDisparity; int mindisp = state->minDisparity;
int wsz = state->SADWindowSize; int wsz = state->SADWindowSize;
...@@ -745,7 +745,7 @@ static bool ocl_stereobm_opt( InputArray _left, InputArray _right, ...@@ -745,7 +745,7 @@ static bool ocl_stereobm_opt( InputArray _left, InputArray _right,
int sizeX = std::max(11, 27 - ocl::Device::getDefault().maxComputeUnits() ), sizeY = sizeX-1, N = ndisp*2; int sizeX = std::max(11, 27 - ocl::Device::getDefault().maxComputeUnits() ), sizeY = sizeX-1, N = ndisp*2;
ocl::Kernel k("stereoBM_opt", ocl::calib3d::stereobm_oclsrc, cv::format("-D csize=%d -D wsz=%d", (2*sizeY)*ndisp, wsz) ); ocl::Kernel k("stereoBM", ocl::calib3d::stereobm_oclsrc, cv::format("-D csize=%d -D wsz=%d", (2*sizeY)*ndisp, wsz) );
if(k.empty()) if(k.empty())
return false; return false;
...@@ -781,42 +781,6 @@ static bool ocl_stereobm_opt( InputArray _left, InputArray _right, ...@@ -781,42 +781,6 @@ static bool ocl_stereobm_opt( InputArray _left, InputArray _right,
return k.run(3, globalThreads, localThreads, false); return k.run(3, globalThreads, localThreads, false);
} }
static bool ocl_stereobm_bf(InputArray _left, InputArray _right,
OutputArray _disp, StereoBMParams* state)
{
ocl::Kernel k("stereoBM_BF", ocl::calib3d::stereobm_oclsrc, cv::format("-D SIZE=%d", state->numDisparities ) );
if(k.empty())
return false;
UMat left = _left.getUMat(), right = _right.getUMat();
_disp.create(_left.size(), CV_16S);
UMat disp = _disp.getUMat();
size_t globalThreads[3] = { left.cols, left.rows, 1 };
int idx = 0;
idx = k.set(idx, ocl::KernelArg::PtrReadOnly(left));
idx = k.set(idx, ocl::KernelArg::PtrReadOnly(right));
idx = k.set(idx, ocl::KernelArg::WriteOnly(disp));
idx = k.set(idx, state->minDisparity);
idx = k.set(idx, state->numDisparities);
idx = k.set(idx, state->preFilterCap);
idx = k.set(idx, state->SADWindowSize);
idx = k.set(idx, state->textureThreshold);
idx = k.set(idx, state->uniquenessRatio);
return k.run(2, globalThreads, NULL, false);
}
static bool ocl_stereo(InputArray _left, InputArray _right,
OutputArray _disp, StereoBMParams* state)
{
//if(ocl::Device::getDefault().localMemSize() > state->numDisparities * state->numDisparities * sizeof(short) )
return ocl_stereobm_opt(_left, _right, _disp, state);
//else
// return ocl_stereobm_bf(_left, _right, _disp, state);
}
struct FindStereoCorrespInvoker : public ParallelLoopBody struct FindStereoCorrespInvoker : public ParallelLoopBody
{ {
FindStereoCorrespInvoker( const Mat& _left, const Mat& _right, FindStereoCorrespInvoker( const Mat& _left, const Mat& _right,
...@@ -950,19 +914,21 @@ public: ...@@ -950,19 +914,21 @@ public:
int FILTERED = (params.minDisparity - 1) << DISPARITY_SHIFT; int FILTERED = (params.minDisparity - 1) << DISPARITY_SHIFT;
if(ocl::useOpenCL() && disparr.isUMat()) if(ocl::useOpenCL() && disparr.isUMat() && params.textureThreshold == 0)
{ {
UMat left, right; UMat left, right;
CV_Assert(ocl_prefiltering(leftarr, rightarr, left, right, &params)); if(ocl_prefiltering(leftarr, rightarr, left, right, &params))
CV_Assert(ocl_stereo(left, right, disparr, &params)); {
if(ocl_stereobm(left, right, disparr, &params))
{
if( params.speckleRange >= 0 && params.speckleWindowSize > 0 ) if( params.speckleRange >= 0 && params.speckleWindowSize > 0 )
filterSpeckles(disparr.getMat(), FILTERED, params.speckleWindowSize, params.speckleRange, slidingSumBuf); filterSpeckles(disparr.getMat(), FILTERED, params.speckleWindowSize, params.speckleRange, slidingSumBuf);
if (dtype == CV_32F) if (dtype == CV_32F)
disparr.getUMat().convertTo(disparr, CV_32FC1, 1./(1 << DISPARITY_SHIFT), 0); disparr.getUMat().convertTo(disparr, CV_32FC1, 1./(1 << DISPARITY_SHIFT), 0);
return; return;
} }
}
}
Mat left0 = leftarr.getMat(), right0 = rightarr.getMat(); Mat left0 = leftarr.getMat(), right0 = rightarr.getMat();
disparr.create(left0.size(), dtype); disparr.create(left0.size(), dtype);
......
...@@ -81,26 +81,11 @@ OCL_TEST_P(StereoBMFixture, StereoBM) ...@@ -81,26 +81,11 @@ OCL_TEST_P(StereoBMFixture, StereoBM)
{ {
Ptr<StereoBM> bm = createStereoBM( n_disp, winSize); Ptr<StereoBM> bm = createStereoBM( n_disp, winSize);
bm->setPreFilterType(bm->PREFILTER_XSOBEL); bm->setPreFilterType(bm->PREFILTER_XSOBEL);
// bm->setMinDisparity(15); bm->setTextureThreshold(0);
long t1 = clock();
OCL_OFF(bm->compute(left, right, disp)); OCL_OFF(bm->compute(left, right, disp));
long t2 = clock();
OCL_ON(bm->compute(uleft, uright, udisp)); OCL_ON(bm->compute(uleft, uright, udisp));
cv::ocl::finish();
long t3 = clock();
std::cout << (double)(t2-t1)/CLOCKS_PER_SEC << " " << (double)(t3-t2)/CLOCKS_PER_SEC << std::endl;
/*
Mat t; absdiff(disp, udisp, t);
for(int i = 0; i<t.rows; i++)
for(int j = 0; j< t.cols; j++)
// if(t.at<short>(i,j) > 0)
if(i == 5 && j == 36)
printf("%d %d cv: %d ocl: %d\n", i, j, disp.at<short>(i,j), udisp.getMat(ACCESS_READ).at<short>(i,j) );
/* imshow("diff.png", t*100);
imshow("cv.png", disp*100);
imshow("ocl.png", udisp.getMat(ACCESS_READ)*100);
waitKey(0);*/
Near(1e-3); Near(1e-3);
} }
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册