提交 52644f06 编写于 作者: A Alexander Alekhin

Merge pull request #14764 from alalek:core_intrin_drop_hasSIMD_checks

......@@ -204,10 +204,6 @@ prefilterXSobel( const Mat& src, Mat& dst, int ftzero )
tab[x] = (uchar)(x - OFS < -ftzero ? 0 : x - OFS > ftzero ? ftzero*2 : x - OFS + ftzero);
uchar val0 = tab[0 + OFS];
#if CV_SIMD128
bool useSIMD = hasSIMD128();
#endif
for( y = 0; y < size.height-1; y += 2 )
{
const uchar* srow1 = src.ptr<uchar>(y);
......@@ -221,7 +217,6 @@ prefilterXSobel( const Mat& src, Mat& dst, int ftzero )
x = 1;
#if CV_SIMD128
if( useSIMD )
{
v_int16x8 ftz = v_setall_s16((short) ftzero);
v_int16x8 ftz2 = v_setall_s16((short)(ftzero*2));
......@@ -268,7 +263,6 @@ prefilterXSobel( const Mat& src, Mat& dst, int ftzero )
uchar* dptr = dst.ptr<uchar>(y);
x = 0;
#if CV_SIMD128
if( useSIMD )
{
v_uint8x16 val0_16 = v_setall_u8(val0);
for(; x <= size.width-16; x+=16 )
......@@ -594,8 +588,6 @@ findStereoCorrespondenceBM( const Mat& left, const Mat& right,
mType FILTERED = (mType)((mindisp - 1) << disp_shift);
#if CV_SIMD128
bool useSIMD = hasSIMD128();
if( useSIMD )
{
CV_Assert (ndisp % 8 == 0);
}
......@@ -637,7 +629,6 @@ findStereoCorrespondenceBM( const Mat& left, const Mat& right,
int lval = lptr[0];
d = 0;
#if CV_SIMD128
if( useSIMD )
{
v_uint8x16 lv = v_setall_u8((uchar)lval);
......@@ -706,7 +697,6 @@ findStereoCorrespondenceBM( const Mat& left, const Mat& right,
int lval = lptr[0];
d = 0;
#if CV_SIMD128
if( useSIMD )
{
v_uint8x16 lv = v_setall_u8((uchar)lval);
for( ; d <= ndisp - 16; d += 16 )
......@@ -769,7 +759,6 @@ findStereoCorrespondenceBM( const Mat& left, const Mat& right,
{
d = 0;
#if CV_SIMD128
if( useSIMD )
{
for( d = 0; d <= ndisp-8; d += 8 )
{
......@@ -799,7 +788,6 @@ findStereoCorrespondenceBM( const Mat& left, const Mat& right,
hsad_sub = hsad0 + MAX(y - wsz2 - 1, -dy0)*ndisp;
d = 0;
#if CV_SIMD128
if( useSIMD )
{
v_int32x4 d0_4 = v_int32x4(0, 1, 2, 3);
v_int32x4 dd_4 = v_setall_s32(4);
......@@ -1003,9 +991,6 @@ struct FindStereoCorrespInvoker : public ParallelLoopBody
validDisparityRect = _validDisparityRect;
slidingSumBuf = &_slidingSumBuf;
cost = &_cost;
#if CV_SIMD128
useSIMD = hasSIMD128();
#endif
}
void operator()(const Range& range) const CV_OVERRIDE
......@@ -1043,7 +1028,7 @@ struct FindStereoCorrespInvoker : public ParallelLoopBody
Mat cost_i = state->disp12MaxDiff >= 0 ? cost->rowRange(row0, row1) : Mat();
#if CV_SIMD128
if( useSIMD && useShorts )
if (useShorts)
{
if( disp_i.type() == CV_16S)
findStereoCorrespondenceBM_SIMD<short>( left_i, right_i, disp_i, cost_i, *state, ptr, row0, rows - row1 );
......@@ -1083,7 +1068,6 @@ protected:
size_t stripeBufSize;
bool useShorts;
Rect validDisparityRect;
bool useSIMD;
};
class StereoBMImpl CV_FINAL : public StereoBM
......
......@@ -140,9 +140,6 @@ static void calcPixelCostBT( const Mat& img1, const Mat& img2, int y,
int width2 = maxX2 - minX2;
const PixType *row1 = img1.ptr<PixType>(y), *row2 = img2.ptr<PixType>(y);
PixType *prow1 = buffer + width2*2, *prow2 = prow1 + width*cn*2;
#if CV_SIMD128
bool useSIMD = hasSIMD128();
#endif
tab += tabOfs;
......@@ -224,7 +221,7 @@ static void calcPixelCostBT( const Mat& img1, const Mat& img2, int y,
int u1 = std::max(ul, ur); u1 = std::max(u1, u);
#if CV_SIMD128
if( useSIMD )
if (true)
{
v_uint8x16 _u = v_setall_u8((uchar)u), _u0 = v_setall_u8((uchar)u0);
v_uint8x16 _u1 = v_setall_u8((uchar)u1);
......@@ -304,8 +301,6 @@ static void computeDisparitySGBM( const Mat& img1, const Mat& img2,
5, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0, 4, 0, 1, 0, 2, 0, 1, 0, 3, 0, 1, 0, 2, 0, 1, 0
};
static const v_uint16x8 v_LSB = v_uint16x8(0x1, 0x2, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80);
bool useSIMD = hasSIMD128();
#endif
const int ALIGN = 16;
......@@ -450,7 +445,7 @@ static void computeDisparitySGBM( const Mat& img1, const Mat& img2,
const CostType* pixSub = pixDiff + std::max(x - (SW2+1)*D, 0);
#if CV_SIMD128
if( useSIMD )
if (true)
{
for( d = 0; d < D; d += 8 )
{
......@@ -547,7 +542,7 @@ static void computeDisparitySGBM( const Mat& img1, const Mat& img2,
CostType* Sp = S + x*D;
#if CV_SIMD128
if( useSIMD )
if (true)
{
v_int16x8 _P1 = v_setall_s16((short)P1);
......@@ -681,7 +676,7 @@ static void computeDisparitySGBM( const Mat& img1, const Mat& img2,
const CostType* Cp = C + x*D;
#if CV_SIMD128
if( useSIMD )
if (true)
{
v_int16x8 _P1 = v_setall_s16((short)P1);
v_int16x8 _delta0 = v_setall_s16((short)delta0);
......@@ -753,7 +748,7 @@ static void computeDisparitySGBM( const Mat& img1, const Mat& img2,
else
{
#if CV_SIMD128
if( useSIMD )
if (true)
{
v_int16x8 _minS = v_setall_s16(MAX_COST), _bestDisp = v_setall_s16(-1);
v_int16x8 _d8 = v_int16x8(0, 1, 2, 3, 4, 5, 6, 7), _8 = v_setall_s16(8);
......@@ -868,7 +863,6 @@ struct CalcVerticalSums: public ParallelLoopBody
Cbuf = alignedBuf;
Sbuf = Cbuf + CSBufSize;
hsumBuf = Sbuf + CSBufSize;
useSIMD = hasSIMD128();
}
void operator()(const Range& range) const CV_OVERRIDE
......@@ -957,7 +951,7 @@ struct CalcVerticalSums: public ParallelLoopBody
const CostType* pixSub = pixDiff + std::max(x - (SW2+1)*D, 0);
#if CV_SIMD128
if( useSIMD )
if (true)
{
for( d = 0; d < D; d += 8 )
{
......@@ -1034,7 +1028,7 @@ struct CalcVerticalSums: public ParallelLoopBody
CostType* Sp = S + x*D;
#if CV_SIMD128
if( useSIMD )
if (true)
{
v_int16x8 _P1 = v_setall_s16((short)P1);
......@@ -1121,7 +1115,6 @@ struct CalcVerticalSums: public ParallelLoopBody
size_t LrSize;
size_t hsumBufNRows;
int ftzero;
bool useSIMD;
};
struct CalcHorizontalSums: public ParallelLoopBody
......@@ -1149,7 +1142,6 @@ struct CalcHorizontalSums: public ParallelLoopBody
LrSize = 2 * D2;
Cbuf = alignedBuf;
Sbuf = Cbuf + CSBufSize;
useSIMD = hasSIMD128();
}
void operator()(const Range& range) const CV_OVERRIDE
......@@ -1204,7 +1196,7 @@ struct CalcHorizontalSums: public ParallelLoopBody
CostType* Sp = S + x*D;
#if CV_SIMD128
if( useSIMD )
if (true)
{
v_int16x8 _P1 = v_setall_s16((short)P1);
......@@ -1277,7 +1269,7 @@ struct CalcHorizontalSums: public ParallelLoopBody
minLr = MAX_COST;
#if CV_SIMD128
if( useSIMD )
if (true)
{
v_int16x8 _P1 = v_setall_s16((short)P1);
......@@ -1424,7 +1416,6 @@ struct CalcHorizontalSums: public ParallelLoopBody
int INVALID_DISP_SCALED;
int uniquenessRatio;
int disp12MaxDiff;
bool useSIMD;
};
/*
computes disparity for "roi" in img1 w.r.t. img2 and write it to disp1buf.
......@@ -1536,10 +1527,6 @@ struct SGBM3WayMainLoop : public ParallelLoopBody
int costBufSize, hsumBufNRows;
int TAB_OFS, ftzero;
#if CV_SIMD128
bool useSIMD;
#endif
PixType* clipTab;
SGBM3WayMainLoop(Mat *_buffers, const Mat& _img1, const Mat& _img2, Mat* _dst_disp, const StereoSGBMParams& params, PixType* _clipTab, int _nstripes, int _stripe_overlap);
......@@ -1569,10 +1556,6 @@ buffers(_buffers), img1(&_img1), img2(&_img2), dst_disp(_dst_disp), clipTab(_cli
hsumBufNRows = SH2*2 + 2;
TAB_OFS = 256*4;
ftzero = std::max(params.preFilterCap, 15) | 1;
#if CV_SIMD128
useSIMD = hasSIMD128();
#endif
}
void getBufferPointers(Mat& buffer, int width, int width1, int D, int num_ch, int SH2, int P2,
......@@ -1673,7 +1656,7 @@ void SGBM3WayMainLoop::getRawMatchingCost(CostType* C, // target cost-volume row
const CostType* pixSub = pixDiff + std::max(x - (SW2+1)*D, 0);
#if CV_SIMD128
if(useSIMD)
if (true)
{
v_int16x8 hv_reg;
for( d = 0; d < D; d+=8 )
......@@ -1734,7 +1717,7 @@ inline void accumulateCostsLeftTop(CostType* leftBuf, CostType* leftBuf_prev, Co
CostType& leftMinCost, CostType& topMinCost, int D, int P1, int P2)
{
#if CV_SIMD128
if(hasSIMD128())
if (true)
{
v_int16x8 P1_reg = v_setall_s16(cv::saturate_cast<CostType>(P1));
......@@ -1846,7 +1829,7 @@ inline void accumulateCostsRight(CostType* rightBuf, CostType* topBuf, CostType*
CostType& rightMinCost, int D, int P1, int P2, int& optimal_disp, CostType& min_cost)
{
#if CV_SIMD128
if(hasSIMD128())
if (true)
{
v_int16x8 P1_reg = v_setall_s16(cv::saturate_cast<CostType>(P1));
......@@ -2012,7 +1995,7 @@ void SGBM3WayMainLoop::operator () (const Range& range) const
if(uniquenessRatio>0)
{
#if CV_SIMD128
if(useSIMD)
if (true)
{
horPassCostVolume+=x;
int thresh = (100*min_cost)/(100-uniquenessRatio);
......
......@@ -2774,15 +2774,6 @@ inline void v_pack_store(float16_t* ptr, const v_float32x8& a)
inline void v256_cleanup() { _mm256_zeroall(); }
//! @name Check SIMD256 support
//! @{
//! @brief Check CPU capability of SIMD operation
static inline bool hasSIMD256()
{
return (CV_CPU_HAS_SUPPORT_AVX2) ? true : false;
}
//! @}
CV_CPU_OPTIMIZATION_HAL_NAMESPACE_END
//! @endcond
......
......@@ -2357,16 +2357,6 @@ inline void v_cleanup() {}
//! @}
//! @name Check SIMD support
//! @{
//! @brief Check CPU capability of SIMD operation
static inline bool hasSIMD128()
{
return false;
}
//! @}
#ifndef CV_DOXYGEN
CV_CPU_OPTIMIZATION_HAL_NAMESPACE_END
#endif
......
......@@ -1910,16 +1910,6 @@ inline void v_pack_store(float16_t* ptr, const v_float32x4& v)
inline void v_cleanup() {}
//! @name Check SIMD support
//! @{
//! @brief Check CPU capability of SIMD operation
static inline bool hasSIMD128()
{
return (CV_CPU_HAS_SUPPORT_NEON) ? true : false;
}
//! @}
CV_CPU_OPTIMIZATION_HAL_NAMESPACE_END
//! @endcond
......
......@@ -3043,16 +3043,6 @@ inline void v_pack_store(float16_t* ptr, const v_float32x4& v)
inline void v_cleanup() {}
//! @name Check SIMD support
//! @{
//! @brief Check CPU capability of SIMD operation
static inline bool hasSIMD128()
{
return (CV_CPU_HAS_SUPPORT_SSE2) ? true : false;
}
//! @}
CV_CPU_OPTIMIZATION_HAL_NAMESPACE_END
//! @endcond
......
......@@ -1355,16 +1355,6 @@ OPENCV_HAL_IMPL_VSX_TRANSPOSE4x4(v_uint32x4, vec_uint4)
OPENCV_HAL_IMPL_VSX_TRANSPOSE4x4(v_int32x4, vec_int4)
OPENCV_HAL_IMPL_VSX_TRANSPOSE4x4(v_float32x4, vec_float4)
//! @name Check SIMD support
//! @{
//! @brief Check CPU capability of SIMD operation
static inline bool hasSIMD128()
{
return (CV_CPU_HAS_SUPPORT_VSX) ? true : false;
}
//! @}
CV_CPU_OPTIMIZATION_HAL_NAMESPACE_END
//! @endcond
......
......@@ -64,7 +64,6 @@ void FAST_t(InputArray _img, std::vector<KeyPoint>& keypoints, int threshold, bo
#if CV_SIMD128
const int quarterPatternSize = patternSize/4;
v_uint8x16 delta = v_setall_u8(0x80), t = v_setall_u8((char)threshold), K16 = v_setall_u8((char)K);
bool hasSimd = hasSIMD128();
#if CV_TRY_AVX2
Ptr<opt_AVX2::FAST_t_patternSize16_AVX2> fast_t_impl_avx2;
if(CV_CPU_HAS_SUPPORT_AVX2)
......@@ -102,7 +101,6 @@ void FAST_t(InputArray _img, std::vector<KeyPoint>& keypoints, int threshold, bo
{
j = 3;
#if CV_SIMD128
if( hasSimd )
{
if( patternSize == 16 )
{
......
......@@ -126,7 +126,7 @@ int cornerScore<16>(const uchar* ptr, const int pixel[], int threshold)
d[k] = (short)(v - ptr[pixel[k]]);
#if CV_SIMD128
if (hasSIMD128())
if (true)
{
v_int16x8 q0 = v_setall_s16(-1000), q1 = v_setall_s16(1000);
for (k = 0; k < 16; k += 8)
......@@ -223,7 +223,7 @@ int cornerScore<12>(const uchar* ptr, const int pixel[], int threshold)
#endif
#if CV_SIMD128
if (hasSIMD128())
if (true)
{
v_int16x8 q0 = v_setall_s16(-1000), q1 = v_setall_s16(1000);
for (k = 0; k < 16; k += 8)
......@@ -304,7 +304,7 @@ int cornerScore<8>(const uchar* ptr, const int pixel[], int threshold)
d[k] = (short)(v - ptr[pixel[k]]);
#if CV_SIMD128
if (hasSIMD128())
if (true)
{
v_int16x8 v0 = v_load(d + 1);
v_int16x8 v1 = v_load(d + 2);
......
......@@ -318,8 +318,7 @@ public:
low(_low), high(_high), aperture_size(_aperture_size), L2gradient(_L2gradient)
{
#if CV_SIMD128
haveSIMD = hasSIMD128();
if(haveSIMD)
if (true)
_map.create(src.rows + 2, (int)alignSize((size_t)(src.cols + CV_MALLOC_SIMD128 + 1), CV_MALLOC_SIMD128), CV_8UC1);
else
#endif
......@@ -338,8 +337,7 @@ public:
low(_low), high(_high), aperture_size(0), L2gradient(_L2gradient)
{
#if CV_SIMD128
haveSIMD = hasSIMD128();
if(haveSIMD)
if (true)
_map.create(src.rows + 2, (int)alignSize((size_t)(src.cols + CV_MALLOC_SIMD128 + 1), CV_MALLOC_SIMD128), CV_8UC1);
else
#endif
......@@ -440,7 +438,6 @@ public:
{
int j = 0, width = src.cols * cn;
#if CV_SIMD128
if (haveSIMD)
{
for ( ; j <= width - 8; j += 8)
{
......@@ -464,7 +461,6 @@ public:
{
int j = 0, width = src.cols * cn;
#if CV_SIMD128
if (haveSIMD)
{
for(; j <= width - 8; j += 8)
{
......@@ -525,7 +521,7 @@ public:
// From here actual src row is (i - 1)
// Set left and right border to 1
#if CV_SIMD128
if(haveSIMD)
if (true)
_pmap = map.ptr<uchar>(i) + CV_MALLOC_SIMD128;
else
#endif
......@@ -547,7 +543,6 @@ public:
const int TG22 = 13573;
int j = 0;
#if CV_SIMD128
if (haveSIMD)
{
const v_int32x4 v_low = v_setall_s32(low);
const v_int8x16 v_one = v_setall_s8(1);
......@@ -806,9 +801,6 @@ private:
bool L2gradient, needGradient;
ptrdiff_t mapstep;
int cn;
#if CV_SIMD128
bool haveSIMD;
#endif
mutable Mutex mutex;
};
......@@ -820,9 +812,6 @@ public:
map(_map), dst(_dst)
{
dst = _dst;
#if CV_SIMD128
haveSIMD = hasSIMD128();
#endif
}
~finalPass() {}
......@@ -836,13 +825,13 @@ public:
uchar *pdst = dst.ptr<uchar>(i);
const uchar *pmap = map.ptr<uchar>(i + 1);
#if CV_SIMD128
if(haveSIMD)
if (true)
pmap += CV_MALLOC_SIMD128;
else
#endif
pmap += 1;
#if CV_SIMD128
if(haveSIMD) {
{
const v_uint8x16 v_zero = v_setzero_u8();
const v_uint8x16 v_ff = ~v_zero;
const v_uint8x16 v_two(2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2);
......@@ -873,9 +862,6 @@ public:
private:
const Mat &map;
Mat &dst;
#if CV_SIMD128
bool haveSIMD;
#endif
finalPass(const finalPass&); // = delete
finalPass& operator=(const finalPass&); // = delete
......
......@@ -56,9 +56,6 @@ static void calcMinEigenVal( const Mat& _cov, Mat& _dst )
#if CV_TRY_AVX
bool haveAvx = CV_CPU_HAS_SUPPORT_AVX;
#endif
#if CV_SIMD128
bool haveSimd = hasSIMD128();
#endif
if( _cov.isContinuous() && _dst.isContinuous() )
{
......@@ -78,7 +75,6 @@ static void calcMinEigenVal( const Mat& _cov, Mat& _dst )
j = 0;
#if CV_SIMD128
if( haveSimd )
{
v_float32x4 half = v_setall_f32(0.5f);
for( ; j <= size.width - v_float32x4::nlanes; j += v_float32x4::nlanes )
......@@ -112,9 +108,6 @@ static void calcHarris( const Mat& _cov, Mat& _dst, double k )
#if CV_TRY_AVX
bool haveAvx = CV_CPU_HAS_SUPPORT_AVX;
#endif
#if CV_SIMD128
bool haveSimd = hasSIMD128();
#endif
if( _cov.isContinuous() && _dst.isContinuous() )
{
......@@ -135,7 +128,6 @@ static void calcHarris( const Mat& _cov, Mat& _dst, double k )
j = 0;
#if CV_SIMD128
if( haveSimd )
{
v_float32x4 v_k = v_setall_f32((float)k);
......@@ -254,9 +246,6 @@ cornerEigenValsVecs( const Mat& src, Mat& eigenv, int block_size,
#if CV_TRY_AVX
bool haveAvx = CV_CPU_HAS_SUPPORT_AVX;
#endif
#if CV_SIMD128
bool haveSimd = hasSIMD128();
#endif
int depth = src.depth();
double scale = (double)(1 << ((aperture_size > 0 ? aperture_size : 3) - 1)) * block_size;
......@@ -298,7 +287,6 @@ cornerEigenValsVecs( const Mat& src, Mat& eigenv, int block_size,
j = 0;
#if CV_SIMD128
if( haveSimd )
{
for( ; j <= size.width - v_float32x4::nlanes; j += v_float32x4::nlanes )
{
......@@ -711,7 +699,6 @@ void cv::preCornerDetect( InputArray _src, OutputArray _dst, int ksize, int bord
factor = 1./(factor * factor * factor);
#if CV_SIMD128
float factor_f = (float)factor;
bool haveSimd = hasSIMD128();
v_float32x4 v_factor = v_setall_f32(factor_f), v_m2 = v_setall_f32(-2.0f);
#endif
......@@ -729,7 +716,6 @@ void cv::preCornerDetect( InputArray _src, OutputArray _dst, int ksize, int bord
j = 0;
#if CV_SIMD128
if (haveSimd)
{
for( ; j <= size.width - v_float32x4::nlanes; j += v_float32x4::nlanes )
{
......
......@@ -446,8 +446,7 @@ struct RemapVec_8u
{
int cn = _src.channels(), x = 0, sstep = (int)_src.step;
if( (cn != 1 && cn != 3 && cn != 4) || !hasSIMD128() ||
sstep > 0x8000 )
if( (cn != 1 && cn != 3 && cn != 4) || sstep > 0x8000 )
return 0;
const uchar *S0 = _src.ptr(), *S1 = _src.ptr(1);
......@@ -1098,9 +1097,6 @@ public:
int brows0 = std::min(128, dst->rows), map_depth = m1->depth();
int bcols0 = std::min(buf_size/brows0, dst->cols);
brows0 = std::min(buf_size/bcols0, dst->rows);
#if CV_SIMD128
bool useSIMD = hasSIMD128();
#endif
Mat _bufxy(brows0, bcols0, CV_16SC2), _bufa;
if( !nnfunc )
......@@ -1147,7 +1143,6 @@ public:
x1 = 0;
#if CV_SIMD128
if( useSIMD )
{
int span = v_float32x4::nlanes;
for( ; x1 <= bcols - span * 2; x1 += span * 2 )
......@@ -1189,7 +1184,6 @@ public:
x1 = 0;
#if CV_SIMD128
if (useSIMD)
{
v_uint16x8 v_scale = v_setall_u16(INTER_TAB_SIZE2 - 1);
int span = v_uint16x8::nlanes;
......@@ -1207,7 +1201,6 @@ public:
x1 = 0;
#if CV_SIMD128
if( useSIMD )
{
v_float32x4 v_scale = v_setall_f32((float)INTER_TAB_SIZE);
v_int32x4 v_scale2 = v_setall_s32(INTER_TAB_SIZE - 1);
......@@ -1245,7 +1238,6 @@ public:
x1 = 0;
#if CV_SIMD128
if( useSIMD )
{
v_float32x4 v_scale = v_setall_f32((float)INTER_TAB_SIZE);
v_int32x4 v_scale2 = v_setall_s32(INTER_TAB_SIZE - 1), v_scale3 = v_setall_s32(INTER_TAB_SIZE);
......@@ -1898,9 +1890,6 @@ void cv::convertMaps( InputArray _map1, InputArray _map2,
size.height = 1;
}
#if CV_SIMD128
bool useSIMD = hasSIMD128();
#endif
#if CV_TRY_SSE4_1
bool useSSE4_1 = CV_CPU_HAS_SUPPORT_SSE4_1;
#endif
......@@ -1931,7 +1920,6 @@ void cv::convertMaps( InputArray _map1, InputArray _map2,
#endif
{
#if CV_SIMD128
if( useSIMD )
{
int span = v_int16x8::nlanes;
for( ; x <= size.width - span; x += span )
......@@ -1961,7 +1949,6 @@ void cv::convertMaps( InputArray _map1, InputArray _map2,
#endif
{
#if CV_SIMD128
if( useSIMD )
{
v_float32x4 v_scale = v_setall_f32((float)INTER_TAB_SIZE);
v_int32x4 v_mask = v_setall_s32(INTER_TAB_SIZE - 1);
......@@ -2002,10 +1989,11 @@ void cv::convertMaps( InputArray _map1, InputArray _map2,
{
#if CV_SIMD128
int span = v_float32x4::nlanes;
if( useSIMD )
{
for( ; x <= (size.width << 1) - span * 2; x += span * 2 )
v_store(dst1 + x, v_pack(v_round(v_load(src1f + x)),
v_round(v_load(src1f + x + span))));
}
#endif
for( ; x < size.width; x++ )
{
......@@ -2022,7 +2010,6 @@ void cv::convertMaps( InputArray _map1, InputArray _map2,
#endif
{
#if CV_SIMD128
if( useSIMD )
{
v_float32x4 v_scale = v_setall_f32((float)INTER_TAB_SIZE);
v_int32x4 v_mask = v_setall_s32(INTER_TAB_SIZE - 1);
......@@ -2063,7 +2050,6 @@ void cv::convertMaps( InputArray _map1, InputArray _map2,
else if( m1type == CV_16SC2 && dstm1type == CV_32FC1 )
{
#if CV_SIMD128
if( useSIMD )
{
v_uint16x8 v_mask2 = v_setall_u16(INTER_TAB_SIZE2-1);
v_uint32x4 v_zero = v_setzero_u32(), v_mask = v_setall_u32(INTER_TAB_SIZE-1);
......@@ -2113,7 +2099,6 @@ void cv::convertMaps( InputArray _map1, InputArray _map2,
else if( m1type == CV_16SC2 && dstm1type == CV_32FC2 )
{
#if CV_SIMD128
if( useSIMD )
{
v_int16x8 v_mask2 = v_setall_s16(INTER_TAB_SIZE2-1);
v_int32x4 v_zero = v_setzero_s32(), v_mask = v_setall_s32(INTER_TAB_SIZE-1);
......@@ -2189,9 +2174,6 @@ public:
#if CV_TRY_AVX2
bool useAVX2 = CV_CPU_HAS_SUPPORT_AVX2;
#endif
#if CV_SIMD128
bool useSIMD = hasSIMD128();
#endif
#if CV_TRY_SSE4_1
bool useSSE4_1 = CV_CPU_HAS_SUPPORT_SSE4_1;
#endif
......@@ -2226,7 +2208,6 @@ public:
#endif
{
#if CV_SIMD128
if( useSIMD )
{
v_int32x4 v_X0 = v_setall_s32(X0), v_Y0 = v_setall_s32(Y0);
int span = v_uint16x8::nlanes;
......@@ -2260,7 +2241,6 @@ public:
x1 = opt_AVX2::warpAffineBlockline(adelta + x, bdelta + x, xy, alpha, X0, Y0, bw);
#endif
#if CV_SIMD128
if( useSIMD )
{
v_int32x4 v__X0 = v_setall_s32(X0), v__Y0 = v_setall_s32(Y0);
v_int32x4 v_mask = v_setall_s32(INTER_TAB_SIZE - 1);
......
......@@ -218,14 +218,13 @@ struct MomentsInTile_SIMD<uchar, int, int>
{
MomentsInTile_SIMD()
{
useSIMD = checkHardwareSupport(CV_CPU_SSE2);
// nothing
}
int operator() (const uchar * ptr, int len, int & x0, int & x1, int & x2, int & x3)
{
int x = 0;
if( useSIMD )
{
__m128i dx = _mm_set1_epi16(8);
__m128i z = _mm_setzero_si128(), qx0 = z, qx1 = z, qx2 = z, qx3 = z, qx = _mm_setr_epi16(0, 1, 2, 3, 4, 5, 6, 7);
......@@ -264,7 +263,6 @@ struct MomentsInTile_SIMD<uchar, int, int>
}
int CV_DECL_ALIGNED(16) buf[4];
bool useSIMD;
};
#elif CV_NEON
......@@ -342,14 +340,13 @@ struct MomentsInTile_SIMD<ushort, int, int64>
{
MomentsInTile_SIMD()
{
useSIMD = checkHardwareSupport(CV_CPU_SSE4_1);
// nothing
}
int operator() (const ushort * ptr, int len, int & x0, int & x1, int & x2, int64 & x3)
{
int x = 0;
if (useSIMD)
{
__m128i v_delta = _mm_set1_epi32(4), v_zero = _mm_setzero_si128(), v_x0 = v_zero,
v_x1 = v_zero, v_x2 = v_zero, v_x3 = v_zero, v_ix0 = _mm_setr_epi32(0, 1, 2, 3);
......@@ -395,7 +392,6 @@ struct MomentsInTile_SIMD<ushort, int, int64>
int CV_DECL_ALIGNED(16) buf[4];
int64 CV_DECL_ALIGNED(16) buf64[2];
bool useSIMD;
};
#endif
......
......@@ -71,7 +71,6 @@ static void calcSharrDeriv(const cv::Mat& src, cv::Mat& dst)
#if CV_SIMD128
v_int16x8 c3 = v_setall_s16(3), c10 = v_setall_s16(10);
bool haveSIMD = checkHardwareSupport(CV_CPU_SSE2) || checkHardwareSupport(CV_CPU_NEON);
#endif
for( y = 0; y < rows; y++ )
......@@ -84,7 +83,6 @@ static void calcSharrDeriv(const cv::Mat& src, cv::Mat& dst)
// do vertical convolution
x = 0;
#if CV_SIMD128
if(haveSIMD)
{
for( ; x <= colsn - 8; x += 8 )
{
......@@ -120,7 +118,6 @@ static void calcSharrDeriv(const cv::Mat& src, cv::Mat& dst)
// do horizontal convolution, interleave the results and store them to dst
x = 0;
#if CV_SIMD128
if(haveSIMD)
{
for( ; x <= colsn - 8; x += 8 )
{
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册