Merge pull request #2634 from ElenaGvozdeva:ippiDCT

0e1bf581 · Alexander Alekhin · OpenCV Buildbot · 01f31dc0 · 12279e26 · 0e1bf581
Showing with 169 addition and 23 deletion

modules/core/perf/perf_dft.cpp modules/core/perf/perf_dft.cpp +32 -0

modules/core/src/dxt.cpp modules/core/src/dxt.cpp +136 -22

modules/imgproc/src/distransform.cpp modules/imgproc/src/distransform.cpp +1 -1

未找到文件。
--- a/modules/core/perf/perf_dft.cpp
+++ b/modules/core/perf/perf_dft.cpp
@@ -6,6 +6,8 @@ using namespace perf;
 using std::tr1::make_tuple;
 using std::tr1::get;

+///////////////////////////////////////////////////////dft//////////////////////////////////////////////////////////////
+
 #define MAT_TYPES_DFT  CV_32FC1, CV_32FC2, CV_64FC1
 #define MAT_SIZES_DFT  cv::Size(320, 480), cv::Size(800, 600), cv::Size(1280, 1024), sz1080p, sz2K
 CV_ENUM(FlagsType, 0, DFT_INVERSE, DFT_SCALE, DFT_COMPLEX_OUTPUT, DFT_ROWS, DFT_INVERSE|DFT_COMPLEX_OUTPUT)
@@ -27,5 +29,35 @@ PERF_TEST_P(Size_MatType_FlagsType, dft, TEST_MATS_DFT)

    TEST_CYCLE() dft(src, dst, flags);

+    SANITY_CHECK(dst, 1e-5, ERROR_RELATIVE);
+}
+
+///////////////////////////////////////////////////////dct//////////////////////////////////////////////////////
+
+CV_ENUM(DCT_FlagsType, 0, DCT_INVERSE , DCT_ROWS, DCT_INVERSE|DCT_ROWS)
+
+typedef std::tr1::tuple<Size, MatType, DCT_FlagsType> Size_MatType_Flag_t;
+typedef perf::TestBaseWithParam<Size_MatType_Flag_t> Size_MatType_Flag;
+
+PERF_TEST_P(Size_MatType_Flag, dct, testing::Combine(
+                                    testing::Values(cv::Size(320, 240),cv::Size(800, 600),
+                                                    cv::Size(1024, 768), cv::Size(1280, 1024),
+                                                    sz1080p, sz2K),
+                                    testing::Values(CV_32FC1, CV_64FC1), DCT_FlagsType::all()))
+{
+    Size sz = get<0>(GetParam());
+    int type = get<1>(GetParam());
+    int flags = get<2>(GetParam());
+
+    Mat src(sz, type);
+    Mat dst(sz, type);
+
+    declare
+        .in(src, WARMUP_RNG)
+        .out(dst)
+        .time(60);
+
+    TEST_CYCLE() dct(src, dst, flags);
+
    SANITY_CHECK(dst, 1e-5, ERROR_RELATIVE);
 }
\ No newline at end of file
--- a/modules/core/src/dxt.cpp
+++ b/modules/core/src/dxt.cpp
@@ -2880,6 +2880,131 @@ static void IDCT_64f(const double* src, int src_step, double* dft_src, double* d

 }

+namespace cv
+{
+#if defined HAVE_IPP && IPP_VERSION_MAJOR >= 7 && !defined HAVE_IPP_ICV_ONLY
+
+typedef IppStatus (CV_STDCALL * ippiDCTFunc)(const Ipp32f*, int, Ipp32f*, int, const void*, Ipp8u*);
+typedef IppStatus (CV_STDCALL * ippiDCTInitAlloc)(void**, IppiSize, IppHintAlgorithm);
+typedef IppStatus (CV_STDCALL * ippiDCTFree)(void* pDCTSpec);
+typedef IppStatus (CV_STDCALL * ippiDCTGetBufSize)(const void*, int*);
+
+template <typename Dct>
+class DctIPPLoop_Invoker : public ParallelLoopBody
+{
+public:
+
+    DctIPPLoop_Invoker(const Mat& _src, Mat& _dst, const Dct* _ippidct, bool _inv, bool *_ok) :
+        ParallelLoopBody(), src(&_src), dst(&_dst), ippidct(_ippidct), inv(_inv), ok(_ok)
+    {
+        *ok = true;
+    }
+
+    virtual void operator()(const Range& range) const
+    {
+        void* pDCTSpec;
+        AutoBuffer<uchar> buf;
+        uchar* pBuffer = 0;
+        int bufSize=0;
+
+        IppiSize srcRoiSize = {src->cols, 1};
+
+        CV_SUPPRESS_DEPRECATED_START
+
+        ippiDCTInitAlloc ippInitAlloc   = inv ? (ippiDCTInitAlloc)ippiDCTInvInitAlloc_32f   : (ippiDCTInitAlloc)ippiDCTFwdInitAlloc_32f;
+        ippiDCTFree ippFree             = inv ? (ippiDCTFree)ippiDCTInvFree_32f             : (ippiDCTFree)ippiDCTFwdFree_32f;
+        ippiDCTGetBufSize ippGetBufSize = inv ? (ippiDCTGetBufSize)ippiDCTInvGetBufSize_32f : (ippiDCTGetBufSize)ippiDCTFwdGetBufSize_32f;
+
+        if (ippInitAlloc(&pDCTSpec, srcRoiSize, ippAlgHintNone)>=0 && ippGetBufSize(pDCTSpec, &bufSize)>=0)
+        {
+            buf.allocate( bufSize );
+            pBuffer = (uchar*)buf;
+
+            for( int i = range.start; i < range.end; ++i)
+                if(!(*ippidct)((float*)(src->data+i*src->step), (int)src->step,(float*)(dst->data+i*dst->step), (int)dst->step, pDCTSpec, (Ipp8u*)pBuffer))
+                    *ok = false;
+        }
+        else
+            *ok = false;
+
+        if (pDCTSpec)
+            ippFree(pDCTSpec);
+
+        CV_SUPPRESS_DEPRECATED_END
+    }
+
+private:
+    const Mat* src;
+    Mat* dst;
+    const Dct* ippidct;
+    bool inv;
+    bool *ok;
+};
+
+template <typename Dct>
+bool DctIPPLoop(const Mat& src, Mat& dst, const Dct& ippidct, bool inv)
+{
+    bool ok;
+    parallel_for_(Range(0, src.rows), DctIPPLoop_Invoker<Dct>(src, dst, &ippidct, inv, &ok), src.rows/(double)(1<<4) );
+    return ok;
+}
+
+struct IPPDCTFunctor
+{
+    IPPDCTFunctor(ippiDCTFunc _func) : func(_func){}
+
+    bool operator()(const Ipp32f* src, int srcStep, Ipp32f* dst, int dstStep, const void* pDCTSpec, Ipp8u* pBuffer) const
+    {
+        return func ? func(src, srcStep, dst, dstStep, pDCTSpec, pBuffer) >= 0 : false;
+    }
+private:
+    ippiDCTFunc func;
+};
+
+static bool ippi_DCT_32f(const Mat& src, Mat& dst, bool inv, bool row)
+{
+    ippiDCTFunc ippFunc = inv ? (ippiDCTFunc)ippiDCTInv_32f_C1R : (ippiDCTFunc)ippiDCTFwd_32f_C1R ;
+
+    if (row)
+        return(DctIPPLoop(src,dst,IPPDCTFunctor(ippFunc),inv));
+    else
+    {
+        IppStatus status;
+        void* pDCTSpec;
+        AutoBuffer<uchar> buf;
+        uchar* pBuffer = 0;
+        int bufSize=0;
+
+        IppiSize srcRoiSize = {src.cols, src.rows};
+
+        CV_SUPPRESS_DEPRECATED_START
+
+        ippiDCTInitAlloc ippInitAlloc   = inv ? (ippiDCTInitAlloc)ippiDCTInvInitAlloc_32f   : (ippiDCTInitAlloc)ippiDCTFwdInitAlloc_32f;
+        ippiDCTFree ippFree             = inv ? (ippiDCTFree)ippiDCTInvFree_32f             : (ippiDCTFree)ippiDCTFwdFree_32f;
+        ippiDCTGetBufSize ippGetBufSize = inv ? (ippiDCTGetBufSize)ippiDCTInvGetBufSize_32f : (ippiDCTGetBufSize)ippiDCTFwdGetBufSize_32f;
+
+        status = ippStsErr;
+
+        if (ippInitAlloc(&pDCTSpec, srcRoiSize, ippAlgHintNone)>=0 && ippGetBufSize(pDCTSpec, &bufSize)>=0)
+        {
+            buf.allocate( bufSize );
+            pBuffer = (uchar*)buf;
+
+            status = ippFunc((float*)src.data, (int)src.step, (float*)dst.data, (int)dst.step, pDCTSpec, (Ipp8u*)pBuffer);
+        }
+
+        if (pDCTSpec)
+            ippFree(pDCTSpec);
+
+        CV_SUPPRESS_DEPRECATED_END
+
+        return status >= 0;
+    }
+}
+
+#endif
+}
+
 void cv::dct( InputArray _src0, OutputArray _dst, int flags )
 {
    static DCTFunc dct_tbl[4] =
@@ -2893,7 +3018,7 @@ void cv::dct( InputArray _src0, OutputArray _dst, int flags )
    bool inv = (flags & DCT_INVERSE) != 0;
    Mat src0 = _src0.getMat(), src = src0;
    int type = src.type(), depth = src.depth();
-    void /* *spec_dft = 0, */ *spec = 0;
+    void *spec = 0;

    double scale = 1.;
    int prev_len = 0, nf = 0, stage, end_stage;
@@ -2910,6 +3035,16 @@ void cv::dct( InputArray _src0, OutputArray _dst, int flags )
    _dst.create( src.rows, src.cols, type );
    Mat dst = _dst.getMat();

+#if defined (HAVE_IPP) && (IPP_VERSION_MAJOR >= 7) && !defined HAVE_IPP_ICV_ONLY
+    bool row = (flags & DCT_ROWS) != 0;
+    if (src.type() == CV_32F)
+    {
+        if(ippi_DCT_32f(src,dst,inv, row))
+            return;
+        setIppErrorStatus();
+    }
+#endif
+
    DCTFunc dct_func = dct_tbl[(int)inv + (depth == CV_64F)*2];

    if( (flags & DCT_ROWS) || src.rows == 1 ||
@@ -2962,27 +3097,6 @@ void cv::dct( InputArray _src0, OutputArray _dst, int flags )

            spec = 0;
            inplace_transform = 1;
-            /*if( len*count >= 64 && DFTInitAlloc_R_32f_p )
-            {
-                int ipp_sz = 0;
-                if( depth == CV_32F )
-                {
-                    if( spec_dft )
-                        IPPI_CALL( DFTFree_R_32f_p( spec_dft ));
-                    IPPI_CALL( DFTInitAlloc_R_32f_p( &spec_dft, len, 8, cvAlgHintNone ));
-                    IPPI_CALL( DFTGetBufSize_R_32f_p( spec_dft, &ipp_sz ));
-                }
-                else
-                {
-                    if( spec_dft )
-                        IPPI_CALL( DFTFree_R_64f_p( spec_dft ));
-                    IPPI_CALL( DFTInitAlloc_R_64f_p( &spec_dft, len, 8, cvAlgHintNone ));
-                    IPPI_CALL( DFTGetBufSize_R_64f_p( spec_dft, &ipp_sz ));
-                }
-                spec = spec_dft;
-                sz += ipp_sz;
-            }
-            else*/
            {
                sz += len*(complex_elem_size + sizeof(int)) + complex_elem_size;


--- a/modules/imgproc/src/distransform.cpp
+++ b/modules/imgproc/src/distransform.cpp
@@ -577,7 +577,7 @@ trueDistTrans( const Mat& src, Mat& dst )
    for( ; i <= m*3; i++ )
        sat_tab[i] = i - shift;

-    cv::parallel_for_(cv::Range(0, n), cv::DTColumnInvoker(&src, &dst, sat_tab, sqr_tab));
+    cv::parallel_for_(cv::Range(0, n), cv::DTColumnInvoker(&src, &dst, sat_tab, sqr_tab), src.total()/(double)(1<<16));

    // stage 2: compute modified distance transform for each row
    float* inv_tab = sqr_tab + n;