Merge pull request #895 from bitwangyaoyao:2.4_perf

b51a1a7d · Vadim Pisarevsky · OpenCV Buildbot · 389be676 · 04399a27 · b51a1a7d
23 changed file
--- a/modules/ocl/CMakeLists.txt
+++ b/modules/ocl/CMakeLists.txt
@@ -3,5 +3,5 @@ if(NOT HAVE_OPENCL)
 endif()

 set(the_description "OpenCL-accelerated Computer Vision")
-ocv_define_module(ocl opencv_core opencv_imgproc opencv_features2d opencv_objdetect opencv_video)
+ocv_define_module(ocl opencv_core opencv_imgproc opencv_features2d opencv_objdetect opencv_video opencv_calib3d)
 ocv_warnings_disable(CMAKE_CXX_FLAGS -Wshadow)
--- a/modules/ocl/perf/perf_arithm.cpp
+++ b/modules/ocl/perf/perf_arithm.cpp
--- a/modules/ocl/perf/perf_blend.cpp
+++ b/modules/ocl/perf/perf_blend.cpp
@@ -16,6 +16,7 @@
 //
 // @Authors
 //    Fangfang Bai, fangfang@multicorewareinc.com
+//    Jin Ma,       jin@multicorewareinc.com
 //
 // Redistribution and use in source and binary forms, with or without modification,
 // are permitted provided that the following conditions are met:
@@ -68,7 +69,7 @@ void blendLinearGold(const cv::Mat &img1, const cv::Mat &img2, const cv::Mat &we
        }
    }
 }
-TEST(blend)
+PERFTEST(blend)
 {
    Mat src1, src2, weights1, weights2, dst;
    ocl::oclMat d_src1, d_src2, d_weights1, d_weights2, d_dst;
@@ -102,9 +103,12 @@ TEST(blend)
            ocl::blendLinear(d_src1, d_src2, d_weights1, d_weights2, d_dst);
            WARMUP_OFF;

+            cv::Mat ocl_mat;
+            d_dst.download(ocl_mat);
+            TestSystem::instance().setAccurate(ExpectedMatNear(dst, ocl_mat, 1.f));
+
            GPU_ON;
            ocl::blendLinear(d_src1, d_src2, d_weights1, d_weights2, d_dst);
-             ;
            GPU_OFF;

            GPU_FULL_ON;

--- a/modules/ocl/perf/perf_brute_force_matcher.cpp
+++ b/modules/ocl/perf/perf_brute_force_matcher.cpp
@@ -16,6 +16,7 @@
 //
 // @Authors
 //    Fangfang Bai, fangfang@multicorewareinc.com
+//    Jin Ma,       jin@multicorewareinc.com
 //
 // Redistribution and use in source and binary forms, with or without modification,
 // are permitted provided that the following conditions are met:
@@ -45,7 +46,7 @@
 #include "precomp.hpp"

 //////////////////// BruteForceMatch /////////////////
-TEST(BruteForceMatcher)
+PERFTEST(BruteForceMatcher)
 {
    Mat trainIdx_cpu;
    Mat distance_cpu;
@@ -66,6 +67,7 @@ TEST(BruteForceMatcher)
        gen(train, size, desc_len, CV_32F, 0, 1);
        // Output
        vector< vector<DMatch> > matches(2);
+        vector< vector<DMatch> > d_matches(2);
        // Init GPU matcher
        ocl::BruteForceMatcher_OCL_base d_matcher(ocl::BruteForceMatcher_OCL_base::L2Dist);

@@ -86,9 +88,11 @@ TEST(BruteForceMatcher)
        d_matcher.matchSingle(d_query, d_train, d_trainIdx, d_distance);
        WARMUP_OFF;

+        d_matcher.match(d_query, d_train, d_matches[0]);
+        TestSystem::instance().setAccurate(AssertEQ<size_t>(d_matches[0].size(), matches[0].size()));
+
        GPU_ON;
        d_matcher.matchSingle(d_query, d_train, d_trainIdx, d_distance);
-         ;
        GPU_OFF;

        GPU_FULL_ON;
@@ -111,15 +115,16 @@ TEST(BruteForceMatcher)

        GPU_ON;
        d_matcher.knnMatchSingle(d_query, d_train, d_trainIdx, d_distance, d_allDist, 2);
-         ;
        GPU_OFF;

        GPU_FULL_ON;
        d_query.upload(query);
        d_train.upload(train);
-        d_matcher.knnMatch(d_query, d_train, matches, 2);
+        d_matcher.knnMatch(d_query, d_train, d_matches, 2);
        GPU_FULL_OFF;

+        TestSystem::instance().setAccurate(AssertEQ<size_t>(d_matches[0].size(), matches[0].size()));
+
        SUBTEST << size << "; radiusMatch";

        float max_distance = 2.0f;
@@ -138,13 +143,14 @@ TEST(BruteForceMatcher)

        GPU_ON;
        d_matcher.radiusMatchSingle(d_query, d_train, d_trainIdx, d_distance, d_nMatches, max_distance);
-         ;
        GPU_OFF;

        GPU_FULL_ON;
        d_query.upload(query);
        d_train.upload(train);
-        d_matcher.radiusMatch(d_query, d_train, matches, max_distance);
+        d_matcher.radiusMatch(d_query, d_train, d_matches, max_distance);
        GPU_FULL_OFF;
+
+        TestSystem::instance().setAccurate(AssertEQ<size_t>(d_matches[0].size(), matches[0].size()));
    }
 }
\ No newline at end of file
--- a/modules/ocl/perf/perf_canny.cpp
+++ b/modules/ocl/perf/perf_canny.cpp
@@ -16,6 +16,7 @@
 //
 // @Authors
 //    Fangfang Bai, fangfang@multicorewareinc.com
+//    Jin Ma,       jin@multicorewareinc.com
 //
 // Redistribution and use in source and binary forms, with or without modification,
 // are permitted provided that the following conditions are met:
@@ -45,7 +46,7 @@
 #include "precomp.hpp"

 ///////////// Canny ////////////////////////
-TEST(Canny)
+PERFTEST(Canny)
 {
    Mat img = imread(abspath("aloeL.jpg"), CV_LOAD_IMAGE_GRAYSCALE);

@@ -70,9 +71,10 @@ TEST(Canny)
    ocl::Canny(d_img, d_buf, d_edges, 50.0, 100.0);
    WARMUP_OFF;

+    TestSystem::instance().setAccurate(ExceptedMatSimilar(edges, d_edges, 2e-2));
+
    GPU_ON;
    ocl::Canny(d_img, d_buf, d_edges, 50.0, 100.0);
-     ;
    GPU_OFF;

    GPU_FULL_ON;

--- a/modules/ocl/perf/perf_color.cpp
+++ b/modules/ocl/perf/perf_color.cpp
@@ -16,6 +16,7 @@
 //
 // @Authors
 //    Fangfang Bai, fangfang@multicorewareinc.com
+//    Jin Ma,       jin@multicorewareinc.com
 //
 // Redistribution and use in source and binary forms, with or without modification,
 // are permitted provided that the following conditions are met:
@@ -45,7 +46,7 @@
 #include "precomp.hpp"

 ///////////// cvtColor////////////////////////
-TEST(cvtColor)
+PERFTEST(cvtColor)
 {
    Mat src, dst;
    ocl::oclMat d_src, d_dst;
@@ -72,9 +73,12 @@ TEST(cvtColor)
            ocl::cvtColor(d_src, d_dst, CV_RGBA2GRAY, 4);
            WARMUP_OFF;

+            cv::Mat ocl_mat;
+            d_dst.download(ocl_mat);
+            TestSystem::instance().setAccurate(ExceptedMatSimilar(dst, ocl_mat, 1e-5));
+
            GPU_ON;
            ocl::cvtColor(d_src, d_dst, CV_RGBA2GRAY, 4);
-             ;
            GPU_OFF;

            GPU_FULL_ON;

--- a/modules/ocl/perf/perf_columnsum.cpp
+++ b/modules/ocl/perf/perf_columnsum.cpp
@@ -16,6 +16,7 @@
 //
 // @Authors
 //    Fangfang Bai, fangfang@multicorewareinc.com
+//    Jin Ma,       jin@multicorewareinc.com
 //
 // Redistribution and use in source and binary forms, with or without modification,
 // are permitted provided that the following conditions are met:
@@ -45,7 +46,7 @@
 #include "precomp.hpp"

 ///////////// columnSum////////////////////////
-TEST(columnSum)
+PERFTEST(columnSum)
 {
    Mat src, dst;
    ocl::oclMat d_src, d_dst;
@@ -58,12 +59,13 @@ TEST(columnSum)

        CPU_ON;
        dst.create(src.size(), src.type());
+        for (int j = 0; j < src.cols; j++)
+            dst.at<float>(0, j) = src.at<float>(0, j);

        for (int i = 1; i < src.rows; ++i)
-        {
-            for (int j = 0; j < src.cols; ++j)
+        {for (int j = 0; j < src.cols; ++j)
            {
-                dst.at<float>(i, j) = src.at<float>(i, j) += src.at<float>(i - 1, j);
+                dst.at<float>(i, j) = dst.at<float>(i - 1 , j) + src.at<float>(i , j);
            }
        }

@@ -74,9 +76,12 @@ TEST(columnSum)
        ocl::columnSum(d_src, d_dst);
        WARMUP_OFF;

+        cv::Mat ocl_mat;
+        d_dst.download(ocl_mat);
+        TestSystem::instance().setAccurate(ExpectedMatNear(dst, ocl_mat, 5e-1));
+
        GPU_ON;
        ocl::columnSum(d_src, d_dst);
-         ;
        GPU_OFF;

        GPU_FULL_ON;

--- a/modules/ocl/perf/perf_fft.cpp
+++ b/modules/ocl/perf/perf_fft.cpp
@@ -16,6 +16,7 @@
 //
 // @Authors
 //    Fangfang Bai, fangfang@multicorewareinc.com
+//    Jin Ma,       jin@multicorewareinc.com
 //
 // Redistribution and use in source and binary forms, with or without modification,
 // are permitted provided that the following conditions are met:
@@ -45,13 +46,13 @@
 #include "precomp.hpp"

 ///////////// dft ////////////////////////
-TEST(dft)
+PERFTEST(dft)
 {
    Mat src, dst;
    ocl::oclMat d_src, d_dst;

-    int all_type[] = {CV_32FC1, CV_32FC2};
-    std::string type_name[] = {"CV_32FC1", "CV_32FC2"};
+    int all_type[] = {CV_32FC2};
+    std::string type_name[] = {"CV_32FC2"};

    for (int size = Min_Size; size <= Max_Size; size *= Multiple)
    {
@@ -73,9 +74,10 @@ TEST(dft)
            ocl::dft(d_src, d_dst, Size(size, size));
            WARMUP_OFF;

+            TestSystem::instance().setAccurate(ExpectedMatNear(dst, cv::Mat(d_dst), src.size().area() * 1e-4));
+
            GPU_ON;
            ocl::dft(d_src, d_dst, Size(size, size));
-             ;
            GPU_OFF;

            GPU_FULL_ON;

--- a/modules/ocl/perf/perf_filters.cpp
+++ b/modules/ocl/perf/perf_filters.cpp
@@ -16,6 +16,7 @@
 //
 // @Authors
 //    Fangfang Bai, fangfang@multicorewareinc.com
+//    Jin Ma,       jin@multicorewareinc.com
 //
 // Redistribution and use in source and binary forms, with or without modification,
 // are permitted provided that the following conditions are met:
@@ -45,7 +46,7 @@
 #include "precomp.hpp"

 ///////////// Blur////////////////////////
-TEST(Blur)
+PERFTEST(Blur)
 {
    Mat src1, dst;
    ocl::oclMat d_src1, d_dst;
@@ -77,9 +78,10 @@ TEST(Blur)
            ocl::blur(d_src1, d_dst, ksize, Point(-1, -1), bordertype);
            WARMUP_OFF;

+            TestSystem::instance().setAccurate(ExpectedMatNear(cv::Mat(d_dst), dst, 1.0));
+
            GPU_ON;
            ocl::blur(d_src1, d_dst, ksize, Point(-1, -1), bordertype);
-             ;
            GPU_OFF;

            GPU_FULL_ON;
@@ -92,7 +94,7 @@ TEST(Blur)
    }
 }
 ///////////// Laplacian////////////////////////
-TEST(Laplacian)
+PERFTEST(Laplacian)
 {
    Mat src1, dst;
    ocl::oclMat d_src1, d_dst;
@@ -123,9 +125,10 @@ TEST(Laplacian)
            ocl::Laplacian(d_src1, d_dst, -1, ksize, 1);
            WARMUP_OFF;

+            TestSystem::instance().setAccurate(ExpectedMatNear(cv::Mat(d_dst), dst, 1e-5));
+
            GPU_ON;
            ocl::Laplacian(d_src1, d_dst, -1, ksize, 1);
-             ;
            GPU_OFF;

            GPU_FULL_ON;
@@ -139,7 +142,7 @@ TEST(Laplacian)
 }

 ///////////// Erode ////////////////////
-TEST(Erode)
+PERFTEST(Erode)
 {
    Mat src, dst, ker;
    ocl::oclMat d_src, d_dst;
@@ -168,9 +171,10 @@ TEST(Erode)
            ocl::erode(d_src, d_dst, ker);
            WARMUP_OFF;

+            TestSystem::instance().setAccurate(ExpectedMatNear(cv::Mat(d_dst), dst, 1e-5));            
+
            GPU_ON;
            ocl::erode(d_src, d_dst, ker);
-             ;
            GPU_OFF;

            GPU_FULL_ON;
@@ -184,7 +188,7 @@ TEST(Erode)
 }

 ///////////// Sobel ////////////////////////
-TEST(Sobel)
+PERFTEST(Sobel)
 {
    Mat src, dst;
    ocl::oclMat d_src, d_dst;
@@ -214,9 +218,10 @@ TEST(Sobel)
            ocl::Sobel(d_src, d_dst, -1, dx, dy);
            WARMUP_OFF;

+            TestSystem::instance().setAccurate(ExpectedMatNear(cv::Mat(d_dst), dst, 1));
+
            GPU_ON;
            ocl::Sobel(d_src, d_dst, -1, dx, dy);
-             ;
            GPU_OFF;

            GPU_FULL_ON;
@@ -229,7 +234,7 @@ TEST(Sobel)
    }
 }
 ///////////// Scharr ////////////////////////
-TEST(Scharr)
+PERFTEST(Scharr)
 {
    Mat src, dst;
    ocl::oclMat d_src, d_dst;
@@ -259,9 +264,10 @@ TEST(Scharr)
            ocl::Scharr(d_src, d_dst, -1, dx, dy);
            WARMUP_OFF;

+            TestSystem::instance().setAccurate(ExpectedMatNear(cv::Mat(d_dst), dst, 1));
+
            GPU_ON;
            ocl::Scharr(d_src, d_dst, -1, dx, dy);
-             ;
            GPU_OFF;

            GPU_FULL_ON;
@@ -275,7 +281,7 @@ TEST(Scharr)
 }

 ///////////// GaussianBlur ////////////////////////
-TEST(GaussianBlur)
+PERFTEST(GaussianBlur)
 {
    Mat src, dst;
    int all_type[] = {CV_8UC1, CV_8UC4, CV_32FC1, CV_32FC4};
@@ -288,6 +294,8 @@ TEST(GaussianBlur)
            SUBTEST << size << 'x' << size << "; " << type_name[j] ;

            gen(src, size, size, all_type[j], 0, 256);
+            dst = src;
+            dst.setTo(0);

            GaussianBlur(src, dst, Size(9, 9), 0);

@@ -303,9 +311,11 @@ TEST(GaussianBlur)
            ocl::GaussianBlur(d_src, d_dst, Size(9, 9), 0);
            WARMUP_OFF;

+            TestSystem::instance().setAccurate(ExpectedMatNear(cv::Mat(d_dst), dst, 1.0));
+
+
            GPU_ON;
            ocl::GaussianBlur(d_src, d_dst, Size(9, 9), 0);
-             ;
            GPU_OFF;

            GPU_FULL_ON;
@@ -319,7 +329,7 @@ TEST(GaussianBlur)
 }

 ///////////// filter2D////////////////////////
-TEST(filter2D)
+PERFTEST(filter2D)
 {
    Mat src;

@@ -339,7 +349,8 @@ TEST(filter2D)
                Mat kernel;
                gen(kernel, ksize, ksize, CV_32FC1, 0.0, 1.0);

-                Mat dst;
+				Mat dst(src);
+				dst.setTo(0);
                cv::filter2D(src, dst, -1, kernel);

                CPU_ON;
@@ -347,15 +358,18 @@ TEST(filter2D)
                CPU_OFF;

                ocl::oclMat d_src(src);
-                ocl::oclMat d_dst;
+                ocl::oclMat d_dst(d_src);
+				d_dst.setTo(0);

                WARMUP_ON;
                ocl::filter2D(d_src, d_dst, -1, kernel);
                WARMUP_OFF;

+                TestSystem::instance().setAccurate(ExpectedMatNear(cv::Mat(d_dst), dst, 1e-5));
+
+
                GPU_ON;
                ocl::filter2D(d_src, d_dst, -1, kernel);
-                 ;
                GPU_OFF;

                GPU_FULL_ON;

--- a/modules/ocl/perf/perf_gemm.cpp
+++ b/modules/ocl/perf/perf_gemm.cpp
@@ -16,6 +16,7 @@
 //
 // @Authors
 //    Fangfang Bai, fangfang@multicorewareinc.com
+//    Jin Ma,       jin@multicorewareinc.com
 //
 // Redistribution and use in source and binary forms, with or without modification,
 // are permitted provided that the following conditions are met:
@@ -45,7 +46,7 @@
 #include "precomp.hpp"

 ///////////// gemm ////////////////////////
-TEST(gemm)
+PERFTEST(gemm)
 {
    Mat src1, src2, src3, dst;
    ocl::oclMat d_src1, d_src2, d_src3, d_dst;
@@ -71,10 +72,10 @@ TEST(gemm)
        WARMUP_ON;
        ocl::gemm(d_src1, d_src2, 1.0, d_src3, 1.0, d_dst);
        WARMUP_OFF;
+        TestSystem::instance().setAccurate(ExpectedMatNear(cv::Mat(d_dst), dst, src1.cols * src1.rows * 1e-4));

        GPU_ON;
        ocl::gemm(d_src1, d_src2, 1.0, d_src3, 1.0, d_dst);
-         ;
        GPU_OFF;

        GPU_FULL_ON;

--- a/modules/ocl/perf/perf_haar.cpp
+++ b/modules/ocl/perf/perf_haar.cpp
@@ -16,6 +16,7 @@
 //
 // @Authors
 //    Fangfang Bai, fangfang@multicorewareinc.com
+//    Jin Ma,       jin@multicorewareinc.com
 //
 // Redistribution and use in source and binary forms, with or without modification,
 // are permitted provided that the following conditions are met:
@@ -82,7 +83,7 @@ public:

 }
 }
-TEST(Haar)
+PERFTEST(Haar)
 {
    Mat img = imread(abspath("basketball1.png"), CV_LOAD_IMAGE_GRAYSCALE);

@@ -106,6 +107,8 @@ TEST(Haar)
                                    1.1, 2, 0 | CV_HAAR_SCALE_IMAGE, Size(30, 30));
    CPU_OFF;

+
+    vector<Rect> oclfaces;
    ocl::CascadeClassifier_GPU faceCascade;

    if (!faceCascade.load(abspath("haarcascade_frontalface_alt.xml")))
@@ -115,24 +118,24 @@ TEST(Haar)

    ocl::oclMat d_img(img);

-    faces.clear();
-
    WARMUP_ON;
-    faceCascade.detectMultiScale(d_img, faces,
+    faceCascade.detectMultiScale(d_img, oclfaces,
                                 1.1, 2, 0 | CV_HAAR_SCALE_IMAGE, Size(30, 30));
    WARMUP_OFF;

+    //Testing whether the expected is equal to the actual.
+    TestSystem::instance().setAccurate(ExpectedEQ<vector<Rect>::size_type, vector<Rect>::size_type>(faces.size(), oclfaces.size()));
+
    faces.clear();

    GPU_ON;
-    faceCascade.detectMultiScale(d_img, faces,
+    faceCascade.detectMultiScale(d_img, oclfaces,
                                 1.1, 2, 0 | CV_HAAR_SCALE_IMAGE, Size(30, 30));
-     ;
    GPU_OFF;

    GPU_FULL_ON;
    d_img.upload(img);
-    faceCascade.detectMultiScale(d_img, faces,
+    faceCascade.detectMultiScale(d_img, oclfaces,
                                 1.1, 2, 0 | CV_HAAR_SCALE_IMAGE, Size(30, 30));
    GPU_FULL_OFF;
 }
\ No newline at end of file
--- a/modules/ocl/perf/perf_hog.cpp
+++ b/modules/ocl/perf/perf_hog.cpp
@@ -16,6 +16,7 @@
 //
 // @Authors
 //    Fangfang Bai, fangfang@multicorewareinc.com
+//    Jin Ma,       jin@multicorewareinc.com
 //
 // Redistribution and use in source and binary forms, with or without modification,
 // are permitted provided that the following conditions are met:
@@ -45,7 +46,13 @@
 #include "precomp.hpp"

 ///////////// HOG////////////////////////
-TEST(HOG)
+bool match_rect(cv::Rect r1, cv::Rect r2, int threshold)
+{
+    return ((abs(r1.x - r2.x) < threshold) && (abs(r1.y - r2.y) < threshold) &&
+        (abs(r1.width - r2.width) < threshold) && (abs(r1.height - r2.height) < threshold));
+}
+
+PERFTEST(HOG)
 {
    Mat src = imread(abspath("road.png"), cv::IMREAD_GRAYSCALE);

@@ -58,6 +65,7 @@ TEST(HOG)
    cv::HOGDescriptor hog;
    hog.setSVMDetector(hog.getDefaultPeopleDetector());
    std::vector<cv::Rect> found_locations;
+    std::vector<cv::Rect> d_found_locations;

    SUBTEST << 768 << 'x' << 576 << "; road.png";

@@ -73,12 +81,78 @@ TEST(HOG)
    d_src.upload(src);

    WARMUP_ON;
-    ocl_hog.detectMultiScale(d_src, found_locations);
+    ocl_hog.detectMultiScale(d_src, d_found_locations);
    WARMUP_OFF;
+    
+    // Ground-truth rectangular people window
+    cv::Rect win1_64x128(231, 190, 72, 144);
+    cv::Rect win2_64x128(621, 156, 97, 194);
+    cv::Rect win1_48x96(238, 198, 63, 126);
+    cv::Rect win2_48x96(619, 161, 92, 185);
+    cv::Rect win3_48x96(488, 136, 56, 112);
+
+    // Compare whether ground-truth windows are detected and compare the number of windows detected.
+    std::vector<int> d_comp(4);
+    std::vector<int> comp(4);
+    for(int i = 0; i < (int)d_comp.size(); i++)
+    {
+        d_comp[i] = 0;
+        comp[i] = 0;
+    }
+
+    int threshold = 10;
+    int val = 32;
+    d_comp[0] = (int)d_found_locations.size();
+    comp[0] = (int)found_locations.size();
+
+    cv::Size winSize = hog.winSize;
+
+    if (winSize == cv::Size(48, 96))
+    {
+        for(int i = 0; i < (int)d_found_locations.size(); i++)
+        {
+            if (match_rect(d_found_locations[i], win1_48x96, threshold))
+                d_comp[1] = val;
+            if (match_rect(d_found_locations[i], win2_48x96, threshold))
+                d_comp[2] = val;
+            if (match_rect(d_found_locations[i], win3_48x96, threshold))
+                d_comp[3] = val;
+        }
+        for(int i = 0; i < (int)found_locations.size(); i++)
+        {
+            if (match_rect(found_locations[i], win1_48x96, threshold))
+                comp[1] = val;
+            if (match_rect(found_locations[i], win2_48x96, threshold))
+                comp[2] = val;
+            if (match_rect(found_locations[i], win3_48x96, threshold))
+                comp[3] = val;
+        }
+    }
+    else if (winSize == cv::Size(64, 128))
+    {
+        for(int i = 0; i < (int)d_found_locations.size(); i++)
+        {
+            if (match_rect(d_found_locations[i], win1_64x128, threshold))
+                d_comp[1] = val;
+            if (match_rect(d_found_locations[i], win2_64x128, threshold))
+                d_comp[2] = val;
+        }
+        for(int i = 0; i < (int)found_locations.size(); i++)
+        {
+            if (match_rect(found_locations[i], win1_64x128, threshold))
+                comp[1] = val;
+            if (match_rect(found_locations[i], win2_64x128, threshold))
+                comp[2] = val;
+        }
+    }
+
+    cv::Mat ocl_mat;
+    ocl_mat = cv::Mat(d_comp);
+    ocl_mat.convertTo(ocl_mat, cv::Mat(comp).type());
+    TestSystem::instance().setAccurate(ExpectedMatNear(ocl_mat, cv::Mat(comp), 3));

    GPU_ON;
    ocl_hog.detectMultiScale(d_src, found_locations);
-     ;
    GPU_OFF;

    GPU_FULL_ON;

--- a/modules/ocl/perf/perf_imgproc.cpp
+++ b/modules/ocl/perf/perf_imgproc.cpp
@@ -16,6 +16,7 @@
 //
 // @Authors
 //    Fangfang Bai, fangfang@multicorewareinc.com
+//    Jin Ma,       jin@multicorewareinc.com
 //
 // Redistribution and use in source and binary forms, with or without modification,
 // are permitted provided that the following conditions are met:
@@ -45,7 +46,7 @@
 #include "precomp.hpp"

 ///////////// equalizeHist ////////////////////////
-TEST(equalizeHist)
+PERFTEST(equalizeHist)
 {
    Mat src, dst;
    int all_type[] = {CV_8UC1};
@@ -74,9 +75,11 @@ TEST(equalizeHist)
            ocl::equalizeHist(d_src, d_dst);
            WARMUP_OFF;

+            TestSystem::instance().setAccurate(ExpectedMatNear(dst, cv::Mat(d_dst), 1.1));
+
+
            GPU_ON;
            ocl::equalizeHist(d_src, d_dst);
-             ;
            GPU_OFF;

            GPU_FULL_ON;
@@ -89,7 +92,7 @@ TEST(equalizeHist)
    }
 }
 /////////// CopyMakeBorder //////////////////////
-TEST(CopyMakeBorder)
+PERFTEST(CopyMakeBorder)
 {
    Mat src, dst;
    ocl::oclMat d_dst;
@@ -119,9 +122,11 @@ TEST(CopyMakeBorder)
            ocl::copyMakeBorder(d_src, d_dst, 7, 5, 5, 7, bordertype, cv::Scalar(1.0));
            WARMUP_OFF;

+            TestSystem::instance().setAccurate(ExpectedMatNear(dst, cv::Mat(d_dst), 0.0));
+
+
            GPU_ON;
            ocl::copyMakeBorder(d_src, d_dst, 7, 5, 5, 7, bordertype, cv::Scalar(1.0));
-             ;
            GPU_OFF;

            GPU_FULL_ON;
@@ -134,7 +139,7 @@ TEST(CopyMakeBorder)
    }
 }
 ///////////// cornerMinEigenVal ////////////////////////
-TEST(cornerMinEigenVal)
+PERFTEST(cornerMinEigenVal)
 {
    Mat src, dst;
    ocl::oclMat d_dst;
@@ -165,9 +170,11 @@ TEST(cornerMinEigenVal)
            ocl::cornerMinEigenVal(d_src, d_dst, blockSize, apertureSize, borderType);
            WARMUP_OFF;

+            TestSystem::instance().setAccurate(ExpectedMatNear(dst, cv::Mat(d_dst), 1.0));
+
+
            GPU_ON;
            ocl::cornerMinEigenVal(d_src, d_dst, blockSize, apertureSize, borderType);
-             ;
            GPU_OFF;

            GPU_FULL_ON;
@@ -180,7 +187,7 @@ TEST(cornerMinEigenVal)
    }
 }
 ///////////// cornerHarris ////////////////////////
-TEST(cornerHarris)
+PERFTEST(cornerHarris)
 {
    Mat src, dst;
    ocl::oclMat d_src, d_dst;
@@ -208,9 +215,10 @@ TEST(cornerHarris)
            ocl::cornerHarris(d_src, d_dst, 5, 7, 0.1, BORDER_REFLECT);
            WARMUP_OFF;

+            TestSystem::instance().setAccurate(ExpectedMatNear(dst, cv::Mat(d_dst), 1.0));
+
            GPU_ON;
            ocl::cornerHarris(d_src, d_dst, 5, 7, 0.1, BORDER_REFLECT);
-             ;
            GPU_OFF;

            GPU_FULL_ON;
@@ -224,7 +232,7 @@ TEST(cornerHarris)
    }
 }
 ///////////// integral ////////////////////////
-TEST(integral)
+PERFTEST(integral)
 {
    Mat src, sum;
    ocl::oclMat d_src, d_sum, d_buf;
@@ -252,9 +260,14 @@ TEST(integral)
            ocl::integral(d_src, d_sum);
            WARMUP_OFF;

+            cv::Mat ocl_mat;
+            d_sum.download(ocl_mat);
+            if(sum.type() == ocl_mat.type()) //we won't test accuracy when cpu function overlow
+                TestSystem::instance().setAccurate(ExpectedMatNear(sum, ocl_mat, 0.0));
+
+
            GPU_ON;
            ocl::integral(d_src, d_sum);
-             ;
            GPU_OFF;

            GPU_FULL_ON;
@@ -267,15 +280,15 @@ TEST(integral)
    }
 }
 ///////////// WarpAffine ////////////////////////
-TEST(WarpAffine)
+PERFTEST(WarpAffine)
 {
    Mat src, dst;
    ocl::oclMat d_src, d_dst;

    static const double coeffs[2][3] =
    {
-        {cos(3.14 / 6), -sin(3.14 / 6), 100.0},
-        {sin(3.14 / 6), cos(3.14 / 6), -100.0}
+        {cos(CV_PI / 6), -sin(CV_PI / 6), 100.0},
+        {sin(CV_PI / 6), cos(CV_PI / 6), -100.0}
    };
    Mat M(2, 3, CV_64F, (void *)coeffs);
    int interpolation = INTER_NEAREST;
@@ -306,9 +319,10 @@ TEST(WarpAffine)
            ocl::warpAffine(d_src, d_dst, M, size1, interpolation);
            WARMUP_OFF;

+            TestSystem::instance().setAccurate(ExpectedMatNear(dst, cv::Mat(d_dst), 1.0));
+
            GPU_ON;
            ocl::warpAffine(d_src, d_dst, M, size1, interpolation);
-             ;
            GPU_OFF;

            GPU_FULL_ON;
@@ -321,19 +335,19 @@ TEST(WarpAffine)
    }
 }
 ///////////// WarpPerspective ////////////////////////
-TEST(WarpPerspective)
+PERFTEST(WarpPerspective)
 {
    Mat src, dst;
    ocl::oclMat d_src, d_dst;

    static const double coeffs[3][3] =
    {
-        {cos(3.14 / 6), -sin(3.14 / 6), 100.0},
-        {sin(3.14 / 6), cos(3.14 / 6), -100.0},
+        {cos(CV_PI / 6), -sin(CV_PI / 6), 100.0},
+        {sin(CV_PI / 6), cos(CV_PI / 6), -100.0},
        {0.0, 0.0, 1.0}
    };
    Mat M(3, 3, CV_64F, (void *)coeffs);
-    int interpolation = INTER_NEAREST;
+    int interpolation = INTER_LINEAR;

    int all_type[] = {CV_8UC1, CV_8UC4};
    std::string type_name[] = {"CV_8UC1", "CV_8UC4"};
@@ -360,9 +374,10 @@ TEST(WarpPerspective)
            ocl::warpPerspective(d_src, d_dst, M, size1, interpolation);
            WARMUP_OFF;

+            TestSystem::instance().setAccurate(ExpectedMatNear(dst, cv::Mat(d_dst), 1.0));
+
            GPU_ON;
            ocl::warpPerspective(d_src, d_dst, M, size1, interpolation);
-             ;
            GPU_OFF;

            GPU_FULL_ON;
@@ -376,7 +391,7 @@ TEST(WarpPerspective)
 }

 ///////////// resize ////////////////////////
-TEST(resize)
+PERFTEST(resize)
 {
    Mat src, dst;
    ocl::oclMat d_src, d_dst;
@@ -405,9 +420,11 @@ TEST(resize)
            ocl::resize(d_src, d_dst, Size(), 2.0, 2.0);
            WARMUP_OFF;

+            TestSystem::instance().setAccurate(ExpectedMatNear(dst, cv::Mat(d_dst), 1.0));
+
+
            GPU_ON;
            ocl::resize(d_src, d_dst, Size(), 2.0, 2.0);
-             ;
            GPU_OFF;

            GPU_FULL_ON;
@@ -439,9 +456,10 @@ TEST(resize)
            ocl::resize(d_src, d_dst, Size(), 0.5, 0.5);
            WARMUP_OFF;

+            TestSystem::instance().setAccurate(ExpectedMatNear(dst, cv::Mat(d_dst), 1.0));
+
            GPU_ON;
            ocl::resize(d_src, d_dst, Size(), 0.5, 0.5);
-             ;
            GPU_OFF;

            GPU_FULL_ON;
@@ -454,7 +472,7 @@ TEST(resize)
    }
 }
 ///////////// threshold////////////////////////
-TEST(threshold)
+PERFTEST(threshold)
 {
    Mat src, dst;
    ocl::oclMat d_src, d_dst;
@@ -478,9 +496,11 @@ TEST(threshold)
        ocl::threshold(d_src, d_dst, 50.0, 0.0, THRESH_BINARY);
        WARMUP_OFF;

+        TestSystem::instance().setAccurate(ExpectedMatNear(dst, cv::Mat(d_dst), 1.0));
+
+
        GPU_ON;
        ocl::threshold(d_src, d_dst, 50.0, 0.0, THRESH_BINARY);
-         ;
        GPU_OFF;

        GPU_FULL_ON;
@@ -509,9 +529,10 @@ TEST(threshold)
        ocl::threshold(d_src, d_dst, 50.0, 0.0, THRESH_TRUNC);
        WARMUP_OFF;

+        TestSystem::instance().setAccurate(ExpectedMatNear(dst, cv::Mat(d_dst), 1.0));
+
        GPU_ON;
        ocl::threshold(d_src, d_dst, 50.0, 0.0, THRESH_TRUNC);
-         ;
        GPU_OFF;

        GPU_FULL_ON;
@@ -522,9 +543,189 @@ TEST(threshold)
    }
 }
 ///////////// meanShiftFiltering////////////////////////
-TEST(meanShiftFiltering)
+COOR do_meanShift(int x0, int y0, uchar *sptr, uchar *dptr, int sstep, cv::Size size, int sp, int sr, int maxIter, float eps, int *tab)
 {
-    int sp = 10, sr = 10;
+
+    int isr2 = sr * sr;
+    int c0, c1, c2, c3;
+    int iter;
+    uchar *ptr = NULL;
+    uchar *pstart = NULL;
+    int revx = 0, revy = 0;
+    c0 = sptr[0];
+    c1 = sptr[1];
+    c2 = sptr[2];
+    c3 = sptr[3];
+    // iterate meanshift procedure
+    for(iter = 0; iter < maxIter; iter++ )
+    {
+        int count = 0;
+        int s0 = 0, s1 = 0, s2 = 0, sx = 0, sy = 0;
+
+        //mean shift: process pixels in window (p-sigmaSp)x(p+sigmaSp)
+        int minx = x0 - sp;
+        int miny = y0 - sp;
+        int maxx = x0 + sp;
+        int maxy = y0 + sp;
+
+        //deal with the image boundary
+        if(minx < 0) minx = 0;
+        if(miny < 0) miny = 0;
+        if(maxx >= size.width) maxx = size.width - 1;
+        if(maxy >= size.height) maxy = size.height - 1;
+        if(iter == 0)
+        {
+            pstart = sptr;
+        }
+        else
+        {
+            pstart = pstart + revy * sstep + (revx << 2); //point to the new position
+        }
+        ptr = pstart;
+        ptr = ptr + (miny - y0) * sstep + ((minx - x0) << 2); //point to the start in the row
+
+        for( int y = miny; y <= maxy; y++, ptr += sstep - ((maxx - minx + 1) << 2))
+        {
+            int rowCount = 0;
+            int x = minx;
+#if CV_ENABLE_UNROLLED
+            for( ; x + 4 <= maxx; x += 4, ptr += 16)
+            {
+                int t0, t1, t2;
+                t0 = ptr[0], t1 = ptr[1], t2 = ptr[2];
+                if(tab[t0 - c0 + 255] + tab[t1 - c1 + 255] + tab[t2 - c2 + 255] <= isr2)
+                {
+                    s0 += t0;
+                    s1 += t1;
+                    s2 += t2;
+                    sx += x;
+                    rowCount++;
+                }
+                t0 = ptr[4], t1 = ptr[5], t2 = ptr[6];
+                if(tab[t0 - c0 + 255] + tab[t1 - c1 + 255] + tab[t2 - c2 + 255] <= isr2)
+                {
+                    s0 += t0;
+                    s1 += t1;
+                    s2 += t2;
+                    sx += x + 1;
+                    rowCount++;
+                }
+                t0 = ptr[8], t1 = ptr[9], t2 = ptr[10];
+                if(tab[t0 - c0 + 255] + tab[t1 - c1 + 255] + tab[t2 - c2 + 255] <= isr2)
+                {
+                    s0 += t0;
+                    s1 += t1;
+                    s2 += t2;
+                    sx += x + 2;
+                    rowCount++;
+                }
+                t0 = ptr[12], t1 = ptr[13], t2 = ptr[14];
+                if(tab[t0 - c0 + 255] + tab[t1 - c1 + 255] + tab[t2 - c2 + 255] <= isr2)
+                {
+                    s0 += t0;
+                    s1 += t1;
+                    s2 += t2;
+                    sx += x + 3;
+                    rowCount++;
+                }
+            }
+#endif
+            for(; x <= maxx; x++, ptr += 4)
+            {
+                int t0 = ptr[0], t1 = ptr[1], t2 = ptr[2];
+                if(tab[t0 - c0 + 255] + tab[t1 - c1 + 255] + tab[t2 - c2 + 255] <= isr2)
+                {
+                    s0 += t0;
+                    s1 += t1;
+                    s2 += t2;
+                    sx += x;
+                    rowCount++;
+                }
+            }
+            if(rowCount == 0)
+                continue;
+            count += rowCount;
+            sy += y * rowCount;
+        }
+
+        if( count == 0 )
+            break;
+
+        int x1 = sx / count;
+        int y1 = sy / count;
+        s0 = s0 / count;
+        s1 = s1 / count;
+        s2 = s2 / count;
+
+        bool stopFlag = (x0 == x1 && y0 == y1) || (abs(x1 - x0) + abs(y1 - y0) +
+            tab[s0 - c0 + 255] + tab[s1 - c1 + 255] + tab[s2 - c2 + 255] <= eps);
+
+        //revise the pointer corresponding to the new (y0,x0)
+        revx = x1 - x0;
+        revy = y1 - y0;
+
+        x0 = x1;
+        y0 = y1;
+        c0 = s0;
+        c1 = s1;
+        c2 = s2;
+
+        if( stopFlag )
+            break;
+    } //for iter
+
+    dptr[0] = (uchar)c0;
+    dptr[1] = (uchar)c1;
+    dptr[2] = (uchar)c2;
+    dptr[3] = (uchar)c3;
+
+    COOR coor;
+    coor.x = static_cast<short>(x0);
+    coor.y = static_cast<short>(y0);
+    return coor;
+}
+void meanShiftFiltering_(const Mat &src_roi, Mat &dst_roi, int sp, int sr, cv::TermCriteria crit);
+void meanShiftFiltering_(const Mat &src_roi, Mat &dst_roi, int sp, int sr, cv::TermCriteria crit)
+{
+    if( src_roi.empty() )
+        CV_Error( CV_StsBadArg, "The input image is empty" );
+
+    if( src_roi.depth() != CV_8U || src_roi.channels() != 4 )
+        CV_Error( CV_StsUnsupportedFormat, "Only 8-bit, 4-channel images are supported" );
+
+    CV_Assert( (src_roi.cols == dst_roi.cols) && (src_roi.rows == dst_roi.rows) );
+    CV_Assert( !(dst_roi.step & 0x3) );
+
+    if( !(crit.type & cv::TermCriteria::MAX_ITER) )
+        crit.maxCount = 5;
+    int maxIter = std::min(std::max(crit.maxCount, 1), 100);
+    float eps;
+    if( !(crit.type & cv::TermCriteria::EPS) )
+        eps = 1.f;
+    eps = (float)std::max(crit.epsilon, 0.0);
+
+    int tab[512];
+    for(int i = 0; i < 512; i++)
+        tab[i] = (i - 255) * (i - 255);
+    uchar *sptr = src_roi.data;
+    uchar *dptr = dst_roi.data;
+    int sstep = (int)src_roi.step;
+    int dstep = (int)dst_roi.step;
+    cv::Size size = src_roi.size();
+
+    for(int i = 0; i < size.height; i++, sptr += sstep - (size.width << 2),
+        dptr += dstep - (size.width << 2))
+    {
+        for(int j = 0; j < size.width; j++, sptr += 4, dptr += 4)
+        {
+            do_meanShift(j, i, sptr, dptr, sstep, size, sp, sr, maxIter, eps, tab);
+        }
+    }
+}
+
+PERFTEST(meanShiftFiltering)
+{
+    int sp = 5, sr = 6;
    Mat src, dst;

    ocl::oclMat d_src, d_dst;
@@ -533,25 +734,32 @@ TEST(meanShiftFiltering)
    {
        SUBTEST << size << 'x' << size << "; 8UC3 vs 8UC4";

-        gen(src, size, size, CV_8UC3, Scalar::all(0), Scalar::all(256));
+        gen(src, size, size, CV_8UC4, Scalar::all(0), Scalar::all(256));
+        //gen(dst, size, size, CV_8UC4, Scalar::all(0), Scalar::all(256));
+        dst = src;
+        dst.setTo(0);
+
+        cv::TermCriteria crit(cv::TermCriteria::COUNT + cv::TermCriteria::EPS, 5, 1);

-        pyrMeanShiftFiltering(src, dst, sp, sr);
+        meanShiftFiltering_(src, dst, sp, sr, crit);

        CPU_ON;
-        pyrMeanShiftFiltering(src, dst, sp, sr);
+        meanShiftFiltering_(src, dst, sp, sr, crit);
        CPU_OFF;

-        gen(src, size, size, CV_8UC4, Scalar::all(0), Scalar::all(256));
-
        d_src.upload(src);

        WARMUP_ON;
-        ocl::meanShiftFiltering(d_src, d_dst, sp, sr);
+        ocl::meanShiftFiltering(d_src, d_dst, sp, sr, crit);
        WARMUP_OFF;

+        cv::Mat ocl_mat;
+        d_dst.download(ocl_mat);
+
+        TestSystem::instance().setAccurate(ExpectedMatNear(dst, ocl_mat, 0.0));
+
        GPU_ON;
        ocl::meanShiftFiltering(d_src, d_dst, sp, sr);
-         ;
        GPU_OFF;

        GPU_FULL_ON;
@@ -562,6 +770,7 @@ TEST(meanShiftFiltering)
    }
 }
 ///////////// meanShiftProc////////////////////////
+#if 0
 COOR do_meanShift(int x0, int y0, uchar *sptr, uchar *dptr, int sstep, cv::Size size, int sp, int sr, int maxIter, float eps, int *tab)
 {

@@ -740,6 +949,7 @@ COOR do_meanShift(int x0, int y0, uchar *sptr, uchar *dptr, int sstep, cv::Size
    coor.y = static_cast<short>(y0);
    return coor;
 }
+#endif

 void meanShiftProc_(const Mat &src_roi, Mat &dst_roi, Mat &dstCoor_roi, int sp, int sr, cv::TermCriteria crit)
 {
@@ -798,7 +1008,7 @@ void meanShiftProc_(const Mat &src_roi, Mat &dst_roi, Mat &dstCoor_roi, int sp,
    }

 }
-TEST(meanShiftProc)
+PERFTEST(meanShiftProc)
 {
    Mat src, dst, dstCoor_roi;
    ocl::oclMat d_src, d_dst, d_dstCoor_roi;
@@ -825,9 +1035,11 @@ TEST(meanShiftProc)
        ocl::meanShiftProc(d_src, d_dst, d_dstCoor_roi, 5, 6, crit);
        WARMUP_OFF;

+        TestSystem::instance().setAccurate(ExpectedMatNear(dstCoor_roi, cv::Mat(d_dstCoor_roi), 0.0)
+            &&ExpectedMatNear(dst, cv::Mat(d_dst), 0.0));
+
        GPU_ON;
        ocl::meanShiftProc(d_src, d_dst, d_dstCoor_roi, 5, 6, crit);
-         ;
        GPU_OFF;

        GPU_FULL_ON;
@@ -841,7 +1053,7 @@ TEST(meanShiftProc)
 }

 ///////////// remap////////////////////////
-TEST(remap)
+PERFTEST(remap)
 {
    Mat src, dst, xmap, ymap;
    ocl::oclMat d_src, d_dst, d_xmap, d_ymap;
@@ -892,9 +1104,14 @@ TEST(remap)
            ocl::remap(d_src, d_dst, d_xmap, d_ymap, interpolation, borderMode);
            WARMUP_OFF;

+            if(interpolation == 0)
+                TestSystem::instance().setAccurate(ExpectedMatNear(dst, cv::Mat(d_dst), 1.0));            
+            else
+                TestSystem::instance().setAccurate(ExpectedMatNear(dst, cv::Mat(d_dst), 2.0));
+
+
            GPU_ON;
            ocl::remap(d_src, d_dst, d_xmap, d_ymap, interpolation, borderMode);
-             ;
            GPU_OFF;

            GPU_FULL_ON;

--- a/modules/ocl/perf/perf_match_template.cpp
+++ b/modules/ocl/perf/perf_match_template.cpp
@@ -16,6 +16,7 @@
 //
 // @Authors
 //    Fangfang Bai, fangfang@multicorewareinc.com
+//    Jin Ma,       jin@multicorewareinc.com
 //
 // Redistribution and use in source and binary forms, with or without modification,
 // are permitted provided that the following conditions are met:
@@ -52,7 +53,7 @@
 //	ocl::oclMat d_src(src), d_templ(templ), d_dst;
 //	ocl::matchTemplate(d_src, d_templ, d_dst, CV_TM_CCORR);
 //}
-TEST(matchTemplate)
+PERFTEST(matchTemplate)
 {
    //InitMatchTemplate();

@@ -89,9 +90,10 @@ TEST(matchTemplate)
                ocl::matchTemplate(d_src, d_templ, d_dst, CV_TM_CCORR);
                WARMUP_OFF;

+                TestSystem::instance().setAccurate(ExpectedMatNear(dst, cv::Mat(d_dst), templ.rows * templ.cols * 1e-1));            
+
                GPU_ON;
                ocl::matchTemplate(d_src, d_templ, d_dst, CV_TM_CCORR);
-                 ;
                GPU_OFF;

                GPU_FULL_ON;
@@ -129,9 +131,10 @@ TEST(matchTemplate)
                ocl::matchTemplate(d_src, d_templ, d_dst, CV_TM_CCORR_NORMED);
                WARMUP_OFF;

+                TestSystem::instance().setAccurate(ExpectedMatNear(dst, cv::Mat(d_dst), templ.rows * templ.cols * 1e-1));            
+
                GPU_ON;
                ocl::matchTemplate(d_src, d_templ, d_dst, CV_TM_CCORR_NORMED);
-                 ;
                GPU_OFF;

                GPU_FULL_ON;

--- a/modules/ocl/perf/perf_matrix_operation.cpp
+++ b/modules/ocl/perf/perf_matrix_operation.cpp
@@ -16,6 +16,7 @@
 //
 // @Authors
 //    Fangfang Bai, fangfang@multicorewareinc.com
+//    Jin Ma,       jin@multicorewareinc.com
 //
 // Redistribution and use in source and binary forms, with or without modification,
 // are permitted provided that the following conditions are met:
@@ -45,7 +46,7 @@
 #include "precomp.hpp"

 ///////////// ConvertTo////////////////////////
-TEST(ConvertTo)
+PERFTEST(ConvertTo)
 {
    Mat src, dst;
    ocl::oclMat d_src, d_dst;
@@ -76,9 +77,11 @@ TEST(ConvertTo)
            d_src.convertTo(d_dst, CV_32FC1);
            WARMUP_OFF;

+            TestSystem::instance().setAccurate(ExpectedMatNear(dst, cv::Mat(d_dst), 0.0));            
+
+
            GPU_ON;
            d_src.convertTo(d_dst, CV_32FC1);
-             ;
            GPU_OFF;

            GPU_FULL_ON;
@@ -91,7 +94,7 @@ TEST(ConvertTo)
    }
 }
 ///////////// copyTo////////////////////////
-TEST(copyTo)
+PERFTEST(copyTo)
 {
    Mat src, dst;
    ocl::oclMat d_src, d_dst;
@@ -122,9 +125,11 @@ TEST(copyTo)
            d_src.copyTo(d_dst);
            WARMUP_OFF;

+            TestSystem::instance().setAccurate(ExpectedMatNear(dst, cv::Mat(d_dst), 0.0));            
+
+
            GPU_ON;
            d_src.copyTo(d_dst);
-             ;
            GPU_OFF;

            GPU_FULL_ON;
@@ -137,7 +142,7 @@ TEST(copyTo)
    }
 }
 ///////////// setTo////////////////////////
-TEST(setTo)
+PERFTEST(setTo)
 {
    Mat src, dst;
    Scalar val(1, 2, 3, 4);
@@ -166,9 +171,11 @@ TEST(setTo)
            d_src.setTo(val);
            WARMUP_OFF;

+            TestSystem::instance().setAccurate(ExpectedMatNear(src, cv::Mat(d_src), 1.0));            
+
+
            GPU_ON;
            d_src.setTo(val);
-             ;
            GPU_OFF;

            GPU_FULL_ON;

--- a/modules/ocl/perf/perf_norm.cpp
+++ b/modules/ocl/perf/perf_norm.cpp
@@ -16,6 +16,7 @@
 //
 // @Authors
 //    Fangfang Bai, fangfang@multicorewareinc.com
+//    Jin Ma,       jin@multicorewareinc.com
 //
 // Redistribution and use in source and binary forms, with or without modification,
 // are permitted provided that the following conditions are met:
@@ -45,7 +46,7 @@
 #include "precomp.hpp"

 ///////////// norm////////////////////////
-TEST(norm)
+PERFTEST(norm)
 {
    Mat src, buf;
    ocl::oclMat d_src, d_buf;
@@ -71,9 +72,10 @@ TEST(norm)
        ocl::norm(d_src, d_buf, NORM_INF);
        WARMUP_OFF;

+        TestSystem::instance().setAccurate(ExpectedMatNear(src, cv::Mat(d_buf), .5));                        
+
        GPU_ON;
        ocl::norm(d_src, d_buf, NORM_INF);
-         ;
        GPU_OFF;

        GPU_FULL_ON;

--- a/modules/ocl/perf/perf_pyrdown.cpp
+++ b/modules/ocl/perf/perf_pyrdown.cpp
@@ -16,6 +16,7 @@
 //
 // @Authors
 //    Fangfang Bai, fangfang@multicorewareinc.com
+//    Jin Ma,       jin@multicorewareinc.com
 //
 // Redistribution and use in source and binary forms, with or without modification,
 // are permitted provided that the following conditions are met:
@@ -45,7 +46,7 @@
 #include "precomp.hpp"

 ///////////// pyrDown //////////////////////
-TEST(pyrDown)
+PERFTEST(pyrDown)
 {
    Mat src, dst;
    int all_type[] = {CV_8UC1, CV_8UC4};
@@ -72,9 +73,11 @@ TEST(pyrDown)
            ocl::pyrDown(d_src, d_dst);
            WARMUP_OFF;

+            TestSystem::instance().setAccurate(ExpectedMatNear(dst, cv::Mat(d_dst), dst.depth() == CV_32F ? 1e-4f : 1.0f));                        
+
+
            GPU_ON;
            ocl::pyrDown(d_src, d_dst);
-             ;
            GPU_OFF;

            GPU_FULL_ON;

--- a/modules/ocl/perf/perf_pyrlk.cpp
+++ b/modules/ocl/perf/perf_pyrlk.cpp
@@ -16,6 +16,7 @@
 //
 // @Authors
 //    Fangfang Bai, fangfang@multicorewareinc.com
+//    Jin Ma,       jin@multicorewareinc.com
 //
 // Redistribution and use in source and binary forms, with or without modification,
 // are permitted provided that the following conditions are met:
@@ -45,7 +46,7 @@
 #include "precomp.hpp"

 ///////////// PyrLKOpticalFlow ////////////////////////
-TEST(PyrLKOpticalFlow)
+PERFTEST(PyrLKOpticalFlow)
 {
    std::string images1[] = {"rubberwhale1.png", "aloeL.jpg"};
    std::string images2[] = {"rubberwhale2.png", "aloeR.jpg"};
@@ -115,9 +116,14 @@ TEST(PyrLKOpticalFlow)
            d_pyrLK.sparse(d_frame0, d_frame1, d_pts, d_nextPts, d_status, &d_err);
            WARMUP_OFF;

+            std::vector<cv::Point2f> ocl_nextPts(d_nextPts.cols);
+            std::vector<unsigned char> ocl_status(d_status.cols);
+            TestSystem::instance().setAccurate(AssertEQ<size_t>(nextPts.size(), ocl_nextPts.size()));
+            TestSystem::instance().setAccurate(AssertEQ<size_t>(status.size(), ocl_status.size()));                        
+
+
            GPU_ON;
            d_pyrLK.sparse(d_frame0, d_frame1, d_pts, d_nextPts, d_status, &d_err);
-             ;
            GPU_OFF;

            GPU_FULL_ON;

--- a/modules/ocl/perf/perf_pyrup.cpp
+++ b/modules/ocl/perf/perf_pyrup.cpp
@@ -16,6 +16,7 @@
 //
 // @Authors
 //    Fangfang Bai, fangfang@multicorewareinc.com
+//    Jin Ma,       jin@multicorewareinc.com
 //
 // Redistribution and use in source and binary forms, with or without modification,
 // are permitted provided that the following conditions are met:
@@ -45,7 +46,7 @@
 #include "precomp.hpp"

 ///////////// pyrUp ////////////////////////
-TEST(pyrUp)
+PERFTEST(pyrUp)
 {
    Mat src, dst;
    int all_type[] = {CV_8UC1, CV_8UC4};
@@ -72,9 +73,10 @@ TEST(pyrUp)
            ocl::pyrUp(d_src, d_dst);
            WARMUP_OFF;

+            TestSystem::instance().setAccurate(ExpectedMatNear(dst, cv::Mat(d_dst), (src.depth() == CV_32F ? 1e-4f : 1.0)));                     
+
            GPU_ON;
            ocl::pyrUp(d_src, d_dst);
-             ;
            GPU_OFF;

            GPU_FULL_ON;

--- a/modules/ocl/perf/perf_split_merge.cpp
+++ b/modules/ocl/perf/perf_split_merge.cpp
@@ -16,6 +16,7 @@
 //
 // @Authors
 //    Fangfang Bai, fangfang@multicorewareinc.com
+//    Jin Ma,       jin@multicorewareinc.com
 //
 // Redistribution and use in source and binary forms, with or without modification,
 // are permitted provided that the following conditions are met:
@@ -45,7 +46,7 @@
 #include "precomp.hpp"

 ///////////// Merge////////////////////////
-TEST(Merge)
+PERFTEST(Merge)
 {
    Mat dst;
    ocl::oclMat d_dst;
@@ -84,9 +85,10 @@ TEST(Merge)
            ocl::merge(d_src, d_dst);
            WARMUP_OFF;

+            TestSystem::instance().setAccurate(ExpectedMatNear(cv::Mat(dst), cv::Mat(d_dst), 0.0));                     
+
            GPU_ON;
            ocl::merge(d_src, d_dst);
-             ;
            GPU_OFF;

            GPU_FULL_ON;
@@ -105,7 +107,7 @@ TEST(Merge)
 }

 ///////////// Split////////////////////////
-TEST(Split)
+PERFTEST(Split)
 {
    //int channels = 4;
    int all_type[] = {CV_8UC1, CV_32FC1};
@@ -135,9 +137,23 @@ TEST(Split)
            ocl::split(d_src, d_dst);
            WARMUP_OFF;

+            if(d_dst.size() == dst.size())
+            {
+                TestSystem::instance().setAccurate(1);
+                for(size_t i = 0; i < dst.size(); i++)
+                {
+                    if(ExpectedMatNear(dst[i], cv::Mat(d_dst[i]), 0.0) == 0)
+                    {
+                        TestSystem::instance().setAccurate(0);
+                        break;
+                    }
+                }
+            }else
+                TestSystem::instance().setAccurate(0);
+                                
+
            GPU_ON;
            ocl::split(d_src, d_dst);
-             ;
            GPU_OFF;

            GPU_FULL_ON;

--- a/modules/ocl/perf/precomp.cpp
+++ b/modules/ocl/perf/precomp.cpp
@@ -41,6 +41,10 @@
 //M*/

 #include "precomp.hpp"
+#if GTEST_OS_WINDOWS
+#define NOMINMAX
+# include <windows.h>
+#endif

 // This program test most of the functions in ocl module and generate data metrix of x-factor in .csv files
 // All images needed in this test are in samples/gpu folder.
@@ -110,6 +114,7 @@ void TestSystem::finishCurrentSubtest()
        return;
    }

+    int is_accurate = is_accurate_;
    double cpu_time = cpu_elapsed_ / getTickFrequency() * 1000.0;
    double gpu_time = gpu_elapsed_ / getTickFrequency() * 1000.0;
    double gpu_full_time = gpu_full_elapsed_ / getTickFrequency() * 1000.0;
@@ -166,8 +171,8 @@ void TestSystem::finishCurrentSubtest()
        deviation = std::sqrt(sum / gpu_times_.size());
    }

-    printMetrics(cpu_time, gpu_time, gpu_full_time, speedup, fullspeedup);
-    writeMetrics(cpu_time, gpu_time, gpu_full_time, speedup, fullspeedup, gpu_min, gpu_max, deviation);
+    printMetrics(is_accurate, cpu_time, gpu_time, gpu_full_time, speedup, fullspeedup);
+    writeMetrics(is_accurate, cpu_time, gpu_time, gpu_full_time, speedup, fullspeedup, gpu_min, gpu_max, deviation);

    num_subtests_called_++;
    resetCurrentSubtest();
@@ -184,10 +189,19 @@ double TestSystem::meanTime(const vector<int64> &samples)
 void TestSystem::printHeading()
 {
    cout << endl;
-    cout << setiosflags(ios_base::left);
-    cout << TAB << setw(10) << "CPU, ms" << setw(10) << "GPU, ms"
-         << setw(14) << "SPEEDUP" << setw(14) << "GPUTOTAL, ms" << setw(14) << "TOTALSPEEDUP"
-         << "DESCRIPTION\n";
+    cout<< setiosflags(ios_base::left);
+
+#if 0
+    cout<<TAB<<setw(7)<< "Accu." << setw(10) << "CPU (ms)" << setw(10) << "GPU, ms"
+        << setw(8) << "Speedup"<< setw(10)<<"GPUTotal" << setw(10) << "Total"
+        << "Description\n";
+    cout<<TAB<<setw(7)<<""<<setw(10)<<""<<setw(10)<<""<<setw(8)<<""<<setw(10)<<"(ms)"<<setw(10)<<"Speedup\n";
+#endif
+
+    cout<<TAB<< setw(10) << "CPU (ms)" << setw(10) << "GPU, ms"
+        << setw(8) << "Speedup"<< setw(10)<<"GPUTotal" << setw(10) << "Total"
+        << "Description\n";
+    cout<<TAB<<setw(10)<<""<<setw(10)<<""<<setw(8)<<""<<setw(10)<<"(ms)"<<setw(10)<<"Speedup\n";

    cout << resetiosflags(ios_base::left);
 }
@@ -198,9 +212,14 @@ void TestSystem::writeHeading()
    {
        recordname_ += "_OCL.csv";
        record_ = fopen(recordname_.c_str(), "w");
+        if(record_ == NULL)
+        {
+            cout<<".csv file open failed.\n";
+            exit(0);
+        }
    }

-    fprintf(record_, "NAME,DESCRIPTION,CPU (ms),GPU (ms),SPEEDUP,GPUTOTAL (ms),TOTALSPEEDUP,GPU Min (ms),GPU Max (ms), Standard deviation (ms)\n");
+    fprintf(record_, "NAME,DESCRIPTION,ACCURACY,CPU (ms),GPU (ms),SPEEDUP,GPUTOTAL (ms),TOTALSPEEDUP,GPU Min (ms),GPU Max (ms), Standard deviation (ms)\n");

    fflush(record_);
 }
@@ -209,54 +228,82 @@ void TestSystem::printSummary()
 {
    cout << setiosflags(ios_base::fixed);
    cout << "\naverage GPU speedup: x"
-         << setprecision(3) << speedup_total_ / std::max(1, num_subtests_called_)
-         << endl;
+        << setprecision(3) << speedup_total_ / std::max(1, num_subtests_called_)
+        << endl;
    cout << "\nGPU exceeded: "
-         << setprecision(3) << speedup_faster_count_
-         << "\nGPU passed: "
-         << setprecision(3) << speedup_equal_count_
-         << "\nGPU failed: "
-         << setprecision(3) << speedup_slower_count_
-         << endl;
+        << setprecision(3) << speedup_faster_count_
+        << "\nGPU passed: "
+        << setprecision(3) << speedup_equal_count_
+        << "\nGPU failed: "
+        << setprecision(3) << speedup_slower_count_
+        << endl;
    cout << "\nGPU exceeded rate: "
-         << setprecision(3) << (float)speedup_faster_count_ / std::max(1, num_subtests_called_) * 100
-         << "%"
-         << "\nGPU passed rate: "
-         << setprecision(3) << (float)speedup_equal_count_ / std::max(1, num_subtests_called_) * 100
-         << "%"
-         << "\nGPU failed rate: "
-         << setprecision(3) << (float)speedup_slower_count_ / std::max(1, num_subtests_called_) * 100
-         << "%"
-         << endl;
+        << setprecision(3) << (float)speedup_faster_count_ / std::max(1, num_subtests_called_) * 100
+        << "%"
+        << "\nGPU passed rate: "
+        << setprecision(3) << (float)speedup_equal_count_ / std::max(1, num_subtests_called_) * 100
+        << "%"
+        << "\nGPU failed rate: "
+        << setprecision(3) << (float)speedup_slower_count_ / std::max(1, num_subtests_called_) * 100
+        << "%"
+        << endl;
    cout << "\naverage GPUTOTAL speedup: x"
-         << setprecision(3) << speedup_full_total_ / std::max(1, num_subtests_called_)
-         << endl;
+        << setprecision(3) << speedup_full_total_ / std::max(1, num_subtests_called_)
+        << endl;
    cout << "\nGPUTOTAL exceeded: "
-         << setprecision(3) << speedup_full_faster_count_
-         << "\nGPUTOTAL passed: "
-         << setprecision(3) << speedup_full_equal_count_
-         << "\nGPUTOTAL failed: "
-         << setprecision(3) << speedup_full_slower_count_
-         << endl;
+        << setprecision(3) << speedup_full_faster_count_
+        << "\nGPUTOTAL passed: "
+        << setprecision(3) << speedup_full_equal_count_
+        << "\nGPUTOTAL failed: "
+        << setprecision(3) << speedup_full_slower_count_
+        << endl;
    cout << "\nGPUTOTAL exceeded rate: "
-         << setprecision(3) << (float)speedup_full_faster_count_ / std::max(1, num_subtests_called_) * 100
-         << "%"
-         << "\nGPUTOTAL passed rate: "
-         << setprecision(3) << (float)speedup_full_equal_count_ / std::max(1, num_subtests_called_) * 100
-         << "%"
-         << "\nGPUTOTAL failed rate: "
-         << setprecision(3) << (float)speedup_full_slower_count_ / std::max(1, num_subtests_called_) * 100
-         << "%"
-         << endl;
+        << setprecision(3) << (float)speedup_full_faster_count_ / std::max(1, num_subtests_called_) * 100
+        << "%"
+        << "\nGPUTOTAL passed rate: "
+        << setprecision(3) << (float)speedup_full_equal_count_ / std::max(1, num_subtests_called_) * 100
+        << "%"
+        << "\nGPUTOTAL failed rate: "
+        << setprecision(3) << (float)speedup_full_slower_count_ / std::max(1, num_subtests_called_) * 100
+        << "%"
+        << endl;
    cout << resetiosflags(ios_base::fixed);
 }


-void TestSystem::printMetrics(double cpu_time, double gpu_time, double gpu_full_time, double speedup, double fullspeedup)
+enum GTestColor {
+    COLOR_DEFAULT,
+    COLOR_RED,
+    COLOR_GREEN,
+    COLOR_YELLOW
+};
+#if GTEST_OS_WINDOWS&&!GTEST_OS_WINDOWS_MOBILE
+// Returns the character attribute for the given color.
+WORD GetColorAttribute(GTestColor color) {
+    switch (color) {
+    case COLOR_RED:    return FOREGROUND_RED;
+    case COLOR_GREEN:  return FOREGROUND_GREEN;
+    case COLOR_YELLOW: return FOREGROUND_RED | FOREGROUND_GREEN;
+    default:           return 0;
+    }
+}
+#else
+static const char* GetAnsiColorCode(GTestColor color) {
+    switch (color) {
+    case COLOR_RED:     return "1";
+    case COLOR_GREEN:   return "2";
+    case COLOR_YELLOW:  return "3";
+    default:            return NULL;
+    };
+}
+#endif
+
+static void printMetricsUti(double cpu_time, double gpu_time, double gpu_full_time, double speedup, double fullspeedup, std::stringstream& stream, std::stringstream& cur_subtest_description)
 {
-    cout << TAB << setiosflags(ios_base::left);
-    stringstream stream;
+    //cout <<TAB<< setw(7) << stream.str(); 
+    cout <<TAB; 

+    stream.str("");
    stream << cpu_time;
    cout << setw(10) << stream.str();

@@ -266,21 +313,86 @@ void TestSystem::printMetrics(double cpu_time, double gpu_time, double gpu_full_

    stream.str("");
    stream << "x" << setprecision(3) << speedup;
-    cout << setw(14) << stream.str();
+    cout << setw(8) << stream.str();

    stream.str("");
    stream << gpu_full_time;
-    cout << setw(14) << stream.str();
+    cout << setw(10) << stream.str();

    stream.str("");
    stream << "x" << setprecision(3) << fullspeedup;
-    cout << setw(14) << stream.str();
+    cout << setw(10) << stream.str();

-    cout << cur_subtest_description_.str();
+    cout << cur_subtest_description.str();
    cout << resetiosflags(ios_base::left) << endl;
 }

-void TestSystem::writeMetrics(double cpu_time, double gpu_time, double gpu_full_time, double speedup, double fullspeedup, double gpu_min, double gpu_max, double std_dev)
+void TestSystem::printMetrics(int is_accurate, double cpu_time, double gpu_time, double gpu_full_time, double speedup, double fullspeedup)
+{
+    cout << setiosflags(ios_base::left);
+    stringstream stream;
+
+#if 0
+    if(is_accurate == 1)
+            stream << "Pass";
+    else if(is_accurate_ == 0)
+            stream << "Fail";
+    else if(is_accurate == -1)
+        stream << " ";
+    else
+    {
+        std::cout<<"is_accurate errer: "<<is_accurate<<"\n";
+        exit(-1);
+    }
+#endif
+
+    std::stringstream &cur_subtest_description = getCurSubtestDescription();
+   
+#if GTEST_OS_WINDOWS&&!GTEST_OS_WINDOWS_MOBILE
+    
+    WORD color;
+    const HANDLE stdout_handle = GetStdHandle(STD_OUTPUT_HANDLE);
+    // Gets the current text color.
+    CONSOLE_SCREEN_BUFFER_INFO buffer_info;
+    GetConsoleScreenBufferInfo(stdout_handle, &buffer_info);
+    const WORD old_color_attrs = buffer_info.wAttributes;
+    // We need to flush the stream buffers into the console before each
+    // SetConsoleTextAttribute call lest it affect the text that is already
+    // printed but has not yet reached the console.
+    fflush(stdout);
+
+    if(is_accurate == 1||is_accurate == -1)
+    {
+        color = old_color_attrs;
+        printMetricsUti(cpu_time, gpu_time, gpu_full_time, speedup, fullspeedup, stream, cur_subtest_description);
+
+    }else
+    {
+        color = GetColorAttribute(COLOR_RED);
+        SetConsoleTextAttribute(stdout_handle,
+            color| FOREGROUND_INTENSITY);
+
+        printMetricsUti(cpu_time, gpu_time, gpu_full_time, speedup, fullspeedup, stream, cur_subtest_description);
+        fflush(stdout);
+        // Restores the text color.
+        SetConsoleTextAttribute(stdout_handle, old_color_attrs);
+    }
+#else
+    GTestColor color = COLOR_RED;
+    if(is_accurate == 1|| is_accurate == -1)
+    {
+        printMetricsUti(cpu_time, gpu_time, gpu_full_time, speedup, fullspeedup, stream, cur_subtest_description);
+
+    }else
+    {
+        printf("\033[0;3%sm", GetAnsiColorCode(color));
+        printMetricsUti(cpu_time, gpu_time, gpu_full_time, speedup, fullspeedup, stream, cur_subtest_description);
+        printf("\033[m");  // Resets the terminal to default.
+    }
+#endif
+}
+
+void TestSystem::writeMetrics(int is_accurate, double cpu_time, double gpu_time, double gpu_full_time, double speedup, double fullspeedup, double gpu_min, double gpu_max, double std_dev)
 {
    if (!record_)
    {
@@ -288,10 +400,24 @@ void TestSystem::writeMetrics(double cpu_time, double gpu_time, double gpu_full_
        record_ = fopen(recordname_.c_str(), "w");
    }

-    fprintf(record_, "%s,%s,%.3f,%.3f,%.3f,%.3f,%.3f,%.3f,%.3f,%.3f\n", itname_changed_ ? itname_.c_str() : "",
-            cur_subtest_description_.str().c_str(),
-            cpu_time, gpu_time, speedup, gpu_full_time, fullspeedup,
-            gpu_min, gpu_max, std_dev);
+    string _is_accurate_;
+
+    if(is_accurate == 1)
+        _is_accurate_ = "Pass";
+    else if(is_accurate == 0)
+        _is_accurate_ = "Fail";
+    else if(is_accurate == -1)
+        _is_accurate_ = " ";
+    else
+    {
+        std::cout<<"is_accurate errer: "<<is_accurate<<"\n";
+        exit(-1);
+    }
+
+    fprintf(record_, "%s,%s,%s,%.3f,%.3f,%.3f,%.3f,%.3f,%.3f,%.3f,%.3f\n", itname_changed_ ? itname_.c_str() : "",
+        cur_subtest_description_.str().c_str(),
+        _is_accurate_.c_str(), cpu_time, gpu_time, speedup, gpu_full_time, fullspeedup,
+        gpu_min, gpu_max, std_dev);

    if (itname_changed_)
    {
@@ -310,31 +436,31 @@ void TestSystem::writeSummary()
    }

    fprintf(record_, "\nAverage GPU speedup: %.3f\n"
-            "exceeded: %d (%.3f%%)\n"
-            "passed: %d (%.3f%%)\n"
-            "failed: %d (%.3f%%)\n"
-            "\nAverage GPUTOTAL speedup: %.3f\n"
-            "exceeded: %d (%.3f%%)\n"
-            "passed: %d (%.3f%%)\n"
-            "failed: %d (%.3f%%)\n",
-            speedup_total_ / std::max(1, num_subtests_called_),
-            speedup_faster_count_, (float)speedup_faster_count_ / std::max(1, num_subtests_called_) * 100,
-            speedup_equal_count_, (float)speedup_equal_count_ / std::max(1, num_subtests_called_) * 100,
-            speedup_slower_count_, (float)speedup_slower_count_ / std::max(1, num_subtests_called_) * 100,
-            speedup_full_total_ / std::max(1, num_subtests_called_),
-            speedup_full_faster_count_, (float)speedup_full_faster_count_ / std::max(1, num_subtests_called_) * 100,
-            speedup_full_equal_count_, (float)speedup_full_equal_count_ / std::max(1, num_subtests_called_) * 100,
-            speedup_full_slower_count_, (float)speedup_full_slower_count_ / std::max(1, num_subtests_called_) * 100
-           );
+        "exceeded: %d (%.3f%%)\n"
+        "passed: %d (%.3f%%)\n"
+        "failed: %d (%.3f%%)\n"
+        "\nAverage GPUTOTAL speedup: %.3f\n"
+        "exceeded: %d (%.3f%%)\n"
+        "passed: %d (%.3f%%)\n"
+        "failed: %d (%.3f%%)\n",
+        speedup_total_ / std::max(1, num_subtests_called_),
+        speedup_faster_count_, (float)speedup_faster_count_ / std::max(1, num_subtests_called_) * 100,
+        speedup_equal_count_, (float)speedup_equal_count_ / std::max(1, num_subtests_called_) * 100,
+        speedup_slower_count_, (float)speedup_slower_count_ / std::max(1, num_subtests_called_) * 100,
+        speedup_full_total_ / std::max(1, num_subtests_called_),
+        speedup_full_faster_count_, (float)speedup_full_faster_count_ / std::max(1, num_subtests_called_) * 100,
+        speedup_full_equal_count_, (float)speedup_full_equal_count_ / std::max(1, num_subtests_called_) * 100,
+        speedup_full_slower_count_, (float)speedup_full_slower_count_ / std::max(1, num_subtests_called_) * 100
+        );
    fflush(record_);
 }

 void TestSystem::printError(const std::string &msg)
 {
-	if(msg != "CL_INVALID_BUFFER_SIZE")
-	{
-		cout << TAB << "[error: " << msg << "] " << cur_subtest_description_.str() << endl;
-	}
+    if(msg != "CL_INVALID_BUFFER_SIZE")
+    {
+        cout << TAB << "[error: " << msg << "] " << cur_subtest_description_.str() << endl;
+    }
 }

 void gen(Mat &mat, int rows, int cols, int type, Scalar low, Scalar high)
@@ -343,7 +469,134 @@ void gen(Mat &mat, int rows, int cols, int type, Scalar low, Scalar high)
    RNG rng(0);
    rng.fill(mat, RNG::UNIFORM, low, high);
 }
+#if 0
+void gen(Mat &mat, int rows, int cols, int type, int low, int high, int n)
+{
+    assert(n > 0&&n <= cols * rows);
+    assert(type == CV_8UC1||type == CV_8UC3||type == CV_8UC4
+        ||type == CV_32FC1||type == CV_32FC3||type == CV_32FC4);
+
+    RNG rng;
+    //generate random position without duplication
+    std::vector<int> pos;
+    for(int i = 0; i < cols * rows; i++)
+    {
+        pos.push_back(i);
+    }
+
+    for(int i = 0; i < cols * rows; i++)
+    {
+        int temp = i + rng.uniform(0, cols * rows - 1 - i);
+        int temp1 = pos[temp];
+        pos[temp]= pos[i];
+        pos[i] = temp1;
+    }
+
+    std::vector<int> selected_pos;
+    for(int i = 0; i < n; i++)
+    {
+        selected_pos.push_back(pos[i]);
+    }
+
+    pos.clear();
+    //end of generating random y without duplication
+
+    if(type == CV_8UC1)
+    {
+        typedef struct coorStruct_
+        {
+            int x;
+            int y;
+            uchar xy;
+        }coorStruct;
+
+        coorStruct coor_struct;
+
+        std::vector<coorStruct> coor;
+
+        for(int i = 0; i < n; i++)
+        {
+            coor_struct.x = -1;
+            coor_struct.y = -1;
+            coor_struct.xy = (uchar)rng.uniform(low, high);
+            coor.push_back(coor_struct);
+        }

+        for(int i = 0; i < n; i++)
+        {
+            coor[i].y = selected_pos[i]/cols;
+            coor[i].x = selected_pos[i]%cols;
+        }
+        selected_pos.clear();
+
+        mat.create(rows, cols, type);
+        mat.setTo(0);
+
+        for(int i = 0; i < n; i++)
+        {
+            mat.at<unsigned char>(coor[i].y, coor[i].x) = coor[i].xy;
+        }
+    }
+
+    if(type == CV_8UC4 || type == CV_8UC3)
+    {
+        mat.create(rows, cols, type);
+        mat.setTo(0);
+
+        typedef struct Coor
+        {
+            int x;
+            int y;
+
+            uchar r;
+            uchar g;
+            uchar b;
+            uchar alpha;
+        }coor;
+
+        std::vector<coor> coor_vect;
+
+        coor xy_coor;
+
+        for(int i = 0; i < n; i++)
+        {
+            xy_coor.r = (uchar)rng.uniform(low, high);
+            xy_coor.g = (uchar)rng.uniform(low, high);
+            xy_coor.b = (uchar)rng.uniform(low, high);
+            if(type == CV_8UC4)
+                xy_coor.alpha = (uchar)rng.uniform(low, high);
+
+            coor_vect.push_back(xy_coor);
+        }
+
+        for(int i = 0; i < n; i++)
+        {
+            coor_vect[i].y = selected_pos[i]/((int)mat.step1()/mat.elemSize());
+            coor_vect[i].x = selected_pos[i]%((int)mat.step1()/mat.elemSize());
+            //printf("coor_vect[%d] = (%d, %d)\n", i, coor_vect[i].y, coor_vect[i].x);
+        }
+
+        if(type == CV_8UC4)
+        {
+            for(int i = 0; i < n; i++)
+            {
+                mat.at<unsigned char>(coor_vect[i].y, 4 * coor_vect[i].x) = coor_vect[i].r;
+                mat.at<unsigned char>(coor_vect[i].y, 4 * coor_vect[i].x + 1) = coor_vect[i].g;
+                mat.at<unsigned char>(coor_vect[i].y, 4 * coor_vect[i].x + 2) = coor_vect[i].b;
+                mat.at<unsigned char>(coor_vect[i].y, 4 * coor_vect[i].x + 3) = coor_vect[i].alpha;
+            }
+        }else if(type == CV_8UC3)
+        {
+            for(int i = 0; i < n; i++)
+            {
+                mat.at<unsigned char>(coor_vect[i].y, 3 * coor_vect[i].x) = coor_vect[i].r;
+                mat.at<unsigned char>(coor_vect[i].y, 3 * coor_vect[i].x + 1) = coor_vect[i].g;
+                mat.at<unsigned char>(coor_vect[i].y, 3 * coor_vect[i].x + 2) = coor_vect[i].b;
+            }
+        }
+    }
+}
+#endif

 string abspath(const string &relpath)
 {
@@ -352,11 +605,57 @@ string abspath(const string &relpath)


 int CV_CDECL cvErrorCallback(int /*status*/, const char * /*func_name*/,
-                             const char *err_msg, const char * /*file_name*/,
-                             int /*line*/, void * /*userdata*/)
+    const char *err_msg, const char * /*file_name*/,
+    int /*line*/, void * /*userdata*/)
 {
    TestSystem::instance().printError(err_msg);
    return 0;
 }

+double checkNorm(const Mat &m)
+{
+    return norm(m, NORM_INF);
+}
+
+double checkNorm(const Mat &m1, const Mat &m2)
+{
+    return norm(m1, m2, NORM_INF);
+}
+
+double checkSimilarity(const Mat &m1, const Mat &m2)
+{
+    Mat diff;
+    matchTemplate(m1, m2, diff, CV_TM_CCORR_NORMED);
+    return std::abs(diff.at<float>(0, 0) - 1.f);
+}
+
+
+int ExpectedMatNear(cv::Mat dst, cv::Mat cpu_dst, double eps)
+{
+    assert(dst.type() == cpu_dst.type());
+    assert(dst.size() == cpu_dst.size());
+    if(checkNorm(cv::Mat(dst), cv::Mat(cpu_dst)) < eps ||checkNorm(cv::Mat(dst), cv::Mat(cpu_dst)) == eps)
+        return 1;
+    return 0;
+}
+
+int ExceptDoubleNear(double val1, double val2, double abs_error)
+{
+    const double diff = fabs(val1 - val2);
+    if (diff <= abs_error)
+        return 1;
+
+    return 0;
+}
+
+int ExceptedMatSimilar(cv::Mat dst, cv::Mat cpu_dst, double eps)
+{
+    assert(dst.type() == cpu_dst.type());
+    assert(dst.size() == cpu_dst.size()); 
+    if(checkSimilarity(cv::Mat(cpu_dst), cv::Mat(dst)) <= eps)
+        return 1;
+    return 0;
+}
+
+

--- a/modules/ocl/perf/precomp.hpp
+++ b/modules/ocl/perf/precomp.hpp
@@ -50,10 +50,15 @@
 #include "opencv2/core/core.hpp"
 #include "opencv2/imgproc/imgproc.hpp"
 #include "opencv2/highgui/highgui.hpp"
+#include "opencv2/calib3d/calib3d.hpp"
 #include "opencv2/video/video.hpp"
 #include "opencv2/objdetect/objdetect.hpp"
 #include "opencv2/features2d/features2d.hpp"
 #include "opencv2/ocl/ocl.hpp"
+#include "opencv2/ts/ts.hpp"
+#include "opencv2/ts/ts_perf.hpp"
+#include "opencv2/ts/ts_gtest.h"
+

 #define Min_Size 1000
 #define Max_Size 4000
@@ -64,6 +69,8 @@ using namespace std;
 using namespace cv;

 void gen(Mat &mat, int rows, int cols, int type, Scalar low, Scalar high);
+void gen(Mat &mat, int rows, int cols, int type, int low, int high, int n);
+
 string abspath(const string &relpath);
 int CV_CDECL cvErrorCallback(int, const char *, const char *, const char *, int, void *);
 typedef struct
@@ -76,6 +83,50 @@ COOR do_meanShift(int x0, int y0, uchar *sptr, uchar *dptr, int sstep,
 void meanShiftProc_(const Mat &src_roi, Mat &dst_roi, Mat &dstCoor_roi,
                    int sp, int sr, cv::TermCriteria crit);

+
+template<class T1, class T2>
+int ExpectedEQ(T1 expected, T2 actual)
+{
+    if(expected == actual)
+        return 1;
+
+    return 0;
+}
+
+template<class T1>
+int EeceptDoubleEQ(T1 expected, T1 actual)
+{
+    testing::internal::Double lhs(expected);
+    testing::internal::Double rhs(actual);
+
+    if (lhs.AlmostEquals(rhs)) 
+    {
+        return 1;
+    }
+
+    return 0;
+}
+
+template<class T>
+int AssertEQ(T expected, T actual)
+{
+    if(expected == actual)
+    {
+        return 1;
+    }
+    return 0;
+}
+
+int ExceptDoubleNear(double val1, double val2, double abs_error);
+bool match_rect(cv::Rect r1, cv::Rect r2, int threshold);
+
+double checkNorm(const cv::Mat &m);
+double checkNorm(const cv::Mat &m1, const cv::Mat &m2);
+double checkSimilarity(const cv::Mat &m1, const cv::Mat &m2);
+
+int ExpectedMatNear(cv::Mat dst, cv::Mat cpu_dst, double eps);
+int ExceptedMatSimilar(cv::Mat dst, cv::Mat cpu_dst, double eps);
+
 class Runnable
 {
 public:
@@ -171,6 +222,16 @@ public:
        return cur_iter_idx_ >= cpu_num_iters_;
    }

+    int get_cur_iter_idx()
+    {
+        return cur_iter_idx_;
+    }
+
+    int get_cpu_num_iters()
+    {
+        return cpu_num_iters_;
+    }
+
    bool warmupStop()
    {
        return cur_warmup_idx_++ >= gpu_warmup_iters_;
@@ -252,6 +313,16 @@ public:
        itname_changed_ = true;
    }

+    void setAccurate(int is_accurate = -1)
+    {
+        is_accurate_ = is_accurate;
+    }
+
+    std::stringstream &getCurSubtestDescription()
+    {
+        return cur_subtest_description_;
+    }
+
 private:
    TestSystem():
        cur_subtest_is_empty_(true), cpu_elapsed_(0),
@@ -261,7 +332,8 @@ private:
        speedup_full_faster_count_(0), speedup_full_slower_count_(0), speedup_full_equal_count_(0), is_list_mode_(false),
        num_iters_(10), cpu_num_iters_(2),
        gpu_warmup_iters_(1), cur_iter_idx_(0), cur_warmup_idx_(0),
-        record_(0), recordname_("performance"), itname_changed_(true)
+        record_(0), recordname_("performance"), itname_changed_(true), 
+        is_accurate_(-1)
    {
        cpu_times_.reserve(num_iters_);
        gpu_times_.reserve(num_iters_);
@@ -277,20 +349,22 @@ private:
        cur_subtest_description_.str("");
        cur_subtest_is_empty_ = true;
        cur_iter_idx_ = 0;
+        cur_warmup_idx_ = 0;
        cpu_times_.clear();
        gpu_times_.clear();
        gpu_full_times_.clear();
+        is_accurate_ = -1;
    }

    double meanTime(const std::vector<int64> &samples);

    void printHeading();
    void printSummary();
-    void printMetrics(double cpu_time, double gpu_time = 0.0f, double gpu_full_time = 0.0f, double speedup = 0.0f, double fullspeedup = 0.0f);
+    void printMetrics(int is_accurate, double cpu_time, double gpu_time = 0.0f, double gpu_full_time = 0.0f, double speedup = 0.0f, double fullspeedup = 0.0f);

    void writeHeading();
    void writeSummary();
-    void writeMetrics(double cpu_time, double gpu_time = 0.0f, double gpu_full_time = 0.0f,
+    void writeMetrics(int is_accurate, double cpu_time, double gpu_time = 0.0f, double gpu_full_time = 0.0f,
                      double speedup = 0.0f, double fullspeedup = 0.0f,
                      double gpu_min = 0.0f, double gpu_max = 0.0f, double std_dev = 0.0f);

@@ -340,6 +414,8 @@ private:
    std::string recordname_;
    std::string itname_;
    bool itname_changed_;
+
+    int is_accurate_;
 };


@@ -353,7 +429,7 @@ struct name##_init: Runnable { \
 	void name##_init::run()


-#define TEST(name) \
+#define PERFTEST(name) \
 struct name##_test: Runnable { \
 	name##_test(): Runnable(#name) { \
 	TestSystem::instance().addTest(this); \
@@ -375,7 +451,7 @@ struct name##_test: Runnable { \
 	while (!TestSystem::instance().stop()) { \
 	TestSystem::instance().gpuOn()
 #define GPU_OFF \
-    ocl::finish(); \
+	ocl::finish();\
 	TestSystem::instance().gpuOff(); \
 	} TestSystem::instance().gpuComplete()

@@ -389,5 +465,5 @@ struct name##_test: Runnable { \
 #define WARMUP_ON \
 	while (!TestSystem::instance().warmupStop()) {
 #define WARMUP_OFF \
-        ocl::finish(); \
+	ocl::finish();\
 	} TestSystem::instance().warmupComplete()
--- a/modules/ocl/test/test_gemm.cpp
+++ b/modules/ocl/test/test_gemm.cpp
@@ -74,7 +74,7 @@ TEST_P(Gemm, Accuracy)
    cv::gemm(a, b, 1.0, c, 1.0, dst, flags);
    cv::ocl::gemm(cv::ocl::oclMat(a), cv::ocl::oclMat(b), 1.0, cv::ocl::oclMat(c), 1.0, ocl_dst, flags);

-    EXPECT_MAT_NEAR(dst, ocl_dst, mat_size.area() * 1e-4, "");
+    EXPECT_MAT_NEAR(dst, ocl_dst, mat_size.area() * 1e-4);
 }

 INSTANTIATE_TEST_CASE_P(ocl_gemm, Gemm, testing::Combine(