optimization of cv::CLAHE (~3x)

6f05a250 · Ilya Lavrenov · af6a64d7 · 6f05a250
隐藏空白更改
内联并排

Showing with 41 addition and 24 deletion

modules/imgproc/src/clahe.cpp modules/imgproc/src/clahe.cpp +41 -24

未找到文件。
--- a/modules/imgproc/src/clahe.cpp
+++ b/modules/imgproc/src/clahe.cpp
@@ -233,6 +233,31 @@ namespace
        CLAHE_Interpolation_Body(const cv::Mat& src, const cv::Mat& dst, const cv::Mat& lut, const cv::Size& tileSize, const int& tilesX, const int& tilesY) :
            src_(src), dst_(dst), lut_(lut), tileSize_(tileSize), tilesX_(tilesX), tilesY_(tilesY)
        {
+            buf.allocate(src.cols << 2);
+            ind1_p = (int *)buf;
+            ind2_p = ind1_p + src.cols;
+            xa_p = (float *)(ind2_p + src.cols);
+            xa1_p = xa_p + src.cols;
+
+            int lut_step = static_cast<int>(lut_.step / sizeof(T));
+            float inv_tw = 1.0f / tileSize_.width;
+
+            for (int x = 0; x < src.cols; ++x)
+            {
+                float txf = x * inv_tw - 0.5f;
+
+                int tx1 = cvFloor(txf);
+                int tx2 = tx1 + 1;
+
+                xa_p[x] = txf - tx1;
+                xa1_p[x] = 1.0f - xa_p[x];
+
+                tx1 = std::max(tx1, 0);
+                tx2 = std::min(tx2, tilesX_ - 1);
+
+                ind1_p[x] = tx1 * lut_step;
+                ind2_p[x] = tx2 * lut_step;
+            }
        }

        void operator ()(const cv::Range& range) const;
@@ -245,24 +270,28 @@ namespace
        cv::Size tileSize_;
        int tilesX_;
        int tilesY_;
+
+        cv::AutoBuffer<int> buf;
+        int * ind1_p, * ind2_p;
+        float * xa_p, * xa1_p;
    };

    template <class T>
    void CLAHE_Interpolation_Body<T>::operator ()(const cv::Range& range) const
    {
-        const size_t lut_step = lut_.step / sizeof(T);
+        float inv_th = 1.0f / tileSize_.height;

        for (int y = range.start; y < range.end; ++y)
        {
            const T* srcRow = src_.ptr<T>(y);
            T* dstRow = dst_.ptr<T>(y);

-            const float tyf = (static_cast<float>(y) / tileSize_.height) - 0.5f;
+            float tyf = y * inv_th - 0.5f;

            int ty1 = cvFloor(tyf);
            int ty2 = ty1 + 1;

-            const float ya = tyf - ty1;
+            float ya = tyf - ty1, ya1 = 1.0f - ya;

            ty1 = std::max(ty1, 0);
            ty2 = std::min(ty2, tilesY_ - 1);
@@ -272,27 +301,13 @@ namespace

            for (int x = 0; x < src_.cols; ++x)
            {
-                const float txf = (static_cast<float>(x) / tileSize_.width) - 0.5f;
-
-                int tx1 = cvFloor(txf);
-                int tx2 = tx1 + 1;
+                int srcVal = srcRow[x];

-                const float xa = txf - tx1;
-
-                tx1 = std::max(tx1, 0);
-                tx2 = std::min(tx2, tilesX_ - 1);
+                int ind1 = ind1_p[x] + srcVal;
+                int ind2 = ind2_p[x] + srcVal;

-                const int srcVal = srcRow[x];
-
-                const size_t ind1 = tx1 * lut_step + srcVal;
-                const size_t ind2 = tx2 * lut_step + srcVal;
-
-                float res = 0;
-
-                res += lutPlane1[ind1] * ((1.0f - xa) * (1.0f - ya));
-                res += lutPlane1[ind2] * ((xa) * (1.0f - ya));
-                res += lutPlane2[ind1] * ((1.0f - xa) * (ya));
-                res += lutPlane2[ind2] * ((xa) * (ya));
+                float res = (lutPlane1[ind1] * xa1_p[x] + lutPlane1[ind2] * xa_p[x]) * ya1 +
+                            (lutPlane2[ind1] * xa1_p[x] + lutPlane2[ind2] * xa_p[x]) * ya;

                dstRow[x] = cv::saturate_cast<T>(res);
            }
@@ -403,7 +418,9 @@ namespace
            calcLutBody = cv::makePtr<CLAHE_CalcLut_Body<uchar, 256, 0> >(srcForLut, lut_, tileSize, tilesX_, clipLimit, lutScale);
        else if (_src.type() == CV_16UC1)
            calcLutBody = cv::makePtr<CLAHE_CalcLut_Body<ushort, 4096, 4> >(srcForLut, lut_, tileSize, tilesX_, clipLimit, lutScale);
-        CV_Assert(!calcLutBody.empty());
+        else
+            CV_Error( CV_StsBadArg, "Unsupported type" );
+
        cv::parallel_for_(cv::Range(0, tilesX_ * tilesY_), *calcLutBody);

        cv::Ptr<cv::ParallelLoopBody> interpolationBody;
@@ -411,7 +428,7 @@ namespace
            interpolationBody = cv::makePtr<CLAHE_Interpolation_Body<uchar> >(src, dst, lut_, tileSize, tilesX_, tilesY_);
        else if (_src.type() == CV_16UC1)
            interpolationBody = cv::makePtr<CLAHE_Interpolation_Body<ushort> >(src, dst, lut_, tileSize, tilesX_, tilesY_);
-        CV_Assert(!interpolationBody.empty());
+
        cv::parallel_for_(cv::Range(0, src.rows), *interpolationBody);
    }