Merge pull request #16138 from pmur:reg_16137

* imgproc: Prevent 1B overrun of 8C3 SIMD optimization The fourth value read via v_load_q is essentially ignored, but can cause trouble if it happens to cross page boundaries. The final few iterations may attempt to read the most extreme elements of S, which will read 1B beyond the array in most aligment cases. Dynamically compute the stop. This could be hoised from the loop, but will require a more extensive change. Likewise, cleanup the iteration increment statements to make it more obvious they do channel count (3) elements per pass. This should resolve #16137 * imgproc(resize): extra check

Merge pull request #16138 from pmur:reg_16137
* imgproc: Prevent 1B overrun of 8C3 SIMD optimization The fourth value read via v_load_q is essentially ignored, but can cause trouble if it happens to cross page boundaries. The final few iterations may attempt to read the most extreme elements of S, which will read 1B beyond the array in most aligment cases. Dynamically compute the stop. This could be hoised from the loop, but will require a more extensive change. Likewise, cleanup the iteration increment statements to make it more obvious they do channel count (3) elements per pass. This should resolve #16137 * imgproc(resize): extra check
1c4a64f0 · Paul Murphy · Alexander Alekhin · afa07257 · 1c4a64f0
隐藏空白更改
内联并排

Showing with 15 addition and 5 deletion

modules/imgproc/src/resize.cpp modules/imgproc/src/resize.cpp +15 -5

未找到文件。
--- a/modules/imgproc/src/resize.cpp
+++ b/modules/imgproc/src/resize.cpp
@@ -1547,7 +1547,7 @@ struct HResizeLinearVec_X4
 struct HResizeLinearVecU8_X4
 {
    int operator()(const uchar** src, uchar** _dst, int count, const int* xofs,
-        const uchar* _alpha, int, int, int cn, int, int xmax) const
+        const uchar* _alpha, int smax, int, int cn, int, int xmax) const
    {
        const short *alpha = (const short*)_alpha;
        int **dst = (int**)_dst;
@@ -1633,8 +1633,18 @@ struct HResizeLinearVecU8_X4
        }
        else if(cn == 3)
        {
-            const int step = 4;
-            const int len0 = xmax - step;
+            int len0 = xmax - cn;
+
+            /* This may need to trim 1 or more extra units depending on the amount of
+               scaling. Test until we find the first value which we know cannot overrun. */
+            while (len0 >= cn &&
+                xofs[len0 - cn] + cn >= smax - cn  // check access: v_load_expand_q(S+xofs[dx]+cn)
+            )
+            {
+                len0 -= cn;
+            }
+            CV_DbgAssert(len0 <= 0 || len0 >= cn);
+
            for( ; k <= (count - 2); k+=2 )
            {
                const uchar *S0 = src[k];
@@ -1642,7 +1652,7 @@ struct HResizeLinearVecU8_X4
                const uchar *S1 = src[k+1];
                int *D1 = dst[k+1];

-                for( dx = 0; dx < len0; dx += 3*step/4 )
+                for( dx = 0; dx < len0; dx += cn )
                {
                    v_int16x8 a = v_load(alpha+dx*2);
                    v_store(&D0[dx], v_dotprod(v_reinterpret_as_s16(v_load_expand_q(S0+xofs[dx]) | (v_load_expand_q(S0+xofs[dx]+cn)<<16)), a));
@@ -1653,7 +1663,7 @@ struct HResizeLinearVecU8_X4
            {
                const uchar *S = src[k];
                int *D = dst[k];
-                for( dx = 0; dx < len0; dx += 3*step/4 )
+                for( dx = 0; dx < len0; dx += cn )
                {
                    v_int16x8 a = v_load(alpha+dx*2);
                    v_store(&D[dx], v_dotprod(v_reinterpret_as_s16(v_load_expand_q(S+xofs[dx]) | (v_load_expand_q(S+xofs[dx]+cn)<<16)), a));