diff --git a/modules/gapi/src/backends/fluid/gfluidcore.cpp b/modules/gapi/src/backends/fluid/gfluidcore.cpp index 23d9822bcc351789cd1e4dc40717e07dd17571f8..22f73e553cd0aa6e44534e7a62afe6a174562dcf 100644 --- a/modules/gapi/src/backends/fluid/gfluidcore.cpp +++ b/modules/gapi/src/backends/fluid/gfluidcore.cpp @@ -2686,16 +2686,8 @@ GAPI_FLUID_KERNEL(GFluidMerge4, cv::gapi::core::GMerge4, false) int w = 0; // cycle counter - #if CV_SIMD128 - for (; w <= width-16; w+=16) - { - v_uint8x16 a, b, c, d; - a = v_load(&in1[w]); - b = v_load(&in2[w]); - c = v_load(&in3[w]); - d = v_load(&in4[w]); - v_store_interleave(&out[4*w], a, b, c, d); - } + #if CV_SIMD + w = merge4_simd(in1, in2, in3, in4, out, width); #endif for (; w < width; w++) diff --git a/modules/gapi/src/backends/fluid/gfluidcore_func.dispatch.cpp b/modules/gapi/src/backends/fluid/gfluidcore_func.dispatch.cpp index eb7a2e91d3ccbb9f32904e55ba5878624f694cc3..9afac9ceb450f4507d88744ddd87b0db8a987485 100644 --- a/modules/gapi/src/backends/fluid/gfluidcore_func.dispatch.cpp +++ b/modules/gapi/src/backends/fluid/gfluidcore_func.dispatch.cpp @@ -256,6 +256,13 @@ int merge3_simd(const uchar in1[], const uchar in2[], const uchar in3[], CV_CPU_DISPATCH_MODES_ALL); } +int merge4_simd(const uchar in1[], const uchar in2[], const uchar in3[], + const uchar in4[], uchar out[], const int width) +{ + CV_CPU_DISPATCH(merge4_simd, (in1, in2, in3, in4, out, width), + CV_CPU_DISPATCH_MODES_ALL); +} + } // namespace fluid } // namespace gapi } // namespace cv diff --git a/modules/gapi/src/backends/fluid/gfluidcore_func.hpp b/modules/gapi/src/backends/fluid/gfluidcore_func.hpp index f61d7d40b3088307989ff3f44cb4576e4bd57169..868923932d208a4eee37880619563d318865522f 100644 --- a/modules/gapi/src/backends/fluid/gfluidcore_func.hpp +++ b/modules/gapi/src/backends/fluid/gfluidcore_func.hpp @@ -196,6 +196,9 @@ int split4_simd(const uchar in[], uchar out1[], uchar out2[], int merge3_simd(const uchar in1[], const uchar in2[], const uchar in3[], uchar out[], const int width); +int merge4_simd(const uchar in1[], const uchar in2[], const uchar in3[], + const uchar in4[], uchar out[], const int width); + } // namespace fluid } // namespace gapi } // namespace cv diff --git a/modules/gapi/src/backends/fluid/gfluidcore_func.simd.hpp b/modules/gapi/src/backends/fluid/gfluidcore_func.simd.hpp index b5c7cae4314815fbd4224677d6bc38802476d781..2424a57677804ba1f0513d5e7f78011fa6a81e92 100644 --- a/modules/gapi/src/backends/fluid/gfluidcore_func.simd.hpp +++ b/modules/gapi/src/backends/fluid/gfluidcore_func.simd.hpp @@ -217,6 +217,9 @@ int split4_simd(const uchar in[], uchar out1[], uchar out2[], int merge3_simd(const uchar in1[], const uchar in2[], const uchar in3[], uchar out[], const int width); +int merge4_simd(const uchar in1[], const uchar in2[], const uchar in3[], + const uchar in4[], uchar out[], const int width); + #ifndef CV_CPU_OPTIMIZATION_DECLARATIONS_ONLY struct scale_tag {}; @@ -2076,6 +2079,41 @@ int merge3_simd(const uchar in1[], const uchar in2[], const uchar in3[], return x; } +//------------------------- +// +// Fluid kernels: Merge4 +// +//------------------------- + +int merge4_simd(const uchar in1[], const uchar in2[], const uchar in3[], + const uchar in4[], uchar out[], const int width) +{ + constexpr int nlanes = v_uint8::nlanes; + if (width < nlanes) + return 0; + + int x = 0; + for (;;) + { + for (; x <= width - nlanes; x += nlanes) + { + v_uint8 a, b, c, d; + a = vx_load(&in1[x]); + b = vx_load(&in2[x]); + c = vx_load(&in3[x]); + d = vx_load(&in4[x]); + v_store_interleave(&out[4 * x], a, b, c, d); + } + if (x < width) + { + x = width - nlanes; + continue; + } + break; + } + return x; +} + #endif // CV_CPU_OPTIMIZATION_DECLARATIONS_ONLY CV_CPU_OPTIMIZATION_NAMESPACE_END