diff --git a/modules/gapi/perf/common/gapi_core_perf_tests.hpp b/modules/gapi/perf/common/gapi_core_perf_tests.hpp index 60294d21930f4e5d64773089b5e42b4eda14c1f9..6104fea3bff9236a265a0abf125a56a9502b719c 100644 --- a/modules/gapi/perf/common/gapi_core_perf_tests.hpp +++ b/modules/gapi/perf/common/gapi_core_perf_tests.hpp @@ -62,7 +62,7 @@ namespace opencv_test class InRangePerfTest : public TestPerfParams> {}; class Split3PerfTest : public TestPerfParams> {}; class Split4PerfTest : public TestPerfParams> {}; - class Merge3PerfTest : public TestPerfParams> {}; + class Merge3PerfTest : public TestPerfParams> {}; class Merge4PerfTest : public TestPerfParams> {}; class RemapPerfTest : public TestPerfParams> {}; class FlipPerfTest : public TestPerfParams> {}; diff --git a/modules/gapi/perf/common/gapi_core_perf_tests_inl.hpp b/modules/gapi/perf/common/gapi_core_perf_tests_inl.hpp index 83ef13008c26ae9f8a18e441ca4845b095ce1641..3a777cff3da5127a673171a3d5aba750222a1e69 100644 --- a/modules/gapi/perf/common/gapi_core_perf_tests_inl.hpp +++ b/modules/gapi/perf/common/gapi_core_perf_tests_inl.hpp @@ -1577,11 +1577,12 @@ PERF_TEST_P_(Merge3PerfTest, TestPerformance) { compare_f cmpF; cv::Size sz; + MatType type = -1; cv::GCompileArgs compile_args; - std::tie(cmpF, sz, compile_args) = GetParam(); + std::tie(cmpF, sz, type, compile_args) = GetParam(); - initMatsRandU(CV_8UC1, sz, CV_8UC3); - cv::Mat in_mat3(sz, CV_8UC1); + initMatsRandU(type, sz, CV_MAKETYPE(type, 3)); + cv::Mat in_mat3(sz, type); cv::Scalar mean = cv::Scalar::all(127); cv::Scalar stddev = cv::Scalar::all(40.f); cv::randn(in_mat3, mean, stddev); diff --git a/modules/gapi/perf/cpu/gapi_core_perf_tests_cpu.cpp b/modules/gapi/perf/cpu/gapi_core_perf_tests_cpu.cpp index 2f91e07e52a43cbe3ec315123df4a1f1255ffb8d..f8e147973edcdd866eced5154e376cbc9f5f006a 100644 --- a/modules/gapi/perf/cpu/gapi_core_perf_tests_cpu.cpp +++ b/modules/gapi/perf/cpu/gapi_core_perf_tests_cpu.cpp @@ -252,6 +252,7 @@ INSTANTIATE_TEST_CASE_P(Split4PerfTestCPU, Split4PerfTest, INSTANTIATE_TEST_CASE_P(Merge3PerfTestCPU, Merge3PerfTest, Combine(Values(AbsExact().to_compare_f()), Values(szSmall128, szVGA, sz720p, sz1080p), + Values(CV_8U), Values(cv::compile_args(CORE_CPU)))); INSTANTIATE_TEST_CASE_P(Merge4PerfTestCPU, Merge4PerfTest, diff --git a/modules/gapi/perf/cpu/gapi_core_perf_tests_fluid.cpp b/modules/gapi/perf/cpu/gapi_core_perf_tests_fluid.cpp index 83de793a818502ce3daaefb957c6bad0ad36eaf9..8284896d6c4c077399a3809b9956c087d86a8fc2 100644 --- a/modules/gapi/perf/cpu/gapi_core_perf_tests_fluid.cpp +++ b/modules/gapi/perf/cpu/gapi_core_perf_tests_fluid.cpp @@ -253,6 +253,7 @@ INSTANTIATE_TEST_CASE_P(Split4PerfTestFluid, Split4PerfTest, INSTANTIATE_TEST_CASE_P(Merge3PerfTestFluid, Merge3PerfTest, Combine(Values(AbsExact().to_compare_f()), Values(szSmall128, szVGA, sz720p, sz1080p), + Values(CV_8U, CV_16S, CV_16U, CV_32F), Values(cv::compile_args(CORE_FLUID)))); INSTANTIATE_TEST_CASE_P(Merge4PerfTestFluid, Merge4PerfTest, diff --git a/modules/gapi/perf/gpu/gapi_core_perf_tests_gpu.cpp b/modules/gapi/perf/gpu/gapi_core_perf_tests_gpu.cpp index 8aaa304e589708fe40917b40fbb9f587898bbeda..bcc9894d46d117ea8578afb3319bc8bf920673b0 100644 --- a/modules/gapi/perf/gpu/gapi_core_perf_tests_gpu.cpp +++ b/modules/gapi/perf/gpu/gapi_core_perf_tests_gpu.cpp @@ -242,6 +242,7 @@ INSTANTIATE_TEST_CASE_P(Split4PerfTestGPU, Split4PerfTest, INSTANTIATE_TEST_CASE_P(Merge3PerfTestGPU, Merge3PerfTest, Combine(Values(AbsExact().to_compare_f()), Values( szSmall128, szVGA, sz720p, sz1080p ), + Values(CV_8U), Values(cv::compile_args(CORE_GPU)))); INSTANTIATE_TEST_CASE_P(Merge4PerfTestGPU, Merge4PerfTest, diff --git a/modules/gapi/src/backends/fluid/gfluidcore.cpp b/modules/gapi/src/backends/fluid/gfluidcore.cpp index 7a8f1f5ed8b5fb185c6a1101c5c20c707e7ba2c4..c2686c7bd34a18fffff4aa5ceeb8e98a57200eeb 100644 --- a/modules/gapi/src/backends/fluid/gfluidcore.cpp +++ b/modules/gapi/src/backends/fluid/gfluidcore.cpp @@ -2320,12 +2320,15 @@ GAPI_FLUID_KERNEL(GFluidSplit3, cv::gapi::core::GSplit3, false) static void run(const View &src, Buffer &dst1, Buffer &dst2, Buffer &dst3) { + GAPI_Assert((src.meta().depth == CV_8U) && (dst1.meta().depth == CV_8U) && + (dst2.meta().depth == CV_8U) && (dst3.meta().depth == CV_8U) && + (3 == src.meta().chan)); + const auto *in = src.InLine(0); auto *out1 = dst1.OutLine(); auto *out2 = dst2.OutLine(); auto *out3 = dst3.OutLine(); - GAPI_Assert(3 == src.meta().chan); int width = src.length(); int w = 0; @@ -2348,13 +2351,16 @@ GAPI_FLUID_KERNEL(GFluidSplit4, cv::gapi::core::GSplit4, false) static void run(const View &src, Buffer &dst1, Buffer &dst2, Buffer &dst3, Buffer &dst4) { + GAPI_Assert((src.meta().depth == CV_8U) && (dst1.meta().depth == CV_8U) && + (dst2.meta().depth == CV_8U) && (dst3.meta().depth == CV_8U) && + (dst4.meta().depth == CV_8U) && (4 == src.meta().chan)); + const auto *in = src.InLine(0); auto *out1 = dst1.OutLine(); auto *out2 = dst2.OutLine(); auto *out3 = dst3.OutLine(); auto *out4 = dst4.OutLine(); - GAPI_Assert(4 == src.meta().chan); int width = src.length(); int w = 0; @@ -2372,31 +2378,46 @@ GAPI_FLUID_KERNEL(GFluidSplit4, cv::gapi::core::GSplit4, false) } }; +template +CV_ALWAYS_INLINE void run_merge3(Buffer& dst, const View& src1, const View& src2, const View& src3) +{ + const auto* in1 = src1.InLine(0); + const auto* in2 = src2.InLine(0); + const auto* in3 = src3.InLine(0); + auto* out = dst.OutLine(); + + int width = dst.length(); + int w = 0; + +#if CV_SIMD + w = merge3_simd(in1, in2, in3, out, width); +#endif + + for (; w < width; w++) + { + out[3 * w] = in1[w]; + out[3 * w + 1] = in2[w]; + out[3 * w + 2] = in3[w]; + } +} + GAPI_FLUID_KERNEL(GFluidMerge3, cv::gapi::core::GMerge3, false) { static const int Window = 1; - static void run(const View &src1, const View &src2, const View &src3, Buffer &dst) + static void run(const View& src1, const View& src2, const View& src3, Buffer& dst) { - const auto *in1 = src1.InLine(0); - const auto *in2 = src2.InLine(0); - const auto *in3 = src3.InLine(0); - auto *out = dst.OutLine(); - - GAPI_Assert(3 == dst.meta().chan); - int width = dst.length(); - int w = 0; + GAPI_Assert((src1.meta().depth == dst.meta().depth) && + (src1.meta().depth == src2.meta().depth) && + (src1.meta().depth == src3.meta().depth)); - #if CV_SIMD - w = merge3_simd(in1, in2, in3, out, width); - #endif + // SRC/DST TYPE OP __VA_ARGS__ + MERGE3_(uchar, run_merge3, dst, src1, src2, src3); + MERGE3_(ushort, run_merge3, dst, src1, src2, src3); + MERGE3_(short, run_merge3, dst, src1, src2, src3); + MERGE3_(float, run_merge3, dst, src1, src2, src3); - for (; w < width; w++) - { - out[3*w ] = in1[w]; - out[3*w + 1] = in2[w]; - out[3*w + 2] = in3[w]; - } + CV_Error(cv::Error::StsBadArg, "unsupported combination of types"); } }; @@ -2407,13 +2428,16 @@ GAPI_FLUID_KERNEL(GFluidMerge4, cv::gapi::core::GMerge4, false) static void run(const View &src1, const View &src2, const View &src3, const View &src4, Buffer &dst) { + GAPI_Assert((dst.meta().depth == CV_8U) && (src1.meta().depth == CV_8U) && + (src2.meta().depth == CV_8U) && (src3.meta().depth == CV_8U) && + (4 == dst.meta().chan)); + const auto *in1 = src1.InLine(0); const auto *in2 = src2.InLine(0); const auto *in3 = src3.InLine(0); const auto *in4 = src4.InLine(0); auto *out = dst.OutLine(); - GAPI_Assert(4 == dst.meta().chan); int width = dst.length(); int w = 0; // cycle counter diff --git a/modules/gapi/src/backends/fluid/gfluidcore_func.dispatch.cpp b/modules/gapi/src/backends/fluid/gfluidcore_func.dispatch.cpp index 6171bff8020d0c7d80aa951c7d864ee029e9f69d..05d34170249bb5e917f0e30643e357e71779a80e 100644 --- a/modules/gapi/src/backends/fluid/gfluidcore_func.dispatch.cpp +++ b/modules/gapi/src/backends/fluid/gfluidcore_func.dispatch.cpp @@ -277,13 +277,21 @@ int split4_simd(const uchar in[], uchar out1[], uchar out2[], CV_CPU_DISPATCH_MODES_ALL); } -int merge3_simd(const uchar in1[], const uchar in2[], const uchar in3[], - uchar out[], const int width) -{ - CV_CPU_DISPATCH(merge3_simd, (in1, in2, in3, out, width), - CV_CPU_DISPATCH_MODES_ALL); +#define MERGE3_SIMD(T) \ +int merge3_simd(const T in1[], const T in2[], const T in3[], \ + T out[], const int width) \ +{ \ + CV_CPU_DISPATCH(merge3_simd, (in1, in2, in3, out, width), \ + CV_CPU_DISPATCH_MODES_ALL); \ } +MERGE3_SIMD(uchar) +MERGE3_SIMD(short) +MERGE3_SIMD(ushort) +MERGE3_SIMD(float) + +#undef MERGE3_SIMD + int merge4_simd(const uchar in1[], const uchar in2[], const uchar in3[], const uchar in4[], uchar out[], const int width) { diff --git a/modules/gapi/src/backends/fluid/gfluidcore_func.hpp b/modules/gapi/src/backends/fluid/gfluidcore_func.hpp index aec03c0b50114ffaf510f85af310c6106e84851c..0511f4e095886ae1338b678be9a8128d9601f08f 100644 --- a/modules/gapi/src/backends/fluid/gfluidcore_func.hpp +++ b/modules/gapi/src/backends/fluid/gfluidcore_func.hpp @@ -216,8 +216,16 @@ int split3_simd(const uchar in[], uchar out1[], uchar out2[], int split4_simd(const uchar in[], uchar out1[], uchar out2[], uchar out3[], uchar out4[], const int width); -int merge3_simd(const uchar in1[], const uchar in2[], const uchar in3[], - uchar out[], const int width); +#define MERGE3_SIMD(T) \ +int merge3_simd(const T in1[], const T in2[], const T in3[], \ + T out[], const int width); + +MERGE3_SIMD(uchar) +MERGE3_SIMD(short) +MERGE3_SIMD(ushort) +MERGE3_SIMD(float) + +#undef MERGE3_SIMD int merge4_simd(const uchar in1[], const uchar in2[], const uchar in3[], const uchar in4[], uchar out[], const int width); diff --git a/modules/gapi/src/backends/fluid/gfluidcore_func.simd.hpp b/modules/gapi/src/backends/fluid/gfluidcore_func.simd.hpp index d1fe33fa2e03031496bd6b534a81aede9fb45fa5..aed0ee97d86431708039b81fdc3ec148b821fcb8 100644 --- a/modules/gapi/src/backends/fluid/gfluidcore_func.simd.hpp +++ b/modules/gapi/src/backends/fluid/gfluidcore_func.simd.hpp @@ -322,12 +322,21 @@ int split3_simd(const uchar in[], uchar out1[], uchar out2[], int split4_simd(const uchar in[], uchar out1[], uchar out2[], uchar out3[], uchar out4[], const int width); -int merge3_simd(const uchar in1[], const uchar in2[], const uchar in3[], - uchar out[], const int width); +#define MERGE3_SIMD(T) \ +int merge3_simd(const T in1[], const T in2[], const T in3[], \ + T out[], const int width); + +MERGE3_SIMD(uchar) +MERGE3_SIMD(short) +MERGE3_SIMD(ushort) +MERGE3_SIMD(float) + +#undef MERGE3_SIMD int merge4_simd(const uchar in1[], const uchar in2[], const uchar in3[], const uchar in4[], uchar out[], const int width); + #ifndef CV_CPU_OPTIMIZATION_DECLARATIONS_ONLY #define SRC_SHORT_OR_USHORT std::is_same::value || std::is_same::value @@ -2530,33 +2539,41 @@ int split4_simd(const uchar in[], uchar out1[], uchar out2[], // //------------------------- -int merge3_simd(const uchar in1[], const uchar in2[], const uchar in3[], - uchar out[], const int width) -{ - constexpr int nlanes = v_uint8::nlanes; - if (width < nlanes) - return 0; - - int x = 0; - for (;;) - { - for (; x <= width - nlanes; x += nlanes) - { - v_uint8 a, b, c; - a = vx_load(&in1[x]); - b = vx_load(&in2[x]); - c = vx_load(&in3[x]); - v_store_interleave(&out[3 * x], a, b, c); - } - if (x < width) - { - x = width - nlanes; - continue; - } - break; - } - return x; -} +#define MERGE3_SIMD(T) \ +int merge3_simd(const T in1[], const T in2[], const T in3[], \ + T out[], const int width) \ +{ \ + constexpr int nlanes = vector_type_of_t::nlanes; \ + if (width < nlanes) \ + return 0; \ + \ + int x = 0; \ + for (;;) \ + { \ + for (; x <= width - nlanes; x += nlanes) \ + { \ + vector_type_of_t a, b, c; \ + a = vx_load(&in1[x]); \ + b = vx_load(&in2[x]); \ + c = vx_load(&in3[x]); \ + v_store_interleave(&out[3 * x], a, b, c); \ + } \ + if (x < width) \ + { \ + x = width - nlanes; \ + continue; \ + } \ + break; \ + } \ + return x; \ +} + +MERGE3_SIMD(uchar) +MERGE3_SIMD(short) +MERGE3_SIMD(ushort) +MERGE3_SIMD(float) + +#undef MERGE3_SIMD //------------------------- // @@ -2926,6 +2943,8 @@ CV_ALWAYS_INLINE void convertto_simd_nocoeff_impl(const SRC* inx, float* outx) int convertto_simd(const SRC in[], DST out[], const int length) \ { \ constexpr int nlanes = vector_type_of_t::nlanes; \ + if (length < nlanes) \ + return 0; \ \ int x = 0; \ for (;;) \ @@ -3093,6 +3112,9 @@ int convertto_scaled_simd(const SRC in[], DST out[], const float alpha, \ const float beta, const int length) \ { \ constexpr int nlanes = vector_type_of_t::nlanes; \ + if (length < nlanes) \ + return 0; \ + \ v_float32 v_alpha = vx_setall_f32(alpha); \ v_float32 v_beta = vx_setall_f32(beta); \ \ diff --git a/modules/gapi/src/backends/fluid/gfluidutils.hpp b/modules/gapi/src/backends/fluid/gfluidutils.hpp index 4da16f2dee13cf5fcb909b97638fe59aa4a9865d..f7eff8d3b8f6196440155ed67964f564853b9b2a 100644 --- a/modules/gapi/src/backends/fluid/gfluidutils.hpp +++ b/modules/gapi/src/backends/fluid/gfluidutils.hpp @@ -86,6 +86,23 @@ using cv::gapi::own::rintd; return; \ } +#define MERGE3_(T, OP, ...) \ + if (cv::DataType::depth == dst.meta().depth && \ + cv::DataType::depth == src1.meta().depth) \ + { \ + GAPI_DbgAssert(dst.length() == src1.length()); \ + GAPI_DbgAssert(dst.length() == src2.length()); \ + GAPI_DbgAssert(dst.length() == src3.length()); \ + \ + GAPI_DbgAssert(1 == src1.meta().chan); \ + GAPI_DbgAssert(1 == src2.meta().chan); \ + GAPI_DbgAssert(1 == src3.meta().chan); \ + GAPI_DbgAssert(3 == dst.meta().chan); \ + \ + OP(__VA_ARGS__); \ + return; \ + } + } // namespace fluid } // namespace gapi } // namespace cv