未验证 提交 9c7adb72 编写于 作者: A Anna Khakimova 提交者: GitHub

Merge pull request #21530 from anna-khakimova:ak/simd_divrc

* GAPI Fluid: SIMD for DivRC kernel.

* Fluid: Div kernel's SIMD refactoring

* SIMD for DivRC 3 channel case

* Applied comments
上级 ebb6915e
......@@ -528,6 +528,10 @@ PERF_TEST_P_(DivRCPerfTest, TestPerformance)
// FIXIT Unstable input data for divide
initMatsRandU(type, sz, dtype, false);
//This condition need as workaround the bug in the OpenCV.
//It reinitializes divider matrix without zero values for CV_16S DST type.
if (dtype == CV_16S || (type == CV_16S && dtype == -1))
cv::randu(in_mat1, cv::Scalar::all(1), cv::Scalar::all(255));
// OpenCV code ///////////////////////////////////////////////////////////
cv::divide(sc, in_mat1, out_mat_ocv, scale, dtype);
......
......@@ -101,8 +101,8 @@ INSTANTIATE_TEST_CASE_P(DivCPerfTestFluid, DivCPerfTest,
INSTANTIATE_TEST_CASE_P(DivRCPerfTestFluid, DivRCPerfTest,
Combine(Values(Tolerance_FloatRel_IntAbs(1e-5, 1).to_compare_f()),
Values(szSmall128, szVGA, sz720p, sz1080p),
Values(CV_8UC1, CV_8UC3, CV_32FC1),
Values(-1, CV_8U, CV_32F),
Values(CV_8UC1, CV_8UC3, CV_16UC1, CV_16SC1, CV_32FC1),
Values(-1, CV_8U, CV_16U, CV_16S, CV_32F),
Values(1.0),
Values(cv::compile_args(CORE_FLUID))));
......
......@@ -936,8 +936,8 @@ CV_ALWAYS_INLINE void run_arithm_s(Buffer &dst, const View &src, const float sca
}
template<typename DST, typename SRC>
static void run_arithm_rs(Buffer &dst, const View &src, const float scalar[4], Arithm arithm,
float scale=1)
CV_ALWAYS_INLINE void run_arithm_rs(Buffer &dst, const View &src, const float scalar[],
Arithm arithm, float scale=1)
{
const auto *in = src.InLine<SRC>(0);
auto *out = dst.OutLine<DST>();
......@@ -955,15 +955,23 @@ static void run_arithm_rs(Buffer &dst, const View &src, const float scalar[4], A
w = subrc_simd(scalar, in, out, length, chan);
#endif
for (; w < length; ++w)
{
out[w] = subr<DST>(in[w], scalar[w % chan]);
}
break;
}
// TODO: optimize division
case ARITHM_DIVIDE:
for (int w=0; w < width; w++)
for (int c=0; c < chan; c++)
out[chan*w + c] = div<DST>(scalar[c], in[chan*w + c], scale);
{
int w = 0;
#if CV_SIMD
w = divrc_simd(scalar, in, out, length, chan, scale);
#endif
for (; w < length; ++w)
{
out[w] = div<DST>(scalar[w % chan], in[w], scale);
}
break;
}
default: CV_Error(cv::Error::StsBadArg, "unsupported arithmetic operation");
}
}
......@@ -1319,7 +1327,9 @@ CV_ALWAYS_INLINE void run_divc(Buffer& dst, const View& src, Buffer& scratch,
#endif
for (; w < length; ++w)
{
out[w] = div<DST>(in[w], scalar[w % chan], scale);
}
}
GAPI_FLUID_KERNEL(GFluidDivC, cv::gapi::core::GDivC, true)
......@@ -1402,32 +1412,55 @@ GAPI_FLUID_KERNEL(GFluidDivC, cv::gapi::core::GDivC, true)
}
};
GAPI_FLUID_KERNEL(GFluidDivRC, cv::gapi::core::GDivRC, false)
GAPI_FLUID_KERNEL(GFluidDivRC, cv::gapi::core::GDivRC, true)
{
static const int Window = 1;
static void run(const cv::Scalar &_scalar, const View &src, double _scale, int /*dtype*/,
Buffer &dst)
static void run(const cv::Scalar& _scalar, const View& src, double _scale, int /*dtype*/,
Buffer& dst, Buffer& scratch)
{
const float scalar[4] = {
static_cast<float>(_scalar[0]),
static_cast<float>(_scalar[1]),
static_cast<float>(_scalar[2]),
static_cast<float>(_scalar[3])
};
GAPI_Assert(src.meta().chan <= 4);
if (dst.y() == 0)
{
const int chan = src.meta().chan;
float* _scratch = scratch.OutLine<float>();
scalar_to_scratch(_scalar, _scratch, scratch.length(), chan);
}
const float* scalar = scratch.OutLine<float>();
const float scale = static_cast<float>(_scale);
// DST SRC OP __VA_ARGS__
UNARY_(uchar , uchar , run_arithm_rs, dst, src, scalar, ARITHM_DIVIDE, scale);
UNARY_(uchar , short, run_arithm_rs, dst, src, scalar, ARITHM_DIVIDE, scale);
UNARY_(uchar , float, run_arithm_rs, dst, src, scalar, ARITHM_DIVIDE, scale);
UNARY_( short, short, run_arithm_rs, dst, src, scalar, ARITHM_DIVIDE, scale);
UNARY_( float, uchar , run_arithm_rs, dst, src, scalar, ARITHM_DIVIDE, scale);
UNARY_( float, short, run_arithm_rs, dst, src, scalar, ARITHM_DIVIDE, scale);
UNARY_( float, float, run_arithm_rs, dst, src, scalar, ARITHM_DIVIDE, scale);
UNARY_(uchar, uchar, run_arithm_rs, dst, src, scalar, ARITHM_DIVIDE, scale);
UNARY_(uchar, ushort, run_arithm_rs, dst, src, scalar, ARITHM_DIVIDE, scale);
UNARY_(uchar, short, run_arithm_rs, dst, src, scalar, ARITHM_DIVIDE, scale);
UNARY_(uchar, float, run_arithm_rs, dst, src, scalar, ARITHM_DIVIDE, scale);
UNARY_(ushort, ushort, run_arithm_rs, dst, src, scalar, ARITHM_DIVIDE, scale);
UNARY_(ushort, uchar, run_arithm_rs, dst, src, scalar, ARITHM_DIVIDE, scale);
UNARY_(ushort, short, run_arithm_rs, dst, src, scalar, ARITHM_DIVIDE, scale);
UNARY_(ushort, float, run_arithm_rs, dst, src, scalar, ARITHM_DIVIDE, scale);
UNARY_(short, short, run_arithm_rs, dst, src, scalar, ARITHM_DIVIDE, scale);
UNARY_(short, uchar, run_arithm_rs, dst, src, scalar, ARITHM_DIVIDE, scale);
UNARY_(short, ushort, run_arithm_rs, dst, src, scalar, ARITHM_DIVIDE, scale);
UNARY_(short, float, run_arithm_rs, dst, src, scalar, ARITHM_DIVIDE, scale);
UNARY_(float, uchar, run_arithm_rs, dst, src, scalar, ARITHM_DIVIDE, scale);
UNARY_(float, ushort, run_arithm_rs, dst, src, scalar, ARITHM_DIVIDE, scale);
UNARY_(float, short, run_arithm_rs, dst, src, scalar, ARITHM_DIVIDE, scale);
UNARY_(float, float, run_arithm_rs, dst, src, scalar, ARITHM_DIVIDE, scale);
CV_Error(cv::Error::StsBadArg, "unsupported combination of types");
}
static void initScratch(const GScalarDesc&, const GMatDesc&, double, int, Buffer& scratch)
{
initScratchBuffer(scratch);
}
static void resetScratch(Buffer& /*scratch*/)
{
}
};
//-------------------
......
......@@ -235,6 +235,33 @@ ABSDIFFC_SIMD(float)
#undef ABSDIFFC_SIMD
#define DIVRC_SIMD(SRC, DST) \
int divrc_simd(const float scalar[], const SRC in[], DST out[], \
const int length, const int chan, const float scale) \
{ \
CV_CPU_DISPATCH(divrc_simd, (scalar, in, out, length, chan, scale), \
CV_CPU_DISPATCH_MODES_ALL); \
}
DIVRC_SIMD(uchar, uchar)
DIVRC_SIMD(ushort, uchar)
DIVRC_SIMD(short, uchar)
DIVRC_SIMD(float, uchar)
DIVRC_SIMD(short, short)
DIVRC_SIMD(ushort, short)
DIVRC_SIMD(uchar, short)
DIVRC_SIMD(float, short)
DIVRC_SIMD(ushort, ushort)
DIVRC_SIMD(uchar, ushort)
DIVRC_SIMD(short, ushort)
DIVRC_SIMD(float, ushort)
DIVRC_SIMD(uchar, float)
DIVRC_SIMD(ushort, float)
DIVRC_SIMD(short, float)
DIVRC_SIMD(float, float)
#undef DIVRC_SIMD
int split3_simd(const uchar in[], uchar out1[], uchar out2[],
uchar out3[], const int width)
{
......
......@@ -187,6 +187,29 @@ ABSDIFFC_SIMD(float)
#undef ABSDIFFC_SIMD
#define DIVRC_SIMD(SRC, DST) \
int divrc_simd(const float scalar[], const SRC in[], DST out[], \
const int length, const int chan, const float scale);
DIVRC_SIMD(uchar, uchar)
DIVRC_SIMD(ushort, uchar)
DIVRC_SIMD(short, uchar)
DIVRC_SIMD(float, uchar)
DIVRC_SIMD(short, short)
DIVRC_SIMD(ushort, short)
DIVRC_SIMD(uchar, short)
DIVRC_SIMD(float, short)
DIVRC_SIMD(ushort, ushort)
DIVRC_SIMD(uchar, ushort)
DIVRC_SIMD(short, ushort)
DIVRC_SIMD(float, ushort)
DIVRC_SIMD(uchar, float)
DIVRC_SIMD(ushort, float)
DIVRC_SIMD(short, float)
DIVRC_SIMD(float, float)
#undef DIVRC_SIMD
int split3_simd(const uchar in[], uchar out1[], uchar out2[],
uchar out3[], const int width);
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册