diff --git a/modules/imgproc/src/color.cpp b/modules/imgproc/src/color.cpp index dcbfb8f79f4766b1ad10d24759b933d62cfc686d..f0a8fd8584ff857ba99bd48f3c28ebd703db38b5 100644 --- a/modules/imgproc/src/color.cpp +++ b/modules/imgproc/src/color.cpp @@ -5060,7 +5060,8 @@ static bool ocl_cvtColor( InputArray _src, OutputArray _dst, int code, int dcn ) CV_Assert( scn == 2 && depth == CV_8U ); k.create("YUV2RGB_422", ocl::imgproc::cvtcolor_oclsrc, - opts + format("-D dcn=%d -D bidx=%d -D uidx=%d -D yidx=%d", dcn, bidx, uidx, yidx)); + opts + format("-D dcn=%d -D bidx=%d -D uidx=%d -D yidx=%d%s", dcn, bidx, uidx, yidx, + src.offset % 4 == 0 && src.step % 4 == 0 ? " -D USE_OPTIMIZED_LOAD" : "")); break; } case COLOR_BGR2YCrCb: diff --git a/modules/imgproc/src/opencl/cvtcolor.cl b/modules/imgproc/src/opencl/cvtcolor.cl index e660a52204927569fd114bc343adacab77d9b61b..a7cc776503f4a5263f406ab48e38fc4b13fe1fcc 100644 --- a/modules/imgproc/src/opencl/cvtcolor.cl +++ b/modules/imgproc/src/opencl/cvtcolor.cl @@ -573,22 +573,33 @@ __kernel void YUV2RGB_422(__global const uchar* srcptr, int src_step, int src_of { if (y < rows ) { + __constant float* coeffs = c_YUV2RGBCoeffs_420; + +#ifndef USE_OPTIMIZED_LOAD float U = ((float) src[uidx]) - HALF_MAX; float V = ((float) src[(2 + uidx) % 4]) - HALF_MAX; + float y00 = max(0.f, ((float) src[yidx]) - 16.f) * coeffs[0]; + float y01 = max(0.f, ((float) src[yidx + 2]) - 16.f) * coeffs[0]; +#else + int load_src = *((__global int*) src); + float vec_src[4] = { load_src & 0xff, (load_src >> 8) & 0xff, (load_src >> 16) & 0xff, (load_src >> 24) & 0xff}; + float U = vec_src[uidx] - HALF_MAX; + float V = vec_src[(2 + uidx) % 4] - HALF_MAX; + float y00 = max(0.f, vec_src[yidx] - 16.f) * coeffs[0]; + float y01 = max(0.f, vec_src[yidx + 2] - 16.f) * coeffs[0]; +#endif - __constant float* coeffs = c_YUV2RGBCoeffs_420; float ruv = fma(coeffs[4], V, 0.5f); float guv = fma(coeffs[3], V, fma(coeffs[2], U, 0.5f)); float buv = fma(coeffs[1], U, 0.5f); - float y00 = max(0.f, ((float) src[yidx]) - 16.f) * coeffs[0]; dst[2 - bidx] = convert_uchar_sat(y00 + ruv); dst[1] = convert_uchar_sat(y00 + guv); dst[bidx] = convert_uchar_sat(y00 + buv); #if dcn == 4 dst[3] = 255; #endif - float y01 = max(0.f, ((float) src[yidx + 2]) - 16.f) * coeffs[0]; + dst[dcn + 2 - bidx] = convert_uchar_sat(y01 + ruv); dst[dcn + 1] = convert_uchar_sat(y01 + guv); dst[dcn + bidx] = convert_uchar_sat(y01 + buv);