diff --git a/modules/core/include/opencv2/core/mat.hpp b/modules/core/include/opencv2/core/mat.hpp index eeb83c074475e58483d709f11b67479c7166637f..5e667b213242de35995633559101618a0e7599fe 100644 --- a/modules/core/include/opencv2/core/mat.hpp +++ b/modules/core/include/opencv2/core/mat.hpp @@ -2572,27 +2572,38 @@ public: - number of channels */ int flags; + //! the matrix dimensionality, >= 2 int dims; - //! the number of rows and columns or (-1, -1) when the matrix has more than 2 dimensions - int rows, cols; + + //! number of rows in the matrix; -1 when the matrix has more than 2 dimensions + int rows; + + //! number of columns in the matrix; -1 when the matrix has more than 2 dimensions + int cols; //! custom allocator MatAllocator* allocator; - UMatUsageFlags usageFlags; // usage flags for allocator + + //! usage flags for allocator; recommend do not set directly, instead set during construct/create/getUMat + UMatUsageFlags usageFlags; + //! and the standard allocator static MatAllocator* getStdAllocator(); //! internal use method: updates the continuity flag void updateContinuityFlag(); - // black-box container of UMat data + //! black-box container of UMat data UMatData* u; - // offset of the submatrix (or 0) + //! offset of the submatrix (or 0) size_t offset; + //! dimensional size of the matrix; accessible in various formats MatSize size; + + //! number of bytes each matrix element/row/plane/dimension occupies MatStep step; protected: diff --git a/modules/core/perf/opencl/perf_usage_flags.cpp b/modules/core/perf/opencl/perf_usage_flags.cpp index d59087121f5111c7b6229e4c058671a405b0ee69..0717121d1cf74e116238a21e50c38dc9627db32d 100644 --- a/modules/core/perf/opencl/perf_usage_flags.cpp +++ b/modules/core/perf/opencl/perf_usage_flags.cpp @@ -12,25 +12,33 @@ namespace opencv_test { namespace ocl { -typedef TestBaseWithParam > UsageFlagsBoolFixture; - -OCL_PERF_TEST_P(UsageFlagsBoolFixture, UsageFlags_AllocHostMem, ::testing::Combine(OCL_TEST_SIZES, Bool())) +typedef TestBaseWithParam> SizeUsageFlagsFixture; + +OCL_PERF_TEST_P(SizeUsageFlagsFixture, UsageFlags_AllocMem, + ::testing::Combine( + OCL_TEST_SIZES, + testing::Values(USAGE_DEFAULT, USAGE_ALLOCATE_HOST_MEMORY, USAGE_ALLOCATE_DEVICE_MEMORY), // USAGE_ALLOCATE_SHARED_MEMORY + testing::Values(USAGE_DEFAULT, USAGE_ALLOCATE_HOST_MEMORY, USAGE_ALLOCATE_DEVICE_MEMORY), // USAGE_ALLOCATE_SHARED_MEMORY + testing::Values(USAGE_DEFAULT, USAGE_ALLOCATE_HOST_MEMORY, USAGE_ALLOCATE_DEVICE_MEMORY) // USAGE_ALLOCATE_SHARED_MEMORY + )) { Size sz = get<0>(GetParam()); - bool allocHostMem = get<1>(GetParam()); + UMatUsageFlags srcAllocMem = get<1>(GetParam()); + UMatUsageFlags dstAllocMem = get<2>(GetParam()); + UMatUsageFlags finalAllocMem = get<3>(GetParam()); - UMat src(sz, CV_8UC1, Scalar::all(128)); + UMat src(sz, CV_8UC1, Scalar::all(128), srcAllocMem); OCL_TEST_CYCLE() { - UMat dst(allocHostMem ? USAGE_ALLOCATE_HOST_MEMORY : USAGE_DEFAULT); + UMat dst(dstAllocMem); cv::add(src, Scalar::all(1), dst); { Mat canvas = dst.getMat(ACCESS_RW); cv::putText(canvas, "Test", Point(20, 20), FONT_HERSHEY_PLAIN, 1, Scalar::all(255)); } - UMat final; + UMat final(finalAllocMem); cv::subtract(dst, Scalar::all(1), final); } diff --git a/modules/core/src/ocl.cpp b/modules/core/src/ocl.cpp index 8749b29ec8e1a7d89f944bb7eef3dbe3a0bb0ff0..6ca61acc18cc68b8f728ea9099e6af7fb5a9b2f2 100644 --- a/modules/core/src/ocl.cpp +++ b/modules/core/src/ocl.cpp @@ -5518,13 +5518,19 @@ public: && !(u->originalUMatData && u->originalUMatData->handle) ) { - handle = clCreateBuffer(ctx_handle, CL_MEM_USE_HOST_PTR|createFlags, + // Change the host-side origdata[size] to "pinned memory" that enables fast + // DMA-transfers over PCIe to the device. Often used with clEnqueueMapBuffer/clEnqueueUnmapMemObject + handle = clCreateBuffer(ctx_handle, CL_MEM_USE_HOST_PTR|(createFlags & ~CL_MEM_ALLOC_HOST_PTR), u->size, u->origdata, &retval); - CV_OCL_DBG_CHECK_RESULT(retval, cv::format("clCreateBuffer(CL_MEM_USE_HOST_PTR|createFlags, sz=%lld, origdata=%p) => %p", + CV_OCL_DBG_CHECK_RESULT(retval, cv::format("clCreateBuffer(CL_MEM_USE_HOST_PTR|(createFlags & ~CL_MEM_ALLOC_HOST_PTR), sz=%lld, origdata=%p) => %p", (long long int)u->size, u->origdata, (void*)handle).c_str()); } if((!handle || retval < 0) && !(accessFlags & ACCESS_FAST)) { + // Allocate device-side memory and immediately copy data from the host-side pointer origdata[size]. + // If createFlags=CL_MEM_ALLOC_HOST_PTR (aka cv::USAGE_ALLOCATE_HOST_MEMORY), then + // additionally allocate a host-side "pinned" duplicate of the origdata that is + // managed by OpenCL. This is potentially faster in unaligned/unmanaged scenarios. handle = clCreateBuffer(ctx_handle, CL_MEM_COPY_HOST_PTR|CL_MEM_READ_WRITE|createFlags, u->size, u->origdata, &retval); CV_OCL_DBG_CHECK_RESULT(retval, cv::format("clCreateBuffer(CL_MEM_COPY_HOST_PTR|CL_MEM_READ_WRITE|createFlags, sz=%lld, origdata=%p) => %p", diff --git a/modules/core/src/umatrix.cpp b/modules/core/src/umatrix.cpp index 09ba92ecdee73a5f657c227b701cdfad94e6dc04..c80d240ecc0211cf2921b38f4dfbc21b4a37fccd 100644 --- a/modules/core/src/umatrix.cpp +++ b/modules/core/src/umatrix.cpp @@ -307,8 +307,7 @@ UMat& UMat::operator=(const UMat& m) else copySize(m); allocator = m.allocator; - if (usageFlags == USAGE_DEFAULT) - usageFlags = m.usageFlags; + usageFlags = m.usageFlags; u = m.u; offset = m.offset; } @@ -332,9 +331,6 @@ void UMat::assignTo(UMat& m, int _type) const void UMat::create(int _rows, int _cols, int _type, UMatUsageFlags _usageFlags) { - _type &= TYPE_MASK; - if( dims <= 2 && rows == _rows && cols == _cols && type() == _type && u ) - return; int sz[] = {_rows, _cols}; create(2, sz, _type, _usageFlags); } @@ -426,7 +422,9 @@ UMat& UMat::operator=(UMat&& m) m.step.p = m.step.buf; m.size.p = &m.rows; } - m.flags = MAGIC_VAL; m.dims = m.rows = m.cols = 0; + m.flags = MAGIC_VAL; + m.usageFlags = USAGE_DEFAULT; + m.dims = m.rows = m.cols = 0; m.allocator = NULL; m.u = NULL; m.offset = 0; @@ -600,6 +598,7 @@ UMat Mat::getUMat(AccessFlag accessFlags, UMatUsageFlags usageFlags) const CV_XADD(&(u->urefcount), 1); } hdr.flags = flags; + hdr.usageFlags = usageFlags; setSize(hdr, dims, size.p, step.p); finalizeHdr(hdr); hdr.u = new_u; @@ -610,16 +609,21 @@ UMat Mat::getUMat(AccessFlag accessFlags, UMatUsageFlags usageFlags) const void UMat::create(int d, const int* _sizes, int _type, UMatUsageFlags _usageFlags) { - this->usageFlags = _usageFlags; - int i; CV_Assert(0 <= d && d <= CV_MAX_DIM && _sizes); _type = CV_MAT_TYPE(_type); - if( u && (d == dims || (d == 1 && dims <= 2)) && _type == type() ) + // if param value is USAGE_DEFAULT by implicit default param value -or- explicit value + // ...then don't change the existing usageFlags + // it is not possible to change usage from non-default to USAGE_DEFAULT through create() + // ...instead must construct UMat() + if (_usageFlags == cv::USAGE_DEFAULT) + { + _usageFlags = usageFlags; + } + + if( u && (d == dims || (d == 1 && dims <= 2)) && _type == type() && _usageFlags == usageFlags ) { - if( d == 2 && rows == _sizes[0] && cols == _sizes[1] ) - return; for( i = 0; i < d; i++ ) if( size[i] != _sizes[i] ) break; @@ -636,6 +640,7 @@ void UMat::create(int d, const int* _sizes, int _type, UMatUsageFlags _usageFlag } release(); + usageFlags = _usageFlags; if( d == 0 ) return; flags = (_type & CV_MAT_TYPE_MASK) | MAGIC_VAL; diff --git a/modules/core/test/test_opencl.cpp b/modules/core/test/test_opencl.cpp index 17cd7b5c8922495c2ec65c369bdcf027d7f67b65..ff1391016e545e355002b657123db42ef2203896 100644 --- a/modules/core/test/test_opencl.cpp +++ b/modules/core/test/test_opencl.cpp @@ -207,9 +207,32 @@ TEST_P(OCL_OpenCLExecutionContext_P, ScopeTest) executeUMatCall(); } +INSTANTIATE_TEST_CASE_P(/*nothing*/, OCL_OpenCLExecutionContext_P, getOpenCLTestConfigurations()); -INSTANTIATE_TEST_CASE_P(/*nothing*/, OCL_OpenCLExecutionContext_P, getOpenCLTestConfigurations()); +typedef testing::TestWithParam UsageFlagsFixture; +OCL_TEST_P(UsageFlagsFixture, UsageFlagsRetained) +{ + if (!cv::ocl::useOpenCL()) + { + throw SkipTestException("OpenCL is not available / disabled"); + } + + const UMatUsageFlags usage = GetParam(); + cv::UMat flip_in(10, 10, CV_32F, usage); + cv::UMat flip_out(usage); + cv::flip(flip_in, flip_out, 1); + cv::ocl::finish(); + + ASSERT_EQ(usage, flip_in.usageFlags); + ASSERT_EQ(usage, flip_out.usageFlags); +} + +INSTANTIATE_TEST_CASE_P( + /*nothing*/, + UsageFlagsFixture, + testing::Values(USAGE_DEFAULT, USAGE_ALLOCATE_HOST_MEMORY, USAGE_ALLOCATE_DEVICE_MEMORY) +); } } // namespace opencv_test::ocl