Dynamic CUDA support library loading implemented for Linux.

Logical mistake in macro fixed; DeviceInfo deligate reimplemented; Build and warning fixes.

Dynamic CUDA support library loading implemented for Linux.
Logical mistake in macro fixed; DeviceInfo deligate reimplemented; Build and warning fixes.
8660e048 · Alexander Smorkalov · d4087f19 · 8660e048 · 8660e048 · 8660e048
6 changed file
--- a/modules/core/CMakeLists.txt
+++ b/modules/core/CMakeLists.txt
 set(the_description "The Core Functionality")

+macro(ocv_glob_module_sources_no_cuda)
+  file(GLOB_RECURSE lib_srcs "src/*.cpp")
+  file(GLOB_RECURSE lib_int_hdrs "src/*.hpp" "src/*.h")
+  file(GLOB lib_hdrs "include/opencv2/${name}/*.hpp" "include/opencv2/${name}/*.h")
+  file(GLOB lib_hdrs_detail "include/opencv2/${name}/detail/*.hpp" "include/opencv2/${name}/detail/*.h")
+
+  set(cuda_objs "")
+  set(lib_cuda_hdrs "")
+  if(HAVE_CUDA)
+    ocv_include_directories(${CUDA_INCLUDE_DIRS})
+    file(GLOB lib_cuda_hdrs "src/cuda/*.hpp")
+  endif()
+
+  source_group("Src" FILES ${lib_srcs} ${lib_int_hdrs})
+
+  file(GLOB cl_kernels "src/opencl/*.cl")
+  if(HAVE_opencv_ocl AND cl_kernels)
+    ocv_include_directories(${OPENCL_INCLUDE_DIRS})
+    add_custom_command(
+      OUTPUT "${CMAKE_CURRENT_BINARY_DIR}/opencl_kernels.cpp" "${CMAKE_CURRENT_BINARY_DIR}/opencl_kernels.hpp"
+      COMMAND ${CMAKE_COMMAND} -DCL_DIR="${CMAKE_CURRENT_SOURCE_DIR}/src/opencl" -DOUTPUT="${CMAKE_CURRENT_BINARY_DIR}/opencl_kernels.cpp" -P "${OpenCV_SOURCE_DIR}/cmake/cl2cpp.cmake"
+      DEPENDS ${cl_kernels} "${OpenCV_SOURCE_DIR}/cmake/cl2cpp.cmake")
+    source_group("OpenCL" FILES ${cl_kernels} "${CMAKE_CURRENT_BINARY_DIR}/opencl_kernels.cpp" "${CMAKE_CURRENT_BINARY_DIR}/opencl_kernels.hpp")
+    list(APPEND lib_srcs ${cl_kernels} "${CMAKE_CURRENT_BINARY_DIR}/opencl_kernels.cpp" "${CMAKE_CURRENT_BINARY_DIR}/opencl_kernels.hpp")
+  endif()
+
+  source_group("Include" FILES ${lib_hdrs})
+  source_group("Include\\detail" FILES ${lib_hdrs_detail})
+
+  ocv_set_module_sources(${ARGN} HEADERS ${lib_hdrs} ${lib_hdrs_detail}
+                                 SOURCES ${lib_srcs} ${lib_int_hdrs} ${cuda_objs} ${lib_cuda_hdrs})
+endmacro()
+
+ocv_add_module(core PRIVATE_REQUIRED ${ZLIB_LIBRARIES})
+ocv_module_include_directories(${ZLIB_INCLUDE_DIR})
+
 if(HAVE_WINRT)
    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /ZW /GS /Gm- /AI\"${WINDOWS_SDK_PATH}/References/CommonConfiguration/Neutral\" /AI\"${VISUAL_STUDIO_PATH}/vcpackages\"")
 endif()

-file(GLOB lib_cuda_hdrs        "include/opencv2/${name}/cuda/*.hpp"        "include/opencv2/${name}/cuda/*.h")
-file(GLOB lib_cuda_hdrs_detail "include/opencv2/${name}/cuda/detail/*.hpp" "include/opencv2/${name}/cuda/detail/*.h")
-
-source_group("Cuda Headers"         FILES ${lib_cuda_hdrs})
-source_group("Cuda Headers\\Detail" FILES ${lib_cuda_hdrs_detail})
-
 if(DYNAMIC_CUDA_SUPPORT)
  add_definitions(-DDYNAMIC_CUDA_SUPPORT)
+else()
+  add_definitions(-DUSE_CUDA)
 endif()

-ocv_add_module(core PRIVATE_REQUIRED ${ZLIB_LIBRARIES})
-ocv_module_include_directories(${ZLIB_INCLUDE_DIR})
-
 if(HAVE_CUDA)
  ocv_include_directories("${OpenCV_SOURCE_DIR}/modules/gpu/include")
  ocv_warnings_disable(CMAKE_CXX_FLAGS -Wundef)
 endif()

-ocv_glob_module_sources(SOURCES "${opencv_core_BINARY_DIR}/version_string.inc"
-                        HEADERS ${lib_cuda_hdrs} ${lib_cuda_hdrs_detail})
+file(GLOB lib_cuda_hdrs        "include/opencv2/${name}/cuda/*.hpp"        "include/opencv2/${name}/cuda/*.h")
+file(GLOB lib_cuda_hdrs_detail "include/opencv2/${name}/cuda/detail/*.hpp" "include/opencv2/${name}/cuda/detail/*.h")
+
+source_group("Cuda Headers"         FILES ${lib_cuda_hdrs})
+source_group("Cuda Headers\\Detail" FILES ${lib_cuda_hdrs_detail})
+
+if (DYNAMIC_CUDA_SUPPORT)
+  ocv_glob_module_sources_no_cuda(SOURCES "${opencv_core_BINARY_DIR}/version_string.inc"
+                                  HEADERS ${lib_cuda_hdrs} ${lib_cuda_hdrs_detail})
+else()
+  ocv_glob_module_sources(SOURCES "${opencv_core_BINARY_DIR}/version_string.inc"
+                          HEADERS ${lib_cuda_hdrs} ${lib_cuda_hdrs_detail})
+endif()

 ocv_create_module()
 ocv_add_precompiled_headers(${the_module})
@@ -31,6 +71,6 @@ ocv_add_precompiled_headers(${the_module})
 ocv_add_accuracy_tests()
 ocv_add_perf_tests()

-if(DYNAMIC_CUDA_SUPPORT)
+if (DYNAMIC_CUDA_SUPPORT)
  add_subdirectory(cuda)
 endif()
--- a/modules/core/cuda/CMakeLists.txt
+++ b/modules/core/cuda/CMakeLists.txt
 project(opencv_core_cuda)
-set(HAVE_CUDA FALSE)
-add_definitions("-DHAVE_CUDA")
+add_definitions(-DUSE_CUDA)
 include_directories(${CUDA_INCLUDE_DIRS}
                    "../src/"
                    "../include/opencv2/core/"

--- a/modules/core/cuda/main.cpp
+++ b/modules/core/cuda/main.cpp
+#include "cvconfig.h"
 #include "opencv2/core/core.hpp"
 #include "opencv2/core/gpumat.hpp"

+#include <stdio.h>
+#include <iostream>
+
 #ifdef HAVE_CUDA
 #include <cuda_runtime.h>
 #include <npp.h>
@@ -17,7 +21,30 @@
 #endif
 #endif

+using namespace std;
 using namespace cv;
 using namespace cv::gpu;

-#include "gpumat_cuda.hpp"
\ No newline at end of file
+#include "gpumat_cuda.hpp"
+
+#ifdef HAVE_CUDA
+static CudaDeviceInfoFuncTable deviceInfoTable;
+static CudaFuncTable gpuTable;
+#else
+static EmptyDeviceInfoFuncTable deviceInfoTable;
+static EmptyFuncTable gpuTable;
+#endif
+
+extern "C" {
+   
+DeviceInfoFuncTable* deviceInfoFactory()
+{
+    return (DeviceInfoFuncTable*)&deviceInfoTable;
+}
+
+GpuFuncTable* gpuFactory()
+{
+    return (GpuFuncTable*)&gpuTable;
+}
+
+}
--- a/modules/core/include/opencv2/core/gpumat.hpp
+++ b/modules/core/include/opencv2/core/gpumat.hpp
@@ -137,6 +137,9 @@ namespace cv { namespace gpu
        int deviceID() const { return device_id_; }

    private:
+        // Private section is fictive to preserve bin compatibility.
+        // Changes in the private fields there have no effects.
+        // see deligate code.
        void query();

        int device_id_;

--- a/modules/core/src/gpumat.cpp
+++ b/modules/core/src/gpumat.cpp
@@ -43,8 +43,9 @@
 #include "precomp.hpp"
 #include "opencv2/core/gpumat.hpp"
 #include <iostream>
+#include <dlfcn.h>

-#if defined(HAVE_CUDA)
+#if defined(HAVE_CUDA) || defined(DYNAMIC_CUDA_SUPPORT)
    #include <cuda_runtime.h>
    #include <npp.h>

@@ -66,15 +67,81 @@ using namespace cv::gpu;

 #include "gpumat_cuda.hpp"

-namespace
+typedef GpuFuncTable* (*GpuFactoryType)();
+typedef DeviceInfoFuncTable* (*DeviceInfoFactoryType)();
+
+static GpuFactoryType gpuFactory = NULL;
+static DeviceInfoFactoryType deviceInfoFactory = NULL;
+
+static const std::string getCudaSupportLibName()
+{
+    return "libopencv_core_cuda.so";
+}
+
+static bool loadCudaSupportLib()
 {
-    const GpuFuncTable* gpuFuncTable()
+    void* handle;
+    const std::string name = getCudaSupportLibName();
+    handle = dlopen(name.c_str(), RTLD_LAZY);
+    if (!handle)
+        return false;
+
+    deviceInfoFactory = (DeviceInfoFactoryType)dlsym(handle, "deviceInfoFactory");
+    if (!deviceInfoFactory)
    {
-        static EmptyFuncTable funcTable;
-        return &funcTable;
+        dlclose(handle);
+        return false;
    }
+    
+    gpuFactory = (GpuFactoryType)dlsym(handle, "gpuFactory");
+    if (!gpuFactory)
+    {
+        dlclose(handle);
+        return false;
+    }
+
+    dlclose(handle);
+
+    return true;
 }

+static GpuFuncTable* gpuFuncTable()
+{
+#ifdef DYNAMIC_CUDA_SUPPORT
+   static EmptyFuncTable stub;
+   static GpuFuncTable* libFuncTable = loadCudaSupportLib() ? gpuFactory(): (GpuFuncTable*)&stub;
+   static GpuFuncTable *funcTable = libFuncTable ? libFuncTable : (GpuFuncTable*)&stub;
+#else
+# ifdef USE_CUDA
+   static CudaFuncTable impl;
+   static GpuFuncTable* funcTable = &impl;
+#else
+   static EmptyFuncTable stub;
+   static GpuFuncTable* funcTable = &stub;
+#endif
+#endif
+   return funcTable;
+}
+
+static DeviceInfoFuncTable* deviceInfoFuncTable()
+{
+#ifdef DYNAMIC_CUDA_SUPPORT
+   static EmptyDeviceInfoFuncTable stub;
+   static DeviceInfoFuncTable* libFuncTable = loadCudaSupportLib() ? deviceInfoFactory(): (DeviceInfoFuncTable*)&stub;
+   static DeviceInfoFuncTable* funcTable = libFuncTable ? libFuncTable : (DeviceInfoFuncTable*)&stub;
+#else
+# ifdef USE_CUDA
+   static CudaDeviceInfoFuncTable impl;
+   static DeviceInfoFuncTable* funcTable = &impl;
+#else
+   static EmptyFuncTable stub;
+   static DeviceInfoFuncTable* funcTable = &stub;
+#endif
+#endif
+   return funcTable;
+}
+
+
 //////////////////////////////// Initialization & Info ////////////////////////

 int cv::gpu::getCudaEnabledDeviceCount() { return gpuFuncTable()->getCudaEnabledDeviceCount(); }
@@ -95,13 +162,13 @@ bool cv::gpu::TargetArchs::hasEqualOrGreater(int major, int minor) { return gpuF
 bool cv::gpu::TargetArchs::hasEqualOrGreaterPtx(int major, int minor) { return gpuFuncTable()->hasEqualOrGreaterPtx(major, minor); }
 bool cv::gpu::TargetArchs::hasEqualOrGreaterBin(int major, int minor) { return gpuFuncTable()->hasEqualOrGreaterBin(major, minor); }

-size_t cv::gpu::DeviceInfo::sharedMemPerBlock() const { return gpuFuncTable()->sharedMemPerBlock(); }
-void cv::gpu::DeviceInfo::queryMemory(size_t& total_memory, size_t& free_memory) const { gpuFuncTable()->queryMemory(total_memory, free_memory); }
-size_t cv::gpu::DeviceInfo::freeMemory() const { return gpuFuncTable()->freeMemory(); }
-size_t cv::gpu::DeviceInfo::totalMemory() const { return gpuFuncTable()->totalMemory(); }
-bool cv::gpu::DeviceInfo::supports(FeatureSet feature_set) const { return gpuFuncTable()->supports(feature_set); }
-bool cv::gpu::DeviceInfo::isCompatible() const { return gpuFuncTable()->isCompatible(); }
-void cv::gpu::DeviceInfo::query() { gpuFuncTable()->query(); }
+size_t cv::gpu::DeviceInfo::sharedMemPerBlock() const { return deviceInfoFuncTable()->sharedMemPerBlock(); }
+void cv::gpu::DeviceInfo::queryMemory(size_t& total_memory, size_t& free_memory) const { deviceInfoFuncTable()->queryMemory(total_memory, free_memory); }
+size_t cv::gpu::DeviceInfo::freeMemory() const { return deviceInfoFuncTable()->freeMemory(); }
+size_t cv::gpu::DeviceInfo::totalMemory() const { return deviceInfoFuncTable()->totalMemory(); }
+bool cv::gpu::DeviceInfo::supports(FeatureSet feature_set) const { return deviceInfoFuncTable()->supports(feature_set); }
+bool cv::gpu::DeviceInfo::isCompatible() const { return deviceInfoFuncTable()->isCompatible(); }
+void cv::gpu::DeviceInfo::query() { deviceInfoFuncTable()->query(); }

 void cv::gpu::printCudaDeviceInfo(int device) { gpuFuncTable()->printCudaDeviceInfo(device); }
 void cv::gpu::printShortCudaDeviceInfo(int device) { gpuFuncTable()->printShortCudaDeviceInfo(device); }
@@ -556,7 +623,7 @@ namespace cv { namespace gpu
    
    void setTo(GpuMat& src, Scalar s, cudaStream_t stream)
    {
-        gpuFuncTable()->setTo(src, s, stream);
+        gpuFuncTable()->setTo(src, s, cv::gpu::GpuMat(), stream);
    }
    
    void setTo(GpuMat& src, Scalar s, const GpuMat& mask, cudaStream_t stream)

--- a/modules/core/src/gpumat_cuda.hpp
+++ b/modules/core/src/gpumat_cuda.hpp