From d8ddbcc6d85cf19d727890a6ed49f5410a763a94 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=E7=9F=B3=E6=99=93=E4=BC=9F?= <39303645+Shixiaowei02@users.noreply.github.com> Date: Thu, 21 Nov 2019 10:14:18 +0800 Subject: [PATCH] fix cuda build error, test=develop (#2464) * fix cuda building, test=develop * remove sequence_pool from cmake because build error, test=develop --- cmake/cuda.cmake | 22 +++++++++++++++------- cmake/cudnn.cmake | 3 ++- lite/api/CMakeLists.txt | 3 +-- lite/backends/cuda/CMakeLists.txt | 3 +-- lite/backends/cuda/math/CMakeLists.txt | 3 +-- lite/core/context.h | 7 ------- lite/kernels/cuda/CMakeLists.txt | 6 +++--- lite/tools/debug/debug_utils.h | 6 +++--- 8 files changed, 26 insertions(+), 27 deletions(-) diff --git a/cmake/cuda.cmake b/cmake/cuda.cmake index 9ff908a4c8..b80c35a0e6 100644 --- a/cmake/cuda.cmake +++ b/cmake/cuda.cmake @@ -174,15 +174,23 @@ if(NOT WITH_DSO) endif(WIN32) endif(NOT WITH_DSO) -get_filename_component(CUDA_LIB_PATH ${CUDA_curand_LIBRARY} DIRECTORY) -function(import_static_library alias path) +function(add_cuda_static_lib alias cuda_lib_paths file_name) + unset(ABS_PATH CACHE) + find_library(ABS_PATH NAMES ${file_name} PATHS ${${cuda_lib_paths}} NO_DEFAULT_PATH) add_library(${alias} STATIC IMPORTED GLOBAL) - set_property(TARGET ${alias} PROPERTY IMPORTED_LOCATION ${path}) + set_property(TARGET ${alias} PROPERTY IMPORTED_LOCATION ${ABS_PATH}) + set(CUDA_STATIC_MODULES ${CUDA_STATIC_MODULES} ${alias} PARENT_SCOPE) endfunction() -import_static_library(cudart_static ${CUDA_LIB_PATH}/libcudart_static.a) -import_static_library(cublas_static ${CUDA_LIB_PATH}/libcublas_static.a) -import_static_library(curand_static ${CUDA_LIB_PATH}/libcurand_static.a) -import_static_library(culibos_static ${CUDA_LIB_PATH}/libculibos.a) + +add_cuda_static_lib(cudart_static CUDNN_CHECK_LIBRARY_DIRS libcudart_static.a) +add_cuda_static_lib(cublas_static CUDNN_CHECK_LIBRARY_DIRS libcublas_static.a) +add_cuda_static_lib(curand_static CUDNN_CHECK_LIBRARY_DIRS libcurand_static.a) +add_cuda_static_lib(culibos_static CUDNN_CHECK_LIBRARY_DIRS libculibos.a) +if((${CUDA_VERSION} GREATER 10.0) OR (${CUDA_VERSION} EQUAL 10.0)) + add_cuda_static_lib(cublasLt_static CUDNN_CHECK_LIBRARY_DIRS libcublasLt_static.a) +endif() + +set_property(GLOBAL PROPERTY CUDA_STATIC_MODULES cudnn_static ${CUDA_STATIC_MODULES}) # setting nvcc arch flags select_nvcc_arch_flags(NVCC_FLAGS_EXTRA) diff --git a/cmake/cudnn.cmake b/cmake/cudnn.cmake index c0cb4ccea6..574baa86a8 100644 --- a/cmake/cudnn.cmake +++ b/cmake/cudnn.cmake @@ -33,7 +33,8 @@ list(APPEND CUDNN_CHECK_LIBRARY_DIRS $ENV{CUDNN_ROOT}/lib /usr/lib ${CUDA_TOOLKIT_ROOT_DIR} - ${CUDA_TOOLKIT_ROOT_DIR}/lib/x64 + ${CUDA_TOOLKIT_ROOT_DIR}/lib/x64 + ${CUDA_TOOLKIT_ROOT_DIR}/lib64 ) if((${CUDA_VERSION} GREATER 10.0) OR (${CUDA_VERSION} EQUAL 10.0)) diff --git a/lite/api/CMakeLists.txt b/lite/api/CMakeLists.txt index 63d53869ea..408a63e3f5 100644 --- a/lite/api/CMakeLists.txt +++ b/lite/api/CMakeLists.txt @@ -87,9 +87,8 @@ endif() set(light_api_deps scope target_wrapper_host model_parser program) if(LITE_WITH_CUDA) + get_property(cuda_static_deps GLOBAL PROPERTY CUDA_STATIC_MODULES) set(light_api_deps ${light_api_deps} target_wrapper_cuda) - set(cuda_static_deps cudart_static cublas_static curand_static - cudnn_static culibos_static) endif() lite_cc_library(light_api SRCS light_api.cc DEPS scope target_wrapper_host model_parser diff --git a/lite/backends/cuda/CMakeLists.txt b/lite/backends/cuda/CMakeLists.txt index a6c3fcc66a..f73b4120e6 100644 --- a/lite/backends/cuda/CMakeLists.txt +++ b/lite/backends/cuda/CMakeLists.txt @@ -1,8 +1,7 @@ if(NOT LITE_WITH_CUDA) return() endif() -set(cuda_static_deps cudnn_static cublas_static curand_static - culibos_static cudart_static) +get_property(cuda_static_deps GLOBAL PROPERTY CUDA_STATIC_MODULES) nv_library(target_wrapper_cuda SRCS target_wrapper.cc DEPS ${cuda_static_deps}) nv_library(cuda_blas SRCS blas.cc DEPS ${cuda_static_deps}) diff --git a/lite/backends/cuda/math/CMakeLists.txt b/lite/backends/cuda/math/CMakeLists.txt index 1829bcf330..82acd2d0ea 100644 --- a/lite/backends/cuda/math/CMakeLists.txt +++ b/lite/backends/cuda/math/CMakeLists.txt @@ -2,8 +2,7 @@ if(NOT LITE_WITH_CUDA) return() endif() -set(cuda_static_deps cudnn_static cublas_static curand_static - culibos_static cudart_static) +get_property(cuda_static_deps GLOBAL PROPERTY CUDA_STATIC_MODULES) nv_library(cuda_activation SRCS activation.cu DEPS ${cuda_static_deps}) nv_library(cuda_scale SRCS scale.cu DEPS ${cuda_static_deps}) diff --git a/lite/core/context.h b/lite/core/context.h index 545c6d2e88..eb25e7e1d9 100644 --- a/lite/core/context.h +++ b/lite/core/context.h @@ -207,13 +207,6 @@ class Context { ctx->cublas_fp32_ = cublas_fp32_; } - CUDAContext& operator=(const CUDAContext& context) { - this->Init( - context.device_id_, context.exec_stream_id_, context.io_stream_id_); - this->cublas_fp32_ = context.cublas_fp32_; - return *this; - } - const cudaStream_t& exec_stream() const { return exec_stream_; } void SetExecStream(cudaStream_t stream) { exec_stream_ = stream; } diff --git a/lite/kernels/cuda/CMakeLists.txt b/lite/kernels/cuda/CMakeLists.txt index 2b7cd648c9..8b26b772bd 100644 --- a/lite/kernels/cuda/CMakeLists.txt +++ b/lite/kernels/cuda/CMakeLists.txt @@ -9,7 +9,7 @@ add_kernel(io_copy_compute_cuda CUDA basic SRCS io_copy_compute.cc DEPS ${lite_k add_kernel(leaky_relu_compute_cuda CUDA basic SRCS leaky_relu_compute.cu DEPS ${lite_kernel_deps}) add_kernel(relu_compute_cuda CUDA basic SRCS relu_compute.cu DEPS ${lite_kernel_deps}) add_kernel(yolo_box_compute_cuda CUDA basic SRCS yolo_box_compute.cu DEPS ${lite_kernel_deps}) -add_kernel(sequence_pool_compute_cuda CUDA extra SRCS sequence_pool_compute.cu DEPS ${lite_kernel_deps}) +#add_kernel(sequence_pool_compute_cuda CUDA extra SRCS sequence_pool_compute.cu DEPS ${lite_kernel_deps}) add_kernel(transpose_compute_cuda CUDA basic SRCS transpose_compute.cu DEPS ${lite_kernel_deps} ${math_cuda} cuda_transpose) add_kernel(nearest_interp_compute_cuda CUDA basic SRCS nearest_interp_compute.cu DEPS ${lite_kernel_deps}) add_kernel(conv2d_cuda CUDA basic SRCS conv_compute.cc DEPS ${lite_kernel_deps} ${math_cuda}) @@ -44,7 +44,7 @@ nv_test(yolo_box_compute_cuda_test SRCS yolo_box_compute_test.cc DEPS yolo_box_c nv_test(transpose_compute_cuda_test SRCS transpose_compute_test.cc DEPS transpose_compute_cuda) nv_test(concat_compute_cuda_test SRCS concat_compute_test.cc DEPS concat_compute_cuda) nv_test(elementwise_add_compute_cuda_test SRCS elementwise_add_compute_test.cc DEPS elementwise_add_compute_cuda) -nv_test(sequence_pool_compute_cuda_test SRCS sequence_pool_compute_test.cc DEPS sequence_pool_compute_cuda) +#nv_test(sequence_pool_compute_cuda_test SRCS sequence_pool_compute_test.cc DEPS sequence_pool_compute_cuda) nv_test(softmax_compute_cuda_test SRCS softmax_compute_test.cc DEPS softmax_compute_cuda) #nv_test(layout_cuda_test SRCS layout_compute_test.cc DEPS layout_compute_cuda) nv_test(mul_compute_cuda_test SRCS mul_compute_test.cc DEPS mul_compute_cuda) @@ -60,7 +60,7 @@ if(LITE_BUILD_EXTRA) nv_test(search_seq_depadding_compute_cuda_test SRCS search_seq_depadding_compute_test.cc DEPS search_seq_depadding_compute_cuda) nv_test(match_matrix_tensor_compute_cuda_test SRCS match_matrix_tensor_compute_test.cc DEPS match_matrix_tensor_compute_cuda) nv_test(search_grnn_compute_cuda_test SRCS search_grnn_compute_test.cc DEPS search_grnn_compute_cuda) - nv_test(sequence_pool_compute_cuda_test SRCS sequence_pool_compute_test.cc DEPS sequence_pool_compute_cuda sequence_pooling) + #nv_test(sequence_pool_compute_cuda_test SRCS sequence_pool_compute_test.cc DEPS sequence_pool_compute_cuda sequence_pooling) nv_test(lookup_table_compute_cuda_test SRCS lookup_table_compute_test.cc DEPS lookup_table_compute_cuda) nv_test(search_aligned_mat_mul_compute_cuda_test SRCS search_aligned_mat_mul_compute_test.cc DEPS search_aligned_mat_mul_compute_cuda) nv_test(search_seq_fc_compute_cuda_test SRCS search_seq_fc_compute_test.cc DEPS search_seq_fc_compute_cuda) diff --git a/lite/tools/debug/debug_utils.h b/lite/tools/debug/debug_utils.h index 7f77b90488..ff08c47e52 100644 --- a/lite/tools/debug/debug_utils.h +++ b/lite/tools/debug/debug_utils.h @@ -27,7 +27,7 @@ #include "lite/model_parser/pb/var_desc.h" #include "lite/utils/all.h" -DEFINE_string(model_dir, "", "Model dir path"); +DEFINE_string(model_path, "", "Model dir path"); DEFINE_string(input_file, "", "Input datas file path"); DEFINE_string(topo_output_file, "", "Runtime topology order output file path"); DEFINE_bool(output_topo, true, "Dump runtime topology or not"); @@ -185,7 +185,7 @@ void ParseConfig(DebugConfig* conf) { CHECK(conf); #define CHECK_NON_EMPTY(name__) \ CHECK(!FLAGS_##name__.empty()) << "Option " << #name__ << " can't be empty." - CHECK_NON_EMPTY(model_dir); + CHECK_NON_EMPTY(model_path); if (FLAGS_output_topo) { CHECK_NON_EMPTY(topo_output_file); } @@ -193,7 +193,7 @@ void ParseConfig(DebugConfig* conf) { CHECK_NON_EMPTY(tensor_output_file); } #undef CHECK_NON_EMPTY - conf->model_dir = FLAGS_model_dir; + conf->model_dir = FLAGS_model_path; conf->topo_output_file = FLAGS_topo_output_file; conf->tensor_output_file = FLAGS_tensor_output_file; conf->input_file = FLAGS_input_file; -- GitLab