未验证 提交 d8ddbcc6 编写于 作者: 石晓伟 提交者: GitHub

fix cuda build error, test=develop (#2464)

* fix cuda building, test=develop

* remove sequence_pool from cmake because build error, test=develop
上级 43f1358f
...@@ -174,15 +174,23 @@ if(NOT WITH_DSO) ...@@ -174,15 +174,23 @@ if(NOT WITH_DSO)
endif(WIN32) endif(WIN32)
endif(NOT WITH_DSO) endif(NOT WITH_DSO)
get_filename_component(CUDA_LIB_PATH ${CUDA_curand_LIBRARY} DIRECTORY) function(add_cuda_static_lib alias cuda_lib_paths file_name)
function(import_static_library alias path) unset(ABS_PATH CACHE)
find_library(ABS_PATH NAMES ${file_name} PATHS ${${cuda_lib_paths}} NO_DEFAULT_PATH)
add_library(${alias} STATIC IMPORTED GLOBAL) add_library(${alias} STATIC IMPORTED GLOBAL)
set_property(TARGET ${alias} PROPERTY IMPORTED_LOCATION ${path}) set_property(TARGET ${alias} PROPERTY IMPORTED_LOCATION ${ABS_PATH})
set(CUDA_STATIC_MODULES ${CUDA_STATIC_MODULES} ${alias} PARENT_SCOPE)
endfunction() endfunction()
import_static_library(cudart_static ${CUDA_LIB_PATH}/libcudart_static.a)
import_static_library(cublas_static ${CUDA_LIB_PATH}/libcublas_static.a) add_cuda_static_lib(cudart_static CUDNN_CHECK_LIBRARY_DIRS libcudart_static.a)
import_static_library(curand_static ${CUDA_LIB_PATH}/libcurand_static.a) add_cuda_static_lib(cublas_static CUDNN_CHECK_LIBRARY_DIRS libcublas_static.a)
import_static_library(culibos_static ${CUDA_LIB_PATH}/libculibos.a) add_cuda_static_lib(curand_static CUDNN_CHECK_LIBRARY_DIRS libcurand_static.a)
add_cuda_static_lib(culibos_static CUDNN_CHECK_LIBRARY_DIRS libculibos.a)
if((${CUDA_VERSION} GREATER 10.0) OR (${CUDA_VERSION} EQUAL 10.0))
add_cuda_static_lib(cublasLt_static CUDNN_CHECK_LIBRARY_DIRS libcublasLt_static.a)
endif()
set_property(GLOBAL PROPERTY CUDA_STATIC_MODULES cudnn_static ${CUDA_STATIC_MODULES})
# setting nvcc arch flags # setting nvcc arch flags
select_nvcc_arch_flags(NVCC_FLAGS_EXTRA) select_nvcc_arch_flags(NVCC_FLAGS_EXTRA)
......
...@@ -33,7 +33,8 @@ list(APPEND CUDNN_CHECK_LIBRARY_DIRS ...@@ -33,7 +33,8 @@ list(APPEND CUDNN_CHECK_LIBRARY_DIRS
$ENV{CUDNN_ROOT}/lib $ENV{CUDNN_ROOT}/lib
/usr/lib /usr/lib
${CUDA_TOOLKIT_ROOT_DIR} ${CUDA_TOOLKIT_ROOT_DIR}
${CUDA_TOOLKIT_ROOT_DIR}/lib/x64 ${CUDA_TOOLKIT_ROOT_DIR}/lib/x64
${CUDA_TOOLKIT_ROOT_DIR}/lib64
) )
if((${CUDA_VERSION} GREATER 10.0) OR (${CUDA_VERSION} EQUAL 10.0)) if((${CUDA_VERSION} GREATER 10.0) OR (${CUDA_VERSION} EQUAL 10.0))
......
...@@ -87,9 +87,8 @@ endif() ...@@ -87,9 +87,8 @@ endif()
set(light_api_deps set(light_api_deps
scope target_wrapper_host model_parser program) scope target_wrapper_host model_parser program)
if(LITE_WITH_CUDA) if(LITE_WITH_CUDA)
get_property(cuda_static_deps GLOBAL PROPERTY CUDA_STATIC_MODULES)
set(light_api_deps ${light_api_deps} target_wrapper_cuda) set(light_api_deps ${light_api_deps} target_wrapper_cuda)
set(cuda_static_deps cudart_static cublas_static curand_static
cudnn_static culibos_static)
endif() endif()
lite_cc_library(light_api SRCS light_api.cc lite_cc_library(light_api SRCS light_api.cc
DEPS scope target_wrapper_host model_parser DEPS scope target_wrapper_host model_parser
......
if(NOT LITE_WITH_CUDA) if(NOT LITE_WITH_CUDA)
return() return()
endif() endif()
set(cuda_static_deps cudnn_static cublas_static curand_static get_property(cuda_static_deps GLOBAL PROPERTY CUDA_STATIC_MODULES)
culibos_static cudart_static)
nv_library(target_wrapper_cuda SRCS target_wrapper.cc DEPS ${cuda_static_deps}) nv_library(target_wrapper_cuda SRCS target_wrapper.cc DEPS ${cuda_static_deps})
nv_library(cuda_blas SRCS blas.cc DEPS ${cuda_static_deps}) nv_library(cuda_blas SRCS blas.cc DEPS ${cuda_static_deps})
......
...@@ -2,8 +2,7 @@ if(NOT LITE_WITH_CUDA) ...@@ -2,8 +2,7 @@ if(NOT LITE_WITH_CUDA)
return() return()
endif() endif()
set(cuda_static_deps cudnn_static cublas_static curand_static get_property(cuda_static_deps GLOBAL PROPERTY CUDA_STATIC_MODULES)
culibos_static cudart_static)
nv_library(cuda_activation SRCS activation.cu DEPS ${cuda_static_deps}) nv_library(cuda_activation SRCS activation.cu DEPS ${cuda_static_deps})
nv_library(cuda_scale SRCS scale.cu DEPS ${cuda_static_deps}) nv_library(cuda_scale SRCS scale.cu DEPS ${cuda_static_deps})
......
...@@ -207,13 +207,6 @@ class Context<TargetType::kCUDA> { ...@@ -207,13 +207,6 @@ class Context<TargetType::kCUDA> {
ctx->cublas_fp32_ = cublas_fp32_; ctx->cublas_fp32_ = cublas_fp32_;
} }
CUDAContext& operator=(const CUDAContext& context) {
this->Init(
context.device_id_, context.exec_stream_id_, context.io_stream_id_);
this->cublas_fp32_ = context.cublas_fp32_;
return *this;
}
const cudaStream_t& exec_stream() const { return exec_stream_; } const cudaStream_t& exec_stream() const { return exec_stream_; }
void SetExecStream(cudaStream_t stream) { exec_stream_ = stream; } void SetExecStream(cudaStream_t stream) { exec_stream_ = stream; }
......
...@@ -9,7 +9,7 @@ add_kernel(io_copy_compute_cuda CUDA basic SRCS io_copy_compute.cc DEPS ${lite_k ...@@ -9,7 +9,7 @@ add_kernel(io_copy_compute_cuda CUDA basic SRCS io_copy_compute.cc DEPS ${lite_k
add_kernel(leaky_relu_compute_cuda CUDA basic SRCS leaky_relu_compute.cu DEPS ${lite_kernel_deps}) add_kernel(leaky_relu_compute_cuda CUDA basic SRCS leaky_relu_compute.cu DEPS ${lite_kernel_deps})
add_kernel(relu_compute_cuda CUDA basic SRCS relu_compute.cu DEPS ${lite_kernel_deps}) add_kernel(relu_compute_cuda CUDA basic SRCS relu_compute.cu DEPS ${lite_kernel_deps})
add_kernel(yolo_box_compute_cuda CUDA basic SRCS yolo_box_compute.cu DEPS ${lite_kernel_deps}) add_kernel(yolo_box_compute_cuda CUDA basic SRCS yolo_box_compute.cu DEPS ${lite_kernel_deps})
add_kernel(sequence_pool_compute_cuda CUDA extra SRCS sequence_pool_compute.cu DEPS ${lite_kernel_deps}) #add_kernel(sequence_pool_compute_cuda CUDA extra SRCS sequence_pool_compute.cu DEPS ${lite_kernel_deps})
add_kernel(transpose_compute_cuda CUDA basic SRCS transpose_compute.cu DEPS ${lite_kernel_deps} ${math_cuda} cuda_transpose) add_kernel(transpose_compute_cuda CUDA basic SRCS transpose_compute.cu DEPS ${lite_kernel_deps} ${math_cuda} cuda_transpose)
add_kernel(nearest_interp_compute_cuda CUDA basic SRCS nearest_interp_compute.cu DEPS ${lite_kernel_deps}) add_kernel(nearest_interp_compute_cuda CUDA basic SRCS nearest_interp_compute.cu DEPS ${lite_kernel_deps})
add_kernel(conv2d_cuda CUDA basic SRCS conv_compute.cc DEPS ${lite_kernel_deps} ${math_cuda}) add_kernel(conv2d_cuda CUDA basic SRCS conv_compute.cc DEPS ${lite_kernel_deps} ${math_cuda})
...@@ -44,7 +44,7 @@ nv_test(yolo_box_compute_cuda_test SRCS yolo_box_compute_test.cc DEPS yolo_box_c ...@@ -44,7 +44,7 @@ nv_test(yolo_box_compute_cuda_test SRCS yolo_box_compute_test.cc DEPS yolo_box_c
nv_test(transpose_compute_cuda_test SRCS transpose_compute_test.cc DEPS transpose_compute_cuda) nv_test(transpose_compute_cuda_test SRCS transpose_compute_test.cc DEPS transpose_compute_cuda)
nv_test(concat_compute_cuda_test SRCS concat_compute_test.cc DEPS concat_compute_cuda) nv_test(concat_compute_cuda_test SRCS concat_compute_test.cc DEPS concat_compute_cuda)
nv_test(elementwise_add_compute_cuda_test SRCS elementwise_add_compute_test.cc DEPS elementwise_add_compute_cuda) nv_test(elementwise_add_compute_cuda_test SRCS elementwise_add_compute_test.cc DEPS elementwise_add_compute_cuda)
nv_test(sequence_pool_compute_cuda_test SRCS sequence_pool_compute_test.cc DEPS sequence_pool_compute_cuda) #nv_test(sequence_pool_compute_cuda_test SRCS sequence_pool_compute_test.cc DEPS sequence_pool_compute_cuda)
nv_test(softmax_compute_cuda_test SRCS softmax_compute_test.cc DEPS softmax_compute_cuda) nv_test(softmax_compute_cuda_test SRCS softmax_compute_test.cc DEPS softmax_compute_cuda)
#nv_test(layout_cuda_test SRCS layout_compute_test.cc DEPS layout_compute_cuda) #nv_test(layout_cuda_test SRCS layout_compute_test.cc DEPS layout_compute_cuda)
nv_test(mul_compute_cuda_test SRCS mul_compute_test.cc DEPS mul_compute_cuda) nv_test(mul_compute_cuda_test SRCS mul_compute_test.cc DEPS mul_compute_cuda)
...@@ -60,7 +60,7 @@ if(LITE_BUILD_EXTRA) ...@@ -60,7 +60,7 @@ if(LITE_BUILD_EXTRA)
nv_test(search_seq_depadding_compute_cuda_test SRCS search_seq_depadding_compute_test.cc DEPS search_seq_depadding_compute_cuda) nv_test(search_seq_depadding_compute_cuda_test SRCS search_seq_depadding_compute_test.cc DEPS search_seq_depadding_compute_cuda)
nv_test(match_matrix_tensor_compute_cuda_test SRCS match_matrix_tensor_compute_test.cc DEPS match_matrix_tensor_compute_cuda) nv_test(match_matrix_tensor_compute_cuda_test SRCS match_matrix_tensor_compute_test.cc DEPS match_matrix_tensor_compute_cuda)
nv_test(search_grnn_compute_cuda_test SRCS search_grnn_compute_test.cc DEPS search_grnn_compute_cuda) nv_test(search_grnn_compute_cuda_test SRCS search_grnn_compute_test.cc DEPS search_grnn_compute_cuda)
nv_test(sequence_pool_compute_cuda_test SRCS sequence_pool_compute_test.cc DEPS sequence_pool_compute_cuda sequence_pooling) #nv_test(sequence_pool_compute_cuda_test SRCS sequence_pool_compute_test.cc DEPS sequence_pool_compute_cuda sequence_pooling)
nv_test(lookup_table_compute_cuda_test SRCS lookup_table_compute_test.cc DEPS lookup_table_compute_cuda) nv_test(lookup_table_compute_cuda_test SRCS lookup_table_compute_test.cc DEPS lookup_table_compute_cuda)
nv_test(search_aligned_mat_mul_compute_cuda_test SRCS search_aligned_mat_mul_compute_test.cc DEPS search_aligned_mat_mul_compute_cuda) nv_test(search_aligned_mat_mul_compute_cuda_test SRCS search_aligned_mat_mul_compute_test.cc DEPS search_aligned_mat_mul_compute_cuda)
nv_test(search_seq_fc_compute_cuda_test SRCS search_seq_fc_compute_test.cc DEPS search_seq_fc_compute_cuda) nv_test(search_seq_fc_compute_cuda_test SRCS search_seq_fc_compute_test.cc DEPS search_seq_fc_compute_cuda)
......
...@@ -27,7 +27,7 @@ ...@@ -27,7 +27,7 @@
#include "lite/model_parser/pb/var_desc.h" #include "lite/model_parser/pb/var_desc.h"
#include "lite/utils/all.h" #include "lite/utils/all.h"
DEFINE_string(model_dir, "", "Model dir path"); DEFINE_string(model_path, "", "Model dir path");
DEFINE_string(input_file, "", "Input datas file path"); DEFINE_string(input_file, "", "Input datas file path");
DEFINE_string(topo_output_file, "", "Runtime topology order output file path"); DEFINE_string(topo_output_file, "", "Runtime topology order output file path");
DEFINE_bool(output_topo, true, "Dump runtime topology or not"); DEFINE_bool(output_topo, true, "Dump runtime topology or not");
...@@ -185,7 +185,7 @@ void ParseConfig(DebugConfig* conf) { ...@@ -185,7 +185,7 @@ void ParseConfig(DebugConfig* conf) {
CHECK(conf); CHECK(conf);
#define CHECK_NON_EMPTY(name__) \ #define CHECK_NON_EMPTY(name__) \
CHECK(!FLAGS_##name__.empty()) << "Option " << #name__ << " can't be empty." CHECK(!FLAGS_##name__.empty()) << "Option " << #name__ << " can't be empty."
CHECK_NON_EMPTY(model_dir); CHECK_NON_EMPTY(model_path);
if (FLAGS_output_topo) { if (FLAGS_output_topo) {
CHECK_NON_EMPTY(topo_output_file); CHECK_NON_EMPTY(topo_output_file);
} }
...@@ -193,7 +193,7 @@ void ParseConfig(DebugConfig* conf) { ...@@ -193,7 +193,7 @@ void ParseConfig(DebugConfig* conf) {
CHECK_NON_EMPTY(tensor_output_file); CHECK_NON_EMPTY(tensor_output_file);
} }
#undef CHECK_NON_EMPTY #undef CHECK_NON_EMPTY
conf->model_dir = FLAGS_model_dir; conf->model_dir = FLAGS_model_path;
conf->topo_output_file = FLAGS_topo_output_file; conf->topo_output_file = FLAGS_topo_output_file;
conf->tensor_output_file = FLAGS_tensor_output_file; conf->tensor_output_file = FLAGS_tensor_output_file;
conf->input_file = FLAGS_input_file; conf->input_file = FLAGS_input_file;
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册