diff --git a/cmake/cuda.cmake b/cmake/cuda.cmake
index 9ff908a4c87d55e87468a06ae0e6085ac165a1b1..b80c35a0e6a849a94ecbe1b4650af4998f2d88c3 100644
--- a/cmake/cuda.cmake
+++ b/cmake/cuda.cmake
@@ -174,15 +174,23 @@ if(NOT WITH_DSO)
     endif(WIN32)
 endif(NOT WITH_DSO)
 
-get_filename_component(CUDA_LIB_PATH ${CUDA_curand_LIBRARY} DIRECTORY)
-function(import_static_library alias path)
+function(add_cuda_static_lib alias cuda_lib_paths file_name)
+    unset(ABS_PATH CACHE)
+    find_library(ABS_PATH NAMES ${file_name} PATHS ${${cuda_lib_paths}} NO_DEFAULT_PATH)
     add_library(${alias} STATIC IMPORTED GLOBAL)
-    set_property(TARGET ${alias} PROPERTY IMPORTED_LOCATION ${path})
+    set_property(TARGET ${alias} PROPERTY IMPORTED_LOCATION ${ABS_PATH})
+    set(CUDA_STATIC_MODULES ${CUDA_STATIC_MODULES} ${alias} PARENT_SCOPE)
 endfunction()
-import_static_library(cudart_static ${CUDA_LIB_PATH}/libcudart_static.a)
-import_static_library(cublas_static ${CUDA_LIB_PATH}/libcublas_static.a)
-import_static_library(curand_static ${CUDA_LIB_PATH}/libcurand_static.a)
-import_static_library(culibos_static ${CUDA_LIB_PATH}/libculibos.a)
+
+add_cuda_static_lib(cudart_static CUDNN_CHECK_LIBRARY_DIRS libcudart_static.a)
+add_cuda_static_lib(cublas_static CUDNN_CHECK_LIBRARY_DIRS libcublas_static.a)
+add_cuda_static_lib(curand_static CUDNN_CHECK_LIBRARY_DIRS libcurand_static.a)
+add_cuda_static_lib(culibos_static CUDNN_CHECK_LIBRARY_DIRS libculibos.a)
+if((${CUDA_VERSION} GREATER 10.0) OR (${CUDA_VERSION} EQUAL 10.0))
+  add_cuda_static_lib(cublasLt_static CUDNN_CHECK_LIBRARY_DIRS libcublasLt_static.a)
+endif()
+
+set_property(GLOBAL PROPERTY CUDA_STATIC_MODULES cudnn_static ${CUDA_STATIC_MODULES})
 
 # setting nvcc arch flags
 select_nvcc_arch_flags(NVCC_FLAGS_EXTRA)
diff --git a/cmake/cudnn.cmake b/cmake/cudnn.cmake
index c0cb4ccea67cd493a30a6be43ee6ee48f70c36bf..574baa86a82963ffa76795e029a6ba14f537c80a 100644
--- a/cmake/cudnn.cmake
+++ b/cmake/cudnn.cmake
@@ -33,7 +33,8 @@ list(APPEND CUDNN_CHECK_LIBRARY_DIRS
     $ENV{CUDNN_ROOT}/lib
     /usr/lib
 	${CUDA_TOOLKIT_ROOT_DIR}
-	${CUDA_TOOLKIT_ROOT_DIR}/lib/x64
+        ${CUDA_TOOLKIT_ROOT_DIR}/lib/x64
+	${CUDA_TOOLKIT_ROOT_DIR}/lib64
 	)
 
 if((${CUDA_VERSION} GREATER 10.0) OR (${CUDA_VERSION} EQUAL 10.0))
diff --git a/lite/api/CMakeLists.txt b/lite/api/CMakeLists.txt
index 63d53869ea530212ea03b24ef746d980fd13a19b..408a63e3f5bd911ec93575d7cd6b2e2ef3b2b2d8 100644
--- a/lite/api/CMakeLists.txt
+++ b/lite/api/CMakeLists.txt
@@ -87,9 +87,8 @@ endif()
 set(light_api_deps
     scope target_wrapper_host model_parser program)
 if(LITE_WITH_CUDA)
+    get_property(cuda_static_deps GLOBAL PROPERTY CUDA_STATIC_MODULES)
     set(light_api_deps ${light_api_deps} target_wrapper_cuda)
-    set(cuda_static_deps cudart_static cublas_static curand_static
-        cudnn_static culibos_static)
 endif()
 lite_cc_library(light_api SRCS light_api.cc
         DEPS scope target_wrapper_host model_parser
diff --git a/lite/backends/cuda/CMakeLists.txt b/lite/backends/cuda/CMakeLists.txt
index a6c3fcc66a789f159cd3a756ed893627b393e1fe..f73b4120e6a48bfdec04d0706a47bcc4a54fcf5e 100644
--- a/lite/backends/cuda/CMakeLists.txt
+++ b/lite/backends/cuda/CMakeLists.txt
@@ -1,8 +1,7 @@
 if(NOT LITE_WITH_CUDA)
     return()
 endif()
-set(cuda_static_deps cudnn_static cublas_static curand_static
-    culibos_static cudart_static)
+get_property(cuda_static_deps GLOBAL PROPERTY CUDA_STATIC_MODULES)
 
 nv_library(target_wrapper_cuda SRCS target_wrapper.cc DEPS ${cuda_static_deps})
 nv_library(cuda_blas SRCS blas.cc DEPS ${cuda_static_deps})
diff --git a/lite/backends/cuda/math/CMakeLists.txt b/lite/backends/cuda/math/CMakeLists.txt
index 1829bcf330aba31708ac97c97d093afbda197908..82acd2d0eab44cf6bad8e5b6a92803ae4afe60b3 100644
--- a/lite/backends/cuda/math/CMakeLists.txt
+++ b/lite/backends/cuda/math/CMakeLists.txt
@@ -2,8 +2,7 @@ if(NOT LITE_WITH_CUDA)
     return()
 endif()
 
-set(cuda_static_deps cudnn_static cublas_static curand_static
-    culibos_static cudart_static)
+get_property(cuda_static_deps GLOBAL PROPERTY CUDA_STATIC_MODULES)
 
 nv_library(cuda_activation SRCS activation.cu DEPS ${cuda_static_deps})
 nv_library(cuda_scale SRCS scale.cu DEPS ${cuda_static_deps})
diff --git a/lite/core/context.h b/lite/core/context.h
index 545c6d2e8804f72a0bde854f9e5ae82c80b2b53c..eb25e7e1d980de9e8f633591fc1320f2a7cd476d 100644
--- a/lite/core/context.h
+++ b/lite/core/context.h
@@ -207,13 +207,6 @@ class Context<TargetType::kCUDA> {
     ctx->cublas_fp32_ = cublas_fp32_;
   }
 
-  CUDAContext& operator=(const CUDAContext& context) {
-    this->Init(
-        context.device_id_, context.exec_stream_id_, context.io_stream_id_);
-    this->cublas_fp32_ = context.cublas_fp32_;
-    return *this;
-  }
-
   const cudaStream_t& exec_stream() const { return exec_stream_; }
   void SetExecStream(cudaStream_t stream) { exec_stream_ = stream; }
 
diff --git a/lite/kernels/cuda/CMakeLists.txt b/lite/kernels/cuda/CMakeLists.txt
index 2b7cd648c947d6b90e7e273dc5eb4cea9ee16b83..8b26b772bd65f9227cb3a063d37d2cd37b127722 100644
--- a/lite/kernels/cuda/CMakeLists.txt
+++ b/lite/kernels/cuda/CMakeLists.txt
@@ -9,7 +9,7 @@ add_kernel(io_copy_compute_cuda CUDA basic SRCS io_copy_compute.cc DEPS ${lite_k
 add_kernel(leaky_relu_compute_cuda CUDA basic SRCS leaky_relu_compute.cu DEPS ${lite_kernel_deps})
 add_kernel(relu_compute_cuda CUDA basic SRCS relu_compute.cu DEPS ${lite_kernel_deps})
 add_kernel(yolo_box_compute_cuda CUDA basic SRCS yolo_box_compute.cu DEPS ${lite_kernel_deps})
-add_kernel(sequence_pool_compute_cuda CUDA extra SRCS sequence_pool_compute.cu DEPS ${lite_kernel_deps})
+#add_kernel(sequence_pool_compute_cuda CUDA extra SRCS sequence_pool_compute.cu DEPS ${lite_kernel_deps})
 add_kernel(transpose_compute_cuda CUDA basic SRCS transpose_compute.cu DEPS ${lite_kernel_deps} ${math_cuda} cuda_transpose)
 add_kernel(nearest_interp_compute_cuda CUDA basic SRCS nearest_interp_compute.cu DEPS ${lite_kernel_deps})
 add_kernel(conv2d_cuda CUDA basic SRCS conv_compute.cc DEPS ${lite_kernel_deps} ${math_cuda})
@@ -44,7 +44,7 @@ nv_test(yolo_box_compute_cuda_test SRCS yolo_box_compute_test.cc DEPS yolo_box_c
 nv_test(transpose_compute_cuda_test SRCS transpose_compute_test.cc DEPS transpose_compute_cuda)
 nv_test(concat_compute_cuda_test SRCS concat_compute_test.cc DEPS concat_compute_cuda)
 nv_test(elementwise_add_compute_cuda_test SRCS elementwise_add_compute_test.cc DEPS elementwise_add_compute_cuda)
-nv_test(sequence_pool_compute_cuda_test SRCS sequence_pool_compute_test.cc DEPS sequence_pool_compute_cuda)
+#nv_test(sequence_pool_compute_cuda_test SRCS sequence_pool_compute_test.cc DEPS sequence_pool_compute_cuda)
 nv_test(softmax_compute_cuda_test SRCS softmax_compute_test.cc DEPS softmax_compute_cuda)
 #nv_test(layout_cuda_test SRCS layout_compute_test.cc DEPS layout_compute_cuda)
 nv_test(mul_compute_cuda_test SRCS mul_compute_test.cc DEPS mul_compute_cuda)
@@ -60,7 +60,7 @@ if(LITE_BUILD_EXTRA)
     nv_test(search_seq_depadding_compute_cuda_test SRCS search_seq_depadding_compute_test.cc DEPS search_seq_depadding_compute_cuda)
     nv_test(match_matrix_tensor_compute_cuda_test SRCS match_matrix_tensor_compute_test.cc DEPS match_matrix_tensor_compute_cuda)
     nv_test(search_grnn_compute_cuda_test SRCS search_grnn_compute_test.cc DEPS search_grnn_compute_cuda)
-    nv_test(sequence_pool_compute_cuda_test SRCS sequence_pool_compute_test.cc DEPS sequence_pool_compute_cuda sequence_pooling)
+   #nv_test(sequence_pool_compute_cuda_test SRCS sequence_pool_compute_test.cc DEPS sequence_pool_compute_cuda sequence_pooling)
     nv_test(lookup_table_compute_cuda_test SRCS lookup_table_compute_test.cc DEPS lookup_table_compute_cuda)
     nv_test(search_aligned_mat_mul_compute_cuda_test SRCS search_aligned_mat_mul_compute_test.cc DEPS search_aligned_mat_mul_compute_cuda)
     nv_test(search_seq_fc_compute_cuda_test SRCS search_seq_fc_compute_test.cc DEPS search_seq_fc_compute_cuda)
diff --git a/lite/tools/debug/debug_utils.h b/lite/tools/debug/debug_utils.h
index 7f77b90488657aab96c7942d703e86d64723f5fc..ff08c47e524cacee37e95572a7f7a2fb444d4d16 100644
--- a/lite/tools/debug/debug_utils.h
+++ b/lite/tools/debug/debug_utils.h
@@ -27,7 +27,7 @@
 #include "lite/model_parser/pb/var_desc.h"
 #include "lite/utils/all.h"
 
-DEFINE_string(model_dir, "", "Model dir path");
+DEFINE_string(model_path, "", "Model dir path");
 DEFINE_string(input_file, "", "Input datas file path");
 DEFINE_string(topo_output_file, "", "Runtime topology order output file path");
 DEFINE_bool(output_topo, true, "Dump runtime topology or not");
@@ -185,7 +185,7 @@ void ParseConfig(DebugConfig* conf) {
   CHECK(conf);
 #define CHECK_NON_EMPTY(name__) \
   CHECK(!FLAGS_##name__.empty()) << "Option " << #name__ << " can't be empty."
-  CHECK_NON_EMPTY(model_dir);
+  CHECK_NON_EMPTY(model_path);
   if (FLAGS_output_topo) {
     CHECK_NON_EMPTY(topo_output_file);
   }
@@ -193,7 +193,7 @@ void ParseConfig(DebugConfig* conf) {
     CHECK_NON_EMPTY(tensor_output_file);
   }
 #undef CHECK_NON_EMPTY
-  conf->model_dir = FLAGS_model_dir;
+  conf->model_dir = FLAGS_model_path;
   conf->topo_output_file = FLAGS_topo_output_file;
   conf->tensor_output_file = FLAGS_tensor_output_file;
   conf->input_file = FLAGS_input_file;