diff --git a/cmake/paddlepaddle.cmake b/cmake/paddlepaddle.cmake
index 7670444ed1e021376fa44491973bb748cf611ecf..54aae0bdc249c6eacbd4bf6b5cc42cbba9f08784 100644
--- a/cmake/paddlepaddle.cmake
+++ b/cmake/paddlepaddle.cmake
@@ -31,10 +31,10 @@ message( "WITH_GPU = ${WITH_GPU}")
 # Paddle Version should be one of:
 # latest: latest develop build
 # version number like 1.5.2
-SET(PADDLE_VERSION "1.7.2")
+SET(PADDLE_VERSION "1.8.1")
 
 if (WITH_GPU)
-    SET(PADDLE_LIB_VERSION "${PADDLE_VERSION}-gpu-cuda${CUDA_VERSION_MAJOR}-cudnn7-avx-mkl")
+    SET(PADDLE_LIB_VERSION "${PADDLE_VERSION}-gpu-cuda10-cudnn7-avx-mkl")
 else()
     if (WITH_AVX)
         if (WITH_MKLML)
@@ -92,8 +92,15 @@ LINK_DIRECTORIES(${PADDLE_INSTALL_DIR}/third_party/install/mkldnn/lib)
 ADD_LIBRARY(openblas STATIC IMPORTED GLOBAL)
 SET_PROPERTY(TARGET openblas PROPERTY IMPORTED_LOCATION ${PADDLE_INSTALL_DIR}/third_party/install/openblas/lib/libopenblas.a)
 
-ADD_LIBRARY(paddle_fluid STATIC IMPORTED GLOBAL)
-SET_PROPERTY(TARGET paddle_fluid PROPERTY IMPORTED_LOCATION ${PADDLE_INSTALL_DIR}/lib/libpaddle_fluid.a)
+ADD_LIBRARY(paddle_fluid SHARED IMPORTED GLOBAL)
+SET_PROPERTY(TARGET paddle_fluid PROPERTY IMPORTED_LOCATION ${PADDLE_INSTALL_DIR}/lib/libpaddle_fluid.so)
+
+ADD_LIBRARY(nvinfer SHARED IMPORTED GLOBAL)
+SET_PROPERTY(TARGET nvinfer PROPERTY IMPORTED_LOCATION /paddle/third_party/TensorRT-6.0.1.5/lib/libnvinfer.so)
+
+ADD_LIBRARY(nvinfer_plugin SHARED IMPORTED GLOBAL)
+SET_PROPERTY(TARGET nvinfer_plugin PROPERTY IMPORTED_LOCATION /paddle/third_party/TensorRT-6.0.1.5/lib/libnvinfer_plugin.so)
+
 
 ADD_LIBRARY(xxhash STATIC IMPORTED GLOBAL)
 SET_PROPERTY(TARGET xxhash PROPERTY IMPORTED_LOCATION ${PADDLE_INSTALL_DIR}/third_party/install/xxhash/lib/libxxhash.a)
@@ -101,4 +108,4 @@ SET_PROPERTY(TARGET xxhash PROPERTY IMPORTED_LOCATION ${PADDLE_INSTALL_DIR}/thir
 LIST(APPEND external_project_dependencies paddle)
 
 LIST(APPEND paddle_depend_libs
-        xxhash)
+        xxhash nvinfer nvinfer_plugin)
diff --git a/core/predictor/framework/infer.h b/core/predictor/framework/infer.h
index 51cfb95a8d56d4261b9dab99df5216c5e6c79733..aedf951d6bb61002a946b3a4737491c3073ed728 100644
--- a/core/predictor/framework/infer.h
+++ b/core/predictor/framework/infer.h
@@ -553,7 +553,9 @@ class CloneDBReloadableInferEngine
 };
 
 template <typename FluidFamilyCore>
-class FluidInferEngine : public CloneDBReloadableInferEngine<FluidFamilyCore> {
+// class FluidInferEngine : public CloneDBReloadableInferEngine<FluidFamilyCore>
+// {
+class FluidInferEngine : public DBReloadableInferEngine<FluidFamilyCore> {
  public:
   FluidInferEngine() {}
   ~FluidInferEngine() {}
@@ -603,13 +605,13 @@ class VersionedInferEngine : public InferEngine {
       LOG(ERROR) << "Failed generate engine with type:" << engine_type;
       return -1;
     }
-    VLOG(2) << "FLAGS_logtostderr " << FLAGS_logtostderr;
+    VLOG(2) << "FLGS_logtostderr " << FLAGS_logtostderr;
     int tmp = FLAGS_logtostderr;
     if (engine->proc_initialize(conf, version) != 0) {
       LOG(ERROR) << "Failed initialize engine, type:" << engine_type;
       return -1;
     }
-    VLOG(2) << "FLAGS_logtostderr " << FLAGS_logtostderr;
+    VLOG(2) << "FLGS_logtostderr " << FLAGS_logtostderr;
     FLAGS_logtostderr = tmp;
     auto r = _versions.insert(std::make_pair(engine->version(), engine));
     if (!r.second) {
diff --git a/paddle_inference/inferencer-fluid-gpu/include/fluid_gpu_engine.h b/paddle_inference/inferencer-fluid-gpu/include/fluid_gpu_engine.h
index 2fc6ae587ff26f5f05ff9332f08067ab49d06254..2a4da4b9b03e716b9e8148dbfd0200b887ee66e1 100644
--- a/paddle_inference/inferencer-fluid-gpu/include/fluid_gpu_engine.h
+++ b/paddle_inference/inferencer-fluid-gpu/include/fluid_gpu_engine.h
@@ -190,7 +190,7 @@ class FluidGpuAnalysisDirCore : public FluidFamilyCore {
 
     paddle::AnalysisConfig analysis_config;
     analysis_config.SetModel(data_path);
-    analysis_config.EnableUseGpu(100, FLAGS_gpuid);
+    analysis_config.EnableUseGpu(1500, FLAGS_gpuid);
     analysis_config.SwitchSpecifyInputNames(true);
     analysis_config.SetCpuMathLibraryNumThreads(1);
 
@@ -198,11 +198,58 @@ class FluidGpuAnalysisDirCore : public FluidFamilyCore {
       analysis_config.EnableMemoryOptim();
     }
 
-    if (params.enable_ir_optimization()) {
-      analysis_config.SwitchIrOptim(true);
-    } else {
-      analysis_config.SwitchIrOptim(false);
-    }
+    /*
+     if (params.enable_ir_optimization()) {
+       analysis_config.SwitchIrOptim(true);
+     } else {
+       analysis_config.SwitchIrOptim(false);
+     }
+    */
+
+    int min_seq_len = 1;
+    int max_seq_len = 512;
+    int opt_seq_len = 128;
+    int head_number = 12;
+    int batch = 50;
+
+    std::vector<int> min_in_shape = {batch, min_seq_len, 1};
+    std::vector<int> max_in_shape = {batch, max_seq_len, 1};
+    std::vector<int> opt_in_shape = {batch, opt_seq_len, 1};
+
+    std::string input1_name = "src_text_a_ids";
+    std::string input2_name = "pos_text_a_ids";
+    std::string input3_name = "sent_text_a_ids";
+    std::string input4_name = "stack_0.tmp_0";
+
+    std::map<std::string, std::vector<int>> min_input_shape = {
+        {input1_name, min_in_shape},
+        {input2_name, min_in_shape},
+        {input3_name, min_in_shape},
+        {input4_name, {batch, head_number, min_seq_len, min_seq_len}},
+    };
+
+    std::map<std::string, std::vector<int>> max_input_shape = {
+        {input1_name, max_in_shape},
+        {input2_name, max_in_shape},
+        {input3_name, max_in_shape},
+        {input4_name, {batch, head_number, max_seq_len, max_seq_len}},
+    };
+    std::map<std::string, std::vector<int>> opt_input_shape = {
+        {input1_name, opt_in_shape},
+        {input2_name, opt_in_shape},
+        {input3_name, opt_in_shape},
+        {input4_name, {batch, head_number, opt_seq_len, opt_seq_len}},
+    };
+
+    analysis_config.EnableTensorRtEngine(
+        1 << 30,
+        batch,
+        5,
+        paddle::AnalysisConfig::Precision::kHalf,
+        true,
+        true);
+    analysis_config.SetTRTDynamicShapeInfo(
+        min_input_shape, max_input_shape, opt_input_shape);
 
     AutoLock lock(GlobalPaddleCreateMutex::instance());
     _core =