diff --git a/inference/CMakeLists.txt b/inference/CMakeLists.txt
index d168639f6aac124308f276459594302eec19ef11..cd7a767888d9a8a8c85f3ac3fd02fa2ce459f613 100644
--- a/inference/CMakeLists.txt
+++ b/inference/CMakeLists.txt
@@ -92,7 +92,7 @@ if (WIN32)
         add_definitions(-DSTATIC_LIB)
     endif()
 else()
-    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -o2 -std=c++11")
+    set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -o2 -fopenmp -std=c++11")
     set(CMAKE_STATIC_LIBRARY_PREFIX "")
 endif()
 
@@ -195,8 +195,8 @@ endif(NOT WIN32)
 if(WITH_GPU)
   if(NOT WIN32)
     if (USE_TENSORRT)
-      set(DEPS ${DEPS} ${PADDLE_DIR}/third_party/install/tensorrt/lib/libnvinfer${CMAKE_STATIC_LIBRARY_SUFFIX})
-      set(DEPS ${DEPS} ${PADDLE_DIR}/third_party/install/tensorrt/lib/libnvinfer_plugin${CMAKE_STATIC_LIBRARY_SUFFIX})
+      set(DEPS ${DEPS} ${PADDLE_DIR}/third_party/install/tensorrt/lib/libnvinfer${CMAKE_SHARED_LIBRARY_SUFFIX})
+      set(DEPS ${DEPS} ${PADDLE_DIR}/third_party/install/tensorrt/lib/libnvinfer_plugin${CMAKE_SHARED_LIBRARY_SUFFIX})
     endif()
     set(DEPS ${DEPS} ${CUDA_LIB}/libcudart${CMAKE_SHARED_LIBRARY_SUFFIX})
     set(DEPS ${DEPS} ${CUDNN_LIB}/libcudnn${CMAKE_SHARED_LIBRARY_SUFFIX})
diff --git a/inference/README.md b/inference/README.md
index 71c6c5d300e1fb3529d4e0c9382d1b7f762c3882..4850cbc47d76e4c88619f881e30ff4c751a1b8e2 100644
--- a/inference/README.md
+++ b/inference/README.md
@@ -123,7 +123,10 @@ DEPLOY:
     RESIZE_MAX_SIZE: 1333
     # 输入的tensor数量。
     FEEDS_SIZE: 3
-
+    # 是否开启TensorRT
+    USE_TRT: 0
+    # 如果开启TensorRT, 使用的精度, 支持FP16, FP32, INT8三个值
+    TRT_MODE: FP16
 ```
 修改字段`MODEL_PATH`的值为你在**上一步**下载并解压的模型文件所放置的目录即可。更多配置文件字段介绍，请参考文档[预测部署方案配置文件说明](./docs/configuration.md)。
 
diff --git a/inference/conf/detection_rcnn.yaml b/inference/conf/detection_rcnn.yaml
index a53698d7fedaaaec790318dade1621ea578eb0b6..f6b24b068ba1b291c78cb9f983b59c27dddd20eb 100644
--- a/inference/conf/detection_rcnn.yaml
+++ b/inference/conf/detection_rcnn.yaml
@@ -13,6 +13,8 @@ DEPLOY:
     CHANNELS : 3
     PRE_PROCESSOR: "DetectionPreProcessor"
     PREDICTOR_MODE: "ANALYSIS"
-    BATCH_SIZE : 1 
+    BATCH_SIZE : 1
     RESIZE_MAX_SIZE: 1333
     FEEDS_SIZE: 3
+    USE_TRT : 0
+    TRT_MODE : FP32
diff --git a/inference/conf/detection_rcnn_fpn.yaml b/inference/conf/detection_rcnn_fpn.yaml
index 9d6635ef8c2b29fb0ca9318d1ec08f1f7be037f7..520471b99bead43e2f9e37b3b08245c393f58258 100644
--- a/inference/conf/detection_rcnn_fpn.yaml
+++ b/inference/conf/detection_rcnn_fpn.yaml
@@ -17,3 +17,5 @@ DEPLOY:
     RESIZE_MAX_SIZE: 1333
     FEEDS_SIZE: 3
     COARSEST_STRIDE: 32
+    USE_TRT : 0
+    TRT_MODE : FP32
diff --git a/inference/conf/yolov3_mobilenet.yaml b/inference/conf/yolov3_mobilenet.yaml
new file mode 100644
index 0000000000000000000000000000000000000000..86dea413fce40d125e3e15cce01d64d55e546880
--- /dev/null
+++ b/inference/conf/yolov3_mobilenet.yaml
@@ -0,0 +1,20 @@
+DEPLOY:
+    USE_GPU: 1
+    MODEL_PATH: "/root/projects/models/yolov3_mobilenet_v1/"
+    MODEL_FILENAME: "__model__"
+    PARAMS_FILENAME: "__params__"
+    EVAL_CROP_SIZE: (320, 320)
+    RESIZE_TYPE: "UNPADDING"
+    TARGET_SHORT_SIZE : 256
+    MEAN:  [0.4647, 0.4647, 0.4647]
+    STD: [0.0834, 0.0834, 0.0834]
+    IMAGE_TYPE: "rgb"
+    NUM_CLASSES: 1
+    CHANNELS : 3
+    PRE_PROCESSOR: "DetectionPreProcessor"
+    PREDICTOR_MODE: "ANALYSIS"
+    BATCH_SIZE : 3 
+    RESIZE_MAX_SIZE: -1
+    FEEDS_SIZE: 2
+    USE_TRT : 1 
+    TRT_MODE : "FP16"
diff --git a/inference/images/detection_rcnn/000000014439.jpg b/inference/images/detection_rcnn/000000014439.jpg
index 0abbdab06eb5950b93908cc91adfa640e8a3ac78..5428bb6b1bb8e58a5f19d107e535ba1e6fa3e658 100644
Binary files a/inference/images/detection_rcnn/000000014439.jpg and b/inference/images/detection_rcnn/000000014439.jpg differ
diff --git a/inference/images/detection_rcnn/000000014439_640x640.jpg b/inference/images/detection_rcnn/000000014439_640x640.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..e1e134ba1bc3596e456b40a1dc0299caa6d65384
Binary files /dev/null and b/inference/images/detection_rcnn/000000014439_640x640.jpg differ
diff --git a/inference/predictor/detection_predictor.cpp b/inference/predictor/detection_predictor.cpp
index e47125c5e3eff159ea6f43740906f58e08dc78aa..9c1791e4104cdd7e69a666b4e1940f26ce1bb142 100644
--- a/inference/predictor/detection_predictor.cpp
+++ b/inference/predictor/detection_predictor.cpp
@@ -32,17 +32,18 @@ namespace PaddleSolution {
         int max_h = -1;
         int max_w = -1;
         for (int i = 0; i < batch_size; ++i) {
-            max_h = (max_h > resize_heights[i])? max_h:resize_heights[i];
-            max_w = (max_w > resize_widths[i])? max_w:resize_widths[i];
+            max_h = (max_h > resize_heights[i])? max_h : resize_heights[i];
+            max_w = (max_w > resize_widths[i])? max_w : resize_widths[i];
         }
+
         max_h = static_cast<int>(ceil(static_cast<float>(max_h)
-              / static_cast<float>(coarsest_stride)) * coarsest_stride);
+            / static_cast<float>(coarsest_stride)) * coarsest_stride);
         max_w = static_cast<int>(ceil(static_cast<float>(max_w)
-              / static_cast<float>(coarsest_stride)) * coarsest_stride);
-        std::cout << "max_w: " << max_w << " max_h: " << max_h << std::endl;
+            / static_cast<float>(coarsest_stride)) * coarsest_stride);
         input_buffer.insert(input_buffer.end(),
                             batch_size * channels * max_h * max_w, 0);
         // flatten tensor and padding
+        #pragma omp parallel for
         for (int i = 0; i < lod_buffer.size(); ++i) {
             float *input_buffer_ptr = input_buffer.data()
                                     + i * channels * max_h * max_w;
@@ -121,6 +122,8 @@ namespace PaddleSolution {
         }
 
         bool use_gpu = _model_config._use_gpu;
+        bool enable_trt = _model_config._enable_trt & use_gpu;
+        auto trt_precision = _model_config._trt_precision;
         const auto& model_dir = _model_config._model_path;
         const auto& model_filename = _model_config._model_file_name;
         const auto& params_filename = _model_config._param_file_name;
@@ -136,11 +139,17 @@ namespace PaddleSolution {
             config.use_gpu = use_gpu;
             config.device = 0;
             _main_predictor = paddle::CreatePaddlePredictor(config);
+
         } else if (_model_config._predictor_mode == "ANALYSIS") {
             paddle::AnalysisConfig config;
             if (use_gpu) {
                 config.EnableUseGpu(100, 0);
             }
+            if (enable_trt) {
+                auto use_cab = (trt_precision == paddle::AnalysisConfig::Precision::kInt8);
+                config.EnableTensorRtEngine(1 << 20, _model_config._batch_size, 40,
+                    trt_precision, false, use_cab);
+            }
             auto prog_file = utils::path_join(model_dir, model_filename);
             auto param_file = utils::path_join(model_dir, params_filename);
             config.SetModel(prog_file, param_file);
@@ -288,7 +297,6 @@ namespace PaddleSolution {
            }
            feeds.push_back(im_size_tensor);
            _outputs.clear();
-
             auto t1 = std::chrono::high_resolution_clock::now();
             if (!_main_predictor->Run(feeds, &_outputs, batch_size)) {
             #ifdef _WIN32
@@ -376,7 +384,6 @@ namespace PaddleSolution {
                 std::cout << "Failed to preprocess!" << std::endl;
                 return -1;
             }
-
             // flatten tensor
             padding_minibatch(lod_buffer, input_buffer, resize_heights,
                               resize_widths, channels,
@@ -423,7 +430,6 @@ namespace PaddleSolution {
                 im_size_tensor->Reshape({batch_size, 2});
                 im_size_tensor->copy_from_cpu(image_size.data());
             }
-
             auto t1 = std::chrono::high_resolution_clock::now();
             _main_predictor->ZeroCopyRun();
             auto t2 = std::chrono::high_resolution_clock::now();
diff --git a/inference/preprocessor/preprocessor_detection.cpp b/inference/preprocessor/preprocessor_detection.cpp
index 15d2dc3c4b6255b8639d3296da7be4d22764f475..d60a625947d041cb663e97ce2057c093eeb540b4 100644
--- a/inference/preprocessor/preprocessor_detection.cpp
+++ b/inference/preprocessor/preprocessor_detection.cpp
@@ -62,16 +62,22 @@ bool DetectionPreProcessor::single_process(const std::string& fname,
     int rw = im.cols;
     int rh = im.rows;
     float im_scale_ratio;
-    utils::scaling(_config->_resize_type, rw, rh, _config->_resize[0],
-                   _config->_resize[1], _config->_target_short_size,
-                   _config->_resize_max_size, im_scale_ratio);
+    if (!_config->_enable_trt) {
+        utils::scaling(_config->_resize_type, rw, rh, _config->_resize[0],
+            _config->_resize[1], _config->_target_short_size,
+            _config->_resize_max_size, im_scale_ratio);
+    } else {
+        // TensorRT 5 only support fixed shape input
+        rw = _config->_resize[0];
+        rh = _config->_resize[1];
+    }
     cv::Size resize_size(rw, rh);
     *resize_w = rw;
     *resize_h = rh;
     *scale_ratio = im_scale_ratio;
     if (*ori_h != rh || *ori_w != rw) {
         cv::Mat im_temp;
-        if (_config->_resize_type == utils::SCALE_TYPE::UNPADDING) {
+        if (_config->_enable_trt || _config->_resize_type == utils::SCALE_TYPE::UNPADDING) {
             cv::resize(im, im_temp, resize_size, 0, 0, cv::INTER_LINEAR);
         } else if (_config->_resize_type == utils::SCALE_TYPE::RANGE_SCALING) {
                 cv::resize(im, im_temp, cv::Size(), im_scale_ratio,
@@ -85,6 +91,7 @@ bool DetectionPreProcessor::single_process(const std::string& fname,
 
     float* pmean = _config->_mean.data();
     float* pscale = _config->_std.data();
+    #pragma omp parallel for
     for (int h = 0; h < rh; ++h) {
         const uchar* uptr = im.ptr<uchar>(h);
         const float* fptr = im.ptr<float>(h);
diff --git a/inference/utils/conf_parser.h b/inference/utils/conf_parser.h
index 97461a6e92eee1373cec0ff22e9f9387b77766d3..a4055b2faae5d67a67f8971a96c83f915d70c7c0 100644
--- a/inference/utils/conf_parser.h
+++ b/inference/utils/conf_parser.h
@@ -18,6 +18,7 @@
 #include <vector>
 #include <string>
 #include <map>
+#include <paddle_inference_api.h>
 
 namespace PaddleSolution {
 
@@ -30,6 +31,7 @@ class PaddleModelConfigPaser {
         _channels(0),
         _use_gpu(0),
         _batch_size(1),
+        _enable_trt(false),
         _target_short_size(0),
         _model_file_name("__model__"),
         _param_file_name("__params__"),
@@ -58,6 +60,7 @@ class PaddleModelConfigPaser {
         _resize_max_size = 0;
         _feeds_size = 1;
          _coarsest_stride = 1;
+         _enable_trt = false;
     }
 
     std::string process_parenthesis(const std::string& str) {
@@ -214,6 +217,34 @@ class PaddleModelConfigPaser {
         if (config["DEPLOY"]["COARSEST_STRIDE"].IsDefined()) {
             _coarsest_stride = config["DEPLOY"]["COARSEST_STRIDE"].as<int>();
         }
+        // 20. enable_trt
+        if (config["DEPLOY"]["USE_TRT"].IsDefined()) {
+            _enable_trt = config["DEPLOY"]["USE_TRT"].as<int>();
+            _enable_trt &= _use_gpu;
+        } else {
+            _enable_trt = false;
+        }
+        if (_enable_trt) {
+            std::string trt_mode = "";
+            if (config["DEPLOY"]["TRT_MODE"].IsDefined()) {
+                trt_mode = config["DEPLOY"]["TRT_MODE"].as<std::string>();
+            } else {
+                trt_mode = "FP32";
+            }
+
+            if (trt_mode == "FP16") {
+                _trt_precision = paddle::AnalysisConfig::Precision::kHalf;
+            } else if (trt_mode == "FP32") {
+                _trt_precision = paddle::AnalysisConfig::Precision::kFloat32;
+            } else if (trt_mode == "INT8") {
+                _trt_precision = paddle::AnalysisConfig::Precision::kInt8;
+            } else {
+                _enable_trt = false;
+            }
+        }
+        if (_predictor_mode == "NATIVE") {
+            _enable_trt = false;
+        }
         return true;
     }
 
@@ -293,5 +324,9 @@ class PaddleModelConfigPaser {
     std::string _predictor_mode;
     // DEPLOY.BATCH_SIZE
     int _batch_size;
+    // bool enable_trt
+    bool _enable_trt;
+    // TRT Precision
+    paddle::AnalysisConfig::Precision _trt_precision;
 };
 }  // namespace PaddleSolution