Merge pull request #2 from PaddlePaddle/develop

Sync PaddlePaddle/Serving to TeslaZhao/Serving

Merge pull request #2 from PaddlePaddle/develop
Sync PaddlePaddle/Serving to TeslaZhao/Serving
12d28500 · TeslaZhao · GitHub · 0c950031 · 3c54d68e · 12d28500
48 changed file
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -54,6 +54,7 @@ option(SERVER	    "Compile Paddle Serving Server"		    OFF)
 option(APP          "Compile Paddle Serving App package"	    OFF)
 option(WITH_ELASTIC_CTR "Compile ELASITC-CTR solution"              OFF)
 option(PACK         "Compile for whl"                               OFF)
+option(WITH_TRT     "Compile Paddle Serving with TRT"       OFF)
 set(WITH_MKLML ${WITH_MKL})
 if (NOT DEFINED WITH_MKLDNN)

--- a/README.md
+++ b/README.md
@@ -45,9 +45,10 @@ nvidia-docker exec -it test bash
 ```
 ```shell
-pip install paddle-serving-client 
+pip install paddle-serving-client==0.3.2 
-pip install paddle-serving-server # CPU
+pip install paddle-serving-server==0.3.2 # CPU
-pip install paddle-serving-server-gpu # GPU
+pip install paddle-serving-server-gpu==0.3.2.post9 # GPU with CUDA9.0
+pip install paddle-serving-server-gpu==0.3.2.post10 # GPU with CUDA10.0
 ```
 You may need to use a domestic mirror source (in China, you can use the Tsinghua mirror source, add `-i https://pypi.tuna.tsinghua.edu.cn/simple` to pip command) to speed up the download.
@@ -127,6 +128,7 @@ python -m paddle_serving_server.serve --model uci_housing_model --thread 10 --po
 | `mem_optim_off` | - | - | Disable memory / graphic memory optimization |
 | `ir_optim` | - | - | Enable analysis and optimization of calculation graph |
 | `use_mkl` (Only for cpu version) | - | - | Run inference with MKL |
+| `use_trt` (Only for trt version) | - | - | Run inference with TensorRT  |
 Here, we use `curl` to send a HTTP POST request to the service we just started. Users can use any python library to send HTTP POST as well, e.g, [requests](https://requests.readthedocs.io/en/master/).
 </center>

--- a/README_CN.md
+++ b/README_CN.md
@@ -47,9 +47,10 @@ nvidia-docker run -p 9292:9292 --name test -dit hub.baidubce.com/paddlepaddle/se
 nvidia-docker exec -it test bash
 ```
 ```shell
-pip install paddle-serving-client
+pip install paddle-serving-client==0.3.2
-pip install paddle-serving-server # CPU
+pip install paddle-serving-server==0.3.2 # CPU
-pip install paddle-serving-server-gpu # GPU
+pip install paddle-serving-server-gpu==0.3.2.post9 # GPU with CUDA9.0
+pip install paddle-serving-server-gpu==0.3.2.post10 # GPU with CUDA10.0
 ```
 您可能需要使用国内镜像源（例如清华源, 在pip命令中添加`-i https://pypi.tuna.tsinghua.edu.cn/simple`）来加速下载。
@@ -123,6 +124,7 @@ python -m paddle_serving_server.serve --model uci_housing_model --thread 10 --po
 | `mem_optim_off` | - | - | Disable memory optimization |
 | `ir_optim` | - | - | Enable analysis and optimization of calculation graph |
 | `use_mkl` (Only for cpu version) | - | - | Run inference with MKL |
+| `use_trt` (Only for trt version) | - | - | Run inference with TensorRT  |
 我们使用 `curl` 命令来发送HTTP POST请求给刚刚启动的服务。用户也可以调用python库来发送HTTP POST请求，请参考英文文档 [requests](https://requests.readthedocs.io/en/master/)。
 </center>

--- a/cmake/paddlepaddle.cmake
+++ b/cmake/paddlepaddle.cmake
@@ -31,10 +31,14 @@ message( "WITH_GPU = ${WITH_GPU}")
 # Paddle Version should be one of:
 # latest: latest develop build
 # version number like 1.5.2
-SET(PADDLE_VERSION "1.7.2")
+SET(PADDLE_VERSION "1.8.4")
 if (WITH_GPU)
-    SET(PADDLE_LIB_VERSION "${PADDLE_VERSION}-gpu-cuda${CUDA_VERSION_MAJOR}-cudnn7-avx-mkl")
+    if (WITH_TRT)
+        SET(PADDLE_LIB_VERSION "${PADDLE_VERSION}-gpu-cuda10.1-cudnn7.6-avx-mkl-trt6")
+    else()
+        SET(PADDLE_LIB_VERSION "${PADDLE_VERSION}-gpu-cuda10-cudnn7-avx-mkl")
+    endif()
 else()
    if (WITH_AVX)
        if (WITH_MKLML)
@@ -50,21 +54,38 @@ endif()
 SET(PADDLE_LIB_PATH "http://paddle-inference-lib.bj.bcebos.com/${PADDLE_LIB_VERSION}/fluid_inference.tgz")
 MESSAGE(STATUS "PADDLE_LIB_PATH=${PADDLE_LIB_PATH}")
 if (WITH_GPU OR WITH_MKLML)
-ExternalProject_Add(
+    if (WITH_TRT)
-    "extern_paddle"
+        ExternalProject_Add(
-    ${EXTERNAL_PROJECT_LOG_ARGS}
+            "extern_paddle"
-    URL                 "${PADDLE_LIB_PATH}"
+            ${EXTERNAL_PROJECT_LOG_ARGS}
-    PREFIX              "${PADDLE_SOURCES_DIR}"
+            URL                 "${PADDLE_LIB_PATH}"
-    DOWNLOAD_DIR        "${PADDLE_DOWNLOAD_DIR}"
+            PREFIX              "${PADDLE_SOURCES_DIR}"
-    CONFIGURE_COMMAND   ""
+            DOWNLOAD_DIR        "${PADDLE_DOWNLOAD_DIR}"
-    BUILD_COMMAND       ""
+            CONFIGURE_COMMAND   ""
-    UPDATE_COMMAND      ""
+            BUILD_COMMAND       ""
-    INSTALL_COMMAND
+            UPDATE_COMMAND      ""
-        ${CMAKE_COMMAND} -E copy_directory ${PADDLE_DOWNLOAD_DIR}/paddle/include ${PADDLE_INSTALL_DIR}/include &&
+            INSTALL_COMMAND
-        ${CMAKE_COMMAND} -E copy_directory ${PADDLE_DOWNLOAD_DIR}/paddle/lib ${PADDLE_INSTALL_DIR}/lib &&
+                ${CMAKE_COMMAND} -E copy_directory ${PADDLE_DOWNLOAD_DIR}/paddle/include ${PADDLE_INSTALL_DIR}/include &&
-        ${CMAKE_COMMAND} -E copy_directory ${PADDLE_DOWNLOAD_DIR}/third_party ${PADDLE_INSTALL_DIR}/third_party &&
+                ${CMAKE_COMMAND} -E copy_directory ${PADDLE_DOWNLOAD_DIR}/paddle/lib ${PADDLE_INSTALL_DIR}/lib &&
-        ${CMAKE_COMMAND} -E copy ${PADDLE_INSTALL_DIR}/third_party/install/mkldnn/lib/libmkldnn.so.0 ${PADDLE_INSTALL_DIR}/third_party/install/mkldnn/lib/libmkldnn.so 
+                ${CMAKE_COMMAND} -E copy_directory ${PADDLE_DOWNLOAD_DIR}/third_party ${PADDLE_INSTALL_DIR}/third_party
-)
+        )
+    else()
+        ExternalProject_Add(
+            "extern_paddle"
+            ${EXTERNAL_PROJECT_LOG_ARGS}
+            URL                 "${PADDLE_LIB_PATH}"
+            PREFIX              "${PADDLE_SOURCES_DIR}"
+            DOWNLOAD_DIR        "${PADDLE_DOWNLOAD_DIR}"
+            CONFIGURE_COMMAND   ""
+            BUILD_COMMAND       ""
+            UPDATE_COMMAND      ""
+            INSTALL_COMMAND
+                ${CMAKE_COMMAND} -E copy_directory ${PADDLE_DOWNLOAD_DIR}/paddle/include ${PADDLE_INSTALL_DIR}/include &&
+                ${CMAKE_COMMAND} -E copy_directory ${PADDLE_DOWNLOAD_DIR}/paddle/lib ${PADDLE_INSTALL_DIR}/lib &&
+                ${CMAKE_COMMAND} -E copy_directory ${PADDLE_DOWNLOAD_DIR}/third_party ${PADDLE_INSTALL_DIR}/third_party &&
+                ${CMAKE_COMMAND} -E copy ${PADDLE_INSTALL_DIR}/third_party/install/mkldnn/lib/libmkldnn.so.0 ${PADDLE_INSTALL_DIR}/third_party/install/mkldnn/lib/libmkldnn.so 
+        )
+    endif()
 else()
 ExternalProject_Add(
    "extern_paddle"
@@ -92,8 +113,16 @@ LINK_DIRECTORIES(${PADDLE_INSTALL_DIR}/third_party/install/mkldnn/lib)
 ADD_LIBRARY(openblas STATIC IMPORTED GLOBAL)
 SET_PROPERTY(TARGET openblas PROPERTY IMPORTED_LOCATION ${PADDLE_INSTALL_DIR}/third_party/install/openblas/lib/libopenblas.a)
-ADD_LIBRARY(paddle_fluid STATIC IMPORTED GLOBAL)
+ADD_LIBRARY(paddle_fluid SHARED IMPORTED GLOBAL)
-SET_PROPERTY(TARGET paddle_fluid PROPERTY IMPORTED_LOCATION ${PADDLE_INSTALL_DIR}/lib/libpaddle_fluid.a)
+SET_PROPERTY(TARGET paddle_fluid PROPERTY IMPORTED_LOCATION ${PADDLE_INSTALL_DIR}/lib/libpaddle_fluid.so)
+if (WITH_TRT)
+ADD_LIBRARY(nvinfer SHARED IMPORTED GLOBAL)
+SET_PROPERTY(TARGET nvinfer PROPERTY IMPORTED_LOCATION ${TENSORRT_ROOT}/lib/libnvinfer.so)
+ADD_LIBRARY(nvinfer_plugin SHARED IMPORTED GLOBAL)
+SET_PROPERTY(TARGET nvinfer_plugin PROPERTY IMPORTED_LOCATION ${TENSORRT_ROOT}/lib/libnvinfer_plugin.so)
+endif()
 ADD_LIBRARY(xxhash STATIC IMPORTED GLOBAL)
 SET_PROPERTY(TARGET xxhash PROPERTY IMPORTED_LOCATION ${PADDLE_INSTALL_DIR}/third_party/install/xxhash/lib/libxxhash.a)
@@ -101,4 +130,9 @@ SET_PROPERTY(TARGET xxhash PROPERTY IMPORTED_LOCATION ${PADDLE_INSTALL_DIR}/thir
 LIST(APPEND external_project_dependencies paddle)
 LIST(APPEND paddle_depend_libs
-        xxhash)
+    xxhash)
+if(WITH_TRT)
+LIST(APPEND paddle_depend_libs
+    nvinfer nvinfer_plugin)
+endif()
--- a/core/configure/proto/server_configure.proto
+++ b/core/configure/proto/server_configure.proto
@@ -44,6 +44,7 @@ message EngineDesc {
  optional bool static_optimization = 14;
  optional bool force_update_static_cache = 15;
  optional bool enable_ir_optimization = 16;
+  optional bool use_trt = 17;
 };
 // model_toolkit conf

--- a/core/cube/CMakeLists.txt
+++ b/core/cube/CMakeLists.txt
@@ -12,8 +12,9 @@
 # See the License for the specific language governing permissions and
 # limitations under the License
+#execute_process(COMMAND go env -w GO111MODULE=off)
 add_subdirectory(cube-server)
 add_subdirectory(cube-api)
 add_subdirectory(cube-builder)
-add_subdirectory(cube-transfer)
+#add_subdirectory(cube-transfer)
-add_subdirectory(cube-agent)
+#add_subdirectory(cube-agent)
--- a/core/general-server/CMakeLists.txt
+++ b/core/general-server/CMakeLists.txt
@@ -9,7 +9,7 @@ endif()
 target_include_directories(serving PUBLIC
        ${CMAKE_CURRENT_BINARY_DIR}/../../core/predictor
        )
+    include_directories(${CUDNN_ROOT}/include/)
 if(WITH_GPU)
    target_link_libraries(serving -Wl,--whole-archive fluid_gpu_engine
            -Wl,--no-whole-archive)
@@ -29,7 +29,11 @@ if(WITH_GPU)
 endif()
 if(WITH_MKL OR WITH_GPU)
+    if (WITH_TRT)
+    target_link_libraries(serving -liomp5 -lmklml_intel -lpthread -lcrypto -lm -lrt -lssl -ldl -lz -lbz2)
+    else()
    target_link_libraries(serving -liomp5 -lmklml_intel -lmkldnn -lpthread -lcrypto -lm -lrt -lssl -ldl -lz -lbz2)
+endif()
 else()
    target_link_libraries(serving openblas -lpthread -lcrypto -lm -lrt -lssl -ldl -lz -lbz2)
 endif()

--- a/core/general-server/op/general_response_op.cpp
+++ b/core/general-server/op/general_response_op.cpp
@@ -155,9 +155,11 @@ int GeneralResponseOp::inference() {
      }
      if (model_config->_is_lod_fetch[idx]) {
-        for (int j = 0; j < in->at(idx).lod[0].size(); ++j) {
+        if (in->at(idx).lod.size() > 0) {
-          fetch_p->mutable_tensor_array(var_idx)->add_lod(
+          for (int j = 0; j < in->at(idx).lod[0].size(); ++j) {
-              in->at(idx).lod[0][j]);
+            fetch_p->mutable_tensor_array(var_idx)->add_lod(
+                in->at(idx).lod[0][j]);
+          }
        }
      }

--- a/core/predictor/CMakeLists.txt
+++ b/core/predictor/CMakeLists.txt
@@ -13,7 +13,9 @@ set_source_files_properties(
        PROPERTIES
        COMPILE_FLAGS  "-Wno-strict-aliasing -Wno-unused-variable -Wno-non-virtual-dtor -Wno-error=non-virtual-dtor -Wno-error=delete-non-virtual-dtor")
 add_dependencies(pdserving protobuf boost brpc leveldb pdcodegen configure)
+if (WITH_TRT)
+    add_definitions(-DWITH_TRT)
+endif()
 target_link_libraries(pdserving
        brpc protobuf boost leveldb configure -lpthread -lcrypto -lm -lrt -lssl -ldl -lz)

--- a/core/predictor/framework/infer.h
+++ b/core/predictor/framework/infer.h
@@ -38,6 +38,7 @@ class InferEngineCreationParams {
    _enable_ir_optimization = false;
    _static_optimization = false;
    _force_update_static_cache = false;
+    _use_trt = false;
  }
  void set_path(const std::string& path) { _path = path; }
@@ -50,12 +51,16 @@ class InferEngineCreationParams {
    _enable_ir_optimization = enable_ir_optimization;
  }
+  void set_use_trt(bool use_trt) { _use_trt = use_trt; }
  bool enable_memory_optimization() const {
    return _enable_memory_optimization;
  }
  bool enable_ir_optimization() const { return _enable_ir_optimization; }
+  bool use_trt() const { return _use_trt; }
  void set_static_optimization(bool static_optimization = false) {
    _static_optimization = static_optimization;
  }
@@ -86,6 +91,7 @@ class InferEngineCreationParams {
  bool _enable_ir_optimization;
  bool _static_optimization;
  bool _force_update_static_cache;
+  bool _use_trt;
 };
 class InferEngine {
@@ -172,6 +178,10 @@ class ReloadableInferEngine : public InferEngine {
          force_update_static_cache);
    }
+    if (conf.has_use_trt()) {
+      _infer_engine_params.set_use_trt(conf.use_trt());
+    }
    if (!check_need_reload() || load(_infer_engine_params) != 0) {
      LOG(ERROR) << "Failed load model_data_path" << _model_data_path;
      return -1;
@@ -553,8 +563,12 @@ class CloneDBReloadableInferEngine
 };
 template <typename FluidFamilyCore>
+#ifdef WITH_TRT
+class FluidInferEngine : public DBReloadableInferEngine<FluidFamilyCore> {
+#else
 class FluidInferEngine : public CloneDBReloadableInferEngine<FluidFamilyCore> {
- public:
+#endif
+ public:  // NOLINT
  FluidInferEngine() {}
  ~FluidInferEngine() {}

--- a/core/sdk-cpp/include/abtest.h
+++ b/core/sdk-cpp/include/abtest.h
@@ -51,8 +51,8 @@ class WeightedRandomRender : public EndpointRouterBase {
        new (std::nothrow) Factory<WeightedRandomRender, EndpointRouterBase>();
    if (factory == NULL) {
      RAW_LOG(ERROR,
-              "Failed regist factory: WeightedRandomRender->EndpointRouterBase \
+              "Failed regist factory: WeightedRandomRender->EndpointRouterBase "
-          in macro!");
+              "in macro!");
      return -1;
    }
@@ -63,8 +63,8 @@ class WeightedRandomRender : public EndpointRouterBase {
    if (FactoryPool<EndpointRouterBase>::instance().register_factory(
            "WeightedRandomRender", factory) != 0) {
      RAW_LOG(INFO,
-              "Factory has been registed: \
+              "Factory has been registed: "
-              WeightedRandomRender->EndpointRouterBase.");
+              "WeightedRandomRender->EndpointRouterBase.");
    }
    return 0;

--- a/doc/COMPILE.md
+++ b/doc/COMPILE.md
@@ -75,10 +75,12 @@ export PATH=$PATH:$GOPATH/bin
 ## Get go packages
 ```shell
-go get -u github.com/grpc-ecosystem/grpc-gateway/protoc-gen-grpc-gateway
+go env -w GO111MODULE=on
-go get -u github.com/grpc-ecosystem/grpc-gateway/protoc-gen-swagger
+go env -w GOPROXY=https://goproxy.cn,direct
-go get -u github.com/golang/protobuf/protoc-gen-go
+go get -u github.com/grpc-ecosystem/grpc-gateway/protoc-gen-grpc-gateway@v1.15.2
-go get -u google.golang.org/grpc
+go get -u github.com/grpc-ecosystem/grpc-gateway/protoc-gen-swagger@v1.15.2
+go get -u github.com/golang/protobuf/protoc-gen-go@v1.4.3
+go get -u google.golang.org/grpc@v1.33.0
 ```
@@ -89,9 +91,9 @@ go get -u google.golang.org/grpc
 ``` shell
 mkdir server-build-cpu && cd server-build-cpu
 cmake -DPYTHON_INCLUDE_DIR=$PYTHONROOT/include/python2.7/ \
-      -DPYTHON_LIBRARIES=$PYTHONROOT/lib/libpython2.7.so \
+    -DPYTHON_LIBRARIES=$PYTHONROOT/lib/libpython2.7.so \
-      -DPYTHON_EXECUTABLE=$PYTHONROOT/bin/python \
+    -DPYTHON_EXECUTABLE=$PYTHONROOT/bin/python \
-      -DSERVER=ON ..
+    -DSERVER=ON ..
 make -j10
 ```
@@ -102,10 +104,28 @@ you can execute `make install` to put targets under directory `./output`, you ne
 ``` shell
 mkdir server-build-gpu && cd server-build-gpu
 cmake -DPYTHON_INCLUDE_DIR=$PYTHONROOT/include/python2.7/ \
-      -DPYTHON_LIBRARIES=$PYTHONROOT/lib/libpython2.7.so \
+    -DPYTHON_LIBRARIES=$PYTHONROOT/lib/libpython2.7.so \
-      -DPYTHON_EXECUTABLE=$PYTHONROOT/bin/python \
+    -DPYTHON_EXECUTABLE=$PYTHONROOT/bin/python \
-      -DSERVER=ON \
+    -DCUDA_TOOLKIT_ROOT_DIR=${CUDA_PATH} \
-      -DWITH_GPU=ON ..
+    -DCUDNN_LIBRARY=${CUDNN_LIBRARY} \  
+    -DSERVER=ON \
+    -DWITH_GPU=ON ..
+make -j10
+```
+### Integrated TRT version paddle inference library
+```
+mkdir server-build-trt && cd server-build-trt
+cmake -DPYTHON_INCLUDE_DIR=$PYTHONROOT/include/python2.7/ \
+    -DPYTHON_LIBRARIES=$PYTHONROOT/lib/libpython2.7.so \
+    -DPYTHON_EXECUTABLE=$PYTHONROOT/bin/python \
+    -DTENSORRT_ROOT=${TENSORRT_LIBRARY_PATH} \
+    -DCUDA_TOOLKIT_ROOT_DIR=${CUDA_PATH} \
+    -DCUDNN_LIBRARY=${CUDNN_LIBRARY} \
+    -DSERVER=ON \
+    -DWITH_GPU=ON \
+    -DWITH_TRT=ON ..
 make -j10
 ```
@@ -134,7 +154,10 @@ execute `make install` to put targets under directory `./output`
 ```bash
 mkdir app-build && cd app-build
-cmake -DPYTHON_INCLUDE_DIR=$PYTHONROOT/include/python2.7/ -DPYTHON_LIBRARIES=$PYTHONROOT/lib/libpython2.7.so -DPYTHON_EXECUTABLE=$PYTHONROOT/bin/python -DAPP=ON ..
+cmake -DPYTHON_INCLUDE_DIR=$PYTHONROOT/include/python2.7/ \
+    -DPYTHON_LIBRARIES=$PYTHONROOT/lib/libpython2.7.so \
+    -DPYTHON_EXECUTABLE=$PYTHONROOT/bin/python \
+    -DAPP=ON ..
 make
 ```
@@ -165,7 +188,9 @@ Please use the example under `python/examples` to verify.
 |     WITH_AVX     | Compile Paddle Serving with AVX intrinsics | OFF  |
 |     WITH_MKL     |  Compile Paddle Serving with MKL support   | OFF  |
 |     WITH_GPU     |   Compile Paddle Serving with NVIDIA GPU   | OFF  |
-|    CUDNN_ROOT    |    Define CuDNN library and header path    |      |
+|  CUDNN_LIBRARY   |    Define CuDNN library and header path    |      |
+| CUDA_TOOLKIT_ROOT_DIR |       Define CUDA PATH                |      |
+|   TENSORRT_ROOT  |           Define TensorRT PATH             |      |
 |      CLIENT      |       Compile Paddle Serving Client        | OFF  |
 |      SERVER      |       Compile Paddle Serving Server        | OFF  |
 |       APP        |     Compile Paddle Serving App package     | OFF  |
@@ -180,7 +205,8 @@ To compile the Paddle Serving GPU version on bare metal, you need to install the
 - CUDA
 - CuDNN
- NCCL2
+To compile the TensorRT version, you need to install the TensorRT library.
 Note here:
@@ -190,21 +216,12 @@ Note here:
 The following is the base library version matching relationship used by the PaddlePaddle release version for reference:
-|        |  CUDA   |          CuDNN           | NCCL2  |
+|          |  CUDA   |          CuDNN           | TensorRT |
-| :----: | :-----: | :----------------------: | :----: |
+| :----:   | :-----: | :----------------------: | :----:   |
-| CUDA 8 | 8.0.61  | CuDNN 7.1.2 for CUDA 8.0 | 2.1.4  |
+| post9    |  9.0    | CuDNN 7.3.1 for CUDA 9.0 |          |
-| CUDA 9 | 9.0.176 | CuDNN 7.3.1 for CUDA 9.0 | 2.2.12 |
+| post10   |  10.0   | CuDNN 7.5.1 for CUDA 10.0|          |
+| trt      |  10.1   | CuDNN 7.5.1 for CUDA 10.1| 6.0.1.5  |
 ### How to make the compiler detect the CuDNN library
 Download the corresponding CUDNN version from NVIDIA developer official website and decompressing it, add `-DCUDNN_ROOT` to cmake command, to specify the path of CUDNN.
-### How to make the compiler detect the nccl library
-After downloading the corresponding version of the nccl2 library from the NVIDIA developer official website and decompressing it, add the following environment variables (take nccl2.1.4 as an example):
-```shell
-export C_INCLUDE_PATH=/path/to/nccl2/cuda8/nccl_2.1.4-1+cuda8.0_x86_64/include:$C_INCLUDE_PATH
-export CPLUS_INCLUDE_PATH=/path/to/nccl2/cuda8/nccl_2.1.4-1+cuda8.0_x86_64/include:$CPLUS_INCLUDE_PATH
-export LD_LIBRARY_PATH=/path/to/nccl2/cuda8/nccl_2.1.4-1+cuda8.0_x86_64/lib/:$LD_LIBRARY_PATH
-```
--- a/doc/COMPILE_CN.md
+++ b/doc/COMPILE_CN.md
@@ -72,10 +72,12 @@ export PATH=$PATH:$GOPATH/bin
 ## 获取 Go packages
 ```shell
-go get -u github.com/grpc-ecosystem/grpc-gateway/protoc-gen-grpc-gateway
+go env -w GO111MODULE=on
-go get -u github.com/grpc-ecosystem/grpc-gateway/protoc-gen-swagger
+go env -w GOPROXY=https://goproxy.cn,direct
-go get -u github.com/golang/protobuf/protoc-gen-go
+go get -u github.com/grpc-ecosystem/grpc-gateway/protoc-gen-grpc-gateway@v1.15.2
-go get -u google.golang.org/grpc
+go get -u github.com/grpc-ecosystem/grpc-gateway/protoc-gen-swagger@v1.15.2
+go get -u github.com/golang/protobuf/protoc-gen-go@v1.4.3
+go get -u google.golang.org/grpc@v1.33.0
 ```
@@ -85,7 +87,10 @@ go get -u google.golang.org/grpc
 ``` shell
 mkdir server-build-cpu && cd server-build-cpu
-cmake -DPYTHON_INCLUDE_DIR=$PYTHONROOT/include/python2.7/ -DPYTHON_LIBRARIES=$PYTHONROOT/lib/libpython2.7.so -DPYTHON_EXECUTABLE=$PYTHONROOT/bin/python -DSERVER=ON ..
+cmake -DPYTHON_INCLUDE_DIR=$PYTHONROOT/include/python2.7/ \
+    -DPYTHON_LIBRARIES=$PYTHONROOT/lib/libpython2.7.so \
+    -DPYTHON_EXECUTABLE=$PYTHONROOT/bin/python \
+    -DSERVER=ON ..
 make -j10
 ```
@@ -95,21 +100,44 @@ make -j10
 ``` shell
 mkdir server-build-gpu && cd server-build-gpu
-cmake -DPYTHON_INCLUDE_DIR=$PYTHONROOT/include/python2.7/ -DPYTHON_LIBRARIES=$PYTHONROOT/lib/libpython2.7.so -DPYTHON_EXECUTABLE=$PYTHONROOT/bin/python -DSERVER=ON -DWITH_GPU=ON ..
+cmake -DPYTHON_INCLUDE_DIR=$PYTHONROOT/include/python2.7/ \
+    -DPYTHON_LIBRARIES=$PYTHONROOT/lib/libpython2.7.so \
+    -DPYTHON_EXECUTABLE=$PYTHONROOT/bin/python \
+    -DCUDA_TOOLKIT_ROOT_DIR=${CUDA_PATH} \
+    -DCUDNN_LIBRARY=${CUDNN_LIBRARY} \
+    -DSERVER=ON \
+    -DWITH_GPU=ON ..
 make -j10
 ```
-执行`make install`可以把目标产出放在`./output`目录下。
+### 集成TensorRT版本Paddle Inference Library
-**注意：** 编译成功后，需要设置`SERVING_BIN`路径，详见后面的[注意事项](https://github.com/PaddlePaddle/Serving/blob/develop/doc/COMPILE_CN.md#注意事项)。
+```
+mkdir server-build-trt && cd server-build-trt
+cmake -DPYTHON_INCLUDE_DIR=$PYTHONROOT/include/python2.7/ \
+    -DPYTHON_LIBRARIES=$PYTHONROOT/lib/libpython2.7.so \
+    -DPYTHON_EXECUTABLE=$PYTHONROOT/bin/python \
+    -DTENSORRT_ROOT=${TENSORRT_LIBRARY_PATH} \
+    -DCUDA_TOOLKIT_ROOT_DIR=${CUDA_PATH} \
+    -DCUDNN_LIBRARY=${CUDNN_LIBRARY} \
+    -DSERVER=ON \
+    -DWITH_GPU=ON \
+    -DWITH_TRT=ON ..
+make -j10
+```
+执行`make install`可以把目标产出放在`./output`目录下。
+**注意：** 编译成功后，需要设置`SERVING_BIN`路径，详见后面的[注意事项](https://github.com/PaddlePaddle/Serving/blob/develop/doc/COMPILE_CN.md#注意事项)。
 ## 编译Client部分
 ``` shell
 mkdir client-build && cd client-build
-cmake -DPYTHON_INCLUDE_DIR=$PYTHONROOT/include/python2.7/ -DPYTHON_LIBRARIES=$PYTHONROOT/lib/libpython2.7.so -DPYTHON_EXECUTABLE=$PYTHONROOT/bin/python -DCLIENT=ON ..
+cmake -DPYTHON_INCLUDE_DIR=$PYTHONROOT/include/python2.7/ \
+    -DPYTHON_LIBRARIES=$PYTHONROOT/lib/libpython2.7.so \
+    -DPYTHON_EXECUTABLE=$PYTHONROOT/bin/python \
+    -DCLIENT=ON ..
 make -j10
 ```
@@ -121,7 +149,11 @@ make -j10
 ```bash
 mkdir app-build && cd app-build
-cmake -DPYTHON_INCLUDE_DIR=$PYTHONROOT/include/python2.7/ -DPYTHON_LIBRARIES=$PYTHONROOT/lib/libpython2.7.so -DPYTHON_EXECUTABLE=$PYTHONROOT/bin/python -DCMAKE_INSTALL_PREFIX=./output -DAPP=ON ..
+cmake -DPYTHON_INCLUDE_DIR=$PYTHONROOT/include/python2.7/ \
+    -DPYTHON_LIBRARIES=$PYTHONROOT/lib/libpython2.7.so \
+    -DPYTHON_EXECUTABLE=$PYTHONROOT/bin/python \
+    -DCMAKE_INSTALL_PREFIX=./output \
+    -DAPP=ON ..
 make
 ```
@@ -152,7 +184,10 @@ make
 |     WITH_AVX     | Compile Paddle Serving with AVX intrinsics | OFF  |
 |     WITH_MKL     |  Compile Paddle Serving with MKL support   | OFF  |
 |     WITH_GPU     |   Compile Paddle Serving with NVIDIA GPU   | OFF  |
-|    CUDNN_ROOT    |    Define CuDNN library and header path    |      |
+|     WITH_TRT     |    Compile Paddle Serving with TensorRT    | OFF  |
+|  CUDNN_LIBRARY   |    Define CuDNN library and header path    |      |
+| CUDA_TOOLKIT_ROOT_DIR |       Define CUDA PATH                |      |
+|   TENSORRT_ROOT  |           Define TensorRT PATH             |      |
 |      CLIENT      |       Compile Paddle Serving Client        | OFF  |
 |      SERVER      |       Compile Paddle Serving Server        | OFF  |
 |       APP        |     Compile Paddle Serving App package     | OFF  |
@@ -167,7 +202,8 @@ Paddle Serving通过PaddlePaddle预测库支持在GPU上做预测。WITH_GPU选
 - CUDA
 - CuDNN
- NCCL2
+编译TensorRT版本，需要安装TensorRT库。
 这里要注意的是：
@@ -176,21 +212,12 @@ Paddle Serving通过PaddlePaddle预测库支持在GPU上做预测。WITH_GPU选
 以下是PaddlePaddle发布版本所使用的基础库版本匹配关系，供参考：
-|        |  CUDA   |          CuDNN           | NCCL2  |
+|          |  CUDA   |          CuDNN           | TensorRT |
-| :----: | :-----: | :----------------------: | :----: |
+| :----:   | :-----: | :----------------------: | :----:   |
-| CUDA 8 | 8.0.61  | CuDNN 7.1.2 for CUDA 8.0 | 2.1.4  |
+| post9    |  9.0    | CuDNN 7.3.1 for CUDA 9.0 |          |
-| CUDA 9 | 9.0.176 | CuDNN 7.3.1 for CUDA 9.0 | 2.2.12 |
+| post10   |  10.0   | CuDNN 7.5.1 for CUDA 10.0|          |
+| trt      |  10.1   | CuDNN 7.5.1 for CUDA 10.1| 6.0.1.5  |
 ### 如何让Paddle Serving编译系统探测到CuDNN库
-从NVIDIA developer官网下载对应版本CuDNN并在本地解压后，在cmake编译命令中增加`-DCUDNN_ROOT`参数，指定CuDNN库所在路径。
+从NVIDIA developer官网下载对应版本CuDNN并在本地解压后，在cmake编译命令中增加`-DCUDNN_LIBRARY`参数，指定CuDNN库所在路径。
-### 如何让Paddle Serving编译系统探测到nccl库
-从NVIDIA developer官网下载对应版本nccl2库并解压后，增加如下环境变量 (以nccl2.1.4为例)：
-```shell
-export C_INCLUDE_PATH=/path/to/nccl2/cuda8/nccl_2.1.4-1+cuda8.0_x86_64/include:$C_INCLUDE_PATH
-export CPLUS_INCLUDE_PATH=/path/to/nccl2/cuda8/nccl_2.1.4-1+cuda8.0_x86_64/include:$CPLUS_INCLUDE_PATH
-export LD_LIBRARY_PATH=/path/to/nccl2/cuda8/nccl_2.1.4-1+cuda8.0_x86_64/lib/:$LD_LIBRARY_PATH
-```
--- a/doc/FAQ.md
+++ b/doc/FAQ.md
@@ -15,13 +15,47 @@
   ```
 - Q: 如何使用自己编译的Paddle Serving进行预测？
  A: 通过pip命令安装自己编译出的whl包，并设置SERVING_BIN环境变量为编译出的serving二进制文件路径。
 - Q: 执行GPU预测时遇到InvalidArgumentError: Device id must be less than GPU count, but received id is: 0. GPU count is: 0.
  A: 将显卡驱动对应的libcuda.so的目录添加到LD_LIBRARY_PATH环境变量中
 - Q: 执行GPU预测时遇到ExternalError: Cudnn error, CUDNN_STATUS_BAD_PARAM at (/home/scmbuild/workspaces_cluster.dev/baidu.lib.paddlepaddle/baidu/lib/paddlepaddle/Paddle/paddle/fluid/operators/batch_norm_op.cu:198)
  A: 将cudnn的lib64路径添加到LD_LIBRARY_PATH，安装自pypi的Paddle Serving中post9版使用的是cudnn 7.3,post10使用的是cudnn 7.5。如果是使用自己编译的Paddle Serving，可以在log/serving.INFO日志文件中查看对应的cudnn版本。
 - Q: 执行GPU预测时遇到Error: Failed to find dynamic library: libcublas.so
  A: 将cuda的lib64路径添加到LD_LIBRARY_PATH, post9版本的Paddle Serving使用的是cuda 9.0，post10版本使用的cuda 10.0。
+- Q: 部署和预测中的日志信息在哪里查看？
+- A: server端的日志分为两部分，一部分打印到标准输出，一部分打印到启动服务时的目录下的log/serving.INFO文件中。
+    client端的日志直接打印到标准输出。
+    通过在部署服务之前 'export  GLOG_v=3'可以输出更为详细的日志信息。
+- Q: GPU环境运行Serving报错，GPU count is: 0。
+  ```
+  terminate called after throwing an instance of 'paddle::platform::EnforceNotMet'
+  what():
+  --------------------------------------------
+  C++ Call Stacks (More useful to developers):
+  --------------------------------------------
+  0   std::string paddle::platform::GetTraceBackString<std::string const&>(std::string const&, char const*, int)
+  1   paddle::platform::SetDeviceId(int)
+  2   paddle::AnalysisConfig::fraction_of_gpu_memory_for_pool() const
+  3   std::unique_ptr<paddle::PaddlePredictor, std::default_delete<paddle::PaddlePredictor> > paddle::CreatePaddlePredictor<paddle::AnalysisConfig, (paddle::PaddleEngineKind)2>(paddle::AnalysisConfig const&)
+  4   std::unique_ptr<paddle::PaddlePredictor, std::default_delete<paddle::PaddlePredictor> > paddle::CreatePaddlePredictor<paddle::AnalysisConfig>(paddle::AnalysisConfig const&)
+  ----------------------
+  Error Message Summary:
+  ----------------------
+  InvalidArgumentError: Device id must be less than GPU count, but received id is: 0. GPU count is: 0.
+  [Hint: Expected id < GetCUDADeviceCount(), but received id:0 >= GetCUDADeviceCount():0.] at (/home/scmbuild/workspaces_cluster.dev/baidu.lib.paddlepaddle/baidu/lib/paddlepaddle/Paddle/paddle/fluid/platform/gpu_info.cc:211)
+  ```
+  A: libcuda.so没有链接成功。首先在机器上找到libcuda.so，ldd检查libnvidia版本与nvidia-smi中版本一致（libnvidia-fatbinaryloader.so.418.39，与NVIDIA-SMI 418.39 Driver Version: 418.39）,然后用export导出libcuda.so的路径即可（例如libcuda.so在/usr/lib64/，export LD_LIBRARY_PATH=$LD_LIBRARY_PATH:/usr/lib64/）
--- a/doc/LATEST_PACKAGES.md
+++ b/doc/LATEST_PACKAGES.md
@@ -3,51 +3,59 @@
 ## CPU server
 ### Python 3
 ```
-https://paddle-serving.bj.bcebos.com/whl/paddle_serving_server-0.3.2-py3-none-any.whl
+https://paddle-serving.bj.bcebos.com/whl/paddle_serving_server-0.0.0-py3-none-any.whl
 ```
 ### Python 2
 ```
-https://paddle-serving.bj.bcebos.com/whl/paddle_serving_server-0.3.2-py2-none-any.whl
+https://paddle-serving.bj.bcebos.com/whl/paddle_serving_server-0.0.0-py2-none-any.whl
 ```
 ## GPU server
 ### Python 3
 ```
 #cuda 9.0
-https://paddle-serving.bj.bcebos.com/whl/paddle_serving_server_gpu-0.3.2.post9-py3-none-any.whl
+https://paddle-serving.bj.bcebos.com/whl/paddle_serving_server_gpu-0.0.0.post9-py3-none-any.whl
 #cuda 10.0
-https://paddle-serving.bj.bcebos.com/whl/paddle_serving_server_gpu-0.3.2.post10-py3-none-any.whl
+https://paddle-serving.bj.bcebos.com/whl/paddle_serving_server_gpu-0.0.0.post10-py3-none-any.whl
+#cuda10.1 with TensorRT 6
+https://paddle-serving.bj.bcebos.com/whl/paddle_serving_server_gpu-0.0.0.trt-py3-none-any.whl
 ```
 ### Python 2
 ```
 #cuda 9.0
-https://paddle-serving.bj.bcebos.com/whl/paddle_serving_server_gpu-0.3.2.post9-py2-none-any.whl
+https://paddle-serving.bj.bcebos.com/whl/paddle_serving_server_gpu-0.0.0.post9-py2-none-any.whl
 #cuda 10.0
-https://paddle-serving.bj.bcebos.com/whl/paddle_serving_server_gpu-0.3.2.post10-py2-none-any.whl
+https://paddle-serving.bj.bcebos.com/whl/paddle_serving_server_gpu-0.0.0.post10-py2-none-any.whl
+##cuda10.1 with TensorRT 6
+https://paddle-serving.bj.bcebos.com/whl/paddle_serving_server_gpu-0.0.0.trt-py2-none-any.whl
 ```
 ## Client
 ### Python 3.7
 ```
-https://paddle-serving.bj.bcebos.com/whl/paddle_serving_client-0.3.2-cp37-none-any.whl
+https://paddle-serving.bj.bcebos.com/whl/paddle_serving_client-0.0.0-cp37-none-any.whl
 ```
 ### Python 3.6
 ```
-https://paddle-serving.bj.bcebos.com/whl/paddle_serving_client-0.3.2-cp36-none-any.whl
+https://paddle-serving.bj.bcebos.com/whl/paddle_serving_client-0.0.0-cp36-none-any.whl
+```
+### Python 3.5
+```
+https://paddle-serving.bj.bcebos.com/whl/paddle_serving_client-0.0.0-cp35-none-any.whl
 ```
 ### Python 2.7
 ```
-https://paddle-serving.bj.bcebos.com/whl/paddle_serving_client-0.3.2-cp27-none-any.whl
+https://paddle-serving.bj.bcebos.com/whl/paddle_serving_client-0.0.0-cp27-none-any.whl
 ```
 ## App
 ### Python 3
 ```
-https://paddle-serving.bj.bcebos.com/whl/paddle_serving_app-0.1.2-py3-none-any.whl
+https://paddle-serving.bj.bcebos.com/whl/paddle_serving_app-0.0.0-py3-none-any.whl
 ```
 ### Python 2
 ```
-https://paddle-serving.bj.bcebos.com/whl/paddle_serving_app-0.1.2-py2-none-any.whl
+https://paddle-serving.bj.bcebos.com/whl/paddle_serving_app-0.0.0-py2-none-any.whl
 ```
--- a/java/README.md
+++ b/java/README.md
+## Java Demo
+### Install package
+```
+mvn compile
+mvn install
+cd examples
+mvn compile
+mvn install
+```
+### Start Server
+take the fit_a_line demo as example
+```
+ python -m paddle_serving_server.serve --model uci_housing_model --thread 10 --port 9393 --use_multilang #CPU
+python -m paddle_serving_server_gpu.serve --model uci_housing_model --thread 10 --port 9393 --use_multilang #GPU
+```
+### Client Predict
+```
+java -cp paddle-serving-sdk-java-examples-0.0.1-jar-with-dependencies.jar PaddleServingClientExample fit_a_line
+```
+The Java example also contains the prediction client of Bert, Model_enaemble, asyn_predict, batch_predict, Cube_local, Cube_quant, and Yolov4 models.
--- a/java/README_CN.md
+++ b/java/README_CN.md
+## Java 示例
+### 安装客户端依赖
+```
+mvn compile
+mvn install
+cd examples
+mvn compile
+mvn install
+```
+### 启动服务端
+以fit_a_line模型为例
+```
+ python -m paddle_serving_server.serve --model uci_housing_model --thread 10 --port 9393 --use_multilang #CPU
+python -m paddle_serving_server_gpu.serve --model uci_housing_model --thread 10 --port 9393 --use_multilang #GPU
+```
+### 客户端预测
+```
+java -cp paddle-serving-sdk-java-examples-0.0.1-jar-with-dependencies.jar PaddleServingClientExample fit_a_line
+```
+java示例中还包含了bert、model_enaemble、asyn_predict、batch_predict、cube_local、cube_quant、yolov4模型的预测客户端。
--- a/paddle_inference/inferencer-fluid-cpu/include/fluid_cpu_engine.h
+++ b/paddle_inference/inferencer-fluid-cpu/include/fluid_cpu_engine.h
@@ -23,7 +23,6 @@
 #include "core/configure/inferencer_configure.pb.h"
 #include "core/predictor/framework/infer.h"
 #include "paddle_inference_api.h"  // NOLINT
-//#include "predictor/framework/infer.h"
 namespace baidu {
 namespace paddle_serving {

--- a/paddle_inference/inferencer-fluid-gpu/CMakeLists.txt
+++ b/paddle_inference/inferencer-fluid-gpu/CMakeLists.txt
@@ -2,6 +2,7 @@ FILE(GLOB fluid_gpu_engine_srcs ${CMAKE_CURRENT_LIST_DIR}/src/*.cpp)
 add_library(fluid_gpu_engine ${fluid_gpu_engine_srcs})
 target_include_directories(fluid_gpu_engine PUBLIC
        ${CMAKE_BINARY_DIR}/Paddle/fluid_install_dir/)
 add_dependencies(fluid_gpu_engine pdserving extern_paddle configure)
 target_link_libraries(fluid_gpu_engine pdserving paddle_fluid iomp5 mklml_intel -lpthread -lcrypto -lm -lrt -lssl -ldl -lz)

--- a/paddle_inference/inferencer-fluid-gpu/include/fluid_gpu_engine.h
+++ b/paddle_inference/inferencer-fluid-gpu/include/fluid_gpu_engine.h
@@ -190,7 +190,7 @@ class FluidGpuAnalysisDirCore : public FluidFamilyCore {
    paddle::AnalysisConfig analysis_config;
    analysis_config.SetModel(data_path);
-    analysis_config.EnableUseGpu(100, FLAGS_gpuid);
+    analysis_config.EnableUseGpu(1500, FLAGS_gpuid);
    analysis_config.SwitchSpecifyInputNames(true);
    analysis_config.SetCpuMathLibraryNumThreads(1);
@@ -198,12 +198,68 @@ class FluidGpuAnalysisDirCore : public FluidFamilyCore {
      analysis_config.EnableMemoryOptim();
    }
-    if (params.enable_ir_optimization()) {
+#if 0  // todo: support flexible shape
-      analysis_config.SwitchIrOptim(true);
+    int min_seq_len = 1;
+    int max_seq_len = 512;
+    int opt_seq_len = 128;
+    int head_number = 12;
+    int batch = 50;
+    std::vector<int> min_in_shape = {batch, min_seq_len, 1};
+    std::vector<int> max_in_shape = {batch, max_seq_len, 1};
+    std::vector<int> opt_in_shape = {batch, opt_seq_len, 1};
+    std::string input1_name = "src_text_a_ids";
+    std::string input2_name = "pos_text_a_ids";
+    std::string input3_name = "sent_text_a_ids";
+    std::string input4_name = "stack_0.tmp_0";
+    std::map<std::string, std::vector<int>> min_input_shape = {
+        {input1_name, min_in_shape},
+        {input2_name, min_in_shape},
+        {input3_name, min_in_shape},
+        {input4_name, {batch, head_number, min_seq_len, min_seq_len}},
+    };
+    std::map<std::string, std::vector<int>> max_input_shape = {
+        {input1_name, max_in_shape},
+        {input2_name, max_in_shape},
+        {input3_name, max_in_shape},
+        {input4_name, {batch, head_number, max_seq_len, max_seq_len}},
+    };
+    std::map<std::string, std::vector<int>> opt_input_shape = {
+        {input1_name, opt_in_shape},
+        {input2_name, opt_in_shape},
+        {input3_name, opt_in_shape},
+        {input4_name, {batch, head_number, opt_seq_len, opt_seq_len}},
+    };
+    analysis_config.SetTRTDynamicShapeInfo(
+        min_input_shape, max_input_shape, opt_input_shape);
+#endif
+    int max_batch = 32;
+    int min_subgraph_size = 3;
+    if (params.use_trt()) {
+      analysis_config.EnableTensorRtEngine(
+          1 << 20,
+          max_batch,
+          min_subgraph_size,
+          paddle::AnalysisConfig::Precision::kFloat32,
+          false,
+          false);
+      LOG(INFO) << "create TensorRT predictor";
    } else {
-      analysis_config.SwitchIrOptim(false);
+      if (params.enable_memory_optimization()) {
-    }
+        analysis_config.EnableMemoryOptim();
+      }
+      if (params.enable_ir_optimization()) {
+        analysis_config.SwitchIrOptim(true);
+      } else {
+        analysis_config.SwitchIrOptim(false);
+      }
+    }
    AutoLock lock(GlobalPaddleCreateMutex::instance());
    _core =
        paddle::CreatePaddlePredictor<paddle::AnalysisConfig>(analysis_config);

--- a/python/CMakeLists.txt
+++ b/python/CMakeLists.txt
@@ -80,6 +80,16 @@ if (SERVER)
            COMMAND env ${py_env} ${PYTHON_EXECUTABLE} setup.py bdist_wheel
            DEPENDS ${SERVING_SERVER_CORE} server_config_py_proto ${PY_FILES})
        add_custom_target(paddle_python ALL DEPENDS ${PADDLE_SERVING_BINARY_DIR}/.timestamp)
+    elseif(WITH_TRT)
+        add_custom_command(
+            OUTPUT ${PADDLE_SERVING_BINARY_DIR}/.timestamp
+            COMMAND cp -r
+            ${CMAKE_CURRENT_SOURCE_DIR}/paddle_serving_server_gpu/ ${PADDLE_SERVING_BINARY_DIR}/python/
+            COMMAND env ${py_env} ${PYTHON_EXECUTABLE} gen_version.py
+            "server_gpu" trt
+            COMMAND env ${py_env} ${PYTHON_EXECUTABLE} setup.py bdist_wheel
+            DEPENDS ${SERVING_SERVER_CORE} server_config_py_proto ${PY_FILES})
+        add_custom_target(paddle_python ALL DEPENDS ${PADDLE_SERVING_BINARY_DIR}/.timestamp)
    else()
        add_custom_command(
            OUTPUT ${PADDLE_SERVING_BINARY_DIR}/.timestamp

--- a/python/examples/blazeface/test_client.py
+++ b/python/examples/blazeface/test_client.py
@@ -16,6 +16,7 @@ from paddle_serving_client import Client
 from paddle_serving_app.reader import *
 import sys
 import numpy as np
+from paddle_serving_app.reader import BlazeFacePostprocess
 preprocess = Sequential([
    File2Image(),

--- a/python/examples/imagenet/benchmark.py
+++ b/python/examples/imagenet/benchmark.py
@@ -90,6 +90,7 @@ def single_func(idx, resource):
                image = base64.b64encode(
                    open("./image_data/n01440764/" + file_list[i]).read())
            else:
+                image_path = "./image_data/n01440764/" + file_list[i]
                image = base64.b64encode(open(image_path, "rb").read()).decode(
                    "utf-8")
            req = json.dumps({"feed": [{"image": image}], "fetch": ["score"]})

--- a/python/examples/imagenet/resnet50_web_service.py
+++ b/python/examples/imagenet/resnet50_web_service.py
@@ -13,7 +13,7 @@
 # limitations under the License.
 import sys
 from paddle_serving_client import Client
-from paddle_serving_app.reader import Sequential, URL2Image, Resize, CenterCrop, RGB2BGR, Transpose, Div, Normalize
+from paddle_serving_app.reader import Sequential, URL2Image, Resize, CenterCrop, RGB2BGR, Transpose, Div, Normalize, Base64ToImage
 if len(sys.argv) != 4:
    print("python resnet50_web_service.py model device port")

--- a/python/paddle_serving_app/reader/__init__.py
+++ b/python/paddle_serving_app/reader/__init__.py
@@ -12,9 +12,9 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 from .chinese_bert_reader import ChineseBertReader
-from .image_reader import ImageReader, File2Image, URL2Image, Sequential, Normalize
+from .image_reader import ImageReader, File2Image, URL2Image, Sequential, Normalize, Base64ToImage
 from .image_reader import CenterCrop, Resize, Transpose, Div, RGB2BGR, BGR2RGB, ResizeByFactor
-from .image_reader import RCNNPostprocess, SegPostprocess, PadStride
+from .image_reader import RCNNPostprocess, SegPostprocess, PadStride, BlazeFacePostprocess
 from .image_reader import DBPostProcess, FilterBoxes, GetRotateCropImage, SortedBoxes
 from .lac_reader import LACReader
 from .senta_reader import SentaReader

--- a/python/paddle_serving_app/reader/image_reader.py
+++ b/python/paddle_serving_app/reader/image_reader.py
@@ -317,7 +317,7 @@ class RCNNPostprocess(object):
                            self.clip_bbox([xmin, ymin, xmax, ymax])
                        w = xmax - xmin
                        h = ymax - ymin
-                        im_shape = t['im_shape'][0][i].tolist()
+                        im_shape = t['im_shape'].tolist()
                        im_height, im_width = int(im_shape[0]), int(im_shape[1])
                        xmin *= im_width
                        ymin *= im_height
@@ -420,7 +420,7 @@ class RCNNPostprocess(object):
        for key in image_with_bbox:
            if key == "image":
                continue
-            if ".lod" in key:
+            if ".lod" in key or "im_shape" in key:
                continue
            fetch_name = key
        bbox_result = self._get_bbox_result(image_with_bbox, fetch_name,

--- a/python/paddle_serving_app/version.py
+++ b/python/paddle_serving_app/version.py
@@ -12,5 +12,5 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 """ Paddle Serving App version string """
-serving_app_version = "0.1.2"
+serving_app_version = "0.0.0"
 commit_id = ""
--- a/python/paddle_serving_client/__init__.py
+++ b/python/paddle_serving_client/__init__.py
@@ -354,8 +354,9 @@ class Client(object):
                                name))
                    result_map[name].shape = shape
                    if name in self.lod_tensor_set:
-                        result_map["{}.lod".format(
+                        tmp_lod = result_batch_handle.get_lod(mi, name)
-                            name)] = result_batch_handle.get_lod(mi, name)
+                        if np.size(tmp_lod) > 0:
+                            result_map["{}.lod".format(name)] = tmp_lod
                elif self.fetch_names_to_type_[name] == float32_type:
                    result_map[name] = result_batch_handle.get_float_by_name(
                        mi, name)
@@ -367,9 +368,9 @@ class Client(object):
                    shape = result_batch_handle.get_shape(mi, name)
                    result_map[name].shape = shape
                    if name in self.lod_tensor_set:
-                        result_map["{}.lod".format(
+                        tmp_lod = result_batch_handle.get_lod(mi, name)
-                            name)] = result_batch_handle.get_lod(mi, name)
+                        if np.size(tmp_lod) > 0:
+                            result_map["{}.lod".format(name)] = tmp_lod
                elif self.fetch_names_to_type_[name] == int32_type:
                    # result_map[name] will be py::array(numpy array)
                    result_map[name] = result_batch_handle.get_int32_by_name(
@@ -382,8 +383,9 @@ class Client(object):
                    shape = result_batch_handle.get_shape(mi, name)
                    result_map[name].shape = shape
                    if name in self.lod_tensor_set:
-                        result_map["{}.lod".format(
+                        tmp_lod = result_batch_handle.get_lod(mi, name)
-                            name)] = result_batch_handle.get_lod(mi, name)
+                        if np.size(tmp_lod) > 0:
+                            result_map["{}.lod".format(name)] = tmp_lod
            multi_result_map.append(result_map)
        ret = None
        if len(model_engine_names) == 1:

--- a/python/paddle_serving_client/io/__init__.py
+++ b/python/paddle_serving_client/io/__init__.py
@@ -74,7 +74,8 @@ def save_model(server_model_folder,
        fetch_var = model_conf.FetchVar()
        fetch_var.alias_name = key
        fetch_var.name = fetch_var_dict[key].name
-        fetch_var.is_lod_tensor = fetch_var_dict[key].lod_level >= 1
+        #fetch_var.is_lod_tensor = fetch_var_dict[key].lod_level >= 1
+        fetch_var.is_lod_tensor = 1
        if fetch_var_dict[key].dtype == core.VarDesc.VarType.INT64:
            fetch_var.fetch_type = 0
        if fetch_var_dict[key].dtype == core.VarDesc.VarType.FP32:

--- a/python/paddle_serving_client/version.py
+++ b/python/paddle_serving_client/version.py
@@ -12,7 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 """ Paddle Serving Client version string """
-serving_client_version = "0.3.2"
+serving_client_version = "0.0.0"
-serving_server_version = "0.3.2"
+serving_server_version = "0.0.0"
-module_proto_version = "0.3.2"
+module_proto_version = "0.0.0"
 commit_id = ""
--- a/python/paddle_serving_server/__init__.py
+++ b/python/paddle_serving_server/__init__.py
@@ -584,7 +584,7 @@ class MultiLangServerServiceServicer(multi_lang_general_model_service_pb2_grpc.
                    else:
                        raise Exception("error type.")
                tensor.shape.extend(list(model_result[name].shape))
-                if name in self.lod_tensor_set_:
+                if "{}.lod".format(name) in model_result:
                    tensor.lod.extend(model_result["{}.lod".format(name)]
                                      .tolist())
                inst.tensor_array.append(tensor)

--- a/python/paddle_serving_server/version.py
+++ b/python/paddle_serving_server/version.py
@@ -12,7 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 """ Paddle Serving Client version string """
-serving_client_version = "0.3.2"
+serving_client_version = "0.0.0"
-serving_server_version = "0.3.2"
+serving_server_version = "0.0.0"
-module_proto_version = "0.3.2"
+module_proto_version = "0.0.0"
 commit_id = ""
--- a/python/paddle_serving_server/web_service.py
+++ b/python/paddle_serving_server/web_service.py
@@ -123,7 +123,7 @@ class WebService(object):
                feed=request.json["feed"], fetch=fetch, fetch_map=fetch_map)
            result = {"result": result}
        except ValueError as err:
-            result = {"result": err}
+            result = {"result": str(err)}
        return result
    def run_rpc_service(self):

--- a/python/paddle_serving_server_gpu/__init__.py
+++ b/python/paddle_serving_server_gpu/__init__.py
@@ -73,6 +73,8 @@ def serve_args():
        default=False,
        action="store_true",
        help="Use Multi-language-service")
+    parser.add_argument(
+        "--use_trt", default=False, action="store_true", help="Use TensorRT")
    parser.add_argument(
        "--product_name",
        type=str,
@@ -205,6 +207,7 @@ class Server(object):
        self.cur_path = os.getcwd()
        self.use_local_bin = False
        self.gpuid = 0
+        self.use_trt = False
        self.model_config_paths = None  # for multi-model in a workflow
        self.product_name = None
        self.container_id = None
@@ -271,6 +274,9 @@ class Server(object):
    def set_gpuid(self, gpuid=0):
        self.gpuid = gpuid
+    def set_trt(self):
+        self.use_trt = True
    def _prepare_engine(self, model_config_paths, device):
        if self.model_toolkit_conf == None:
            self.model_toolkit_conf = server_sdk.ModelToolkitConf()
@@ -290,6 +296,7 @@ class Server(object):
            engine.enable_ir_optimization = self.ir_optimization
            engine.static_optimization = False
            engine.force_update_static_cache = False
+            engine.use_trt = self.use_trt
            if device == "cpu":
                engine.type = "FLUID_CPU_ANALYSIS_DIR"
@@ -396,7 +403,10 @@ class Server(object):
        for line in version_file.readlines():
            if re.match("cuda_version", line):
                cuda_version = line.split("\"")[1]
-                device_version = "serving-gpu-cuda" + cuda_version + "-"
+                if cuda_version != "trt":
+                    device_version = "serving-gpu-cuda" + cuda_version + "-"
+                else:
+                    device_version = "serving-gpu-" + cuda_version + "-"
        folder_name = device_version + serving_server_version
        tar_name = folder_name + ".tar.gz"
@@ -645,7 +655,7 @@ class MultiLangServerServiceServicer(multi_lang_general_model_service_pb2_grpc.
                    else:
                        raise Exception("error type.")
                tensor.shape.extend(list(model_result[name].shape))
-                if name in self.lod_tensor_set_:
+                if "{}.lod".format(name) in model_result:
                    tensor.lod.extend(model_result["{}.lod".format(name)]
                                      .tolist())
                inst.tensor_array.append(tensor)

--- a/python/paddle_serving_server_gpu/serve.py
+++ b/python/paddle_serving_server_gpu/serve.py
@@ -64,6 +64,8 @@ def start_gpu_card_model(index, gpuid, args):  # pylint: disable=doc-string-miss
    server.set_memory_optimize(mem_optim)
    server.set_ir_optimize(ir_optim)
    server.set_max_body_size(max_body_size)
+    if args.use_trt:
+        server.set_trt()
    if args.product_name != None:
        server.set_product_name(args.product_name)

--- a/python/paddle_serving_server_gpu/version.py
+++ b/python/paddle_serving_server_gpu/version.py
@@ -12,8 +12,8 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 """ Paddle Serving Client version string """
-serving_client_version = "0.3.2"
+serving_client_version = "0.0.0"
-serving_server_version = "0.3.2"
+serving_server_version = "0.0.0"
-module_proto_version = "0.3.2"
+module_proto_version = "0.0.0"
 cuda_version = "9"
 commit_id = ""
--- a/python/paddle_serving_server_gpu/web_service.py
+++ b/python/paddle_serving_server_gpu/web_service.py
@@ -178,7 +178,7 @@ class WebService(object):
                feed=request.json["feed"], fetch=fetch, fetch_map=fetch_map)
            result = {"result": result}
        except ValueError as err:
-            result = {"result": err}
+            result = {"result": str(err)}
        return result
    def run_rpc_service(self):

--- a/python/pipeline/gateway/proxy_server.go
+++ b/python/pipeline/gateway/proxy_server.go
@@ -25,7 +25,7 @@ import (
  "github.com/grpc-ecosystem/grpc-gateway/runtime"
  "google.golang.org/grpc"
-  gw "./proto"
+  gw "serving-gateway/proto"
 )
 //export run_proxy_server

--- a/python/setup.py.client.in
+++ b/python/setup.py.client.in
@@ -28,17 +28,11 @@ import util
 py_version = sys.version_info
 def copy_lib():
-    if py_version[0] == 2:
-        lib_list = ['libpython2.7.so.1.0', 'libssl.so.10', 'libcrypto.so.10'] 
-    elif py_version[1] == 6:
-        lib_list = ['libpython3.6m.so.1.0', 'libssl.so.10', 'libcrypto.so.10']
-    elif py_version[1] == 7:
-        lib_list = ['libpython3.7m.so.1.0', 'libssl.so.10', 'libcrypto.so.10']
    os.popen('mkdir -p paddle_serving_client/lib')
+    lib_list = ['${OPENSSL_CRYPTO_LIBRARY}', '${OPENSSL_SSL_LIBRARY}', 
+                '${PYTHON_LIBRARY}']
    for lib in lib_list:
-        r = os.popen('whereis {}'.format(lib))
+        os.popen('cp {} ./paddle_serving_client/lib'.format(lib))
-        text = r.read()
-        os.popen('cp {} ./paddle_serving_client/lib'.format(text.strip().split(' ')[1]))
 max_version, mid_version, min_version = util.python_version()
@@ -53,9 +47,6 @@ REQUIRED_PACKAGES = [
    'grpcio-tools >= 1.28.1'
 ]
-if not util.find_package("paddlepaddle") and not util.find_package("paddlepaddle-gpu"):
-    REQUIRED_PACKAGES.append("paddlepaddle")
 packages=['paddle_serving_client',
          'paddle_serving_client.proto',

--- a/python/setup.py.server.in
+++ b/python/setup.py.server.in
@@ -29,7 +29,7 @@ util.gen_pipeline_code("paddle_serving_server")
 REQUIRED_PACKAGES = [
    'six >= 1.10.0', 'protobuf >= 3.11.0', 'grpcio >= 1.28.1', 'grpcio-tools >= 1.28.1',
-    'paddle_serving_client', 'flask >= 1.1.1', 'paddle_serving_app'
+    'paddle_serving_client', 'flask >= 1.1.1', 'paddle_serving_app', 'func_timeout', 'pyyaml'
 ]
 packages=['paddle_serving_server',

--- a/python/setup.py.server_gpu.in
+++ b/python/setup.py.server_gpu.in
@@ -19,17 +19,19 @@ from __future__ import print_function
 from setuptools import setup, Distribution, Extension
 from setuptools import find_packages
 from setuptools import setup
-from paddle_serving_server_gpu.version import serving_server_version
+from paddle_serving_server_gpu.version import serving_server_version, cuda_version
 import util
-max_version, mid_version, min_version = util.python_version()
+if cuda_version != "trt":
+    cuda_version = "post" + cuda_version
+max_version, mid_version, min_version = util.python_version()
 # gen pipeline proto code
 util.gen_pipeline_code("paddle_serving_server_gpu")
 REQUIRED_PACKAGES = [
    'six >= 1.10.0', 'protobuf >= 3.11.0', 'grpcio >= 1.28.1', 'grpcio-tools >= 1.28.1',
-    'paddle_serving_client', 'flask >= 1.1.1', 'paddle_serving_app'
+    'paddle_serving_client', 'flask >= 1.1.1', 'paddle_serving_app', 'func_timeout', 'pyyaml'
 ]
 packages=['paddle_serving_server_gpu',
@@ -56,7 +58,7 @@ package_data={'paddle_serving_server_gpu': ['pipeline/gateway/libproxy_server.so
 setup(
    name='paddle-serving-server-gpu',
-    version=serving_server_version.replace('-', '') + '.post@CUDA_VERSION_MAJOR@',
+    version=serving_server_version.replace('-', '') + "." + cuda_version,
    description=
    ('Paddle Serving Package for saved model with PaddlePaddle'),
    url='https://github.com/PaddlePaddle/Serving',

--- a/python/util.py
+++ b/python/util.py
@@ -44,8 +44,8 @@ def gen_pipeline_code(package_name):
    ret = os.system(
        "cd {}/pipeline/gateway/proto/ && "
        "../../../../../third_party/install/protobuf/bin/protoc -I. "
-        "-I$GOPATH/src "
+        "-I$GOPATH/pkg/mod "
-        "-I$GOPATH/src/github.com/grpc-ecosystem/grpc-gateway/third_party/googleapis "
+        "-I$GOPATH/pkg/mod/github.com/grpc-ecosystem/grpc-gateway\@v1.15.2/third_party/googleapis "
        "--go_out=plugins=grpc:. "
        "gateway.proto".format(package_name))
    if ret != 0:
@@ -54,14 +54,18 @@ def gen_pipeline_code(package_name):
    ret = os.system(
        "cd {}/pipeline/gateway/proto/ && "
        "../../../../../third_party/install/protobuf/bin/protoc -I. "
-        "-I$GOPATH/src "
+        "-I$GOPATH/pkg/mod "
-        "-I$GOPATH/src/github.com/grpc-ecosystem/grpc-gateway/third_party/googleapis "
+        "-I$GOPATH/pkg/mod/github.com/grpc-ecosystem/grpc-gateway\@v1.15.2/third_party/googleapis "
        "--grpc-gateway_out=logtostderr=true:. "
        "gateway.proto".format(package_name))
    if ret != 0:
        exit(1)
    # pipeline grpc-gateway shared-lib
+    ret = os.system("cd {}/pipeline/gateway/ && go mod init serving-gateway".
+                    format(package_name))
+    ret = os.system("cd {}/pipeline/gateway/ && go mod vendor && go mod tidy".
+                    format(package_name))
    ret = os.system(
        "cd {}/pipeline/gateway && "
        "go build -buildmode=c-shared -o libproxy_server.so proxy_server.go".

--- a/tools/Dockerfile.centos6.cuda9.0-cudnn7.devel
+++ b/tools/Dockerfile.centos6.cuda9.0-cudnn7.devel
@@ -41,6 +41,12 @@ RUN yum -y install wget && \
    echo 'export LD_LIBRARY_PATH=/usr/local/python3.6/lib:$LD_LIBRARY_PATH' >> /root/.bashrc && \
    source /root/.bashrc && \
    cd .. && rm -rf Python-3.6.8* && \
+    wget https://github.com/protocolbuffers/protobuf/releases/download/v3.11.2/protobuf-all-3.11.2.tar.gz && \
+    tar zxf protobuf-all-3.11.2.tar.gz && \
+    cd protobuf-3.11.2 && \
+    ./configure && make -j4 && make install && \
+    make clean && \
+    cd .. && rm -rf protobuf-* &&\
    yum -y install epel-release && yum -y install patchelf libXext libSM libXrender && \
    yum clean all && \
    echo "export LANG=en_US.utf8" >> /root/.bashrc && \

--- a/tools/Dockerfile.centos6.devel
+++ b/tools/Dockerfile.centos6.devel
@@ -41,6 +41,12 @@ RUN yum -y install wget && \
    echo 'export LD_LIBRARY_PATH=/usr/local/python3.6/lib:$LD_LIBRARY_PATH' >> /root/.bashrc && \
    source /root/.bashrc && \
    cd .. && rm -rf Python-3.6.8* && \
+    wget https://github.com/protocolbuffers/protobuf/releases/download/v3.11.2/protobuf-all-3.11.2.tar.gz && \
+    tar zxf protobuf-all-3.11.2.tar.gz && \
+    cd protobuf-3.11.2 && \
+    ./configure && make -j4 && make install && \
+    make clean && \
+    cd .. && rm -rf protobuf-* && \
    yum -y install epel-release && yum -y install patchelf libXext libSM libXrender && \
    yum clean all && \
    localedef -c -i en_US -f UTF-8 en_US.UTF-8 && \

--- a/tools/Dockerfile.ci
+++ b/tools/Dockerfile.ci
@@ -34,6 +34,13 @@ RUN wget http://nixos.org/releases/patchelf/patchelf-0.10/patchelf-0.10.tar.bz2
    && cd .. \
    && rm -rf patchelf-0.10*
+RUN wget https://github.com/protocolbuffers/protobuf/releases/download/v3.11.2/protobuf-all-3.11.2.tar.gz && \
+    tar zxf protobuf-all-3.11.2.tar.gz && \
+    cd protobuf-3.11.2 && \
+    ./configure && make -j4 && make install && \
+    make clean && \
+    cd .. && rm -rf protobuf-*
 RUN yum install -y python3 python3-devel
 RUN yum -y update >/dev/null \

--- a/tools/Dockerfile.cuda10.0-cudnn7.devel
+++ b/tools/Dockerfile.cuda10.0-cudnn7.devel
@@ -5,7 +5,14 @@ RUN yum -y install wget >/dev/null \
    && yum -y install git openssl-devel curl-devel bzip2-devel python-devel \
    && yum -y install libSM-1.2.2-2.el7.x86_64 --setopt=protected_multilib=false \
    && yum -y install libXrender-0.9.10-1.el7.x86_64 --setopt=protected_multilib=false \
-    && yum -y install libXext-1.3.3-3.el7.x86_64 --setopt=protected_multilib=false
+    && yum -y install libXext-1.3.3-3.el7.x86_64 --setopt=protected_multilib=false 
+RUN wget https://github.com/protocolbuffers/protobuf/releases/download/v3.11.2/protobuf-all-3.11.2.tar.gz && \
+    tar zxf protobuf-all-3.11.2.tar.gz && \
+    cd protobuf-3.11.2 && \
+    ./configure && make -j4 && make install && \
+    make clean && \
+    cd .. && rm -rf protobuf-*
 RUN wget https://cmake.org/files/v3.2/cmake-3.2.0-Linux-x86_64.tar.gz >/dev/null \
    && tar xzf cmake-3.2.0-Linux-x86_64.tar.gz \
@@ -32,3 +39,5 @@ RUN yum install -y python3 python3-devel \
 RUN localedef -c -i en_US -f UTF-8 en_US.UTF-8 \
    && echo "export LANG=en_US.utf8" >> /root/.bashrc \
    && echo "export LANGUAGE=en_US.utf8" >> /root/.bashrc
--- a/tools/Dockerfile.cuda9.0-cudnn7.devel
+++ b/tools/Dockerfile.cuda9.0-cudnn7.devel
@@ -6,6 +6,13 @@ RUN yum -y install wget >/dev/null \
    && yum -y install libXrender-0.9.10-1.el7.x86_64 --setopt=protected_multilib=false \
    && yum -y install libXext-1.3.3-3.el7.x86_64 --setopt=protected_multilib=false
+RUN wget https://github.com/protocolbuffers/protobuf/releases/download/v3.11.2/protobuf-all-3.11.2.tar.gz && \
+    tar zxf protobuf-all-3.11.2.tar.gz && \
+    cd protobuf-3.11.2 && \
+    ./configure && make -j4 && make install && \
+    make clean && \
+    cd .. && rm -rf protobuf-*
 RUN wget https://cmake.org/files/v3.2/cmake-3.2.0-Linux-x86_64.tar.gz >/dev/null \
    && tar xzf cmake-3.2.0-Linux-x86_64.tar.gz \
    && mv cmake-3.2.0-Linux-x86_64 /usr/local/cmake3.2.0 \

--- a/tools/serving_build.sh
+++ b/tools/serving_build.sh
@@ -18,14 +18,20 @@ function init() {
    export PYTHONROOT=/usr
    cd Serving
    export SERVING_WORKDIR=$PWD
    $PYTHONROOT/bin/python -m pip install -r python/requirements.txt
+    $PYTHONROOT/bin/python -m pip install paddlepaddle
    export GOPATH=$HOME/go
    export PATH=$PATH:$GOPATH/bin
-    go get -u github.com/grpc-ecosystem/grpc-gateway/protoc-gen-grpc-gateway
+    go env -w GO111MODULE=on
-    go get -u github.com/grpc-ecosystem/grpc-gateway/protoc-gen-swagger
+    go env -w GOPROXY=https://goproxy.cn,direct
-    go get -u github.com/golang/protobuf/protoc-gen-go
-    go get -u google.golang.org/grpc
+    go get -u github.com/grpc-ecosystem/grpc-gateway/protoc-gen-grpc-gateway@v1.15.2
+    go get -u github.com/grpc-ecosystem/grpc-gateway/protoc-gen-swagger@v1.15.2
+    go get -u github.com/golang/protobuf/protoc-gen-go@v1.4.3
+    go get -u google.golang.org/grpc@v1.33.0
 }
 function check_cmd() {
@@ -605,7 +611,7 @@ function python_test_grpc_impl() {
            # test load server config and client config in Server side
            cd criteo_ctr_with_cube # pwd: /Serving/python/examples/grpc_impl_example/criteo_ctr_with_cube
+<<COMMENT #comment for compile bug, todo fix conflict between grpc-gateway and cube-agent 
            check_cmd "wget https://paddle-serving.bj.bcebos.com/unittest/ctr_cube_unittest.tar.gz > /dev/null"
            check_cmd "tar xf ctr_cube_unittest.tar.gz"
            check_cmd "mv models/ctr_client_conf ./"
@@ -626,9 +632,11 @@ function python_test_grpc_impl() {
                echo "error with criteo_ctr_with_cube inference auc test, auc should > 0.67"
                exit 1
            fi
+COMMENT
            echo "grpc impl test success"
            kill_server_process
-            ps -ef | grep "cube" | grep -v grep | awk '{print $2}' | xargs kill
+            #ps -ef | grep "cube" | grep -v grep | awk '{print $2}' | xargs kill
            cd .. # pwd: /Serving/python/examples/grpc_impl_example
            ;;
@@ -665,6 +673,7 @@ function python_test_grpc_impl() {
            cd .. # pwd: /Serving/python/examples/grpc_impl_example
            # test load server config and client config in Server side
+<<COMMENT #comment for compile bug, todo fix conflict between grpc-gateway and cube-agent 
            cd criteo_ctr_with_cube # pwd: /Serving/python/examples/grpc_impl_example/criteo_ctr_with_cube
            check_cmd "wget https://paddle-serving.bj.bcebos.com/unittest/ctr_cube_unittest.tar.gz"
@@ -689,10 +698,11 @@ function python_test_grpc_impl() {
                echo "error with criteo_ctr_with_cube inference auc test, auc should > 0.67"
                exit 1
            fi
+COMMENT
            echo "grpc impl test success"
            kill_server_process
            ps -ef | grep "test_server_gpu" | grep -v serving_build | grep -v grep | awk '{print $2}' | xargs kill
-            ps -ef | grep "cube" | grep -v grep | awk '{print $2}' | xargs kill
+            #ps -ef | grep "cube" | grep -v grep | awk '{print $2}' | xargs kill
            cd .. # pwd: /Serving/python/examples/grpc_impl_example
            ;;
        *)
@@ -829,8 +839,8 @@ EOF
            kill_process_by_port 18080
            # test: process servicer & thread op
-            pip uninstall grpcio -y
+            #pip uninstall grpcio -y
-            pip install grpcio --no-binary=grpcio
+            #pip install grpcio --no-binary=grpcio
            cat << EOF > config.yml
 rpc_port: 18080
 worker_num: 4
@@ -944,7 +954,7 @@ function python_run_test() {
    local TYPE=$1 # pwd: /Serving
    cd python/examples # pwd: /Serving/python/examples
    python_test_fit_a_line $TYPE # pwd: /Serving/python/examples
-    python_run_criteo_ctr_with_cube $TYPE # pwd: /Serving/python/examples
+    #python_run_criteo_ctr_with_cube $TYPE # pwd: /Serving/python/examples
    python_test_bert $TYPE # pwd: /Serving/python/examples
    python_test_imdb $TYPE # pwd: /Serving/python/examples
    python_test_lac $TYPE # pwd: /Serving/python/examples