diff --git a/README.md b/README.md
index 360b677f41228a915a1137a197b5aaa44a5d7f65..870f2f587f35ba1526c49f58698ae4db17ff0f81 100644
--- a/README.md
+++ b/README.md
@@ -54,8 +54,11 @@ You may need to use a domestic mirror source (in China, you can use the Tsinghua
 
 If you need install modules compiled with develop branch, please download packages from [latest packages list](./doc/LATEST_PACKAGES.md) and install with `pip install` command.
 
-Packages of Paddle Serving support Centos 6/7 and Ubuntu 16/18, or you can use HTTP service without install client.
+Packages of paddle-serving-server and paddle-serving-server-gpu support Centos 6/7 and Ubuntu 16/18.
 
+Packages of paddle-serving-client and paddle-serving-app support Linux and Windows, but paddle-serving-client only support python2.7/3.6/3.7.
+
+Recommended to install paddle >= 1.8.2.
 
 <h2 align="center"> Pre-built services with Paddle Serving</h2>
 
@@ -121,7 +124,7 @@ python -m paddle_serving_server.serve --model uci_housing_model --thread 10 --po
 | `port` | int | `9292` | Exposed port of current service to users|
 | `name` | str | `""` | Service name, can be used to generate HTTP request url |
 | `model` | str | `""` | Path of paddle model directory to be served |
-| `mem_optim` | - | - | Enable memory / graphic memory optimization |
+| `mem_optim_off` | - | - | Disable memory / graphic memory optimization |
 | `ir_optim` | - | - | Enable analysis and optimization of calculation graph |
 | `use_mkl` (Only for cpu version) | - | - | Run inference with MKL |
 
diff --git a/README_CN.md b/README_CN.md
index f954877b08ed793dd641f7541ff2717feac2070f..6317a79513a3d5e3247d249885d8bfe06de0e1c9 100644
--- a/README_CN.md
+++ b/README_CN.md
@@ -56,7 +56,11 @@ pip install paddle-serving-server-gpu # GPU
 
 如果需要使用develop分支编译的安装包，请从[最新安装包列表](./doc/LATEST_PACKAGES.md)中获取下载地址进行下载，使用`pip install`命令进行安装。
 
-Paddle Serving安装包支持Centos 6/7和Ubuntu 16/18，或者您可以使用HTTP服务，这种情况下不需要安装客户端。
+paddle-serving-server和paddle-serving-server-gpu安装包支持Centos 6/7和Ubuntu 16/18。
+
+paddle-serving-client和paddle-serving-app安装包支持Linux和Windows，其中paddle-serving-client仅支持python2.7/3.5/3.6。
+
+推荐安装1.8.2及以上版本的paddle
 
 <h2 align="center"> Paddle Serving预装的服务 </h2>
 
@@ -116,7 +120,7 @@ python -m paddle_serving_server.serve --model uci_housing_model --thread 10 --po
 | `port` | int | `9292` | Exposed port of current service to users|
 | `name` | str | `""` | Service name, can be used to generate HTTP request url |
 | `model` | str | `""` | Path of paddle model directory to be served |
-| `mem_optim` | - | - | Enable memory optimization |
+| `mem_optim_off` | - | - | Disable memory optimization |
 | `ir_optim` | - | - | Enable analysis and optimization of calculation graph |
 | `use_mkl` (Only for cpu version) | - | - | Run inference with MKL |
 
diff --git a/cmake/external/brpc.cmake b/cmake/external/brpc.cmake
index f5ef70379a5562617e77a9e2ff46587cd48a0f6c..39412f6950b7d4fe71f294079b69707b202f0876 100644
--- a/cmake/external/brpc.cmake
+++ b/cmake/external/brpc.cmake
@@ -40,8 +40,8 @@ ExternalProject_Add(
     extern_brpc
     ${EXTERNAL_PROJECT_LOG_ARGS}
     # TODO(gongwb): change to de newst repo when they changed.
-    GIT_REPOSITORY  "https://github.com/gongweibao/brpc"
-    GIT_TAG         "e9b67ec1b7458f2af5fae76451afe1e27e01b4b4"
+    GIT_REPOSITORY  "https://github.com/wangjiawei04/brpc"
+    GIT_TAG         "6d79e0b17f25107c35b705ea58d888083f59ff47"
     PREFIX          ${BRPC_SOURCES_DIR}
     UPDATE_COMMAND  ""
     CMAKE_ARGS      -DCMAKE_CXX_COMPILER=${CMAKE_CXX_COMPILER}
diff --git a/doc/COMPILE.md b/doc/COMPILE.md
index 46ebfb4f1a882b6645cb1e9bb6155743e520951d..84b1b65cbdbb0dcf6079d30bd7ebc9baf4a8c6e1 100644
--- a/doc/COMPILE.md
+++ b/doc/COMPILE.md
@@ -4,12 +4,26 @@
 
 ## Compilation environment requirements
 
-- OS: CentOS 7
-- GCC: 4.8.2 and later
-- Golang: 1.9.2 and later
-- Git：2.17.1 and later
-- CMake：3.2.2 and later
-- Python：2.7.2 and later / 3.6 and later
+|            module            |                           version                            |
+| :--------------------------: | :----------------------------------------------------------: |
+|              OS              |                           CentOS 7                           |
+|             gcc              |                       4.8.5 and later                        |
+|           gcc-c++            |                       4.8.5 and later                        |
+|             git              |                        3.82 and later                        |
+|            cmake             |                       3.2.0 and later                        |
+|            Python            |               2.7.2 and later / 3.6 and later                |
+|              Go              |                       1.9.2 and later                        |
+|             git              |                       2.17.1 and later                       |
+|         glibc-static         |                             2.17                             |
+|        openssl-devel         |                            1.0.2k                            |
+|         bzip2-devel          |                       1.0.6 and later                        |
+| python-devel / python3-devel |              2.7.5 and later / 3.6.8 and later               |
+|         sqlite-devel         |                       3.7.17 and later                       |
+|           patchelf           |                        0.9 and later                         |
+|           libXext            |                            1.3.3                             |
+|            libSM             |                            1.2.2                             |
+|          libXrender          |                            0.9.10                            |
+|          python-whl          | numpy>=1.12, <=1.16.4<br/>google>=2.0.3<br/>protobuf>=3.12.2<br/>grpcio-tools>=1.28.1<br/>grpcio>=1.28.1<br/>func-timeout>=4.3.5<br/>pyyaml>=1.3.0<br/>sentencepiece==0.1.92<br>flask>=1.1.2<br>ujson>=2.0.3 |
 
 It is recommended to use Docker for compilation. We have prepared the Paddle Serving compilation environment for you, see [this document](DOCKER_IMAGES.md).
 
diff --git a/doc/COMPILE_CN.md b/doc/COMPILE_CN.md
index 54f80d54d334835600d08846dc0fb42efe6558ee..a38faff4289a4946d82f8b4a71afd521c7cd48fd 100644
--- a/doc/COMPILE_CN.md
+++ b/doc/COMPILE_CN.md
@@ -4,12 +4,26 @@
 
 ## 编译环境设置
 
-- OS: CentOS 7
-- GCC: 4.8.2及以上
-- Golang: 1.9.2及以上
-- Git：2.17.1及以上
-- CMake：3.2.2及以上
-- Python：2.7.2及以上 / 3.6及以上
+|             组件             |                           版本要求                           |
+| :--------------------------: | :----------------------------------------------------------: |
+|              OS              |                           CentOS 7                           |
+|             gcc              |                       4.8.5 and later                        |
+|           gcc-c++            |                       4.8.5 and later                        |
+|             git              |                        3.82 and later                        |
+|            cmake             |                       3.2.0 and later                        |
+|            Python            |               2.7.2 and later / 3.6 and later                |
+|              Go              |                       1.9.2 and later                        |
+|             git              |                       2.17.1 and later                       |
+|         glibc-static         |                             2.17                             |
+|        openssl-devel         |                            1.0.2k                            |
+|         bzip2-devel          |                       1.0.6 and later                        |
+| python-devel / python3-devel |              2.7.5 and later / 3.6.8 and later               |
+|         sqlite-devel         |                       3.7.17 and later                       |
+|           patchelf           |                             0.9                              |
+|           libXext            |                            1.3.3                             |
+|            libSM             |                            1.2.2                             |
+|          libXrender          |                            0.9.10                            |
+|          python-whl          | numpy>=1.12, <=1.16.4<br/>google>=2.0.3<br/>protobuf>=3.12.2<br/>grpcio-tools>=1.28.1<br/>grpcio>=1.28.1<br/>func-timeout>=4.3.5<br/>pyyaml>=1.3.0<br/>sentencepiece==0.1.92<br/>flask>=1.1.2<br/>ujson>=2.0.3 |
 
 推荐使用Docker编译，我们已经为您准备好了Paddle Serving编译环境，详见[该文档](DOCKER_IMAGES_CN.md)。
 
diff --git a/doc/CONTRIBUTE.md b/doc/CONTRIBUTE.md
index 1d0f473ce0edfa6092ac1fe81440b53510d3f7a9..a3bfd0f274623cca0413e3eccf6c34e72a082031 100644
--- a/doc/CONTRIBUTE.md
+++ b/doc/CONTRIBUTE.md
@@ -68,7 +68,7 @@ Paddle Serving uses this [Git branching model](http://nvie.com/posts/a-successfu
 
 1. Build and test
 
-   Users can build Paddle Serving natively on Linux, see the [BUILD steps](doc/INSTALL.md).
+   Users can build Paddle Serving natively on Linux, see the [BUILD steps](https://github.com/PaddlePaddle/Serving/blob/develop/doc/COMPILE.md).
 
 1. Keep pulling
 
diff --git a/doc/CUBE_LOCAL.md b/doc/CUBE_LOCAL.md
index 4a8859b2958acfd4af5a3474f88afc48f3645c19..175a7037fe02525f3cc5215f71cdba4c12ec2bbd 100644
--- a/doc/CUBE_LOCAL.md
+++ b/doc/CUBE_LOCAL.md
@@ -6,7 +6,8 @@
 
 There are two examples on CTR under python / examples, they are criteo_ctr, criteo_ctr_with_cube. The former is to save the entire model during training, including sparse parameters. The latter is to cut out the sparse parameters and save them into two parts, one is the sparse parameter and the other is the dense parameter. Because the scale of sparse parameters is very large in industrial cases, reaching the order of 10 ^ 9. Therefore, it is not practical to start large-scale sparse parameter prediction on one machine. Therefore, we introduced Baidu's industrial-grade product Cube to provide the sparse parameter service for many years to provide distributed sparse parameter services.
 
-The local mode of Cube is different from distributed Cube, which is designed to be convenient for developers to use in experiments and demos. If there is a demand for distributed sparse parameter service, please continue reading [Distributed Cube User Guide](./Distributed_Cube) after reading this document (still developing).
+The local mode of Cube is different from distributed Cube, which is designed to be convenient for developers to use in experiments and demos. 
+<!--If there is a demand for distributed sparse parameter service, please continue reading [Distributed Cube User Guide](./Distributed_Cube) after reading this document (still developing).-->
 
 This document uses the original model without any compression algorithm. If there is a need for a quantitative model to go online, please read the [Quantization Storage on Cube Sparse Parameter Indexing](./CUBE_QUANT.md)
 
diff --git a/doc/CUBE_LOCAL_CN.md b/doc/CUBE_LOCAL_CN.md
index 2c5b478af1b0fa7eb51d89507431459bb6ed033e..9191fe8f54d3e9695d4da04adb82d3c3d33567b2 100644
--- a/doc/CUBE_LOCAL_CN.md
+++ b/doc/CUBE_LOCAL_CN.md
@@ -6,7 +6,7 @@
 
 在python/examples下有两个关于CTR的示例，他们分别是criteo_ctr, criteo_ctr_with_cube。前者是在训练时保存整个模型，包括稀疏参数。后者是将稀疏参数裁剪出来，保存成两个部分，一个是稀疏参数，另一个是稠密参数。由于在工业级的场景中，稀疏参数的规模非常大，达到10^9数量级。因此在一台机器上启动大规模稀疏参数预测是不实际的，因此我们引入百度多年来在稀疏参数索引领域的工业级产品Cube，提供分布式的稀疏参数服务。
 
-单机版Cube是分布式Cube的弱化版本，旨在方便开发者做实验和Demo时使用。如果有分布式稀疏参数服务的需求，请在读完此文档之后，继续阅读  [稀疏参数索引服务Cube使用指南](分布式Cube)（正在建设中）。
+<!--单机版Cube是分布式Cube的弱化版本，旨在方便开发者做实验和Demo时使用。如果有分布式稀疏参数服务的需求，请在读完此文档之后，继续阅读  [稀疏参数索引服务Cube使用指南](分布式Cube)（正在建设中）。-->
 
 本文档使用的都是未经过任何压缩算法处理的原始模型，如果有量化模型上线需求，请阅读[Cube稀疏参数索引量化存储使用指南](./CUBE_QUANT_CN.md)
 
diff --git a/doc/DESIGN_CN.md b/doc/DESIGN_CN.md
index 4059c0ee4814abe2959d02e3a2268ac519951244..e795ad6da36ddd391826b8fa79f5ffd801e82368 100644
--- a/doc/DESIGN_CN.md
+++ b/doc/DESIGN_CN.md
@@ -106,7 +106,7 @@ class FluidFamilyCore {
 
 ![预测服务Service](predict-service.png)
 
-关于OP之间的依赖关系，以及通过OP组建workflow，可以参考[从零开始写一个预测服务](CREATING.md)的相关章节
+关于OP之间的依赖关系，以及通过OP组建workflow，可以参考[从零开始写一个预测服务](https://github.com/PaddlePaddle/Serving/blob/develop/doc/deprecated/CREATING.md)的相关章节
 
 服务端实例透视图
 
diff --git a/doc/FAQ.md b/doc/FAQ.md
index 3bdd2dfd4739b54bf39b6b3f561c43bab3edabde..eb4f05a28594effcf59aac880cf4d81846a3a925 100644
--- a/doc/FAQ.md
+++ b/doc/FAQ.md
@@ -12,4 +12,7 @@
   client.load_client_config(sys.argv[1])
   client.set_rpc_timeout_ms(100000)
   client.connect(["127.0.0.1:9393"])
-  ```
+   ```
+
+- Q: 如何使用自己编译的Paddle Serving进行预测？
+  A：通过pip命令安装自己编译出的whl包，并设置SERVING_BIN环境变量为编译出的serving二进制文件路径。
diff --git a/doc/LATEST_PACKAGES.md b/doc/LATEST_PACKAGES.md
index 8dc196c0b2d91262c284edcbf5d724f11d200713..038641afd38192da5b99f714d278232d3ad79fb4 100644
--- a/doc/LATEST_PACKAGES.md
+++ b/doc/LATEST_PACKAGES.md
@@ -3,45 +3,51 @@
 ## CPU server
 ### Python 3
 ```
-https://paddle-serving.bj.bcebos.com/whl/paddle_serving_server-0.3.1-py3-none-any.whl
+https://paddle-serving.bj.bcebos.com/whl/paddle_serving_server-0.3.2-py3-none-any.whl
 ```
 
 ### Python 2
 ```
-https://paddle-serving.bj.bcebos.com/whl/paddle_serving_server-0.3.1-py2-none-any.whl
+https://paddle-serving.bj.bcebos.com/whl/paddle_serving_server-0.3.2-py2-none-any.whl
 ```
 
 ## GPU server
 ### Python 3
 ```
-https://paddle-serving.bj.bcebos.com/whl/paddle_serving_server_gpu-0.3.1-py3-none-any.whl
+#cuda 9.0
+https://paddle-serving.bj.bcebos.com/whl/paddle_serving_server_gpu-0.3.2.post9-py3-none-any.whl
+#cuda 10.0
+https://paddle-serving.bj.bcebos.com/whl/paddle_serving_server_gpu-0.3.2.post10-py3-none-any.whl
 ```
 ### Python 2
 ```
-https://paddle-serving.bj.bcebos.com/whl/paddle_serving_server_gpu-0.3.1-py2-none-any.whl
+#cuda 9.0
+https://paddle-serving.bj.bcebos.com/whl/paddle_serving_server_gpu-0.3.2.post9-py2-none-any.whl
+#cuda 10.0
+https://paddle-serving.bj.bcebos.com/whl/paddle_serving_server_gpu-0.3.2.post10-py2-none-any.whl
 ```
 
 ## Client
 ### Python 3.7
 ```
-https://paddle-serving.bj.bcebos.com/whl/paddle_serving_client-0.3.1-cp37-none-any.whl
+https://paddle-serving.bj.bcebos.com/whl/paddle_serving_client-0.3.2-cp37-none-any.whl
 ```
 ### Python 3.6
 ```
-https://paddle-serving.bj.bcebos.com/whl/paddle_serving_client-0.3.1-cp36-none-any.whl
+https://paddle-serving.bj.bcebos.com/whl/paddle_serving_client-0.3.2-cp36-none-any.whl
 ```
 ### Python 2.7
 ```
-https://paddle-serving.bj.bcebos.com/whl/paddle_serving_client-0.3.1-cp27-none-any.whl
+https://paddle-serving.bj.bcebos.com/whl/paddle_serving_client-0.3.2-cp27-none-any.whl
 ```
 
 ## App
 ### Python 3
 ```
-https://paddle-serving.bj.bcebos.com/whl/paddle_serving_app-0.1.1-py3-none-any.whl
+https://paddle-serving.bj.bcebos.com/whl/paddle_serving_app-0.1.2-py3-none-any.whl
 ```
 
 ### Python 2
 ```
-https://paddle-serving.bj.bcebos.com/whl/paddle_serving_app-0.1.1-py2-none-any.whl
+https://paddle-serving.bj.bcebos.com/whl/paddle_serving_app-0.1.2-py2-none-any.whl
 ```
diff --git a/doc/PERFORMANCE_OPTIM.md b/doc/PERFORMANCE_OPTIM.md
index 651be1c139b5960fa287fc3e981f3039f9f098a2..e87e9541cccadf318821807aa63ca4b0e6809a1b 100644
--- a/doc/PERFORMANCE_OPTIM.md
+++ b/doc/PERFORMANCE_OPTIM.md
@@ -14,7 +14,35 @@ Under the same conditions, the communication time of the HTTP prediction service
 
 Parameters for performance optimization:
 
+The memory/graphic memory optimization option is enabled by default in Paddle Serving, which can reduce the memory/video memory usage and usually does not affect performance. If you need to turn it off, you can use --mem_optim_off in the command line.
+
+r_optim can optimize the calculation graph and increase the inference speed. It is turned off by default and turned on by --ir_optim in the command line.
+
 | Parameters | Type | Default | Description                                                  |
 | ---------- | ---- | ------- | ------------------------------------------------------------ |
-| mem_optim  | - | - | Enable memory / graphic memory optimization                                   |
+| mem_optim_off  | - | - | Disable memory / graphic memory optimization                                   |
 | ir_optim   | - | -  | Enable analysis and optimization of calculation graph,including OP fusion, etc |
+
+
+For the mode of using Python code to start the prediction service, the API of the above two parameters is as follows:
+
+RPC Service
+```
+from paddle_serving_server import Server
+server = Server()
+...
+server.set_memory_optimize(mem_optim)
+server.set_ir_optimize(ir_optim)
+...
+```
+
+HTTP Service
+```
+from paddle_serving_server import WebService
+class NewService(WebService):
+...
+new_service = NewService(name="new")
+...
+new_service.prepare_server(mem_optim=True, ir_optim=False)
+...
+```
diff --git a/doc/PERFORMANCE_OPTIM_CN.md b/doc/PERFORMANCE_OPTIM_CN.md
index c35ea7a11c40ad2a5752d9add8fd8d9f8ddb2b64..2fa5bdef1bee1cdc9e9daceaf853403485a06b84 100644
--- a/doc/PERFORMANCE_OPTIM_CN.md
+++ b/doc/PERFORMANCE_OPTIM_CN.md
@@ -14,7 +14,33 @@
 
 性能优化相关参数：
 
+Paddle Serving中默认开启内存/显存优化选项，可以减少对内存/显存的占用，通常不会对性能造成影响，如果需要关闭可以在命令行启动模式中使用--mem_optim_off。
+ir_optim可以优化计算图，提升推理速度，默认关闭，在命令行启动的模式中通过--ir_optim开启。
+
 | 参数      | 类型 | 默认值 | 含义                      |
 | --------- | ---- | ------ | -------------------------------- |
-| mem_optim | - | -  | 开启内存/显存优化                |
+| mem_optim_off | - | -  | 关闭内存/显存优化                |
 | ir_optim  | - | -  | 开启计算图分析优化，包括OP融合等 |
+
+
+对于使用Python代码启动预测服务的模式，以上两个参数的接口如下：
+RPC服务
+```
+from paddle_serving_server import Server
+server = Server()
+...
+server.set_memory_optimize(mem_optim)
+server.set_ir_optimize(ir_optim)
+...
+```
+
+HTTP服务
+```
+from paddle_serving_server import WebService
+class NewService(WebService):
+...
+new_service = NewService(name="new")
+...
+new_service.prepare_server(mem_optim=True, ir_optim=False)
+...
+```
diff --git a/doc/deprecated/CREATING.md b/doc/deprecated/CREATING.md
index d057af4c38ef97c14b532cc563157a514745acec..7fcd3edb5b0176ad54afb63b607cb528396a3802 100644
--- a/doc/deprecated/CREATING.md
+++ b/doc/deprecated/CREATING.md
@@ -77,7 +77,7 @@ service ImageClassifyService {
 
 关于Serving端的配置的详细信息，可以参考[Serving端配置](SERVING_CONFIGURE.md)
 
-以下配置文件将ReaderOP, ClassifyOP和WriteJsonOP串联成一个workflow (关于OP/workflow等概念，可参考[设计文档](DESIGN.md))
+以下配置文件将ReaderOP, ClassifyOP和WriteJsonOP串联成一个workflow (关于OP/workflow等概念，可参考[设计文档](../DESIGN.md))
 
 - 配置文件示例：
 
diff --git a/doc/deprecated/CTR_PREDICTION.md b/doc/deprecated/CTR_PREDICTION.md
index 513b4560f025a08f3fc2ffe9a7fb96ada0b076c5..a55bcc3d883c31eb3ec12bc06676f11e69e23006 100755
--- a/doc/deprecated/CTR_PREDICTION.md
+++ b/doc/deprecated/CTR_PREDICTION.md
@@ -26,7 +26,7 @@
 
 第1) - 第5)步裁剪完毕后的模型网络配置如下：
 
-![Pruned CTR prediction network](pruned-ctr-network.png)
+![Pruned CTR prediction network](../pruned-ctr-network.png)
 
 
 整个裁剪过程具体说明如下：
diff --git a/doc/deprecated/DOCKER.md b/doc/deprecated/DOCKER.md
deleted file mode 100644
index 0e865c66e2da32a4e0ed15df9f2632b98ffbcedf..0000000000000000000000000000000000000000
--- a/doc/deprecated/DOCKER.md
+++ /dev/null
@@ -1,72 +0,0 @@
-# Docker compilation environment preparation
-
-([简体中文](./DOCKER_CN.md)|English)
-
-## Environmental requirements
-
-+ Docker is installed on the development machine.
-+ Compiling the GPU version requires nvidia-docker.
-
-## Dockerfile
-
-[CPU Version Dockerfile](../tools/Dockerfile)
-
-[GPU Version Dockerfile](../tools/Dockerfile.gpu)
-
-## Instructions
-
-### Building Docker Image
-
-Create a new directory and copy the Dockerfile to this directory.
-
-Run
-
-```bash
-docker build -t serving_compile:cpu .
-```
-
-Or
-
-```bash
-docker build -t serving_compile:cuda9 .
-```
-
-## Enter Docker Container
-
-CPU Version please run
-
-```bash
-docker run -it serving_compile:cpu bash
-```
-
-GPU Version please run
-
-```bash
-docker run -it --runtime=nvidia -it serving_compile:cuda9 bash
-```
-
-##  List of supported environments compiled by Docker
-
-The list of supported environments is as follows:：
-
-| System Environment Supported by CPU Docker Compiled Executables |
-| -------------------------- |
-| Centos6                    |
-| Centos7                    |
-| Ubuntu16.04                |
-| Ubuntu18.04               |
-
-
-
-| System Environment Supported by GPU Docker Compiled Executables |
-| ---------------------------------- |
-| Centos6_cuda9_cudnn7                       |
-| Centos7_cuda9_cudnn7                  |
-| Ubuntu16.04_cuda9_cudnn7                       |
-| Ubuntu16.04_cuda10_cudnn7                  |
-
-
-
-**Remarks:**
-+ If you cannot find libcrypto.so.10 and libssl.so.10 when you execute the pre-compiled version, you can change /usr/lib64/libssl.so.10 and /usr/lib64/libcrypto.so in the Docker environment. 10 Copy to the directory where the executable is located.
-+ CPU pre-compiled version can only be executed on CPU machines, GPU pre-compiled version can only be executed on GPU machines.
diff --git a/doc/deprecated/DOCKER_CN.md b/doc/deprecated/DOCKER_CN.md
deleted file mode 100644
index 92cc3ac6ea34d6399d6204ff7b9ec2d12b412601..0000000000000000000000000000000000000000
--- a/doc/deprecated/DOCKER_CN.md
+++ /dev/null
@@ -1,72 +0,0 @@
-# Docker编译环境准备
-
-(简体中文|[English](./DOCKER.md))
-
-## 环境要求
-
-+ 开发机上已安装Docker。
-+ 编译GPU版本需要安装nvidia-docker。
-
-## Dockerfile文件
-
-[CPU版本Dockerfile](../tools/Dockerfile)
-
-[GPU版本Dockerfile](../tools/Dockerfile.gpu)
-
-## 使用方法
-
-### 构建Docker镜像
-
-建立新目录，复制Dockerfile内容到该目录下Dockerfile文件。
-
-执行
-
-```bash
-docker build -t serving_compile:cpu .
-```
-
-或者
-
-```bash
-docker build -t serving_compile:cuda9 .
-```
-
-## 进入Docker
-
-CPU版本请执行
-
-```bash
-docker run -it serving_compile:cpu bash
-```
-
-GPU版本请执行
-
-```bash
-docker run -it --runtime=nvidia -it serving_compile:cuda9 bash
-```
-
-## Docker编译出的可执行文件支持的环境列表
-
-经过验证的环境列表如下：
-
-| CPU Docker编译出的可执行文件支持的系统环境 |
-| -------------------------- |
-| Centos6                    |
-| Centos7                    |
-| Ubuntu16.04                |
-| Ubuntu18.04               |
-
-
-
-| GPU Docker编译出的可执行文件支持的系统环境 |
-| ---------------------------------- |
-| Centos6_cuda9_cudnn7                       |
-| Centos7_cuda9_cudnn7                  |
-| Ubuntu16.04_cuda9_cudnn7                       |
-| Ubuntu16.04_cuda10_cudnn7                  |
-
-
-
-**备注：** 
-+ 若执行预编译版本出现找不到libcrypto.so.10、libssl.so.10的情况，可以将Docker环境中的/usr/lib64/libssl.so.10与/usr/lib64/libcrypto.so.10复制到可执行文件所在目录。
-+ CPU预编译版本仅可在CPU机器上执行，GPU预编译版本仅可在GPU机器上执行。
diff --git a/doc/deprecated/GETTING_STARTED.md b/doc/deprecated/GETTING_STARTED.md
deleted file mode 100644
index e7eed4f41518ec2ca8b191a6d93da86aafc09e9e..0000000000000000000000000000000000000000
--- a/doc/deprecated/GETTING_STARTED.md
+++ /dev/null
@@ -1,27 +0,0 @@
-
-# Getting Started
-
-请先按照[编译安装说明](INSTALL.md)完成编译
-
-## 运行示例
-说明：Imagenet图像分类模型，默认采用CPU模式（GPU模式当前版本暂未提供支持）
-
-Step1：启动Server端：
-```shell
-cd /path/to/paddle-serving/output/demo/serving/ && ./bin/serving &
-```
-
-默认启动后日志写在./log/下，可tail日志查看serving端接收请求的日志：
-```shell
-tail -f log/serving.INFO
-```
-
-Step2：启动Client端：
-```shell
-cd path/to/paddle-serving/output/demo/client/image_classification &&  ./bin/ximage &
-```
-
-默认启动后日志写在./log/下，可tail日志查看分类结果：
-```shell
-tail -f log/ximage.INFO
-```
diff --git a/doc/deprecated/HTTP_INTERFACE.md b/doc/deprecated/HTTP_INTERFACE.md
index 5be35c745010ef87caae66e60dd577f04408b167..96df2edc7b98aaa995e93fcd806cded01d044bd7 100644
--- a/doc/deprecated/HTTP_INTERFACE.md
+++ b/doc/deprecated/HTTP_INTERFACE.md
@@ -72,7 +72,7 @@ for i in range(0, len(samples) - BATCH_SIZE, BATCH_SIZE):
         print e.reason
 ```
 
-完整示例请参考[text_classification.py](../demo-client/python/text_classification.py)
+完整示例请参考[text_classification.py](https://github.com/PaddlePaddle/Serving/blob/develop/tools/cpp_examples/demo-client/python/text_classification.py)
 
 ## 3. PHP访问HTTP Serving
 
@@ -128,4 +128,4 @@ for ($i = 0; $i < count($samples) - BATCH_SIZE; $i += BATCH_SIZE) {
 curl_close($ch);
 ```
 
-完整代码请参考[text_classification.php](../demo-client/php/text_classification.php)
+完整代码请参考[text_classification.php](https://github.com/PaddlePaddle/Serving/blob/develop/tools/cpp_examples/demo-client/php/text_classification.php)
diff --git a/doc/deprecated/INDEX.md b/doc/deprecated/INDEX.md
deleted file mode 100644
index 11f330a10eefa83522631e9f630fc27da93dabfe..0000000000000000000000000000000000000000
--- a/doc/deprecated/INDEX.md
+++ /dev/null
@@ -1,21 +0,0 @@
-[Design](DESIGN.md)
-
-[Installation](INSTALL.md)
-
-[Getting Started](GETTING_STARTED.md)
-
-[Creating a Prediction Service](CREATING.md)
-
-[Client Configure](CLIENT_CONFIGURE.md)
-
-[Server Side Configuration](SERVING_CONFIGURE.md)
-
-[How to Configure a Clustered Service](CLUSTERING.md)
-
-[Multiple Serving Instances over Single GPU Card](MULTI_SERVING_OVER_SINGLE_GPU_CARD.md)
-
-[Benchmarking](BENCHMARKING.md)
-
-[GPU Benchmarking](GPU_BENCHMARKING.md)
-
-[FAQ](FAQ.md)
diff --git a/python/CMakeLists.txt b/python/CMakeLists.txt
index 098453a2da2411f5bb83cbdd248898e8879a3922..edec41573b67f50feca52ee017bae2d7fa2b28ac 100644
--- a/python/CMakeLists.txt
+++ b/python/CMakeLists.txt
@@ -83,6 +83,7 @@ if (SERVER)
             OUTPUT ${PADDLE_SERVING_BINARY_DIR}/.timestamp
             COMMAND cp -r
             ${CMAKE_CURRENT_SOURCE_DIR}/paddle_serving_server_gpu/ ${PADDLE_SERVING_BINARY_DIR}/python/
+            COMMAND env ${py_env} ${PYTHON_EXECUTABLE} paddle_serving_server_gpu/gen_cuda_version.py ${CUDA_VERSION_MAJOR}
             COMMAND env ${py_env} ${PYTHON_EXECUTABLE} setup.py bdist_wheel
             DEPENDS ${SERVING_SERVER_CORE} server_config_py_proto ${PY_FILES})
         add_custom_target(paddle_python ALL DEPENDS ${PADDLE_SERVING_BINARY_DIR}/.timestamp)
diff --git a/python/examples/grpc_impl_example/criteo_ctr_with_cube/README_CN.md b/python/examples/grpc_impl_example/criteo_ctr_with_cube/README_CN.md
deleted file mode 100644
index 07fc1acc18903256c49d77e2af8e9c2d74b21c16..0000000000000000000000000000000000000000
--- a/python/examples/grpc_impl_example/criteo_ctr_with_cube/README_CN.md
+++ /dev/null
@@ -1,40 +0,0 @@
-## 带稀疏参数索引服务的CTR预测服务
-
-该样例是为了展示gRPC Server 端 `load_model_config` 函数，在这个例子中，bRPC Server 端与 bRPC Client 端的配置文件是不同的（bPRC Client 端的数据先交给 cube，经过 cube 处理后再交给预测库）
-
-### 获取样例数据
-```
-sh get_data.sh
-```
-
-### 下载模型和稀疏参数序列文件
-```
-wget https://paddle-serving.bj.bcebos.com/unittest/ctr_cube_unittest.tar.gz
-tar xf ctr_cube_unittest.tar.gz
-mv models/ctr_client_conf ./
-mv models/ctr_serving_model_kv ./
-mv models/data ./cube/
-```
-执行脚本后会在当前目录有ctr_server_model_kv和ctr_client_config文件夹。
-
-### 启动稀疏参数索引服务
-```
-wget https://paddle-serving.bj.bcebos.com/others/cube_app.tar.gz
-tar xf cube_app.tar.gz
-mv cube_app/cube* ./cube/
-sh cube_prepare.sh &
-```
-
-此处，模型当中的稀疏参数会被存放在稀疏参数索引服务Cube当中，关于稀疏参数索引服务Cube的介绍，请阅读[稀疏参数索引服务Cube单机版使用指南](../../../doc/CUBE_LOCAL_CN.md)
-
-### 启动RPC预测服务，服务端线程数为4（可在test_server.py配置）
-
-```
-python test_server.py ctr_serving_model_kv ctr_client_conf/serving_client_conf.prototxt 
-```
-
-### 执行预测
-
-```
-python test_client.py ./raw_data
-```
diff --git a/python/examples/imagenet/resnet50_web_service.py b/python/examples/imagenet/resnet50_web_service.py
index 3966d31c951d83d8f984e5a265504035ed273125..e7d1914973f2aeb58a912f7d85e35f85718d7a9b 100644
--- a/python/examples/imagenet/resnet50_web_service.py
+++ b/python/examples/imagenet/resnet50_web_service.py
@@ -54,6 +54,7 @@ class ImageService(WebService):
         score_list = fetch_map["score"]
         result = {"label": [], "prob": []}
         for score in score_list:
+            score = score.tolist()
             max_score = max(score)
             result["label"].append(self.label_dict[score.index(max_score)]
                                    .strip().replace(",", ""))
@@ -65,7 +66,7 @@ image_service = ImageService(name="image")
 image_service.load_model_config(sys.argv[1])
 image_service.init_imagenet_setting()
 if device == "gpu":
-    image_service.set_gpus("0,1")
+    image_service.set_gpus("0")
 image_service.prepare_server(
     workdir="workdir", port=int(sys.argv[3]), device=device)
 image_service.run_rpc_service()
diff --git a/python/paddle_serving_server/serve.py b/python/paddle_serving_server/serve.py
index 009a6ce00af2290b64716e211429385d09189831..704cf0304adf1ac647c244063c2b23049f92b221 100644
--- a/python/paddle_serving_server/serve.py
+++ b/python/paddle_serving_server/serve.py
@@ -40,7 +40,7 @@ def parse_args():  # pylint: disable=doc-string-missing
     parser.add_argument(
         "--device", type=str, default="cpu", help="Type of device")
     parser.add_argument(
-        "--mem_optim",
+        "--mem_optim_off",
         default=False,
         action="store_true",
         help="Memory optimize")
@@ -68,7 +68,7 @@ def start_standard_model():  # pylint: disable=doc-string-missing
     port = args.port
     workdir = args.workdir
     device = args.device
-    mem_optim = args.mem_optim
+    mem_optim = args.mem_optim_off is False
     ir_optim = args.ir_optim
     max_body_size = args.max_body_size
     use_mkl = args.use_mkl
diff --git a/python/paddle_serving_server/web_service.py b/python/paddle_serving_server/web_service.py
index d9b9e3f1b1dcfa9502096d0eab4e3be61d2bbaa6..b0c1b79bda5041b4eca114d778a23d3a123c226e 100755
--- a/python/paddle_serving_server/web_service.py
+++ b/python/paddle_serving_server/web_service.py
@@ -41,6 +41,8 @@ class WebService(object):
         server = Server()
         server.set_op_sequence(op_seq_maker.get_op_sequence())
         server.set_num_threads(16)
+        server.set_memory_optimize(self.mem_optim)
+        server.set_ir_optimize(self.ir_optim)
         server.load_model_config(self.model_config)
         server.prepare_server(
             workdir=self.workdir, port=self.port_list[0], device=self.device)
@@ -55,12 +57,19 @@ class WebService(object):
         else:
             return False
 
-    def prepare_server(self, workdir="", port=9393, device="cpu"):
+    def prepare_server(self,
+                       workdir="",
+                       port=9393,
+                       device="cpu",
+                       mem_optim=True,
+                       ir_optim=False):
         self.workdir = workdir
         self.port = port
         self.device = device
         default_port = 12000
         self.port_list = []
+        self.mem_optim = mem_optim
+        self.ir_optim = ir_optim
         for i in range(1000):
             if self.port_is_available(default_port + i):
                 self.port_list.append(default_port + i)
@@ -83,8 +92,6 @@ class WebService(object):
             if isinstance(feed, dict) and "fetch" in feed:
                 del feed["fetch"]
             fetch_map = self.client.predict(feed=feed, fetch=fetch)
-            for key in fetch_map:
-                fetch_map[key] = fetch_map[key].tolist()
             result = self.postprocess(
                 feed=request.json["feed"], fetch=fetch, fetch_map=fetch_map)
             result = {"result": result}
@@ -128,4 +135,6 @@ class WebService(object):
         return feed, fetch
 
     def postprocess(self, feed=[], fetch=[], fetch_map=None):
+        for key in fetch_map:
+            fetch_map[key] = fetch_map[key].tolist()
         return fetch_map
diff --git a/python/paddle_serving_server_gpu/__init__.py b/python/paddle_serving_server_gpu/__init__.py
index 0261003a7863d11fb342d1572b124d1cbb533a2b..1d94bf3093e6d76b260f53acd0c799080627c0ab 100644
--- a/python/paddle_serving_server_gpu/__init__.py
+++ b/python/paddle_serving_server_gpu/__init__.py
@@ -41,7 +41,7 @@ from concurrent import futures
 def serve_args():
     parser = argparse.ArgumentParser("serve")
     parser.add_argument(
-        "--thread", type=int, default=10, help="Concurrency of server")
+        "--thread", type=int, default=2, help="Concurrency of server")
     parser.add_argument(
         "--model", type=str, default="", help="Model for serving")
     parser.add_argument(
@@ -57,7 +57,7 @@ def serve_args():
     parser.add_argument(
         "--name", type=str, default="None", help="Default service name")
     parser.add_argument(
-        "--mem_optim",
+        "--mem_optim_off",
         default=False,
         action="store_true",
         help="Memory optimize")
@@ -187,7 +187,7 @@ class Server(object):
         self.cube_config_fn = "cube.conf"
         self.workdir = ""
         self.max_concurrency = 0
-        self.num_threads = 4
+        self.num_threads = 2
         self.port = 8080
         self.reload_interval_s = 10
         self.max_body_size = 64 * 1024 * 1024
@@ -363,7 +363,15 @@ class Server(object):
     def download_bin(self):
         os.chdir(self.module_path)
         need_download = False
-        device_version = "serving-gpu-"
+
+        #acquire lock
+        version_file = open("{}/version.py".format(self.module_path), "r")
+        import re
+        for line in version_file.readlines():
+            if re.match("cuda_version", line):
+                cuda_version = line.split("\"")[1]
+                device_version = "serving-gpu-cuda" + cuda_version + "-"
+
         folder_name = device_version + serving_server_version
         tar_name = folder_name + ".tar.gz"
         bin_url = "https://paddle-serving.bj.bcebos.com/bin/" + tar_name
@@ -372,8 +380,6 @@ class Server(object):
         download_flag = "{}/{}.is_download".format(self.module_path,
                                                    folder_name)
 
-        #acquire lock
-        version_file = open("{}/version.py".format(self.module_path), "r")
         fcntl.flock(version_file, fcntl.LOCK_EX)
 
         if os.path.exists(download_flag):
@@ -385,6 +391,7 @@ class Server(object):
             os.system("touch {}/{}.is_download".format(self.module_path,
                                                        folder_name))
             print('Frist time run, downloading PaddleServing components ...')
+
             r = os.system('wget ' + bin_url + ' --no-check-certificate')
             if r != 0:
                 if os.path.exists(tar_name):
diff --git a/python/paddle_serving_server_gpu/gen_cuda_version.py b/python/paddle_serving_server_gpu/gen_cuda_version.py
new file mode 100644
index 0000000000000000000000000000000000000000..4a320a0e4dd9f9145a2c7682d5eecb7f582862b5
--- /dev/null
+++ b/python/paddle_serving_server_gpu/gen_cuda_version.py
@@ -0,0 +1,27 @@
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import sys
+import re
+import os
+
+new_str = ""
+with open("paddle_serving_server_gpu/version.py", "r") as f:
+    for line in f.readlines():
+        if re.match("cuda_version", line):
+            line = re.sub(r"\d+", sys.argv[1], line)
+        new_str = new_str + line
+
+with open("paddle_serving_server_gpu/version.py", "w") as f:
+    f.write(new_str)
diff --git a/python/paddle_serving_server_gpu/serve.py b/python/paddle_serving_server_gpu/serve.py
index e26b32c2699d09b714b2658cafad0ae8c5138071..3b0941a97560f11a52808fc7e152419e2cec0ba0 100644
--- a/python/paddle_serving_server_gpu/serve.py
+++ b/python/paddle_serving_server_gpu/serve.py
@@ -34,7 +34,7 @@ def start_gpu_card_model(index, gpuid, args):  # pylint: disable=doc-string-miss
         port = args.port + index
     thread_num = args.thread
     model = args.model
-    mem_optim = args.mem_optim
+    mem_optim = args.mem_optim_off is False
     ir_optim = args.ir_optim
     max_body_size = args.max_body_size
     use_multilang = args.use_multilang
diff --git a/python/paddle_serving_server_gpu/version.py b/python/paddle_serving_server_gpu/version.py
index f7fc14b2a7f0c25b471e8d3bb44e9d6db6839d01..2272c3aa91f999697ea8ef3e2cdb585b01db8bed 100644
--- a/python/paddle_serving_server_gpu/version.py
+++ b/python/paddle_serving_server_gpu/version.py
@@ -15,3 +15,4 @@
 serving_client_version = "0.3.2"
 serving_server_version = "0.3.2"
 module_proto_version = "0.3.2"
+cuda_version = "9"
diff --git a/python/paddle_serving_server_gpu/web_service.py b/python/paddle_serving_server_gpu/web_service.py
index 6750de86f1750f2ab9dc36eca9d4307f7821e2d8..5e9fdf4f4fda84dfb7c4f598fae6cf2381c377ca 100644
--- a/python/paddle_serving_server_gpu/web_service.py
+++ b/python/paddle_serving_server_gpu/web_service.py
@@ -41,7 +41,9 @@ class WebService(object):
                             workdir="conf",
                             port=9292,
                             gpuid=0,
-                            thread_num=10):
+                            thread_num=2,
+                            mem_optim=True,
+                            ir_optim=False):
         device = "gpu"
         if gpuid == -1:
             device = "cpu"
@@ -58,6 +60,8 @@ class WebService(object):
         server = Server()
         server.set_op_sequence(op_seq_maker.get_op_sequence())
         server.set_num_threads(thread_num)
+        server.set_memory_optimize(mem_optim)
+        server.set_ir_optimize(ir_optim)
 
         server.load_model_config(self.model_config)
         if gpuid >= 0:
@@ -77,7 +81,13 @@ class WebService(object):
         else:
             return False
 
-    def prepare_server(self, workdir="", port=9393, device="gpu", gpuid=0):
+    def prepare_server(self,
+                       workdir="",
+                       port=9393,
+                       device="gpu",
+                       gpuid=0,
+                       mem_optim=True,
+                       ir_optim=False):
         self.workdir = workdir
         self.port = port
         self.device = device
@@ -94,7 +104,12 @@ class WebService(object):
             # init cpu service
             self.rpc_service_list.append(
                 self.default_rpc_service(
-                    self.workdir, self.port_list[0], -1, thread_num=10))
+                    self.workdir,
+                    self.port_list[0],
+                    -1,
+                    thread_num=2,
+                    mem_optim=mem_optim,
+                    ir_optim=ir_optim))
         else:
             for i, gpuid in enumerate(self.gpus):
                 self.rpc_service_list.append(
@@ -102,7 +117,9 @@ class WebService(object):
                         "{}_{}".format(self.workdir, i),
                         self.port_list[i],
                         gpuid,
-                        thread_num=10))
+                        thread_num=2,
+                        mem_optim=mem_optim,
+                        ir_optim=ir_optim))
 
     def _launch_web_service(self):
         gpu_num = len(self.gpus)
@@ -204,4 +221,6 @@ class WebService(object):
         return feed, fetch
 
     def postprocess(self, feed=[], fetch=[], fetch_map=None):
+        for key in fetch_map.iterkeys():
+            fetch_map[key] = fetch_map[key].tolist()
         return fetch_map
diff --git a/python/requirements.txt b/python/requirements.txt
index 5f5cfdc52464d5c9dc9ad40ec11be72c86dc6b2c..697b24fd4db6aff6b30913d8a5d23416dc208c80 100644
--- a/python/requirements.txt
+++ b/python/requirements.txt
@@ -1,6 +1,10 @@
 numpy>=1.12, <=1.16.4 ; python_version<"3.5"
+google>=2.0.3
 protobuf>=3.12.2
 grpcio-tools>=1.28.1
 grpcio>=1.28.1
 func-timeout>=4.3.5
 pyyaml>=1.3.0
+sentencepiece==0.1.92
+flask>=1.1.2
+ujson>=2.0.3
diff --git a/python/setup.py.server_gpu.in b/python/setup.py.server_gpu.in
index 65dec4621fceba3967ff21814b218c0229a5124b..4554c1d368f70a32d16ceeabb54d63625f9f256d 100644
--- a/python/setup.py.server_gpu.in
+++ b/python/setup.py.server_gpu.in
@@ -41,7 +41,6 @@ REQUIRED_PACKAGES = [
     'paddle_serving_client', 'flask >= 1.1.1', 'paddle_serving_app'
 ]
 
-
 packages=['paddle_serving_server_gpu',
           'paddle_serving_server_gpu.proto',
           'paddle_serving_server_gpu.pipeline',
@@ -58,7 +57,7 @@ package_dir={'paddle_serving_server_gpu':
 
 setup(
     name='paddle-serving-server-gpu',
-    version=serving_server_version.replace('-', ''),
+    version=serving_server_version.replace('-', '') + '.post@CUDA_VERSION_MAJOR@',
     description=
     ('Paddle Serving Package for saved model with PaddlePaddle'),
     url='https://github.com/PaddlePaddle/Serving',
diff --git a/tools/Dockerfile b/tools/Dockerfile
index dd18a773562bd078771d7df44123ac530764af93..6c61937755ea5e0257e70ce27cab528b76222b12 100644
--- a/tools/Dockerfile
+++ b/tools/Dockerfile
@@ -2,7 +2,7 @@ FROM centos:7.3.1611
 
 RUN yum -y install wget && \
     yum -y install epel-release && yum -y install patchelf && \
-    yum -y install gcc make python-devel && \
+    yum -y install gcc gcc-c++ make python-devel && \
     yum -y install libSM-1.2.2-2.el7.x86_64 --setopt=protected_multilib=false && \
     yum -y install libXrender-0.9.10-1.el7.x86_64 --setopt=protected_multilib=false && \
     yum -y install libXext-1.3.3-3.el7.x86_64 --setopt=protected_multilib=false && \
diff --git a/tools/Dockerfile.cuda10.0-cudnn7.devel b/tools/Dockerfile.cuda10.0-cudnn7.devel
index 8021ef31f05622cec6fb3aff681feb5107d2be2c..b46f9b96cf0d081cf9cdfc12cb46be037677ac86 100644
--- a/tools/Dockerfile.cuda10.0-cudnn7.devel
+++ b/tools/Dockerfile.cuda10.0-cudnn7.devel
@@ -1,4 +1,4 @@
-FROM nvidia/cuda:10.0-cudnn7-runtime-centos7
+FROM nvidia/cuda:10.0-cudnn7-devel-centos7
 
 RUN yum -y install wget >/dev/null \
     && yum -y install gcc gcc-c++ make glibc-static which  \
diff --git a/tools/serving_build.sh b/tools/serving_build.sh
index 4bb68d938bafaa0a0ac8641284b66024e6b38d6a..ac6e5f8cb5fcb7db5e7890c09b08d12ba14d0294 100644
--- a/tools/serving_build.sh
+++ b/tools/serving_build.sh
@@ -54,7 +54,6 @@ function build_app() {
     local DIRNAME=build-app-$TYPE
     mkdir $DIRNAME # pwd: /Serving
     cd $DIRNAME # pwd: /Serving/build-app-$TYPE
-    pip install numpy sentencepiece
     case $TYPE in
         CPU|GPU)
             cmake -DPYTHON_INCLUDE_DIR=$PYTHONROOT/include/python2.7/ \
@@ -295,8 +294,6 @@ function python_run_criteo_ctr_with_cube() {
 function python_test_bert() {
     # pwd: /Serving/python/examples
     local TYPE=$1
-    yum install -y libXext libSM libXrender >/dev/null
-    pip install ujson
     export SERVING_BIN=${SERVING_WORKDIR}/build-server-${TYPE}/core/general-server/serving
     cd bert # pwd: /Serving/python/examples/bert
     case $TYPE in