Merge branch 'fix_rpc_bug' of github.com:HexToString/Serving into benchmark_merge

36b9eac6 · HexToString · 9e446682 · c9ece4f8 · 36b9eac6 · 36b9eac6
18 changed file
--- a/cmake/paddlepaddle.cmake
+++ b/cmake/paddlepaddle.cmake
@@ -136,8 +136,10 @@ LINK_DIRECTORIES(${PADDLE_INSTALL_DIR}/third_party/install/mklml/lib)
 SET(CMAKE_INSTALL_RPATH "${CMAKE_INSTALL_RPATH}" "${PADDLE_INSTALL_DIR}/third_party/install/mkldnn/lib")
 LINK_DIRECTORIES(${PADDLE_INSTALL_DIR}/third_party/install/mkldnn/lib)
-ADD_LIBRARY(openblas STATIC IMPORTED GLOBAL)
+if (NOT WITH_MKLML)
-SET_PROPERTY(TARGET openblas PROPERTY IMPORTED_LOCATION ${PADDLE_INSTALL_DIR}/third_party/install/openblas/lib/libopenblas.a)
+    ADD_LIBRARY(openblas STATIC IMPORTED GLOBAL)
+    SET_PROPERTY(TARGET openblas PROPERTY IMPORTED_LOCATION ${PADDLE_INSTALL_DIR}/third_party/install/openblas/lib/libopenblas.a)
+endif()
 ADD_LIBRARY(paddle_inference STATIC IMPORTED GLOBAL)
 SET_PROPERTY(TARGET paddle_inference PROPERTY IMPORTED_LOCATION ${PADDLE_INSTALL_DIR}/lib/libpaddle_inference.a)

--- a/doc/BAIDU_KUNLUN_XPU_SERVING.md
+++ b/doc/BAIDU_KUNLUN_XPU_SERVING.md
@@ -22,7 +22,7 @@ cmake -DPYTHON_INCLUDE_DIR=/usr/include/python3.7m/ \
    -DSERVER=ON ..
 make -j10
 ```
-You can run `make install` to produce the target in `./output` directory. Add `-DCMAKE_INSTALL_PREFIX=./output` to specify the output path to CMake command shown above。
+You can run `make install` to produce the target in `./output` directory. Add `-DCMAKE_INSTALL_PREFIX=./output` to specify the output path to CMake command shown above. Please specify `-DWITH_MKL=ON` on Intel CPU platform with AVX2 support.  
 * Compile the Serving Client
 ```
 mkdir -p client-build-arm && cd client-build-arm

--- a/doc/PADDLE_SERVING_ON_KUBERNETES.md
+++ b/doc/PADDLE_SERVING_ON_KUBERNETES.md
@@ -25,10 +25,10 @@ kubectl apply -f https://bit.ly/kong-ingress-dbless
 在`tools/generate_runtime_docker.sh`文件下，它的使用方式如下
 ```bash
-bash tool/generate_runtime_docker.sh --env cuda10.1 --python 2.7 --serving 0.5.0 --paddle 2.0.0 --name serving_runtime:cuda10.1-py27
+bash tool/generate_runtime_docker.sh --env cuda10.1 --python 3.6 --serving 0.6.0 --paddle 2.0.1 --name serving_runtime:cuda10.1-py36
 ```
-会生成 cuda10.1，python 2.7，serving版本0.5.0 还有 paddle版本2.0.0的运行镜像。如果有其他疑问，可以执行下列语句得到帮助信息。
+会生成 cuda10.1，python 3.6，serving版本0.6.0 还有 paddle版本2.0.1的运行镜像。如果有其他疑问，可以执行下列语句得到帮助信息。
 ```
 bash tools/generate_runtime_docker.sh --help
@@ -39,7 +39,7 @@ bash tools/generate_runtime_docker.sh --help
 - paddle-serving-server， paddle-serving-client，paddle-serving-app，paddlepaddle，具体版本可以在tools/runtime.dockerfile当中查看，同时，如果有定制化的需求，也可以在该文件中进行定制化。
 - paddle-serving-server 二进制可执行程序
-也就是说，运行镜像在生成之后，我们只需要将我们运行的代码（如果有）和模型搬运到镜像中就可以。生成后的镜像名为`paddle_serving:cuda10.2-py37`
+也就是说，运行镜像在生成之后，我们只需要将我们运行的代码（如果有）和模型搬运到镜像中就可以。生成后的镜像名为`paddle_serving:cuda10.2-py36`
 ### 添加您的代码和模型 
@@ -50,8 +50,8 @@ bash tools/generate_runtime_docker.sh --help
 对于pipeline模式，我们需要确保模型和程序文件、配置文件等各种依赖都能够在镜像中运行。因此可以在`/home/project`下存放我们的执行文件时，我们以`Serving/python/example/pipeline/ocr`为例，这是OCR文字识别任务。
 ```bash
-#假设您已经拥有Serving运行镜像，假设镜像名为paddle_serving:cuda10.2-py37
+#假设您已经拥有Serving运行镜像，假设镜像名为paddle_serving:cuda10.2-py36
-docker run --rm -dit --name pipeline_serving_demo paddle_serving:cuda10.2-py37 bash
+docker run --rm -dit --name pipeline_serving_demo paddle_serving:cuda10.2-py36 bash
 cd Serving/python/example/pipeline/ocr
 # get models
 python -m paddle_serving_app.package --get_model ocr_rec
@@ -71,7 +71,7 @@ docker commit pipeline_serving_demo ocr_serving:latest
 ```
 docker exec -it pipeline_serving_demo bash
 cd /home/ocr
-python3.7 web_service.py 
+python3.6 web_service.py 
 ```
 进入容器到工程目录之后，剩下的操作和调试代码的工作是类似的。
@@ -83,8 +83,8 @@ python3.7 web_service.py
 web service模式本质上和pipeline模式类似，因此我们以`Serving/python/examples/bert`为例
 ```bash
-#假设您已经拥有Serving运行镜像，假设镜像名为registry.baidubce.com/paddlepaddle/serving:0.6.0-cuda10.2-py37
+#假设您已经拥有Serving运行镜像，假设镜像名为registry.baidubce.com/paddlepaddle/serving:0.6.0-cuda10.2-py36
-docker run --rm -dit --name webservice_serving_demo registry.baidubce.com/paddlepaddle/serving:0.6.0-cpu-py27 bash
+docker run --rm -dit --name webservice_serving_demo registry.baidubce.com/paddlepaddle/serving:0.6.0-cpu-py36 bash
 cd Serving/python/examples/bert
 ### download model 
 wget https://paddle-serving.bj.bcebos.com/paddle_hub_models/text/SemanticModel/bert_chinese_L-12_H-768_A-12.tar.gz
@@ -102,7 +102,7 @@ docker commit webservice_serving_demo bert_serving:latest
 ```bash
 docker exec -it webservice_serving_demo bash
 cd /home/bert
-python3.7 bert_web_service.py 9292
+python3.6 bert_web_service.py bert_seq128_model 9292
 ```
 进入容器到工程目录之后，剩下的操作和调试代码的工作是类似的。
@@ -118,14 +118,15 @@ kubenetes集群操作需要`kubectl`去操纵yaml文件。我们这里给出了
 - pipeline ocr示例 
 ```bash
-sh tools/generate_k8s_yamls.sh  --app_name ocr --image_name registry.baidubce.com/paddlepaddle/serving:k8s-pipeline-demo --workdir /home/ocr --command "python2.7 web_service.py" --port 9999
+sh tools/generate_k8s_yamls.sh  --app_name ocr --image_name registry.baidubce.com/paddlepaddle/serving:k8s-pipeline-demo --workdir /home/ocr --command "python3.6 web_service.py" --port 9999
 ```
 - web service bert示例
 ```bash
-sh tools/generate_k8s_yamls.sh  --app_name bert --image_name registry.baidubce.com/paddlepaddle/serving:k8s-web-demo --workdir /home/bert --command "python2.7 bert_web_service.py 9292" --port 9292
+sh tools/generate_k8s_yamls.sh  --app_name bert --image_name registry.baidubce.com/paddlepaddle/serving:k8s-web-demo --workdir /home/bert --command "python3.6 bert_web_service.py bert_seq128_model 9292" --port 9292
 ```
+**需要注意的是，app_name需要同URL的函数名相同。例如示例中bert的访问URL是`https://127.0.0.1:9292/bert/prediction`，那么app_name应为bert。**
 接下来我们会看到有两个yaml文件，分别是`k8s_serving.yaml`和 k8s_ingress.yaml`.
@@ -174,7 +175,7 @@ spec:
        workingDir: /home/ocr
        name: ocr
        command: ['/bin/bash', '-c']
-        args: ["python3.7 web_service.py"]
+        args: ["python3.6 bert_web_service.py bert_seq128_model 9292"]
        env:
          - name: NODE_NAME
            valueFrom:
@@ -216,7 +217,8 @@ spec:
 最终我们执行就可以启动相关容器和API网关。
 ```
-kubectl apply -f k8s_serving.yaml k8s_ingress.yaml
+kubectl apply -f k8s_serving.yaml
+kubectl apply -f k8s_ingress.yaml
 ```
 输入

--- a/java/README.md
+++ b/java/README.md
@@ -27,38 +27,6 @@ mvn compile
 mvn install
 ```
-### Start the server(not pipeline)
-Take the fit_a_line model as an example, the server starts
-```
-cd ../../python/examples/fit_a_line
-sh get_data.sh
-python -m paddle_serving_server.serve --model uci_housing_model --thread 10 --port 9393 --use_multilang &
-```
-Client prediction
-```
-cd ../../../java/examples/target
-java -cp paddle-serving-sdk-java-examples-0.0.1-jar-with-dependencies.jar PaddleServingClientExample fit_a_line
-```
-Take yolov4 as an example, the server starts
-```
-python -m paddle_serving_app.package --get_model yolov4
-tar -xzvf yolov4.tar.gz
-python -m paddle_serving_server_gpu.serve --model yolov4_model --port 9393 --gpu_ids 0 --use_multilang & #It needs to be executed in GPU Docker, otherwise the execution method of CPU must be used.
-```
-Client prediction
-```
-# in /Serving/java/examples/target
-java -cp paddle-serving-sdk-java-examples-0.0.1-jar-with-dependencies.jar PaddleServingClientExample yolov4 ../../../python/examples/yolov4/000000570688.jpg
-# The case of yolov4 needs to specify a picture as input
-```
 ### Start the server(pipeline)
 as for input data type = string，take IMDB model ensemble as an example，the server starts

--- a/java/README_CN.md
+++ b/java/README_CN.md
@@ -27,40 +27,6 @@ mvn compile
 mvn install
 ```
-### 启动服务端(非pipeline方式)
-以fit_a_line模型为例，服务端启动
-```
-cd ../../python/examples/fit_a_line
-sh get_data.sh
-python -m paddle_serving_server.serve --model uci_housing_model --thread 10 --port 9393 --use_multilang &
-```
-客户端预测
-```
-cd ../../../java/examples/target
-java -cp paddle-serving-sdk-java-examples-0.0.1-jar-with-dependencies.jar PaddleServingClientExample fit_a_line
-```
-以yolov4为例子，服务端启动
-```
-python -m paddle_serving_app.package --get_model yolov4
-tar -xzvf yolov4.tar.gz
-python -m paddle_serving_server_gpu.serve --model yolov4_model --port 9393 --gpu_ids 0 --use_multilang &  #需要在GPU Docker当中执行，否则要使用CPU的执行方式。
-```
-客户端预测
-```
-# in /Serving/java/examples/target
-java -cp paddle-serving-sdk-java-examples-0.0.1-jar-with-dependencies.jar PaddleServingClientExample yolov4 ../../../python/examples/yolov4/000000570688.jpg
-# yolov4的案例需要指定一个图片作为输入
-```
 ### 启动服务端(Pipeline方式)
 对于input data type = string类型，以IMDB model ensemble模型为例，服务端启动

--- a/python/examples/pipeline/ocr/web_service.py
+++ b/python/examples/pipeline/ocr/web_service.py
@@ -79,6 +79,9 @@ class RecOp(Op):
        feed_list = []
        img_list = []
        max_wh_ratio = 0
+        ## One batch, the type of feed_data is dict.
+        """ 
        for i, dtbox in enumerate(dt_boxes):
            boximg = self.get_rotate_crop_image(im, dt_boxes[i])
            img_list.append(boximg)
@@ -92,14 +95,73 @@ class RecOp(Op):
            norm_img = self.ocr_reader.resize_norm_img(img, max_wh_ratio)
            imgs[id] = norm_img
        feed = {"image": imgs.copy()}
-        return feed, False, None, ""
-    def postprocess(self, input_dicts, fetch_dict, log_id):
+        """
-        rec_res = self.ocr_reader.postprocess(fetch_dict, with_score=True)
-        res_lst = []
+        ## Many mini-batchs, the type of feed_data is list.
-        for res in rec_res:
+        max_batch_size = 6  # len(dt_boxes)
-            res_lst.append(res[0])
-        res = {"res": str(res_lst)}
+        # If max_batch_size is 0, skipping predict stage
+        if max_batch_size == 0:
+            return {}, True, None, ""
+        boxes_size = len(dt_boxes)
+        batch_size = boxes_size // max_batch_size
+        rem = boxes_size % max_batch_size
+        #_LOGGER.info("max_batch_len:{}, batch_size:{}, rem:{}, boxes_size:{}".format(max_batch_size, batch_size, rem, boxes_size))
+        for bt_idx in range(0, batch_size + 1):
+            imgs = None
+            boxes_num_in_one_batch = 0
+            if bt_idx == batch_size:
+                if rem == 0:
+                    continue
+                else:
+                    boxes_num_in_one_batch = rem
+            elif bt_idx < batch_size:
+                boxes_num_in_one_batch = max_batch_size
+            else:
+                _LOGGER.error("batch_size error, bt_idx={}, batch_size={}".
+                              format(bt_idx, batch_size))
+                break
+            start = bt_idx * max_batch_size
+            end = start + boxes_num_in_one_batch
+            img_list = []
+            for box_idx in range(start, end):
+                boximg = self.get_rotate_crop_image(im, dt_boxes[box_idx])
+                img_list.append(boximg)
+                h, w = boximg.shape[0:2]
+                wh_ratio = w * 1.0 / h
+                max_wh_ratio = max(max_wh_ratio, wh_ratio)
+            _, w, h = self.ocr_reader.resize_norm_img(img_list[0],
+                                                      max_wh_ratio).shape
+            #_LOGGER.info("---- idx:{}, w:{}, h:{}".format(bt_idx, w, h))
+            imgs = np.zeros((boxes_num_in_one_batch, 3, w, h)).astype('float32')
+            for id, img in enumerate(img_list):
+                norm_img = self.ocr_reader.resize_norm_img(img, max_wh_ratio)
+                imgs[id] = norm_img
+            feed = {"image": imgs.copy()}
+            feed_list.append(feed)
+        #_LOGGER.info("feed_list : {}".format(feed_list))
+        return feed_list, False, None, ""
+    def postprocess(self, input_dicts, fetch_data, log_id):
+        res_list = []
+        if isinstance(fetch_data, dict):
+            if len(fetch_data) > 0:
+                rec_batch_res = self.ocr_reader.postprocess(
+                    fetch_data, with_score=True)
+                for res in rec_batch_res:
+                    res_list.append(res[0])
+        elif isinstance(fetch_data, list):
+            for one_batch in fetch_data:
+                one_batch_res = self.ocr_reader.postprocess(
+                    one_batch, with_score=True)
+                for res in one_batch_res:
+                    res_list.append(res[0])
+        res = {"res": str(res_list)}
        return res, None, ""

--- a/python/examples/xpu/bert/README.md
+++ b/python/examples/xpu/bert/README.md
@@ -26,7 +26,7 @@ this script will download Chinese Dictionary File vocab.txt and Chinese Sample D
 ### Start Service
 ```
-python3 bert_web_service.py serving_server 7703
+python3 -m paddle_serving_server.serve --model serving_server --port 7703 --use_lite --use_xpu --ir_optim
 ```
 ### Client Prediction

--- a/python/examples/xpu/bert/bert_client.py
+++ b/python/examples/xpu/bert/bert_client.py
@@ -31,7 +31,7 @@ client.connect(endpoint_list)
 for line in sys.stdin:
    feed_dict = reader.process(line)
    for key in feed_dict.keys():
-        feed_dict[key] = np.array(feed_dict[key]).reshape((128, 1))
+        feed_dict[key] = np.array(feed_dict[key]).reshape((1, 128))
    #print(feed_dict)
-    result = client.predict(feed=feed_dict, fetch=fetch, batch=False)
+    result = client.predict(feed=feed_dict, fetch=fetch, batch=True)
 print(result)
--- a/python/examples/xpu/ernie/ernie_client.py
+++ b/python/examples/xpu/ernie/ernie_client.py
@@ -23,7 +23,7 @@ args = benchmark_args()
 reader = ChineseErnieReader({"max_seq_len": 128})
 fetch = ["save_infer_model/scale_0"]
-endpoint_list = ['127.0.0.1:12000']
+endpoint_list = ['127.0.0.1:7704']
 client = Client()
 client.load_client_config(args.model)
 client.connect(endpoint_list)

--- a/python/pipeline/operator.py
+++ b/python/pipeline/operator.py
@@ -400,7 +400,7 @@ class Op(object):
            log_id: global unique id for RTT, 0 default
        Return:
-            input_dict: data for process stage
+            output_data: data for process stage
            is_skip_process: skip process stage or not, False default
            prod_errcode: None default, otherwise, product errores occured.
                          It is handled in the same way as exception. 
@@ -453,20 +453,23 @@ class Op(object):
            call_result.pop("serving_status_code")
        return call_result
-    def postprocess(self, input_dict, fetch_dict, log_id=0):
+    def postprocess(self, input_data, fetch_data, log_id=0):
        """
        In postprocess stage, assemble data for next op or output.
        Args:
-            input_dict: data returned in preprocess stage.
+            input_data: data returned in preprocess stage, dict(for single predict) or list(for batch predict)
-            fetch_dict: data returned in process stage.
+            fetch_data: data returned in process stage, dict(for single predict) or list(for batch predict)
            log_id: logid, 0 default
        Returns: 
-            fetch_dict: return fetch_dict default
+            fetch_dict: fetch result must be dict type.
            prod_errcode: None default, otherwise, product errores occured.
                          It is handled in the same way as exception.
            prod_errinfo: "" default
        """
+        fetch_dict = {}
+        if isinstance(fetch_data, dict):
+            fetch_dict = fetch_data
        return fetch_dict, None, ""
    def _parse_channeldata(self, channeldata_dict):
@@ -685,180 +688,211 @@ class Op(object):
        _LOGGER.debug("{} Running process".format(op_info_prefix))
        midped_data_dict = collections.OrderedDict()
        err_channeldata_dict = collections.OrderedDict()
-        ### if (batch_num == 1 && skip == True) ,then skip the process stage.
        is_skip_process = False
        data_ids = list(preped_data_dict.keys())
+        # skip process stage
        if len(data_ids) == 1 and skip_process_dict.get(data_ids[0]) == True:
            is_skip_process = True
-            _LOGGER.info("(data_id={} log_id={}) skip process stage".format(
+        if self.with_serving is False or is_skip_process is True:
-                data_ids[0], logid_dict.get(data_ids[0])))
+            midped_data_dict = preped_data_dict
+            _LOGGER.warning("(data_id={} log_id={}) OP={} skip process stage. " \
-        if self.with_serving is True and is_skip_process is False:
+                "with_serving={}, is_skip_process={}".format(data_ids[0],
-            # use typical_logid to mark batch data
+                logid_dict.get(data_ids[0]), self.name, self.with_serving,
-            typical_logid = data_ids[0]
+                is_skip_process))
-            if len(data_ids) != 1:
+            return midped_data_dict, err_channeldata_dict
-                for data_id in data_ids:
-                    _LOGGER.info(
+        # use typical_logid to mark batch data
-                        "(data_id={} logid={}) {} During access to PaddleServingService,"
+        # data_ids is one self-increasing unique key. 
-                        " we selected logid={} (from batch: {}) as a "
+        typical_logid = data_ids[0]
-                        "representative for logging.".format(
+        if len(data_ids) != 1:
-                            data_id,
+            for data_id in data_ids:
-                            logid_dict.get(data_id), op_info_prefix,
+                _LOGGER.info(
-                            typical_logid, data_ids))
+                    "(data_id={} logid={}) Auto-batching is On Op={}!!" \
+                    "We selected logid={} (from batch: {}) as a " \
-            # combine samples to batch
+                    "representative for logging.".format(
-            one_input = preped_data_dict[data_ids[0]]
+                    data_id, logid_dict.get(data_id), self.name,
-            feed_batch = []
+                    typical_logid, data_ids))
-            feed_dict = {}
-            input_offset = None
+        one_input = preped_data_dict[data_ids[0]]
-            cur_offset = 0
+        feed_batch = []
-            input_offset_dict = {}
+        feed_dict = {}
+        cur_offset = 0
-            if isinstance(one_input, dict):
+        input_offset_dict = {}
-                # sample input
+        batch_input = False
-                if len(data_ids) == 1:
-                    feed_batch = [
+        if isinstance(one_input, dict):
-                        preped_data_dict[data_id] for data_id in data_ids
+            # For dict type, data structure is dict.
-                    ]
+            # Merge multiple dicts for data_ids into one dict.
-                else:
+            # feed_batch is the input param of predict func.
-                    for data_id in data_ids:
+            # input_offset_dict is used for data restration[data_ids]
-                        for key, val in preped_data_dict[data_id].items():
+            if len(data_ids) == 1:
-                            has_val = feed_dict.get(key)
+                feed_batch = [preped_data_dict[data_id] for data_id in data_ids]
-                            if has_val is None:
+            else:
-                                feed_dict[key] = val
-                                continue
-                            # merge 2 np.arrray
-                            if isinstance(val, np.ndarray):
-                                feed_dict[key] = np.append(
-                                    feed_dict[key], val, axis=0)
-                    feed_batch.append(feed_dict)
                for data_id in data_ids:
-                    start = cur_offset
                    for key, val in preped_data_dict[data_id].items():
-                        if isinstance(val, (list, np.ndarray)):
+                        has_val = feed_dict.get(key)
-                            cur_offset += len(val)
+                        if has_val is None:
-                        else:
+                            feed_dict[key] = val
-                            cur_offset += 1
+                            continue
-                        break
+                        # merge 2 np.arrray
-                    input_offset_dict[data_id] = [start, cur_offset]
+                        if isinstance(val, np.ndarray):
-            elif isinstance(one_input, list):
+                            feed_dict[key] = np.append(
-                # batch input
+                                feed_dict[key], val, axis=0)
-                input_offset = [0]
+                feed_batch.append(feed_dict)
-                for data_id in data_ids:
-                    batch_input = preped_data_dict[data_id]
+            for data_id in data_ids:
-                    offset = input_offset[-1] + len(batch_input)
+                start = cur_offset
-                    feed_batch += batch_input
+                for key, val in preped_data_dict[data_id].items():
-                    input_offset.append(offset)
+                    if isinstance(val, (list, np.ndarray)):
-            else:
+                        cur_offset += len(val)
-                _LOGGER.critical(
+                    else:
-                    "(data_id={} log_id={}){} Failed to process: expect input type is dict(sample"
+                        cur_offset += 1
-                    " input) or list(batch input), but get {}".format(data_ids[
+                    break
-                        0], typical_logid, op_info_prefix, type(one_input)))
+                input_offset_dict[data_id] = [start, cur_offset]
-                os._exit(-1)
+        elif isinstance(one_input, list):
+            # For list type, data structure of one_input is [dict, dict, ...]
+            # Data structure of feed_batch is [dict1_1, dict1_2, dict2_1, ...]   
+            # Data structure of input_offset_dict is { data_id : [start, end] }
+            batch_input = True
+            for data_id in data_ids:
+                feed_batch.extend(preped_data_dict[data_id])
+                data_size = len(preped_data_dict[data_id])
+                start = cur_offset
+                cur_offset = start + data_size
+                input_offset_dict[data_id] = [start, cur_offset]
+        else:
+            _LOGGER.critical(
+                "(data_id={} log_id={}){} Failed to process: expect input type is dict"
+                " or list(batch input), but get {}".format(data_ids[
+                    0], typical_logid, op_info_prefix, type(one_input)))
+            for data_id in data_ids:
+                error_code = ChannelDataErrcode.TYPE_ERROR.value
+                error_info = "expect input type is dict or list, but get {}".format(
+                    type(one_input))
+                err_channeldata_dict[data_id] = ChannelData(
+                    error_code=error_code,
+                    error_info=error_info,
+                    data_id=data_id,
+                    log_id=logid_dict.get(data_id))
+            return midped_data_dict, err_channeldata_dict
-            midped_batch = None
+        midped_batch = None
-            error_code = ChannelDataErrcode.OK.value
+        error_code = ChannelDataErrcode.OK.value
-            if self._timeout <= 0:
+        if self._timeout <= 0:
-                try:
+            # No retry
+            try:
+                if batch_input is False:
                    midped_batch = self.process(feed_batch, typical_logid)
-                except Exception as e:
+                else:
-                    error_code = ChannelDataErrcode.UNKNOW.value
+                    midped_batch = []
-                    error_info = "(data_id={} log_id={}) {} Failed to process(batch: {}): {}".format(
+                    for idx in range(len(feed_batch)):
-                        data_ids[0], typical_logid, op_info_prefix, data_ids, e)
+                        predict_res = self.process([feed_batch[idx]],
-                    _LOGGER.error(error_info, exc_info=True)
+                                                   typical_logid)
-            else:
+                        midped_batch.append(predict_res)
-                # retry N times configed in yaml files.
+            except Exception as e:
-                for i in range(self._retry):
+                error_code = ChannelDataErrcode.UNKNOW.value
-                    try:
+                error_info = "(data_id={} log_id={}) {} Failed to process(batch: {}): {}".format(
-                        # time out for each process
+                    data_ids[0], typical_logid, op_info_prefix, data_ids, e)
+                _LOGGER.error(error_info, exc_info=True)
+        else:
+            # retry N times configed in yaml files.
+            for i in range(self._retry):
+                try:
+                    # time out for each process
+                    if batch_input is False:
                        midped_batch = func_timeout.func_timeout(
                            self._timeout,
                            self.process,
                            args=(feed_batch, typical_logid))
-                    except func_timeout.FunctionTimedOut as e:
-                        if i + 1 >= self._retry:
-                            error_code = ChannelDataErrcode.TIMEOUT.value
-                            error_info = "(log_id={}) {} Failed to process(batch: {}): " \
-                                    "exceeded retry count.".format(
-                                            typical_logid, op_info_prefix, data_ids)
-                            _LOGGER.error(error_info)
-                        else:
-                            _LOGGER.warning(
-                                "(log_id={}) {} Failed to process(batch: {}): timeout,"
-                                " and retrying({}/{})...".format(
-                                    typical_logid, op_info_prefix, data_ids, i +
-                                    1, self._retry))
-                    except Exception as e:
-                        error_code = ChannelDataErrcode.UNKNOW.value
-                        error_info = "(log_id={}) {} Failed to process(batch: {}): {}".format(
-                            typical_logid, op_info_prefix, data_ids, e)
-                        _LOGGER.error(error_info, exc_info=True)
-                        break
                    else:
-                        break
+                        midped_batch = []
-            if error_code != ChannelDataErrcode.OK.value:
+                        for idx in range(len(feed_batch)):
-                for data_id in data_ids:
+                            predict_res = func_timeout.func_timeout(
-                    err_channeldata_dict[data_id] = ChannelData(
+                                self._timeout,
-                        error_code=error_code,
+                                self.process,
-                        error_info=error_info,
+                                args=([feed_batch[idx]], typical_logid))
-                        data_id=data_id,
+                            midped_batch[idx].append(predict_res)
-                        log_id=logid_dict.get(data_id))
-            elif midped_batch is None:
+                except func_timeout.FunctionTimedOut as e:
-                # op client return None
+                    if i + 1 >= self._retry:
-                error_info = "(log_id={}) {} Failed to predict, please check if " \
+                        error_code = ChannelDataErrcode.TIMEOUT.value
-                        "PaddleServingService is working properly.".format(
+                        error_info = "(log_id={}) {} Failed to process(batch: {}): " \
-                                typical_logid, op_info_prefix)
+                            "exceeded retry count.".format(typical_logid, op_info_prefix, data_ids)
-                _LOGGER.error(error_info)
+                        _LOGGER.error(error_info)
-                for data_id in data_ids:
-                    err_channeldata_dict[data_id] = ChannelData(
-                        error_code=ChannelDataErrcode.CLIENT_ERROR.value,
-                        error_info=error_info,
-                        data_id=data_id,
-                        log_id=logid_dict.get(data_id))
-            else:
-                # transform np format to dict format
-                var_names = midped_batch.keys()
-                lod_var_names = set()
-                lod_offset_names = set()
-                for name in var_names:
-                    lod_offset_name = "{}.lod".format(name)
-                    if lod_offset_name in var_names:
-                        _LOGGER.debug(
-                            "(log_id={}) {} {} is LodTensor. lod_offset_name:{}".
-                            format(typical_logid, op_info_prefix, name,
-                                   lod_offset_name))
-                        lod_var_names.add(name)
-                        lod_offset_names.add(lod_offset_name)
-                for idx, data_id in enumerate(data_ids):
-                    midped_data_dict[data_id] = {}
-                for name, value in midped_batch.items():
-                    if name in lod_offset_names:
-                        continue
-                    if name in lod_var_names:
-                        # lodtensor
-                        lod_offset_name = "{}.lod".format(name)
-                        lod_offset = midped_batch[lod_offset_name]
-                        for idx, data_id in enumerate(data_ids):
-                            data_offset_left = input_offset_dict[data_id][0]
-                            data_offset_right = input_offset_dict[data_id][1]
-                            lod_offset_left = lod_offset[data_offset_left]
-                            lod_offset_right = lod_offset[data_offset_right]
-                            midped_data_dict[data_id][name] = value[
-                                lod_offset_left:lod_offset_right]
-                            midped_data_dict[data_id][lod_offset_name] = \
-                                    lod_offset[data_offset_left:data_offset_right + 1] - lod_offset[data_offset_left]
                    else:
-                        # normal tensor
+                        _LOGGER.warning(
-                        for idx, data_id in enumerate(data_ids):
+                            "(log_id={}) {} Failed to process(batch: {}): timeout,"
-                            left = input_offset_dict[data_id][0]
+                            " and retrying({}/{})...".format(
-                            right = input_offset_dict[data_id][1]
+                                typical_logid, op_info_prefix, data_ids, i + 1,
-                            midped_data_dict[data_id][name] = value[left:right]
+                                self._retry))
+                except Exception as e:
+                    error_code = ChannelDataErrcode.UNKNOW.value
+                    error_info = "(log_id={}) {} Failed to process(batch: {}): {}".format(
+                        typical_logid, op_info_prefix, data_ids, e)
+                    _LOGGER.error(error_info, exc_info=True)
+                    break
+                else:
+                    break
+        # 2 kinds of errors
+        if error_code != ChannelDataErrcode.OK.value or midped_batch is None:
+            error_info = "(log_id={}) {} failed to predict.".format(
+                typical_logid, self.name)
+            _LOGGER.error(error_info)
+            for data_id in data_ids:
+                err_channeldata_dict[data_id] = ChannelData(
+                    error_code=ChannelDataErrcode.CLIENT_ERROR.value,
+                    error_info=error_info,
+                    data_id=data_id,
+                    log_id=logid_dict.get(data_id))
+            return midped_data_dict, err_channeldata_dict
+        # Split batch infer result to each data_ids
+        if batch_input is False:
+            var_names = midped_batch.keys()
+            lod_var_names = set()
+            lod_offset_names = set()
+            # midped_batch is dict type for single input 
+            for name in var_names:
+                lod_offset_name = "{}.lod".format(name)
+                if lod_offset_name in var_names:
+                    _LOGGER.debug("(log_id={}) {} {} is LodTensor".format(
+                        typical_logid, op_info_prefix, name))
+                    lod_var_names.add(name)
+                    lod_offset_names.add(lod_offset_name)
+            for idx, data_id in enumerate(data_ids):
+                midped_data_dict[data_id] = {}
+            for name, value in midped_batch.items():
+                if name in lod_offset_names:
+                    continue
+                if name in lod_var_names:
+                    # lodtensor
+                    lod_offset_name = "{}.lod".format(name)
+                    lod_offset = midped_batch[lod_offset_name]
+                    for idx, data_id in enumerate(data_ids):
+                        data_offset_left = input_offset_dict[data_id][0]
+                        data_offset_right = input_offset_dict[data_id][1]
+                        lod_offset_left = lod_offset[data_offset_left]
+                        lod_offset_right = lod_offset[data_offset_right]
+                        midped_data_dict[data_id][name] = value[
+                            lod_offset_left:lod_offset_right]
+                        midped_data_dict[data_id][lod_offset_name] = \
+                            lod_offset[data_offset_left:data_offset_right + 1] - lod_offset[data_offset_left]
+                else:
+                    # normal tensor
+                    for idx, data_id in enumerate(data_ids):
+                        start = input_offset_dict[data_id][0]
+                        end = input_offset_dict[data_id][1]
+                        midped_data_dict[data_id][name] = value[start:end]
        else:
-            midped_data_dict = preped_data_dict
+            # midped_batch is list type for batch input
-        _LOGGER.debug("{} Succ process".format(op_info_prefix))
+            for idx, data_id in enumerate(data_ids):
+                start = input_offset_dict[data_id][0]
+                end = input_offset_dict[data_id][1]
+                midped_data_dict[data_id] = midped_batch[start:end]
        return midped_data_dict, err_channeldata_dict
    def _run_postprocess(self, parsed_data_dict, midped_data_dict,

--- a/tools/Dockerfile.cuda11-cudnn8.devel
+++ b/tools/Dockerfile.cuda11-cudnn8.devel
 # A image for building paddle binaries
 # Use cuda devel base image for both cpu and gpu environment
 # When you modify it, please be aware of cudnn-runtime version
-FROM hub.baidubce.com/ctr/cuda:11.0-cudnn8-devel-ubuntu18.04
+FROM nvidia/cuda:11.2.0-cudnn8-devel-ubuntu16.04
 MAINTAINER PaddlePaddle Authors <paddle-dev@baidu.com>
 # ENV variables
@@ -10,40 +10,45 @@ ARG WITH_AVX
 ENV WITH_GPU=${WITH_GPU:-ON}
 ENV WITH_AVX=${WITH_AVX:-ON}
-ENV DEBIAN_FRONTEND=noninteractive
-ENV LD_LIBRARY_PATH=/usr/local/cuda-11.0/targets/x86_64-linux/lib:$LD_LIBRARY_PATH
 ENV HOME /root
 # Add bash enhancements
-COPY tools/dockerfile/scripts/root/ /root/
+COPY tools/dockerfiles/root/ /root/
+# Prepare packages for Python
 RUN apt-get update && \
-  apt-get install -y software-properties-common && add-apt-repository ppa:deadsnakes/ppa && \
+    apt-get install -y make build-essential libssl-dev zlib1g-dev libbz2-dev \
-  apt-get update && \
+    libreadline-dev libsqlite3-dev wget curl llvm libncurses5-dev libncursesw5-dev \
-  apt-get install -y curl wget vim git unzip unrar tar xz-utils libssl-dev bzip2 gzip \ 
+    xz-utils tk-dev libffi-dev liblzma-dev
-    coreutils ntp language-pack-zh-hans python-qt4 libsm6 libxext6 libxrender-dev libgl1-mesa-glx \
-    bison graphviz libjpeg-dev zlib1g-dev automake locales swig net-tools libtool module-init-tools libcurl4-openssl-dev libffi-dev
-RUN ln -s /usr/lib/x86_64-linux-gnu/libssl.so.1.0.0 /usr/lib/libssl.so.10 && \
+RUN apt-get update && \
-    ln -s /usr/lib/x86_64-linux-gnu/libcrypto.so.1.0.0 /usr/lib/libcrypto.so.10
+    apt-get install -y --allow-downgrades --allow-change-held-packages \
+    patchelf git python-pip python-dev python-opencv openssh-server bison \
+    wget unzip unrar tar xz-utils bzip2 gzip coreutils ntp \
+    curl sed grep graphviz libjpeg-dev zlib1g-dev  \
+    python-matplotlib unzip \
+    automake locales clang-format swig  \
+    liblapack-dev liblapacke-dev libcurl4-openssl-dev \
+    net-tools libtool module-init-tools vim && \
+    apt-get clean -y
+RUN ln -s /usr/lib/x86_64-linux-gnu/libssl.so /usr/lib/libssl.so.10 && \
+    ln -s /usr/lib/x86_64-linux-gnu/libcrypto.so /usr/lib/libcrypto.so.10
+RUN wget https://github.com/koalaman/shellcheck/releases/download/v0.7.1/shellcheck-v0.7.1.linux.x86_64.tar.xz -O shellcheck-v0.7.1.linux.x86_64.tar.xz && \
+    tar -xf shellcheck-v0.7.1.linux.x86_64.tar.xz && cp  shellcheck-v0.7.1/shellcheck /usr/bin/shellcheck && \
+    rm -rf shellcheck-v0.7.1.linux.x86_64.tar.xz shellcheck-v0.7.1
 # Downgrade gcc&&g++
 WORKDIR /usr/bin 
-COPY tools/dockerfile/build_scripts /build_scripts 
+      COPY tools/dockerfiles/build_scripts /build_scripts 
-RUN bash /build_scripts/install_trt.sh
+      RUN bash /build_scripts/install_gcc.sh gcc82 && rm -rf /build_scripts 
-RUN bash /build_scripts/install_gcc.sh gcc82 && rm -rf /build_scripts 
+      RUN cp gcc gcc.bak && cp g++ g++.bak && rm gcc && rm g++ 
-RUN cp gcc gcc.bak && cp g++ g++.bak && rm gcc && rm g++ 
+      RUN ln -s /usr/local/gcc-8.2/bin/gcc /usr/local/bin/gcc 
-RUN ln -s /usr/local/gcc-8.2/bin/gcc /usr/local/bin/gcc 
+      RUN ln -s /usr/local/gcc-8.2/bin/g++ /usr/local/bin/g++ 
-RUN ln -s /usr/local/gcc-8.2/bin/g++ /usr/local/bin/g++ 
+      RUN ln -s /usr/local/gcc-8.2/bin/gcc /usr/bin/gcc 
-RUN ln -s /usr/local/gcc-8.2/bin/gcc /usr/bin/gcc 
+      RUN ln -s /usr/local/gcc-8.2/bin/g++ /usr/bin/g++ 
-RUN ln -s /usr/local/gcc-8.2/bin/g++ /usr/bin/g++ 
+      ENV PATH=/usr/local/gcc-8.2/bin:$PATH 
-ENV PATH=/usr/local/gcc-8.2/bin:$PATH 
-# install cmake
-WORKDIR /home
-RUN wget -q https://cmake.org/files/v3.16/cmake-3.16.0-Linux-x86_64.tar.gz && tar -zxvf cmake-3.16.0-Linux-x86_64.tar.gz && rm cmake-3.16.0-Linux-x86_64.tar.gz
-ENV PATH=/home/cmake-3.16.0-Linux-x86_64/bin:$PATH
 # install cmake
 WORKDIR /home
@@ -53,75 +58,30 @@ ENV PATH=/home/cmake-3.16.0-Linux-x86_64/bin:$PATH
 # Install Python3.6
 RUN mkdir -p /root/python_build/ && wget -q https://www.sqlite.org/2018/sqlite-autoconf-3250300.tar.gz && \
    tar -zxf sqlite-autoconf-3250300.tar.gz && cd sqlite-autoconf-3250300 && \
-    ./configure -prefix=/usr/local && make -j8 && make install && cd ../ && rm sqlite-autoconf-3250300.tar.gz && \
+    ./configure -prefix=/usr/local && make -j8 && make install && cd ../ && rm sqlite-autoconf-3250300.tar.gz
-    wget -q https://www.python.org/ftp/python/3.6.0/Python-3.6.0.tgz && \
+RUN wget -q https://www.python.org/ftp/python/3.6.0/Python-3.6.0.tgz && \
    tar -xzf Python-3.6.0.tgz && cd Python-3.6.0 && \
    CFLAGS="-Wformat" ./configure --prefix=/usr/local/ --enable-shared > /dev/null && \
-    make -j8 > /dev/null && make altinstall > /dev/null && ldconfig
+    make -j8 > /dev/null && make altinstall > /dev/null && ldconfig && cd .. && rm -rf Python-3.6.0*
 # Install Python3.7
 RUN wget -q https://www.python.org/ftp/python/3.7.0/Python-3.7.0.tgz && \
    tar -xzf Python-3.7.0.tgz && cd Python-3.7.0 && \
    CFLAGS="-Wformat" ./configure --prefix=/usr/local/ --enable-shared > /dev/null && \
-    make -j8 > /dev/null && make altinstall > /dev/null && ldconfig
+    make -j8 > /dev/null && make altinstall > /dev/null && ldconfig && cd .. && rm -rf Python-3.7.0*
 # Install Python3.8
 RUN wget -q https://www.python.org/ftp/python/3.8.0/Python-3.8.0.tgz && \
    tar -xzf Python-3.8.0.tgz && cd Python-3.8.0 && \
    CFLAGS="-Wformat" ./configure --prefix=/usr/local/ --enable-shared > /dev/null && \
-    make -j8 > /dev/null && make altinstall > /dev/null && ldconfig
+    make -j8 > /dev/null && make altinstall > /dev/null && ldconfig && cd .. && rm -rf Python-3.8.0*
-# Install Python3.5
+ENV LD_LIBRARY_PATH=/usr/local/lib:${LD_LIBRARY_PATH}
-RUN wget -q https://www.python.org/ftp/python/3.5.1/Python-3.5.1.tgz && \
+RUN ln -sf /usr/local/bin/python3.6 /usr/local/bin/python3 && ln -sf /usr/local/bin/python3.6 /usr/bin/python3 && ln -sf /usr/local/bin/pip3.6 /usr/local/bin/pip3 && ln -sf /usr/local/bin/pip3.6 /usr/bin/pip3
-    tar -xzf Python-3.5.1.tgz && cd Python-3.5.1 && \
-    CFLAGS="-Wformat" ./configure --prefix=/usr/local/python3.5.1 --enable-shared > /dev/null && \
-    make -j8 > /dev/null && make altinstall > /dev/null && ldconfig
-ENV PATH=/usr/local/include/python3.6m/:/usr/local/python3.5.1/include:${PATH}
-ENV PATH=/usr/local/bin:/usr/local/python3.5.1/bin:${PATH}
-ENV LD_LIBRARY_PATH=/usr/local/lib:/usr/local/python3.5.1/lib:${LD_LIBRARY_PATH}
-ENV CPLUS_INCLUDE_PATH=/usr/local/python3.5.1/include/python3.5:/usr/local/include/python3.6m/:$CPLUS_INCLUDE_PATH
-RUN ln -sf /usr/local/python3.5.1/bin/python3.5 /usr/local/bin/python3.5 && ln -sf /usr/local/python3.5.1/bin/python3.5 /usr/bin/python3.5 && ln -sf /usr/local/python3.5.1/bin/pip3.5 /usr/local/bin/pip3.5 && ln -sf /usr/local/python3.5.1/bin/pip3.5 /usr/bin/pip3.5 &&  ln -sf /usr/local/bin/python3.6 /usr/local/bin/python3 && ln -sf /usr/local/bin/python3.6 /usr/bin/python3 && ln -sf /usr/local/bin/pip3.6 /usr/local/bin/pip3 && ln -sf /usr/local/bin/pip3.6 /usr/bin/pip3
 RUN rm -r /root/python_build
-# Install Python2.7.15 to replace original python
-WORKDIR /home
-ENV version=2.7.15
-RUN wget https://www.python.org/ftp/python/$version/Python-$version.tgz && tar -xvf Python-$version.tgz
-WORKDIR /home/Python-$version
-RUN ./configure --enable-unicode=ucs4 --enable-shared CFLAGS=-fPIC --prefix=/usr/local/python2.7.15 && make && make install
-RUN echo "export PATH=/usr/local/python2.7.15/include:${PATH}" >> ~/.bashrc && echo "export PATH=/usr/local/python2.7.15/bin:${PATH}" >> ~/.bashrc && echo "export LD_LIBRARY_PATH=/usr/local/python2.7.15/lib:${LD_LIBRARY_PATH}" >> ~/.bashrc && echo "export CPLUS_INCLUDE_PATH=/usr/local/python2.7.15/include/python2.7:$CPLUS_INCLUDE_PATH" >> ~/.bashrc
-ENV PATH=/usr/local/python2.7.15/include:${PATH}
-ENV PATH=/usr/local/python2.7.15/bin:${PATH}
-ENV LD_LIBRARY_PATH=/usr/local/python2.7.15/lib:${LD_LIBRARY_PATH}
-ENV CPLUS_INCLUDE_PATH=/usr/local/python2.7.15/include/python2.7:$CPLUS_INCLUDE_PATH
-RUN mv /usr/bin/python /usr/bin/python.bak && ln -s /usr/local/python2.7.15/bin/python2.7 /usr/local/bin/python && ln -s /usr/local/python2.7.15/bin/python2.7 /usr/bin/python
-WORKDIR /home
-RUN wget https://files.pythonhosted.org/packages/b0/d1/8acb42f391cba52e35b131e442e80deffbb8d0676b93261d761b1f0ef8fb/setuptools-40.6.2.zip && apt-get -y install unzip && unzip setuptools-40.6.2.zip
-WORKDIR /home/setuptools-40.6.2
-RUN python setup.py build && python setup.py install
-WORKDIR /home
-RUN wget https://files.pythonhosted.org/packages/69/81/52b68d0a4de760a2f1979b0931ba7889202f302072cc7a0d614211bc7579/pip-18.0.tar.gz && tar -zxvf pip-18.0.tar.gz
-WORKDIR pip-18.0
-RUN python setup.py install && \
-  python3.8 setup.py install && \
-  python3.7 setup.py install && \
-  python3.6 setup.py install 
-WORKDIR /home
-RUN rm Python-$version.tgz setuptools-40.6.2.zip pip-18.0.tar.gz && \
-    rm -r Python-$version setuptools-40.6.2 pip-18.0
-# remove them when apt-get support 2.27 and higher version
-RUN wget -q https://ftp.gnu.org/gnu/binutils/binutils-2.33.1.tar.gz && \ 
-    tar -xzf binutils-2.33.1.tar.gz && \ 
-    cd binutils-2.33.1 && \
-    ./configure && make -j && make install && cd .. && rm -rf binutils-2.33.1 binutils-2.33.1.tar.gz
 # Install Go and glide
 RUN wget -qO- https://dl.google.com/go/go1.14.linux-amd64.tar.gz | \
    tar -xz -C /usr/local && \
@@ -132,8 +92,8 @@ RUN wget -qO- https://dl.google.com/go/go1.14.linux-amd64.tar.gz | \
    echo "GOPATH=/root/go" >> /root/.bashrc && \
    echo "PATH=/usr/local/go/bin:/root/go/bin:$PATH" >> /root/.bashrc
 ENV GOROOT=/usr/local/go GOPATH=/root/go
-ENV PATH=/usr/local/go/bin:/root/go/bin:$PATH
+# should not be in the same line with GOROOT definition, otherwise docker build could not find GOROOT.
+ENV PATH=usr/local/go/bin:/root/go/bin:${PATH}
 # Install TensorRT
 # following TensorRT.tar.gz is not the default official one, we do two miny changes:
@@ -142,9 +102,9 @@ ENV PATH=/usr/local/go/bin:/root/go/bin:$PATH
 # 2. Manually add ~IPluginFactory() in IPluginFactory class of NvInfer.h, otherwise, it couldn't work in paddle.
 #    See https://github.com/PaddlePaddle/Paddle/issues/10129 for details.
-# Downgrade TensorRT
+# Downgrade TensorRT 
-COPY tools/dockerfile/build_scripts /build_scripts
+COPY tools/dockerfiles/build_scripts /build_scripts
-RUN bash /build_scripts/install_trt.sh
+RUN bash /build_scripts/install_trt.sh 
 RUN rm -rf /build_scripts
 # git credential to skip password typing
@@ -162,8 +122,8 @@ RUN wget -q https://paddle-ci.cdn.bcebos.com/patchelf_0.10-2_amd64.deb && \
    dpkg -i patchelf_0.10-2_amd64.deb
 # Configure OpenSSH server. c.f. https://docs.docker.com/engine/examples/running_ssh_service
-#RUN mkdir /var/run/sshd && echo 'root:root' | chpasswd && sed -ri 's/^PermitRootLogin\s+.*/PermitRootLogin yes/' /etc/ssh/sshd_config && sed -ri 's/UsePAM yes/#UsePAM yes/g' /etc/ssh/sshd_config
+RUN mkdir /var/run/sshd && echo 'root:root' | chpasswd && sed -ri 's/^PermitRootLogin\s+.*/PermitRootLogin yes/' /etc/ssh/sshd_config && sed -ri 's/UsePAM yes/#UsePAM yes/g' /etc/ssh/sshd_config
-#CMD source ~/.bashrc
+CMD source ~/.bashrc
 # ccache 3.7.9
 RUN wget https://paddle-ci.gz.bcebos.com/ccache-3.7.9.tar.gz && \
@@ -172,11 +132,9 @@ RUN wget https://paddle-ci.gz.bcebos.com/ccache-3.7.9.tar.gz && \
    make -j8 && make install && \
    ln -s /usr/local/ccache-3.7.9/bin/ccache /usr/local/bin/ccache
-RUN ln -s /usr/share/pyshared/lsb_release.py  /usr/bin/lsb_release.py
 RUN python3.8 -m pip install --upgrade pip requests && \
    python3.7 -m pip install --upgrade pip requests && \
-    python3.6 -m pip install --upgrade pip requests && \
+    python3.6 -m pip install --upgrade pip requests 
-    python2.7 -m pip install --upgrade pip requests
 RUN wget https://paddle-serving.bj.bcebos.com/others/centos_ssl.tar && \
    tar xf centos_ssl.tar && rm -rf centos_ssl.tar && \

--- a/tools/Dockerfile.runtime_template
+++ b/tools/Dockerfile.runtime_template
@@ -28,7 +28,7 @@ WORKDIR /home
 # install whl and bin
 WORKDIR /home
      COPY tools/dockerfiles/build_scripts /build_scripts
-      RUN bash /build_scripts/install_whl.sh 0.5.0 2.0.0 <<run_env>> <<python_version>> && rm -rf /build_scripts
+      RUN bash /build_scripts/install_whl.sh <<serving_version>> <<paddle_version>> <<run_env>> <<python_version>> && rm -rf /build_scripts
 # install tensorrt
 WORKDIR /home

--- a/tools/dockerfiles/build_scripts/install_gcc.sh
+++ b/tools/dockerfiles/build_scripts/install_gcc.sh
@@ -20,6 +20,7 @@
 set -ex
 if [ -f "/etc/redhat-release" ];then
+  lib_so_3=/usr/lib64/libgfortran.so.3
  lib_so_5=/usr/lib64/libgfortran.so.5
  lib_so_6=/usr/lib64/libstdc++.so.6
  lib_path=/usr/lib64
@@ -44,4 +45,20 @@ if [ "$1" == "gcc82" ]; then
  ln -s /usr/local/gcc-8.2/lib64/libgfortran.so.5 ${lib_so_5} && \
  ln -s /usr/local/gcc-8.2/lib64/libstdc++.so.6 ${lib_so_6} && \
  cp /usr/local/gcc-8.2/lib64/libstdc++.so.6.0.25 ${lib_path}
+elif [ "$1" == "gcc54" ]; then
+  wget -q https://paddle-ci.gz.bcebos.com/gcc-5.4.0.tar.gz
+  tar -xvf gcc-5.4.0.tar.gz
+  cd gcc-5.4.0 && \
+  unset LIBRARY_PATH CPATH C_INCLUDE_PATH PKG_CONFIG_PATH CPLUS_INCLUDE_PATH INCLUDE && \
+  ./contrib/download_prerequisites && \
+  cd .. && mkdir temp_gcc54 && cd temp_gcc54 && \
+  ../gcc-5.4.0/configure --prefix=/usr/local/gcc-5.4 --enable-threads=posix --disable-checking --disable-multilib && \
+  make -j8 && make install 
+  cd .. && rm -rf temp_gcc54 
+  rm -rf gcc-5.4.0 gcc-5.4.0.tar.gz 
+  cp ${lib_so_6} ${lib_so_6}.bak  && rm -f ${lib_so_6} &&
+  ln -s /usr/local/gcc-5.4/lib64/libgfortran.so.3 ${lib_so_3} && \
+  ln -s /usr/local/gcc-5.4/lib64/libstdc++.so.6 ${lib_so_6} && \
+  cp /usr/local/gcc-5.4/lib64/libstdc++.so.6.0.21 ${lib_path} 
 fi
--- a/tools/dockerfiles/build_scripts/install_python.sh
+++ b/tools/dockerfiles/build_scripts/install_python.sh
@@ -37,4 +37,13 @@ elif [[ "$VERSION" == "3.7" ]];then
    make -j8 > /dev/null && make altinstall > /dev/null && ldconfig
    cd .. && rm -rf Python-3.7.0*
    python3.7 -m pip install -U pip
+elif [[ "$VERSION" == "3.8" ]];then
+    wget -q https://www.python.org/ftp/python/3.8.0/Python-3.8.0.tgz && \
+    tar -xzf Python-3.8.0.tgz && cd Python-3.8.0 && \
+    CFLAGS="-Wformat" ./configure --prefix=/usr/local/ --enable-shared > /dev/null && \
+    make -j8 > /dev/null && make altinstall > /dev/null && ldconfig
+    cd .. && rm -rf Python-3.8.0*
+    python3.8 -m pip install -U pip
 fi
--- a/tools/dockerfiles/build_scripts/install_whl.sh
+++ b/tools/dockerfiles/build_scripts/install_whl.sh
@@ -18,37 +18,74 @@ SERVING_VERSION=$1
 PADDLE_VERSION=$2
 RUN_ENV=$3 # cpu/10.1 10.2
 PYTHON_VERSION=$4
+serving_release=
 client_release="paddle-serving-client==$SERVING_VERSION"
 app_release="paddle-serving-app==0.3.1"
+if [[ $PYTHON_VERSION == "3.6" ]];then
+    CPYTHON="36"
+elif [[ $PYTHON_VERSION == "3.7" ]];then
+    CPYTHON="37"
+elif [[ $PYTHON_VERSION == "3.8" ]];then
+    CPYTHON="38"
+fi
+if [[ $SERVING_VERSION == "0.5.0" ]]; then
+    if [[ "$RUN_ENV" == "cpu" ]];then
+        server_release="paddle-serving-server==$SERVING_VERSION"
+        serving_bin="https://paddle-serving.bj.bcebos.com/bin/serving-cpu-noavx-openblas-${SERVING_VERSION}.tar.gz"
+    elif [[ "$RUN_ENV" == "cuda10.1" ]];then
+        server_release="paddle-serving-server-gpu==$SERVING_VERSION.post101"
+        serving_bin="https://paddle-serving.bj.bcebos.com/bin/serving-gpu-101-${SERVING_VERSION}.tar.gz"
+    elif [[ "$RUN_ENV" == "cuda10.2" ]];then
+        server_release="paddle-serving-server-gpu==$SERVING_VERSION.post102"
+        serving_bin="https://paddle-serving.bj.bcebos.com/bin/serving-gpu-102-${SERVING_VERSION}.tar.gz"
+    fi
+    client_release="paddle-serving-client==$SERVING_VERSION"
+    app_release="paddle-serving-app==0.3.1"
+elif [[ $SERVING_VERSION == "0.6.0" ]]; then 
+    if [[ "$RUN_ENV" == "cpu" ]];then
+        server_release="https://paddle-serving.bj.bcebos.com/test-dev/whl/paddle_serving_server-$SERVING_VERSION-py3-none-any.whl"
+        serving_bin="https://paddle-serving.bj.bcebos.com/test-dev/bin/serving-cpu-noavx-openblas-$SERVING_VERSION.tar.gz"
+    elif [[ "$RUN_ENV" == "cuda10.1" ]];then
+        server_release="https://paddle-serving.bj.bcebos.com/test-dev/whl/paddle_serving_server_gpu-$SERVING_VERSION.post101-py3-none-any.whl"
+        serving_bin="https://paddle-serving.bj.bcebos.com/test-dev/bin/serving-gpu-101-$SERVING_VERSION.tar.gz"
+    elif [[ "$RUN_ENV" == "cuda10.2" ]];then
+        server_release="https://paddle-serving.bj.bcebos.com/test-dev/whl/paddle_serving_server_gpu-$SERVING_VERSION.post102-py3-none-any.whl"
+        serving_bin="https://paddle-serving.bj.bcebos.com/test-dev/bin/serving-gpu-102-$SERVING_VERSION.tar.gz"
+    fi
+    client_release="https://paddle-serving.bj.bcebos.com/test-dev/whl/paddle_serving_client-$SERVING_VERSION-cp$CPYTHON-none-any.whl"
+    app_release="https://paddle-serving.bj.bcebos.com/test-dev/whl/paddle_serving_app-$SERVING_VERSION-py3-none-any.whl"
+fi
 if [[ "$RUN_ENV" == "cpu" ]];then
-    server_release="paddle-serving-server==$SERVING_VERSION"
    python$PYTHON_VERSION -m pip install $client_release $app_release $server_release
    python$PYTHON_VERSION -m pip install paddlepaddle==${PADDLE_VERSION}
    cd /usr/local/
-    wget https://paddle-serving.bj.bcebos.com/bin/serving-cpu-noavx-openblas-${SERVING_VERSION}.tar.gz 
+    wget $serving_bin 
    tar xf serving-cpu-noavx-openblas-${SERVING_VERSION}.tar.gz
-    echo "export SERVING_BIN=$PWD/serving-cpu-noavx-openblas-${SERVING_VERSION}/serving">>/root/.bashrc
+    mv $PWD/serving-cpu-noavx-openblas-${SERVING_VERSION} $PWD/serving_bin
+    echo "export SERVING_BIN=$PWD/serving_bin/serving">>/root/.bashrc
    rm -rf serving-cpu-noavx-openblas-${SERVING_VERSION}.tar.gz
    cd -
 elif [[ "$RUN_ENV" == "cuda10.1" ]];then
-    server_release="paddle-serving-server-gpu==$SERVING_VERSION.post101"
    python$PYTHON_VERSION -m pip install $client_release $app_release $server_release
    python$PYTHON_VERSION -m pip install paddlepaddle-gpu==${PADDLE_VERSION}
    cd /usr/local/
-    wget https://paddle-serving.bj.bcebos.com/bin/serving-gpu-101-${SERVING_VERSION}.tar.gz
+    wget $serving_bin
    tar xf serving-gpu-101-${SERVING_VERSION}.tar.gz
-    echo "export SERVING_BIN=$PWD/serving-gpu-101-${SERVING_VERSION}/serving">>/root/.bashrc
+    mv $PWD/serving-gpu-101-${SERVING_VERSION} $PWD/serving_bin
+    echo "export SERVING_BIN=$PWD/serving_bin/serving">>/root/.bashrc
    rm -rf serving-gpu-101-${SERVING_VERSION}.tar.gz
    cd -
 elif [[ "$RUN_ENV" == "cuda10.2" ]];then
-    server_release="paddle-serving-server-gpu==$SERVING_VERSION.post102"
    python$PYTHON_VERSION -m pip install $client_release $app_release $server_release
    python$PYTHON_VERSION -m pip install paddlepaddle-gpu==${PADDLE_VERSION}
    cd /usr/local/
-    wget https://paddle-serving.bj.bcebos.com/bin/serving-gpu-102-${SERVING_VERSION}.tar.gz
+    wget $serving_bin
    tar xf serving-gpu-102-${SERVING_VERSION}.tar.gz
-    echo "export SERVING_BIN=$PWD/serving-gpu-102-${SERVING_VERSION}/serving">>/root/.bashrc
+    mv $PWD/serving-gpu-102-${SERVING_VERSION} $PWD/serving_bin
+    echo "export SERVING_BIN=$PWD/serving_bin/serving">>/root/.bashrc
    rm -rf serving-gpu-102-${SERVING_VERSION}.tar.gz
    cd -
 fi

--- a/tools/generate_k8s_yamls.sh
+++ b/tools/generate_k8s_yamls.sh
@@ -60,8 +60,8 @@ function run
  echo "named arg: command: $start_command"
  echo "named arg: port: $port"
-  sed -e "s/<< APP_NAME >>/$app_name/g" -e "s/<< IMAGE_NAME >>/$(echo $image_name | sed -e 's/\\/\\\\/g; s/\//\\\//g; s/&/\\\&/g')/g" -e "s/<< WORKDIR >>/$(echo $workdir | sed -e 's/\\/\\\\/g; s/\//\\\//g; s/&/\\\&/g')/g" -e "s/<< COMMAND >>/\"$start_command\"/g" -e "s/<< PORT >>/$port/g"  tools/k8s_serving.yaml_template > k8s_serving.yaml
+  sed -e "s/<< APP_NAME >>/$app_name/g" -e "s/<< IMAGE_NAME >>/$(echo $image_name | sed -e 's/\\/\\\\/g; s/\//\\\//g; s/&/\\\&/g')/g" -e "s/<< WORKDIR >>/$(echo $workdir | sed -e 's/\\/\\\\/g; s/\//\\\//g; s/&/\\\&/g')/g" -e "s/<< COMMAND >>/\"$(echo $start_command | sed -e 's/\\/\\\\/g; s/\//\\\//g; s/&/\\\&/g')\"/g" -e "s/<< PORT >>/$port/g"  tools/k8s_serving.yaml_template > k8s_serving.yaml
-  sed -e "s/<< APP_NAME >>/$app_name/g" -e "s/<< IMAGE_NAME >>/$(echo $image_name | sed -e 's/\\/\\\\/g; s/\//\\\//g; s/&/\\\&/g')/g" -e "s/<< WORKDIR >>/$(echo $workdir | sed -e 's/\\/\\\\/g; s/\//\\\//g; s/&/\\\&/g')/g" -e "s/<< COMMAND >>/\"$start_command\"/g" -e "s/<< PORT >>/$port/g"  tools/k8s_ingress.yaml_template > k8s_ingress.yaml
+  sed -e "s/<< APP_NAME >>/$app_name/g" -e "s/<< IMAGE_NAME >>/$(echo $image_name | sed -e 's/\\/\\\\/g; s/\//\\\//g; s/&/\\\&/g')/g" -e "s/<< WORKDIR >>/$(echo $workdir | sed -e 's/\\/\\\\/g; s/\//\\\//g; s/&/\\\&/g')/g" -e "s/<< COMMAND >>/\"$(echo $start_command | sed -e 's/\\/\\\\/g; s/\//\\\//g; s/&/\\\&/g')\"/g" -e "s/<< PORT >>/$port/g"  tools/k8s_ingress.yaml_template > k8s_ingress.yaml
  echo "check k8s_serving.yaml and k8s_ingress.yaml please." 
 }

--- a/tools/generate_runtime_docker.sh
+++ b/tools/generate_runtime_docker.sh
@@ -8,7 +8,7 @@ function usage
    echo "usage: sh tools/generate_runtime_docker.sh --SOME_ARG ARG_VALUE"
    echo "   ";
    echo "   --env                 : running env, cpu/cuda10.1/cuda10.2/cuda11";
-    echo "   --python              : python version, 2.7/3.6/3.7 ";
+    echo "   --python              : python version, 3.6/3.7/3.8 ";
    echo "   --serving             : serving version(0.5.0)";
    echo "   --paddle              : paddle version(2.0.1)"
    echo "   --image_name          : image name(default serving_runtime:env-python)"
@@ -73,8 +73,8 @@ function run
  echo "named arg: paddle: $paddle"
  echo "named arg: image_name: $image_name"
-  sed -e "s/<<base_image>>/$base_image/g" -e "s/<<python_version>>/$python/g" -e "s/<<run_env>>/$env/g" tools/Dockerfile.runtime_template > Dockerfile.tmp
+  sed -e "s/<<base_image>>/$base_image/g" -e "s/<<python_version>>/$python/g" -e "s/<<run_env>>/$env/g" -e "s/<<serving_version>>/$serving/g" -e "s/<<paddle_version>>/$paddle/g" tools/Dockerfile.runtime_template > Dockerfile.tmp
-  docker build --build-arg ftp_proxy=http://172.19.57.45:3128 --build-arg https_proxy=http://172.19.57.45:3128 --build-arg http_proxy=http://172.19.57.45:3128 --build-arg HTTP_PROXY=http://172.19.57.45:3128 --build-arg HTTPS_PROXY=http://172.19.57.45:3128 -t $image_name -f Dockerfile.tmp .
+  docker build --network=host --build-arg ftp_proxy=http://172.19.57.45:3128 --build-arg https_proxy=http://172.19.57.45:3128 --build-arg http_proxy=http://172.19.57.45:3128 --build-arg HTTP_PROXY=http://172.19.57.45:3128 --build-arg HTTPS_PROXY=http://172.19.57.45:3128 -t $image_name -f Dockerfile.tmp .
 }
 run "$@";
--- a/tools/k8s_serving.yaml_template
+++ b/tools/k8s_serving.yaml_template
@@ -34,6 +34,7 @@ spec:
      containers:
      - image: << IMAGE_NAME >>
        name: << APP_NAME >>
+        imagePullPolicy: Always
        ports:
        - containerPort: << PORT >>
        workingDir: << WORKDIR >>
@@ -41,6 +42,8 @@ spec:
        command: ['/bin/bash', '-c']
        args: [<< COMMAND >>] 
        env:
+          - name: SERVING_BIN
+            value: "/usr/local/serving_bin/serving"
          - name: NODE_NAME
            valueFrom:
              fieldRef: