Merge pull request #40 from PaddlePaddle/develop

Sync

Merge pull request #40 from PaddlePaddle/develop
Sync
787691bc · TeslaZhao · GitHub · 7bea3ebe · fbbce8ae · 787691bc
30 changed file
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -60,6 +60,13 @@ option(WITH_TRT             "Compile Paddle Serving with TRT"
 option(PADDLE_ON_INFERENCE  "Compile for encryption"                             ON)
 option(WITH_OPENCV	    "Compile Paddle Serving with OPENCV"                    OFF)
+if(NOT DEFINED VERSION_TAG)
+  set(VERSION_TAG "0.0.0")
+endif()
+if (WITH_PYTHON)
+  message(STATUS "Compile Version Tag for wheel: ${VERSION_TAG}")
+endif()
 if (WITH_OPENCV)
    SET(OPENCV_DIR "" CACHE PATH "Location of libraries")
    if(NOT DEFINED OPENCV_DIR)

--- a/doc/BAIDU_KUNLUN_XPU_SERVING.md
+++ b/doc/BAIDU_KUNLUN_XPU_SERVING.md
@@ -59,11 +59,11 @@ For example, after the Server Compiation step，the whl package will be produced
 # Request parameters description
 In order to deploy serving
 service on the arm server with Baidu Kunlun xpu chips and use the acceleration capability of Paddle-Lite，please specify the following parameters during deployment。
-|param|param description|about|
+| param    | param description                | about                                                              |
-|:--|:--|:--|
+| :------- | :------------------------------- | :----------------------------------------------------------------- |
-|use_lite|using Paddle-Lite Engine|use the inference capability of Paddle-Lite|
+| use_lite | using Paddle-Lite Engine         | use the inference capability of Paddle-Lite                        |
-|use_xpu|using Baidu Kunlun for inference|need to be used with the use_lite option|
+| use_xpu  | using Baidu Kunlun for inference | need to be used with the use_lite option                           |
-|ir_optim|open the graph optimization|refer to[Paddle-Lite](https://github.com/PaddlePaddle/Paddle-Lite)|
+| ir_optim | open the graph optimization      | refer to[Paddle-Lite](https://github.com/PaddlePaddle/Paddle-Lite) |
 # Deplyment examples
 ## Download the model
 ```
@@ -78,15 +78,15 @@ There are mainly three deployment methods：
 The first two deployment methods are recommended。
 Start the rpc service, deploying on ARM server with Baidu Kunlun chips，and accelerate with Paddle-Lite and Baidu Kunlun xpu.
 ```
 python3 -m paddle_serving_server_gpu.serve --model uci_housing_model --thread 6 --port 9292 --use_lite --use_xpu --ir_optim
 ```
 Start the rpc service, deploying on ARM server，and accelerate with Paddle-Lite.
 ```
 python3 -m paddle_serving_server_gpu.serve --model uci_housing_model --thread 6 --port 9292 --use_lite --ir_optim
 ```
 Start the rpc service, deploying on ARM server.
 ```
 python3 -m paddle_serving_server_gpu.serve --model uci_housing_model --thread 6 --port 9292
 ```
@@ -103,7 +103,7 @@ fetch_map = client.predict(feed={"x": np.array(data).reshape(1,13,1)}, fetch=["p
 print(fetch_map)
 ```
 Some examples are provided below, and other models can be modifed with reference to these examples。
-|sample name|sample links|
+| sample name | sample links                                                |
-|:-----|:--|
+| :---------- | :---------------------------------------------------------- |
-|fit_a_line|[fit_a_line_xpu](../python/examples/xpu/fit_a_line_xpu)|
+| fit_a_line  | [fit_a_line_xpu](../python/examples/xpu/fit_a_line_xpu)     |
-|resnet|[resnet_v2_50_xpu](../python/examples/xpu/resnet_v2_50_xpu)|
+| resnet      | [resnet_v2_50_xpu](../python/examples/xpu/resnet_v2_50_xpu) |
--- a/doc/COMPILE.md
+++ b/doc/COMPILE.md
@@ -170,7 +170,7 @@ cmake -DPYTHON_INCLUDE_DIR=$PYTHON_INCLUDE_DIR/ \
 make -j10
 ```
-**Note:** After the compilation is successful, you need to set the `SERVING_BIN` path, see the following [Notes](COMPILE.md#Notes) ).
+**Note:** After the compilation is successful, you need to set the `SERVING_BIN` path, see the following [Notes](https://github.com/PaddlePaddle/Serving/blob/develop/doc/COMPILE.md#Notes).
 ## Compile Client

--- a/doc/LOW_PRECISION_DEPLOYMENT.md
+++ b/doc/LOW_PRECISION_DEPLOYMENT.md
+# Low-Precision Deployment for Paddle Serving
+(English|[简体中文](./LOW_PRECISION_DEPLOYMENT_CN.md))
+Intel CPU supports int8 and bfloat16 models, NVIDIA TensorRT supports int8 and float16 models.
+## Obtain the quantized model through PaddleSlim tool
+Train the low-precision models please refer to [PaddleSlim](https://paddleslim.readthedocs.io/zh_CN/latest/tutorials/quant/overview.html).
+## Deploy the quantized model from PaddleSlim using Paddle Serving with Nvidia TensorRT int8 mode
+Firstly, download the [Resnet50 int8 model](https://paddle-inference-dist.bj.bcebos.com/inference_demo/python/resnet50/ResNet50_quant.tar.gz) and convert to Paddle Serving's saved model。
+```
+wget https://paddle-inference-dist.bj.bcebos.com/inference_demo/python/resnet50/ResNet50_quant.tar.gz
+tar zxvf ResNet50_quant.tar.gz
+python -m paddle_serving_client.convert --dirname ResNet50_quant
+```
+Start RPC service, specify the GPU id and precision mode
+```
+python -m paddle_serving_server.serve --model serving_server --port 9393 --gpu_ids 0 --use_gpu --use_trt --precision int8 
+```
+Request the serving service with Client
+```
+from paddle_serving_client import Client
+from paddle_serving_app.reader import Sequential, File2Image, Resize, CenterCrop
+from paddle_serving_app.reader import RGB2BGR, Transpose, Div, Normalize
+client = Client()
+client.load_client_config(
+    "resnet_v2_50_imagenet_client/serving_client_conf.prototxt")
+client.connect(["127.0.0.1:9393"])
+seq = Sequential([
+    File2Image(), Resize(256), CenterCrop(224), RGB2BGR(), Transpose((2, 0, 1)),
+    Div(255), Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225], True)
+])
+image_file = "daisy.jpg"
+img = seq(image_file)
+fetch_map = client.predict(feed={"image": img}, fetch=["score"])
+print(fetch_map["score"].reshape(-1))
+```
+## Reference
+* [PaddleSlim](https://github.com/PaddlePaddle/PaddleSlim)
+* [Deploy the quantized model Using Paddle Inference on Intel CPU](https://paddle-inference.readthedocs.io/en/latest/optimize/paddle_x86_cpu_int8.html)
+* [Deploy the quantized model Using Paddle Inference on Nvidia GPU](https://paddle-inference.readthedocs.io/en/latest/optimize/paddle_trt.html)
\ No newline at end of file
--- a/doc/LOW_PRECISION_DEPLOYMENT_CN.md
+++ b/doc/LOW_PRECISION_DEPLOYMENT_CN.md
+# Paddle Serving低精度部署
+(简体中文|[English](./LOW_PRECISION_DEPLOYMENT.md))
+低精度部署, 在Intel CPU上支持int8、bfloat16模型，Nvidia TensorRT支持int8、float16模型。
+## 通过PaddleSlim量化生成低精度模型
+详细见[PaddleSlim量化](https://paddleslim.readthedocs.io/zh_CN/latest/tutorials/quant/overview.html)
+## 使用TensorRT int8加载PaddleSlim Int8量化模型进行部署
+首先下载Resnet50 [PaddleSlim量化模型](https://paddle-inference-dist.bj.bcebos.com/inference_demo/python/resnet50/ResNet50_quant.tar.gz),并转换为Paddle Serving支持的部署模型格式。
+```
+wget https://paddle-inference-dist.bj.bcebos.com/inference_demo/python/resnet50/ResNet50_quant.tar.gz
+tar zxvf ResNet50_quant.tar.gz
+python -m paddle_serving_client.convert --dirname ResNet50_quant
+```
+启动rpc服务, 设定所选GPU id、部署模型精度
+```
+python -m paddle_serving_server.serve --model serving_server --port 9393 --gpu_ids 0 --use_gpu --use_trt --precision int8 
+```
+使用client进行请求
+```
+from paddle_serving_client import Client
+from paddle_serving_app.reader import Sequential, File2Image, Resize, CenterCrop
+from paddle_serving_app.reader import RGB2BGR, Transpose, Div, Normalize
+client = Client()
+client.load_client_config(
+    "resnet_v2_50_imagenet_client/serving_client_conf.prototxt")
+client.connect(["127.0.0.1:9393"])
+seq = Sequential([
+    File2Image(), Resize(256), CenterCrop(224), RGB2BGR(), Transpose((2, 0, 1)),
+    Div(255), Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225], True)
+])
+image_file = "daisy.jpg"
+img = seq(image_file)
+fetch_map = client.predict(feed={"image": img}, fetch=["score"])
+print(fetch_map["score"].reshape(-1))
+```
+## 参考文档
+* [PaddleSlim](https://github.com/PaddlePaddle/PaddleSlim)
+* PaddleInference Intel CPU部署量化模型[文档](https://paddle-inference.readthedocs.io/en/latest/optimize/paddle_x86_cpu_int8.html)
+* PaddleInference NV GPU部署量化模型[文档](https://paddle-inference.readthedocs.io/en/latest/optimize/paddle_trt.html)
\ No newline at end of file
--- a/python/examples/xpu/bert/README.md
+++ b/python/examples/xpu/bert/README.md
+## Prepare
+### convert model
+```
+python -m paddle_serving_client.convert --dirname infer_bert-base-chinese_ft_model_4000.pdparams
+```
+## RPC Service
+### Start Service
+```
+pytyon bert_web_service.py serving_server 7703
+```
+### Client Prediction
+```
+python bert_client.py
+```
--- a/python/examples/xpu/bert/bert_client.py
+++ b/python/examples/xpu/bert/bert_client.py
+# coding:utf-8
+# pylint: disable=doc-string-missing
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import sys
+from paddle_serving_client import Client
+from paddle_serving_client.utils import benchmark_args
+from chinese_bert_reader import ChineseBertReader
+import numpy as np
+args = benchmark_args()
+reader = ChineseBertReader({"max_seq_len": 128})
+fetch = ["save_infer_model/scale_0.tmp_1"]
+endpoint_list = ['127.0.0.1:7703']
+client = Client()
+client.load_client_config(args.model)
+client.connect(endpoint_list)
+for line in sys.stdin:
+    feed_dict = reader.process(line)
+    for key in feed_dict.keys():
+        feed_dict[key] = np.array(feed_dict[key]).reshape((128, 1))
+    #print(feed_dict)
+    result = client.predict(feed=feed_dict, fetch=fetch, batch=False)
+print(result)
--- a/python/examples/xpu/bert/bert_web_service.py
+++ b/python/examples/xpu/bert/bert_web_service.py
+# coding=utf-8
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# pylint: disable=doc-string-missing
+from paddle_serving_server.web_service import WebService
+from paddle_serving_app.reader import ChineseBertReader
+import sys
+import os
+import numpy as np
+class BertService(WebService):
+    def load(self):
+        self.reader = ChineseBertReader({
+            "vocab_file": "vocab.txt",
+            "max_seq_len": 128
+        })
+    def preprocess(self, feed=[], fetch=[]):
+        feed_res = []
+        is_batch = False
+        for ins in feed:
+            feed_dict = self.reader.process(ins["words"].encode("utf-8"))
+            for key in feed_dict.keys():
+                feed_dict[key] = np.array(feed_dict[key]).reshape(
+                    (len(feed_dict[key]), 1))
+            feed_res.append(feed_dict)
+        return feed_res, fetch, is_batch
+bert_service = BertService(name="bert")
+bert_service.load()
+bert_service.load_model_config(sys.argv[1])
+bert_service.prepare_server(
+    workdir="workdir", port=int(sys.argv[2]), use_lite=True, use_xpu=True, ir_optim=True)
+bert_service.run_rpc_service()
+bert_service.run_web_service()
--- a/python/examples/xpu/bert/chinese_bert_reader.py
+++ b/python/examples/xpu/bert/chinese_bert_reader.py
+#   Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# coding=utf-8
+from paddle_serving_app.reader.bert_base_reader import BertBaseReader
+from paddle_serving_app.reader.batching import pad_batch_data
+from paddle_serving_app.reader.tokenization import FullTokenizer, convert_to_unicode
+class ChineseBertReader(BertBaseReader):
+    """
+    ChineseBertReader handles the most traditional Chinese Bert
+    preprocessing, a user can define the vocab file through initialization
+    Examples:
+    from paddle_serving_app import ChineseBertReader
+    line = ["this is China"]
+    reader = ChineseBertReader()
+    reader.process(line[0])
+    """
+    def __init__(self, args={}):
+        super(ChineseBertReader, self).__init__()
+        vocab_file = ""
+        if "vocab_file" in args:
+            vocab_file = args["vocab_file"]
+        else:
+            vocab_file = self._download_or_not()
+        self.tokenizer = FullTokenizer(vocab_file=vocab_file)
+        if "max_seq_len" in args:
+            self.max_seq_len = args["max_seq_len"]
+        else:
+            self.max_seq_len = 20
+        self.vocab = self.tokenizer.vocab
+        self.pad_id = self.vocab["[PAD]"]
+        self.cls_id = self.vocab["[CLS]"]
+        self.sep_id = self.vocab["[SEP]"]
+        self.mask_id = self.vocab["[MASK]"]
+        self.feed_keys = [
+            "input_ids", "token_type_ids"
+        ]
+    """
+    inner function
+    """
+    def _download_or_not(self):
+        import os
+        import paddle_serving_app
+        module_path = os.path.dirname(paddle_serving_app.__file__)
+        full_path = "{}/tmp/chinese_bert".format(module_path)
+        os.system("mkdir -p {}".format(full_path))
+        if os.path.exists("{}/vocab.txt".format(full_path)):
+            pass
+        else:
+            url = "https://paddle-serving.bj.bcebos.com/reader/chinese_bert/vocab.txt"
+            r = os.system("wget --no-check-certificate " + url)
+            os.system("mv vocab.txt {}".format(full_path))
+            if r != 0:
+                raise SystemExit('Download failed, please check your network')
+        return "{}/vocab.txt".format(full_path)
+    """
+    inner function
+    """
+    def _pad_batch(self, token_ids, text_type_ids):
+        batch_token_ids = [token_ids]
+        batch_text_type_ids = [text_type_ids]
+        padded_token_ids, input_mask = pad_batch_data(
+            batch_token_ids,
+            max_seq_len=self.max_seq_len,
+            pad_idx=self.pad_id,
+            return_input_mask=True)
+        padded_text_type_ids = pad_batch_data(
+            batch_text_type_ids,
+            max_seq_len=self.max_seq_len,
+            pad_idx=self.pad_id)
+        return padded_token_ids,  padded_text_type_ids
+    """
+    process function deals with a raw Chinese string as a sentence
+    this funtion returns a feed_dict
+    default key of the returned feed_dict: input_ids, position_ids, segment_ids, input_mask
+    """
+    def process(self, line):
+        text_a = convert_to_unicode(line)
+        tokens_a = self.tokenizer.tokenize(text_a)
+        if len(tokens_a) > self.max_seq_len - 2:
+            tokens_a = tokens_a[0:(self.max_seq_len - 2)]
+        tokens = []
+        text_type_ids = []
+        tokens.append("[CLS]")
+        text_type_ids.append(0)
+        for token in tokens_a:
+            tokens.append(token)
+            text_type_ids.append(0)
+        token_ids = self.tokenizer.convert_tokens_to_ids(tokens)
+        #position_ids = list(range(len(token_ids)))
+        p_token_ids, p_text_type_ids= \
+            self._pad_batch(token_ids, text_type_ids)
+        feed_result = {
+            self.feed_keys[0]: p_token_ids.reshape(-1).tolist(),
+            #self.feed_keys[1]: p_pos_ids.reshape(-1).tolist(),
+            self.feed_keys[1]: p_text_type_ids.reshape(-1).tolist(),
+            #self.feed_keys[3]: input_mask.reshape(-1).tolist()
+        }
+        return feed_result
--- a/python/examples/xpu/bert/vocab.txt
+++ b/python/examples/xpu/bert/vocab.txt
--- a/python/examples/xpu/ernie/README.md
+++ b/python/examples/xpu/ernie/README.md
+## Prepare
+### convert model
+```
+python3 -m paddle_serving_client.convert --dirname erine
+```
+## RPC Service
+### Start Service
+```
+python3 -m paddle_serving_server.serve --model serving_server --port 7704 --use_lite --use_xpu --ir_optim
+```
+### Client Prediction
+```
+head data-c.txt | python3 ernie_client.py --model serving_client/serving_client_conf.prototxt
+```
--- a/python/examples/xpu/ernie/chinese_ernie_reader.py
+++ b/python/examples/xpu/ernie/chinese_ernie_reader.py
+#   Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# coding=utf-8
+from paddle_serving_app.reader.bert_base_reader import BertBaseReader
+from paddle_serving_app.reader.batching import pad_batch_data
+from paddle_serving_app.reader.tokenization import FullTokenizer, convert_to_unicode
+class ChineseErnieReader(BertBaseReader):
+    """
+    ChineseErnieReader handles the most traditional Chinese Bert
+    preprocessing, a user can define the vocab file through initialization
+    Examples:
+    from paddle_serving_app import ChineseErnieReader
+    line = ["this is China"]
+    reader = ChineseErnieReader()
+    reader.process(line[0])
+    """
+    def __init__(self, args={}):
+        super(ChineseErnieReader, self).__init__()
+        vocab_file = ""
+        if "vocab_file" in args:
+            vocab_file = args["vocab_file"]
+            print("vocab")
+        else:
+            vocab_file = self._download_or_not()
+        self.tokenizer = FullTokenizer(vocab_file=vocab_file)
+        print(self.tokenizer)
+        if "max_seq_len" in args:
+            self.max_seq_len = args["max_seq_len"]
+        else:
+            self.max_seq_len = 20
+        self.vocab = self.tokenizer.vocab
+        self.pad_id = self.vocab["[PAD]"]
+        self.cls_id = self.vocab["[CLS]"]
+        self.sep_id = self.vocab["[SEP]"]
+        self.mask_id = self.vocab["[MASK]"]
+        self.feed_keys = [
+            "placeholder_0", "placeholder_1", "placeholder_2", "placeholder_3"
+        ]
+    """
+    inner function
+    """
+    def _download_or_not(self):
+        import os
+        import paddle_serving_app
+        module_path = os.path.dirname(paddle_serving_app.__file__)
+        full_path = "{}/tmp/chinese_bert".format(module_path)
+        os.system("mkdir -p {}".format(full_path))
+        if os.path.exists("{}/vocab.txt".format(full_path)):
+            pass
+        else:
+            url = "https://paddle-serving.bj.bcebos.com/reader/chinese_bert/vocab.txt"
+            r = os.system("wget --no-check-certificate " + url)
+            os.system("mv vocab.txt {}".format(full_path))
+            if r != 0:
+                raise SystemExit('Download failed, please check your network')
+        return "{}/vocab.txt".format(full_path)
+    """
+    inner function
+    """
+    def _pad_batch(self, token_ids, text_type_ids, position_ids):
+        batch_token_ids = [token_ids]
+        batch_text_type_ids = [text_type_ids]
+        batch_position_ids = [position_ids]
+        padded_token_ids, input_mask = pad_batch_data(
+            batch_token_ids,
+            max_seq_len=self.max_seq_len,
+            pad_idx=self.pad_id,
+            return_input_mask=True)
+        padded_text_type_ids = pad_batch_data(
+            batch_text_type_ids,
+            max_seq_len=self.max_seq_len,
+            pad_idx=self.pad_id)
+        padded_position_ids = pad_batch_data(
+            batch_position_ids,
+            max_seq_len=self.max_seq_len,
+            pad_idx=self.pad_id)
+        return padded_token_ids, padded_position_ids, padded_text_type_ids, input_mask
+    """
+    process function deals with a raw Chinese string as a sentence
+    this funtion returns a feed_dict
+    default key of the returned feed_dict: input_ids, position_ids, segment_ids, input_mask
+    """
+    def process(self, line):
+        text_a = convert_to_unicode(line)
+        tokens_a = self.tokenizer.tokenize(text_a)
+        if len(tokens_a) > self.max_seq_len - 2:
+            tokens_a = tokens_a[0:(self.max_seq_len - 2)]
+        tokens = []
+        text_type_ids = []
+        tokens.append("[CLS]")
+        text_type_ids.append(0)
+        for token in tokens_a:
+            tokens.append(token)
+            text_type_ids.append(0)
+        token_ids = self.tokenizer.convert_tokens_to_ids(tokens)
+        position_ids = list(range(len(token_ids)))
+        p_token_ids, p_pos_ids, p_text_type_ids, input_mask = \
+            self._pad_batch(token_ids, text_type_ids, position_ids)
+        feed_result = {
+            self.feed_keys[0]: p_token_ids.reshape(-1).tolist(),
+            self.feed_keys[1]: p_pos_ids.reshape(-1).tolist(),
+            self.feed_keys[2]: p_text_type_ids.reshape(-1).tolist(),
+            self.feed_keys[3]: input_mask.reshape(-1).tolist()
+        }
+        return feed_result
--- a/python/examples/xpu/ernie/erine_web_service.py
+++ b/python/examples/xpu/ernie/erine_web_service.py
+# coding=utf-8
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# pylint: disable=doc-string-missing
+from paddle_serving_server.web_service import WebService
+from paddle_serving_app.reader import ChineseBertReader
+import sys
+import os
+import numpy as np
+class BertService(WebService):
+    def load(self):
+        self.reader = ChineseBertReader({
+            "vocab_file": "vocab.txt",
+            "max_seq_len": 128
+        })
+    def preprocess(self, feed=[], fetch=[]):
+        feed_res = []
+        is_batch = False
+        for ins in feed:
+            feed_dict = self.reader.process(ins["words"].encode("utf-8"))
+            for key in feed_dict.keys():
+                feed_dict[key] = np.array(feed_dict[key]).reshape(
+                    (len(feed_dict[key]), 1))
+            feed_res.append(feed_dict)
+        return feed_res, fetch, is_batch
+bert_service = BertService(name="bert")
+bert_service.load()
+bert_service.load_model_config(sys.argv[1])
+bert_service.prepare_server(
+    workdir="workdir", port=int(sys.argv[2]), use_lite=True, use_xpu=True, ir_optim=True)
+bert_service.run_rpc_service()
+bert_service.run_web_service()
--- a/python/examples/xpu/ernie/ernie_client.py
+++ b/python/examples/xpu/ernie/ernie_client.py
+# coding:utf-8
+# pylint: disable=doc-string-missing
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import sys
+from paddle_serving_client import Client
+from paddle_serving_client.utils import benchmark_args
+from chinese_ernie_reader import ChineseErnieReader
+import numpy as np
+args = benchmark_args()
+reader = ChineseErnieReader({"max_seq_len": 128})
+fetch = ["save_infer_model/scale_0"]
+endpoint_list = ['127.0.0.1:7704']
+client = Client()
+client.load_client_config(args.model)
+client.connect(endpoint_list)
+for line in sys.stdin:
+    feed_dict = reader.process(line)
+    for key in feed_dict.keys():
+        feed_dict[key] = np.array(feed_dict[key]).reshape((128, 1))
+   # print(feed_dict)
+    result = client.predict(feed=feed_dict, fetch=fetch, batch=False)
+    print(result)
--- a/python/examples/xpu/ernie/vocab.txt
+++ b/python/examples/xpu/ernie/vocab.txt
--- a/python/examples/xpu/vgg19/README.md
+++ b/python/examples/xpu/vgg19/README.md
+## Prepare
+### convert model
+```
+python -m paddle_serving_client.convert --dirname VGG19
+```
+## RPC Service
+### Start Service
+```
+python -m paddle_serving_server.serve --model serving_server --port 7702 --use_lite --use_xpu --ir_optim
+```
+### Client Prediction
+```
+python vgg19_client.py
+```
--- a/python/examples/xpu/vgg19/daisy.jpg
+++ b/python/examples/xpu/vgg19/daisy.jpg
--- a/python/examples/xpu/vgg19/vgg19_client.py
+++ b/python/examples/xpu/vgg19/vgg19_client.py
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from paddle_serving_client import Client
+from paddle_serving_app.reader import Sequential, File2Image, Resize, CenterCrop
+from paddle_serving_app.reader import RGB2BGR, Transpose, Div, Normalize
+client = Client()
+client.load_client_config(
+    "serving_client/serving_client_conf.prototxt")
+client.connect(["127.0.0.1:7702"])
+seq = Sequential([
+    File2Image(), Resize(256), CenterCrop(224), RGB2BGR(), Transpose((2, 0, 1)),
+    Div(255), Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225], True)
+])
+image_file = "daisy.jpg"
+img = seq(image_file)
+fetch_map = client.predict(feed={"image": img}, fetch=["save_infer_model/scale_0"])
+#print(fetch_map)
+print(fetch_map["save_infer_model/scale_0"].reshape(-1))
--- a/python/gen_version.py
+++ b/python/gen_version.py
@@ -47,3 +47,5 @@ elif package_name.endswith('xpu'):
 path = "paddle_serving_" + sys.argv[1]
 commit_id = subprocess.check_output(['git', 'rev-parse', 'HEAD'])
 update_info(path + "/version.py", "commit_id", commit_id)
+update_info(path + "/version.py", "version_tag", "${VERSION_TAG}")
--- a/python/paddle_serving_app/__init__.py
+++ b/python/paddle_serving_app/__init__.py
@@ -12,3 +12,6 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 from .models import ServingModels
+from . import version
+__version__ = version.version_tag
--- a/python/paddle_serving_app/version.py
+++ b/python/paddle_serving_app/version.py
@@ -12,5 +12,5 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 """ Paddle Serving App version string """
-serving_app_version = "0.0.0"
+version_tag = "0.0.0"
 commit_id = ""
--- a/python/paddle_serving_client/__init__.py
+++ b/python/paddle_serving_client/__init__.py
@@ -17,4 +17,4 @@ from . import version
 from . import client
 from .client import *
-__version__ = version.serving_client_version
+__version__ = version.version_tag
--- a/python/paddle_serving_client/version.py
+++ b/python/paddle_serving_client/version.py
@@ -12,7 +12,6 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 """ Paddle Serving Client version string """
-serving_client_version = "0.0.0"
+version_tag = "0.0.0"
-serving_server_version = "0.0.0"
 module_proto_version = "0.0.0"
 commit_id = ""
--- a/python/paddle_serving_server/__init__.py
+++ b/python/paddle_serving_server/__init__.py
@@ -31,4 +31,4 @@ from paddle_serving_server import (
 from .dag import *
 from .server import *
-__version__ = version.serving_server_version
+__version__ = version.version_tag
--- a/python/paddle_serving_server/server.py
+++ b/python/paddle_serving_server/server.py
@@ -22,7 +22,7 @@ from .proto import general_model_config_pb2 as m_config
 from .proto import multi_lang_general_model_service_pb2_grpc
 import google.protobuf.text_format
 import time
-from .version import serving_server_version, version_suffix, device_type
+from .version import version_tag, version_suffix, device_type
 from contextlib import closing
 import argparse
@@ -369,7 +369,7 @@ class Server(object):
        version_file = open("{}/version.py".format(self.module_path), "r")
        folder_name = "serving-%s-%s" % (self.get_serving_bin_name(),
-                                         serving_server_version)
+                                         version_tag)
        tar_name = "%s.tar.gz" % folder_name
        bin_url = "https://paddle-serving.bj.bcebos.com/bin/%s" % tar_name

--- a/python/paddle_serving_server/version.py
+++ b/python/paddle_serving_server/version.py
@@ -12,10 +12,9 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 """ Paddle Serving Server version string """
-serving_client_version = "0.0.0"
+version_tag = "0.0.0"
-serving_server_version = "0.0.0"
-module_proto_version = "0.0.0"
 version_suffix = ""
+module_proto_version = "0.0.0"
 device_type = "0"
 cuda_version = "9"
 commit_id = ""
--- a/python/setup.py.app.in
+++ b/python/setup.py.app.in
@@ -22,7 +22,7 @@ import os
 from setuptools import setup, Distribution, Extension
 from setuptools import find_packages
 from setuptools import setup
-from paddle_serving_app.version import serving_app_version
+from paddle_serving_app.version import version_tag
 from pkg_resources import DistributionNotFound, get_distribution
 def python_version():
@@ -78,7 +78,7 @@ package_dir={'paddle_serving_app':
 setup(
    name='paddle-serving-app',
-    version=serving_app_version.replace('-', ''),
+    version=version_tag.replace('-', ''),
    description=
    ('Paddle Serving Package for saved model with PaddlePaddle'),
    url='https://github.com/PaddlePaddle/Serving',

--- a/python/setup.py.client.in
+++ b/python/setup.py.client.in
@@ -22,7 +22,7 @@ import sys
 from setuptools import setup, Distribution, Extension
 from setuptools import find_packages
 from setuptools import setup
-from paddle_serving_client.version import serving_client_version
+from paddle_serving_client.version import version_tag
 import util
 py_version = sys.version_info
@@ -79,7 +79,7 @@ package_dir={'paddle_serving_client':
 setup(
    name='paddle-serving-client',
-    version=serving_client_version.replace('-', ''),
+    version=version_tag.replace('-', ''),
    description=
    ('Paddle Serving Package for saved model with PaddlePaddle'),
    url='https://github.com/PaddlePaddle/Serving',

--- a/python/setup.py.in
+++ b/python/setup.py.in
@@ -19,7 +19,7 @@ from __future__ import print_function
 from setuptools import setup, Distribution, Extension
 from setuptools import find_packages
 from setuptools import setup
-from paddle_serving.version import serving_client_version
+from paddle_serving.version import version_tag
 from grpc_tools import protoc
 import util
@@ -43,7 +43,7 @@ package_dir={'paddle_serving.serving_client':
 setup(
    name='paddle-serving-client',
-    version=serving_client_version.replace('-', ''),
+    version=version_tag.replace('-', ''),
    description=
    ('Paddle Serving Package for saved model with PaddlePaddle'),
    url='https://github.com/PaddlePaddle/Serving',

--- a/python/setup.py.server.in
+++ b/python/setup.py.server.in
@@ -19,10 +19,10 @@ from __future__ import print_function
 from setuptools import setup, Distribution, Extension
 from setuptools import find_packages
 from setuptools import setup
-from paddle_serving_server.version import serving_server_version, version_suffix
+from paddle_serving_server.version import version_tag, version_suffix
 import util
-package_version = serving_server_version.replace('-', '')
+package_version = version_tag.replace('-', '')
 if version_suffix != "":
    version_suffix = "post" + version_suffix
    package_version = package_version + "." + version_suffix