Merge branch 'pyserving' of https://github.com/barrierye/Serving into pyserving

4c9826b0 · wangjiawei04 · 6676e882 · 21348b99 · 4c9826b0 · 4c9826b0
10 changed file
--- a/README.md
+++ b/README.md
@@ -56,6 +56,8 @@ pip install paddle-serving-server-gpu # GPU

 You may need to use a domestic mirror source (in China, you can use the Tsinghua mirror source, add `-i https://pypi.tuna.tsinghua.edu.cn/simple` to pip command) to speed up the download.

+If you need install modules compiled with develop branch, please download packages from [latest packages list](./doc/LATEST_PACKAGES.md) and install with `pip install` command.
+
 Client package support Centos 7 and Ubuntu 18, or you can use HTTP service without install client.

 <h2 align="center">Quick Start Example</h2>

--- a/README_CN.md
+++ b/README_CN.md
@@ -57,6 +57,8 @@ pip install paddle-serving-server-gpu # GPU

 您可能需要使用国内镜像源（例如清华源, 在pip命令中添加`-i https://pypi.tuna.tsinghua.edu.cn/simple`）来加速下载。

+如果需要使用develop分支编译的安装包，请从[最新安装包列表](./doc/LATEST_PACKAGES.md)中获取下载地址进行下载，使用`pip install`命令进行安装。
+
 客户端安装包支持Centos 7和Ubuntu 18，或者您可以使用HTTP服务，这种情况下不需要安装客户端。

 <h2 align="center">快速启动示例</h2>

--- a/doc/LATEST_PACKAGES.md
+++ b/doc/LATEST_PACKAGES.md
+# Latest Wheel Packages
+
+## CPU server
+### Python 3
+```
+https://paddle-serving.bj.bcebos.com/whl/paddle_serving_server-0.3.0-py3-none-any.whl
+```
+
+### Python 2
+```
+https://paddle-serving.bj.bcebos.com/whl/paddle_serving_server-0.3.0-py2-none-any.whl
+```
+
+## GPU server
+### Python 3
+```
+https://paddle-serving.bj.bcebos.com/whl/paddle_serving_server_gpu-0.3.0-py3-none-any.whl
+```
+### Python 2
+```
+https://paddle-serving.bj.bcebos.com/whl/paddle_serving_server_gpu-0.3.0-py2-none-any.whl
+```
+
+## Client
+### Python 3.7
+```
+https://paddle-serving.bj.bcebos.com/whl/paddle_serving_client-0.3.0-cp37-none-manylinux1_x86_64.whl
+```
+### Python 3.6
+```
+https://paddle-serving.bj.bcebos.com/whl/paddle_serving_client-0.3.0-cp36-none-manylinux1_x86_64.whl
+```
+### Python 2.7
+```
+https://paddle-serving.bj.bcebos.com/whl/paddle_serving_client-0.3.0-cp27-none-manylinux1_x86_64.whl
+```
+
+## App
+### Python 3
+```
+https://paddle-serving.bj.bcebos.com/whl/paddle_serving_app-0.1.0-py3-none-any.whl
+```
+
+### Python 2
+```
+https://paddle-serving.bj.bcebos.com/whl/paddle_serving_app-0.1.0-py2-none-any.whl
+```
--- a/python/examples/bert/test_multi_fetch_client.py
+++ b/python/examples/bert/test_multi_fetch_client.py
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from paddle_serving_client import Client
+from paddle_serving_app.reader import ChineseBertReader
+import sys
+
+client = Client()
+client.load_client_config("./bert_seq32_client/serving_client_conf.prototxt")
+client.connect(["127.0.0.1:9292"])
+
+reader = ChineseBertReader({"max_seq_len": 32})
+fetch = ["sequence_10", "sequence_12", "pooled_output"]
+expected_shape = {
+    "sequence_10": (4, 32, 768),
+    "sequence_12": (4, 32, 768),
+    "pooled_output": (4, 768)
+}
+batch_size = 4
+feed_batch = []
+
+for line in sys.stdin:
+    feed = reader.process(line)
+    if len(feed_batch) < batch_size:
+        feed_batch.append(feed)
+    else:
+        fetch_map = client.predict(feed=feed_batch, fetch=fetch)
+        feed_batch = []
+        for var_name in fetch:
+            if fetch_map[var_name].shape != expected_shape[var_name]:
+                print("fetch var {} shape error.".format(var_name))
+                sys.exit(1)
--- a/python/examples/fit_a_line/test_multi_process_client.py
+++ b/python/examples/fit_a_line/test_multi_process_client.py
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from paddle_serving_client import Client
+from paddle_serving_client.utils import MultiThreadRunner
+import paddle
+
+
+def single_func(idx, resource):
+    client = Client()
+    client.load_client_config(
+        "./uci_housing_client/serving_client_conf.prototxt")
+    client.connect(["127.0.0.1:9293", "127.0.0.1:9292"])
+    test_reader = paddle.batch(
+        paddle.reader.shuffle(
+            paddle.dataset.uci_housing.test(), buf_size=500),
+        batch_size=1)
+    for data in test_reader():
+        fetch_map = client.predict(feed={"x": data[0][0]}, fetch=["price"])
+    return [[0]]
+
+
+multi_thread_runner = MultiThreadRunner()
+thread_num = 4
+result = multi_thread_runner.run(single_func, thread_num, {})
--- a/python/examples/fit_a_line/test_py_client.py
+++ b/python/examples/fit_a_line/test_py_client.py
@@ -29,11 +29,11 @@ x = np.array(
 lp = LineProfiler()
 lp_wrapper = lp(client.predict)

-for i in range(3):
+for i in range(1):
    fetch_map = lp_wrapper(
        feed={"x": x}, fetch_with_type={"combine_op_output": "float"})
    # fetch_map = client.predict(
    # feed={"x": x}, fetch_with_type={"combine_op_output": "float"})
    print(fetch_map)

-lp.print_stats()
+#lp.print_stats()
--- a/python/examples/fit_a_line/test_py_server.py
+++ b/python/examples/fit_a_line/test_py_server.py
@@ -72,7 +72,9 @@ cnn_op = UciOp(
    client_config="uci_housing_client/serving_client_conf.prototxt",
    server_name="127.0.0.1:9393",
    fetch_names=["price"],
-    concurrency=1)
+    concurrency=1,
+    timeout=0.01,
+    retry=2)

 bow_op = UciOp(
    name="bow",
@@ -86,7 +88,9 @@ bow_op = UciOp(
    client_config="uci_housing_client/serving_client_conf.prototxt",
    server_name="127.0.0.1:9393",
    fetch_names=["price"],
-    concurrency=1)
+    concurrency=1,
+    timeout=-1,
+    retry=1)

 combine_op = CombineOp(
    name="combine",
@@ -94,12 +98,14 @@ combine_op = CombineOp(
    in_dtype='float',
    outputs=[out_channel],
    out_dtype='float',
-    concurrency=1)
+    concurrency=1,
+    timeout=-1,
+    retry=1)

 logging.info(read_channel.debug())
 logging.info(combine_channel.debug())
 logging.info(out_channel.debug())
-pyserver = PyServer(profile=False)
+pyserver = PyServer(profile=False, retry=1)
 pyserver.add_channel(read_channel)
 pyserver.add_channel(combine_channel)
 pyserver.add_channel(out_channel)

--- a/python/examples/senta/get_data.sh
+++ b/python/examples/senta/get_data.sh
 wget https://paddle-serving.bj.bcebos.com/paddle_hub_models/text/SentimentAnalysis/senta_bilstm.tar.gz --no-check-certificate
 tar -xzvf senta_bilstm.tar.gz
 wget https://paddle-serving.bj.bcebos.com/paddle_hub_models/text/LexicalAnalysis/lac.tar.gz --no-check-certificate
-tar -xzvf lac_model.tar.gz
+tar -xzvf lac.tar.gz
 wget https://paddle-serving.bj.bcebos.com/reader/lac/lac_dict.tar.gz  --no-check-certificate
 tar -xzvf lac_dict.tar.gz
 wget https://paddle-serving.bj.bcebos.com/reader/senta/vocab.txt --no-check-certificate
--- a/python/paddle_serving_server/pyserver.py
+++ b/python/paddle_serving_server/pyserver.py
@@ -42,12 +42,16 @@ class _TimeProfiler(object):
        self._enable = enable

    def record(self, name_with_tag):
+        if self._enable is False:
+            return
        name_with_tag = name_with_tag.split("_")
        tag = name_with_tag[-1]
        name = '_'.join(name_with_tag[:-1])
        self._time_record.put((name, tag, int(round(time.time() * 1000000))))

    def print_profile(self):
+        if self._enable is False:
+            return
        sys.stderr.write(self._print_head)
        tmp = {}
        while not self._time_record.empty():
@@ -267,7 +271,8 @@ class Op(object):
                 server_name=None,
                 fetch_names=None,
                 concurrency=1,
-                 timeout=-1):
+                 timeout=-1,
+                 retry=2):
        self._run = False
        # TODO: globally unique check
        self._name = name  # to identify the type of OP, it must be globally unique
@@ -285,6 +290,7 @@ class Op(object):
        self._server_port = server_port
        self._device = device
        self._timeout = timeout
+        self._retry = retry

    def set_client(self, client_config, server_name, fetch_names):
        self._client = Client()
@@ -387,24 +393,35 @@ class Op(object):

                error_info = None
                if self.with_serving():
-                    _profiler.record("{}{}-midp_0".format(self._name,
-                                                          concurrency_idx))
-                    if self._time > 0:
-                        try:
-                            data = func_timeout.func_timeout(
-                                self._time, self.midprocess, args=(data, ))
-                        except func_timeout.FunctionTimedOut:
-                            logging.error("error: timeout")
-                            error_info = "{}({}): timeout".format(
-                                self._name, concurrency_idx)
-                        except Exception as e:
-                            logging.error("error: {}".format(e))
-                            error_info = "{}({}): {}".format(self._name,
-                                                             concurrency_idx, e)
-                    else:
-                        data = self.midprocess(data)
-                    _profiler.record("{}{}-midp_1".format(self._name,
-                                                          concurrency_idx))
+                    for i in range(self._retry):
+                        _profiler.record("{}{}-midp_0".format(self._name,
+                                                              concurrency_idx))
+                        if self._timeout > 0:
+                            try:
+                                middata = func_timeout.func_timeout(
+                                    self._timeout,
+                                    self.midprocess,
+                                    args=(data, ))
+                            except func_timeout.FunctionTimedOut:
+                                logging.error("error: timeout")
+                                error_info = "{}({}): timeout".format(
+                                    self._name, concurrency_idx)
+                            except Exception as e:
+                                logging.error("error: {}".format(e))
+                                error_info = "{}({}): {}".format(
+                                    self._name, concurrency_idx, e)
+                        else:
+                            middata = self.midprocess(data)
+                        _profiler.record("{}{}-midp_1".format(self._name,
+                                                              concurrency_idx))
+                        if error_info is None:
+                            data = middata
+                            break
+                        if i + 1 < self._retry:
+                            error_info = None
+                            logging.warn(
+                                self._log("warn: timeout, retry({})".format(i +
+                                                                            1)))

                _profiler.record("{}{}-postp_0".format(self._name,
                                                       concurrency_idx))
@@ -509,6 +526,7 @@ class GeneralPythonService(
        data = python_service_channel_pb2.ChannelData()
        data_id = self._get_next_id()
        data.id = data_id
+        data.is_error = 0
        for idx, name in enumerate(request.feed_var_names):
            logging.debug(
                self._log('name: {}'.format(request.feed_var_names[idx])))

--- a/tools/serving_build.sh
+++ b/tools/serving_build.sh
@@ -331,6 +331,75 @@ function python_test_bert() {
    cd ..
 }

+function python_test_multi_fetch() {
+    # pwd: /Serving/python/examples
+    local TYPT=$1
+    export SERVING_BIN=${SERVING_WORKDIR}/build-server-${TYPE}/core/general-server/serving
+    cd bert # pwd: /Serving/python/examples/bert
+    case $TYPE in
+        CPU)
+            #download model (max_seq_len=32)
+            wget https://paddle-serving.bj.bcebos.com/bert_example/bert_multi_fetch.tar.gz
+            tar -xzvf bert_multi_fetch.tar.gz
+            check_cmd "python -m paddle_serving_server.serve --model bert_seq32_model --port 9292 &"
+            sleep 5
+            check_cmd "head -n 8 data-c.txt | python test_multi_fetch_client.py"
+            kill_server_process
+            echo "bert mutli fetch RPC inference pass"
+            ;;
+        GPU)
+            #download model (max_seq_len=32)
+            wget https://paddle-serving.bj.bcebos.com/bert_example/bert_multi_fetch.tar.gz
+            tar -xzvf bert_multi_fetch.tar.gz
+            check_cmd "python -m paddle_serving_server_gpu.serve --model bert_seq32_model --port 9292 --gpu_ids 0 &"
+            sleep 5
+            check_cmd "head -n 8 data-c.txt | python test_multi_fetch_client.py"
+            kill_server_process
+            echo "bert mutli fetch RPC inference pass"
+            ;;
+        *)
+            echo "error type"
+            exit 1
+            ;;
+    esac
+    echo "test multi fetch $TYPE finished as expected."
+    unset SERVING_BIN
+    cd ..
+}
+
+function python_test_multi_process(){
+    # pwd: /Serving/python/examples
+    local TYPT=$1
+    export SERVING_BIN=${SERVING_WORKDIR}/build-server-${TYPE}/core/general-server/serving
+    cd fit_a_line # pwd: /Serving/python/examples/fit_a_line
+    sh get_data.sh
+    case $TYPE in
+        CPU)
+            check_cmd "python -m paddle_serving_server.serve --model uci_housing_model --port 9292 &"
+            check_cmd "python -m paddle_serving_server.serve --model uci_housing_model --port 9293 &"
+            sleep 5
+            check_cmd "python test_multi_process_client.py"
+            kill_server_process
+            echo "bert mutli rpc RPC inference pass"
+            ;;
+        GPU)
+            check_cmd "python -m paddle_serving_server_gpu.serve --model uci_housing_model --port 9292 --gpu_ids 0 &"
+            check_cmd "python -m paddle_serving_server_gpu.serve --model uci_housing_model --port 9293 --gpu_ids 0 &"
+            sleep 5
+            check_cmd "python test_multi_process_client.py"
+            kill_server_process
+            echo "bert mutli process RPC inference pass"
+            ;;
+        *)
+            echo "error type"
+            exit 1
+            ;;
+    esac
+    echo "test multi process $TYPE finished as expected."
+    unset SERVING_BIN
+    cd ..
+}
+
 function python_test_imdb() {
    # pwd: /Serving/python/examples
    local TYPE=$1
@@ -436,7 +505,9 @@ function python_run_test() {
    python_run_criteo_ctr_with_cube $TYPE # pwd: /Serving/python/examples
    python_test_bert $TYPE # pwd: /Serving/python/examples
    python_test_imdb $TYPE # pwd: /Serving/python/examples
-    python_test_lac $TYPE
+    python_test_lac $TYPE # pwd: /Serving/python/examples
+    python_test_multi_process $TYPE # pwd: /Serving/python/examples
+    python_test_multi_fetch $TYPE # pwd: /Serving/python/examples
    echo "test python $TYPE part finished as expected."
    cd ../.. # pwd: /Serving
 }