Merge branch 'develop' into add-dockerfile

fa9cac61 · TeslaZhao · GitHub · ee8293e1 · 5713b507 · fa9cac61
10 changed file
--- a/doc/FAQ.md
+++ b/doc/FAQ.md
@@ -4,6 +4,35 @@
 ## 基础知识
+#### Q: Paddle Serving 、Paddle Inference、PaddleHub Serving三者的区别及联系？
+**A:** paddle serving是远程服务，即发起预测的设备（手机、浏览器、客户端等）与实际预测的硬件不在一起。	paddle inference是一个library，适合嵌入到一个大系统中保证预测效率，paddle serving调用了paddle       inference做远程服务。paddlehub serving可以认为是一个示例，都会使用paddle serving作为统一预测服务入口。如果在web端交互，一般是调用远程服务的形式，可以使用paddle serving的web service搭建。
+#### Q: paddle-serving是否支持Int32支持
+**A:** 在protobuf定feed_type和fetch_type编号与数据类型对应如下
+     0-int64
+	  1-float32
+	  2-int32
+#### Q: paddle-serving是否支持windows和Linux环境下的多线程调用 
+**A:** 客户端可以发起多线程访问调用服务端 
+#### Q: paddle-serving如何修改消息大小限制
+**A:** 在server端和client但通过FLAGS_max_body_size来扩大数据量限制，单位为字节，默认为64MB
+#### Q: paddle-serving客户端目前支持哪些语言
+**A:** java c++ python 
+#### Q: paddle-serving目前支持哪些协议
+**A:** http rpc 
 ## 编译问题
@@ -46,7 +75,15 @@ InvalidArgumentError: Device id must be less than GPU count, but received id is:
 **A:** 目前（0.4.0）仅支持CentOS，具体列表查阅[这里](https://github.com/PaddlePaddle/Serving/blob/develop/doc/DOCKER_IMAGES.md)
+#### Q: python编译的GCC版本与serving的版本不匹配
+**A:**:1)使用[GPU docker](https://github.com/PaddlePaddle/Serving/blob/develop/doc/RUN_IN_DOCKER.md#gpunvidia-docker)解决环境问题
+	   2)修改anaconda的虚拟环境下安装的python的gcc版本[参考](https://www.jianshu.com/p/c498b3d86f77) 
+#### Q: paddle-serving是否支持本地离线安装 
+**A:** 支持离线部署，需要把一些相关的[依赖包](https://github.com/PaddlePaddle/Serving/blob/develop/doc/COMPILE.md)提前准备安装好
 ## 预测问题
@@ -105,6 +142,19 @@ client端的日志直接打印到标准输出。
 通过在部署服务之前 'export  GLOG_v=3'可以输出更为详细的日志信息。
+#### Q: paddle-serving启动成功后，相关的日志在哪里设置
+**A:** 1)警告是glog组件打印的，告知glog初始化之前日志打印在STDERR
+	   2)一般采用GLOG_v方式启动服务同时设置日志级别。
+例如：
+```
+GLOG_v=2 python -m paddle_serving_server.serve --model xxx_conf/ --port 9999 
+```
 #### Q: （GLOG_v=2下）Server端日志一切正常，但Client端始终得不到正确的预测结果
 **A:** 可能是配置文件有问题，检查下配置文件（is_load_tensor，fetch_type等有没有问题）

--- a/python/examples/pipeline/imdb_model_ensemble/README_CN.md
+++ b/python/examples/pipeline/imdb_model_ensemble/README_CN.md
@@ -8,8 +8,8 @@ sh get_data.sh
 ## 启动服务
 ```
-python -m paddle_serving_server_gpu.serve --model imdb_cnn_model --port 9292 &> cnn.log &
+python -m paddle_serving_server.serve --model imdb_cnn_model --port 9292 &> cnn.log &
-python -m paddle_serving_server_gpu.serve --model imdb_bow_model --port 9393 &> bow.log &
+python -m paddle_serving_server.serve --model imdb_bow_model --port 9393 &> bow.log &
 python test_pipeline_server.py &>pipeline.log &
 ```
@@ -17,8 +17,3 @@ python test_pipeline_server.py &>pipeline.log &
 ```
 python test_pipeline_client.py
 ```
-## HTTP 测试
-```
-curl -X POST -k http://localhost:9999/prediction -d '{"key": ["words"], "value": ["i am very sad | 0"]}'
-```
--- a/python/examples/pipeline/imdb_model_ensemble/test_pipeline_server.py
+++ b/python/examples/pipeline/imdb_model_ensemble/test_pipeline_server.py
@@ -41,7 +41,9 @@ class ImdbRequestOp(RequestOp):
                continue
            words = request.value[idx]
            word_ids, _ = self.imdb_dataset.get_words_and_label(words)
-            dictdata[key] = np.array(word_ids)
+            word_len = len(word_ids)
+            dictdata[key] = np.array(word_ids).reshape(word_len, 1)
+            dictdata["{}.lod".format(key)] = [0, word_len]
        return dictdata
@@ -77,16 +79,18 @@ bow_op = Op(name="bow",
            server_endpoints=["127.0.0.1:9393"],
            fetch_list=["prediction"],
            client_config="imdb_bow_client_conf/serving_client_conf.prototxt",
+            client_type='brpc',
            concurrency=1,
            timeout=-1,
            retry=1,
-            batch_size=3,
+            batch_size=1,
-            auto_batching_timeout=1000)
+            auto_batching_timeout=None)
 cnn_op = Op(name="cnn",
            input_ops=[read_op],
            server_endpoints=["127.0.0.1:9292"],
            fetch_list=["prediction"],
            client_config="imdb_cnn_client_conf/serving_client_conf.prototxt",
+            client_type='brpc',
            concurrency=1,
            timeout=-1,
            retry=1,

--- a/python/examples/pipeline/ocr/local_service_pipeline_server.py
+++ b/python/examples/pipeline/ocr/local_service_pipeline_server.py
@@ -12,11 +12,11 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 # pylint: disable=doc-string-missing
-from paddle_serving_server_gpu.pipeline import Op, RequestOp, ResponseOp
+from paddle_serving_server.pipeline import Op, RequestOp, ResponseOp
-from paddle_serving_server_gpu.pipeline import PipelineServer
+from paddle_serving_server.pipeline import PipelineServer
-from paddle_serving_server_gpu.pipeline.proto import pipeline_service_pb2
+from paddle_serving_server.pipeline.proto import pipeline_service_pb2
-from paddle_serving_server_gpu.pipeline.channel import ChannelDataEcode
+from paddle_serving_server.pipeline.channel import ChannelDataEcode
-from paddle_serving_server_gpu.pipeline import LocalRpcServiceHandler
+from paddle_serving_server.pipeline import LocalServiceHandler
 import numpy as np
 import cv2
 import time
@@ -56,9 +56,11 @@ class DetOp(Op):
        data = np.fromstring(data, np.uint8)
        # Note: class variables(self.var) can only be used in process op mode
        self.im = cv2.imdecode(data, cv2.IMREAD_COLOR)
+        print(self.im)
        self.ori_h, self.ori_w, _ = self.im.shape
        det_img = self.det_preprocess(self.im)
        _, self.new_h, self.new_w = det_img.shape
+        print("image", det_img)
        return {"image": det_img}
    def postprocess(self, input_dicts, fetch_dict):
@@ -111,11 +113,11 @@ read_op = RequestOp()
 det_op = DetOp(
    name="det",
    input_ops=[read_op],
-    local_rpc_service_handler=LocalRpcServiceHandler(
+    client_type="local_predictor",
+    local_service_handler=LocalServiceHandler(
        model_config="ocr_det_model",
        workdir="det_workdir",  # defalut: "workdir"
        thread_num=2,  # defalut: 2
-        devices="0",  # gpu0. defalut: "" (cpu)
        mem_optim=True,  # defalut: True
        ir_optim=False,  # defalut: False
        available_port_generator=None),  # defalut: None
@@ -123,8 +125,8 @@ det_op = DetOp(
 rec_op = RecOp(
    name="rec",
    input_ops=[det_op],
-    local_rpc_service_handler=LocalRpcServiceHandler(
+    client_type="local_predictor",
-        model_config="ocr_rec_model"),
+    local_service_handler=LocalServiceHandler(model_config="ocr_rec_model"),
    concurrency=1)
 response_op = ResponseOp(input_ops=[rec_op])

--- a/python/examples/pipeline/ocr/pipeline_http_client.py
+++ b/python/examples/pipeline/ocr/pipeline_http_client.py
@@ -11,7 +11,7 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-from paddle_serving_server_gpu.pipeline import PipelineClient
+from paddle_serving_server.pipeline import PipelineClient
 import numpy as np
 import requests
 import json

--- a/python/examples/pipeline/ocr/pipeline_rpc_client.py
+++ b/python/examples/pipeline/ocr/pipeline_rpc_client.py
@@ -11,7 +11,7 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-from paddle_serving_server_gpu.pipeline import PipelineClient
+from paddle_serving_server.pipeline import PipelineClient
 import numpy as np
 import requests
 import json
@@ -33,6 +33,6 @@ for img_file in os.listdir(test_img_dir):
        image_data = file.read()
    image = cv2_to_base64(image_data)
-for i in range(4):
+for i in range(1):
    ret = client.predict(feed_dict={"image": image}, fetch=["res"])
    print(ret)
--- a/python/examples/pipeline/simple_web_service/config.yml
+++ b/python/examples/pipeline/simple_web_service/config.yml
@@ -7,3 +7,4 @@ op:
        local_service_conf:
            model_config: uci_housing_model
            devices: "" # "0,1"
+            client_type: brpc
--- a/python/paddle_serving_client/io/__init__.py
+++ b/python/paddle_serving_client/io/__init__.py
@@ -92,9 +92,12 @@ def save_model(server_model_folder,
            fetch_var.shape.extend(tmp_shape)
        config.fetch_var.extend([fetch_var])
-    cmd = "mkdir -p {}".format(client_config_folder)
+    try:
+        save_dirname = os.path.normpath(client_config_folder)
-    os.system(cmd)
+        os.makedirs(save_dirname)
+    except OSError as e:
+        if e.errno != errno.EEXIST:
+            raise
    with open("{}/serving_client_conf.prototxt".format(client_config_folder),
              "w") as fout:
        fout.write(str(config))

--- a/python/pipeline/local_service_handler.py
+++ b/python/pipeline/local_service_handler.py
@@ -22,6 +22,7 @@ except ImportError:
    from paddle_serving_server import OpMaker, OpSeqMaker, Server
    PACKAGE_VERSION = "CPU"
 from . import util
+from paddle_serving_app.local_predict import LocalPredictor
 _LOGGER = logging.getLogger(__name__)
 _workdir_name_gen = util.NameGenerator("workdir_")
@@ -30,6 +31,7 @@ _workdir_name_gen = util.NameGenerator("workdir_")
 class LocalServiceHandler(object):
    def __init__(self,
                 model_config,
+                 client_type='local_predictor',
                 workdir="",
                 thread_num=2,
                 devices="",
@@ -58,12 +60,13 @@ class LocalServiceHandler(object):
                self._port_list.append(available_port_generator.next())
            _LOGGER.info("Model({}) will be launch in gpu device: {}. Port({})"
                         .format(model_config, devices, self._port_list))
+        self.client_type = client_type
        self._workdir = workdir
        self._devices = devices
        self._thread_num = thread_num
        self._mem_optim = mem_optim
        self._ir_optim = ir_optim
+        self.local_predictor_client = None
        self._rpc_service_list = []
        self._server_pros = []
        self._fetch_vars = None
@@ -74,6 +77,13 @@ class LocalServiceHandler(object):
    def get_port_list(self):
        return self._port_list
+    def get_client(self):  # for local_predictor_only
+        if self.local_predictor_client is None:
+            self.local_predictor_client = LocalPredictor()
+            self.local_predictor_client.load_model_config(
+                "{}".format(self._model_config), gpu=False, profile=False)
+        return self.local_predictor_client
    def get_client_config(self):
        return os.path.join(self._model_config, "serving_server_conf.prototxt")

--- a/python/pipeline/operator.py
+++ b/python/pipeline/operator.py
@@ -51,6 +51,7 @@ class Op(object):
                 server_endpoints=None,
                 fetch_list=None,
                 client_config=None,
+                 client_type=None,
                 concurrency=None,
                 timeout=None,
                 retry=None,
@@ -68,6 +69,7 @@ class Op(object):
        self._server_endpoints = server_endpoints
        self._fetch_names = fetch_list
        self._client_config = client_config
+        self.client_type = client_type
        self._timeout = timeout
        self._retry = max(1, retry)
        self._batch_size = batch_size
@@ -138,6 +140,7 @@ class Op(object):
                        if self.client_type == "brpc" or self.client_type == "grpc":
                            service_handler = local_service_handler.LocalServiceHandler(
                                model_config=model_config,
+                                client_type=self.client_type,
                                workdir=local_service_conf["workdir"],
                                thread_num=local_service_conf["thread_num"],
                                devices=local_service_conf["devices"],
@@ -155,12 +158,13 @@ class Op(object):
                                self._fetch_names = service_handler.get_fetch_list(
                                )
                        elif self.client_type == "local_predictor":
-                            service_handler = local_service_handler.LocalPredictorServiceHandler(
+                            service_handler = local_service_handler.LocalServiceHandler(
                                model_config=model_config,
+                                client_type=self.client_type,
                                workdir=local_service_conf["workdir"],
                                thread_num=local_service_conf["thread_num"],
                                devices=local_service_conf["devices"])
-                            service_handler.prepare_server()  # get fetch_list
+                            #service_handler.prepare_server()  # get fetch_list
                            self.local_predictor = service_handler.get_client()
                            if self._client_config is None:
                                self._client_config = service_handler.get_client_config(
@@ -210,6 +214,9 @@ class Op(object):
                          " service: local_service_handler is None."))
            return
        port = self._local_service_handler.get_port_list()
+        #if self._local_service_handler.client_type == "local_predictor":
+        #    _LOGGER.info("Op({}) use local predictor.")
+        #    return
        self._local_service_handler.start_server()
        _LOGGER.info("Op({}) use local rpc service at port: {}"
                     .format(self.name, port))
@@ -248,6 +255,9 @@ class Op(object):
        else:
            raise ValueError("Failed to init client: unknow client "
                             "type {}".format(self.client_type))
+        if self._fetch_names is None:
+            self._fetch_names = client.fetch_names_
+            _LOGGER.info("Op({}) has no fetch name set. So fetch all vars")
        if self.client_type != "local_predictor":
            client.connect(server_endpoints)
        return client
@@ -310,7 +320,7 @@ class Op(object):
        (_, input_dict), = input_dicts.items()
        return input_dict
-    def process(self, feed_batch, fetch_names, typical_logid):
+    def process(self, feed_batch, typical_logid):
        err, err_info = ChannelData.check_batch_npdata(feed_batch)
        if err != 0:
            _LOGGER.critical(
@@ -320,13 +330,13 @@ class Op(object):
        if self.client_type == "local_predictor":
            call_result = self.client.predict(
                feed=feed_batch[0],
-                fetch=fetch_names,
+                fetch=self._fetch_names,
                batch=True,
                log_id=typical_logid)
        else:
            call_result = self.client.predict(
                feed=feed_batch,
-                fetch=fetch_names,
+                fetch=self._fetch_names,
                batch=True,
                log_id=typical_logid)
        if isinstance(self.client, MultiLangClient):