Update hub serving to support multicards prediction

ea9f5292 · 走神的阿圆 · GitHub · b7d9adc1 · ea9f5292 · ea9f5292
17 changed file
--- a/demo/serving/lexical_analysis_lac/templates/lac_gpu_serving_config.json
+++ b/demo/serving/lexical_analysis_lac/templates/lac_gpu_serving_config.json
+{
+  "modules_info": {
+    "lac": {
+      "init_args": {
+        "version": "2.1.0"
+      },
+      "predict_args": {
+        "batch_size": 1
+      }
+    }
+  },
+  "use_gpu": true,
+  "port": 8866,
+  "gpu": "0,1,2"
+}
--- a/demo/serving/lexical_analysis_lac/templates/lac_serving_config.json
+++ b/demo/serving/lexical_analysis_lac/templates/lac_serving_config.json
+{
+  "modules_info": {
+    "lac": {
+      "init_args": {
+        "version": "2.1.0"
+      },
+      "predict_args": {
+        "batch_size": 1,
+        "use_gpu": false
+      }
+    }
+  },
+  "port": 8866,
+  "use_multiprocess": true,
+  "workers": 2,
+  "timeout": 30
+}
--- a/docs/imgs/dog.jpg
+++ b/docs/imgs/dog.jpg
--- a/demo/serving/module_serving/object_detection_pyramidbox_lite_server_mask/pyramidbox_lite_server_mask_serving_demo.py
+++ b/demo/serving/module_serving/object_detection_pyramidbox_lite_server_mask/pyramidbox_lite_server_mask_serving_demo.py
+# coding: utf8
+import requests
+import json
+import cv2
+import base64
+def cv2_to_base64(image):
+    data = cv2.imencode('.jpg', image)[1]
+    return base64.b64encode(data.tostring()).decode('utf8')
+if __name__ == '__main__':
+    # 获取图片的base64编码格式
+    img1 = cv2_to_base64(cv2.imread("../../../../docs/imgs/family_mask.jpg"))
+    img2 = cv2_to_base64(cv2.imread("../../../../docs/imgs/woman_mask.jpg"))
+    data = {'images': [img1, img2]}
+    # 指定content-type
+    headers = {"Content-type": "application/json"}
+    # 发送HTTP请求
+    url = "http://127.0.0.1:8866/predict/pyramidbox_lite_server_mask"
+    r = requests.post(url=url, headers=headers, data=json.dumps(data))
+    # 打印预测结果
+    print(r.json())
--- a/paddlehub/commands/serving.py
+++ b/paddlehub/commands/serving.py
+# coding:utf-8
+# Copyright (c) 2019  PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import argparse
+import os
+import platform
+import socket
+import json
+import multiprocessing
+import time
+import signal
+import paddlehub as hub
+from paddlehub.commands import register
+from paddlehub.serving import app_compat as app
+from paddlehub.env import CONF_HOME
+from paddlehub.serving.http_server import run_all, StandaloneApplication
+from paddlehub.utils import log
+def number_of_workers():
+    '''
+    Get suitable quantity of workers based on empirical formula.
+    '''
+    return (multiprocessing.cpu_count() * 2) + 1
+def pid_is_exist(pid: int):
+    '''
+    Try to kill process by PID.
+    Args:
+        pid(int): PID of process to be killed.
+    Returns:
+         True if PID will be killed.
+    Examples:
+    .. code-block:: python
+        pid_is_exist(pid=8866)
+    '''
+    try:
+        os.kill(pid, 0)
+    except:
+        return False
+    else:
+        return True
+@register(name='hub.serving', description='Start Module Serving or Bert Service for online predicting.')
+class ServingCommand:
+    name = "serving"
+    module_list = []
+    def dump_pid_file(self):
+        '''
+        Write PID info to file.
+        '''
+        pid = os.getpid()
+        filepath = os.path.join(CONF_HOME, "serving_" + str(self.args.port) + ".json")
+        if os.path.exists(filepath):
+            os.remove(filepath)
+        with open(filepath, "w") as fp:
+            info = {"pid": pid, "module": self.args.modules, "start_time": time.time()}
+            json.dump(info, fp)
+    @staticmethod
+    def load_pid_file(filepath: str, port: int = None):
+        '''
+        Read PID info from file.
+        '''
+        if port is None:
+            port = os.path.basename(filepath).split(".")[0].split("_")[1]
+        if not os.path.exists(filepath):
+            log.logger.error(
+                "PaddleHub Serving config file is not exists, please confirm the port [%s] you specified is correct." %
+                port)
+            return False
+        with open(filepath, "r") as fp:
+            info = json.load(fp)
+            return info
+    def stop_serving(self, port: int):
+        '''
+        Stop PaddleHub-Serving by port.
+        '''
+        filepath = os.path.join(CONF_HOME, "serving_" + str(port) + ".json")
+        info = self.load_pid_file(filepath, port)
+        if info is False:
+            return
+        pid = info["pid"]
+        if os.path.exists(filepath):
+            os.remove(filepath)
+        if not pid_is_exist(pid):
+            log.logger.info("PaddleHub Serving has been stopped.")
+            return
+        log.logger.info("PaddleHub Serving will stop.")
+        # CacheUpdater("hub_serving_stop", module=module, addition={"period_time": time.time() - start_time}).start()
+        if platform.system() == "Windows":
+            os.kill(pid, signal.SIGTERM)
+        else:
+            os.killpg(pid, signal.SIGTERM)
+    @staticmethod
+    def start_bert_serving(args):
+        '''
+        Start bert serving server.
+        '''
+        if platform.system() != "Linux":
+            log.logger.error("Error. Bert Service only support linux.")
+            return False
+        if ServingCommand.is_port_occupied("127.0.0.1", args.port) is True:
+            log.logger.error("Port %s is occupied, please change it." % args.port)
+            return False
+        from paddle_gpu_serving.run import BertServer
+        bs = BertServer(with_gpu=args.use_gpu)
+        bs.with_model(model_name=args.modules[0])
+        # CacheUpdater("hub_bert_service", module=args.modules[0], version="0.0.0").start()
+        bs.run(gpu_index=args.gpu, port=int(args.port))
+    @staticmethod
+    def is_port_occupied(ip, port):
+        '''
+        Check if port os occupied.
+        '''
+        s = socket.socket(socket.AF_INET, socket.SOCK_STREAM)
+        try:
+            s.connect((ip, int(port)))
+            s.shutdown(2)
+            return True
+        except:
+            return False
+    def preinstall_modules(self):
+        '''
+        Install module by PaddleHub and get info of this module.
+        '''
+        for key, value in self.modules_info.items():
+            init_args = value["init_args"]
+            # CacheUpdater("hub_serving_start", module=key, version=init_args.get("version", "0.0.0")).start()
+            if "directory" not in init_args:
+                init_args.update({"name": key})
+            m = hub.Module(**init_args)
+            method_name = m.serving_func_name
+            if method_name is None:
+                raise RuntimeError("{} cannot be use for " "predicting".format(key))
+                exit(1)
+            serving_method = getattr(m, method_name)
+            category = str(m.type).split("/")[0].upper()
+            self.modules_info[key].update({
+                "method_name": method_name,
+                "version": m.version,
+                "category": category,
+                "module": m,
+                "name": m.name,
+                "serving_method": serving_method
+            })
+    def start_app_with_args(self):
+        '''
+        Start one PaddleHub-Serving instance by arguments with gunicorn.
+        '''
+        module = self.args.modules
+        if module is not None:
+            port = self.args.port
+            if ServingCommand.is_port_occupied("127.0.0.1", port) is True:
+                log.logger.error("Port %s is occupied, please change it." % port)
+                return False
+            self.preinstall_modules()
+            options = {"bind": "0.0.0.0:%s" % port, "workers": self.args.workers}
+            self.dump_pid_file()
+            StandaloneApplication(app.create_app(init_flag=False, configs=self.modules_info), options).run()
+        else:
+            log.logger.error("Lack of necessary parameters!")
+    def start_zmq_serving_with_args(self):
+        '''
+        Start one PaddleHub-Serving instance by arguments with zmq.
+        '''
+        if self.modules_info is not None:
+            front_port = self.args.port
+            if ServingCommand.is_port_occupied("127.0.0.1", front_port) is True:
+                log.logger.error("Port %s is occupied, please change it." % front_port)
+                return False
+            back_port = int(front_port) + 1
+            for index in range(100):
+                if ServingCommand.is_port_occupied("127.0.0.1", back_port):
+                    break
+                else:
+                    back_port = int(back_port) + 1
+            run_all(self.modules_info, self.args.gpu, front_port, back_port)
+        else:
+            log.logger.error("Lack of necessary parameters!")
+    def start_single_app_with_args(self):
+        '''
+        Start one PaddleHub-Serving instance by arguments with flask.
+        '''
+        module = self.args.modules
+        if module is not None:
+            port = self.args.port
+            if ServingCommand.is_port_occupied("127.0.0.1", port) is True:
+                log.logger.error("Port %s is occupied, please change it." % port)
+                return False
+            self.preinstall_modules()
+            self.dump_pid_file()
+            app.run(configs=self.modules_info, port=port)
+        else:
+            log.logger.error("Lack of necessary parameters!")
+    def start_serving(self):
+        '''
+        Start PaddleHub-Serving with flask and gunicorn
+        '''
+        if self.args.use_gpu:
+            if self.args.use_multiprocess:
+                log.logger.warning('`use_multiprocess` will be ignored if specify `use_gpu`.')
+            self.start_zmq_serving_with_args()
+        else:
+            if self.args.use_multiprocess:
+                if platform.system() == "Windows":
+                    log.logger.warning(
+                        "Warning: Windows cannot use multiprocess working mode, PaddleHub Serving will switch to single process mode"
+                    )
+                    self.start_single_app_with_args()
+                else:
+                    self.start_app_with_args()
+            else:
+                self.start_single_app_with_args()
+    @staticmethod
+    def show_help():
+        str = "serving <option>\n"
+        str += "\tManage PaddleHub Serving.\n"
+        str += "sub command:\n"
+        str += "1. start\n"
+        str += "\tStart PaddleHub Serving.\n"
+        str += "2. stop\n"
+        str += "\tStop PaddleHub Serving.\n"
+        str += "3. start bert_service\n"
+        str += "\tStart Bert Service.\n"
+        str += "\n"
+        str += "[start] option:\n"
+        str += "--modules/-m [module1==version, module2==version...]\n"
+        str += "\tPre-install modules via the parameter list.\n"
+        str += "--port/-p XXXX\n"
+        str += "\tUse port XXXX for serving.\n"
+        str += "--use_multiprocess\n"
+        str += "\tChoose multoprocess mode, cannot be use on Windows.\n"
+        str += "--modules_info\n"
+        str += "\tSet module config in PaddleHub Serving."
+        str += "--config/-c file_path\n"
+        str += "\tUse configs in file to start PaddleHub Serving. "
+        str += "Other parameters will be ignored if you specify the parameter.\n"
+        str += "\n"
+        str += "[stop] option:\n"
+        str += "--port/-p XXXX\n"
+        str += "\tStop PaddleHub Serving on port XXXX safely.\n"
+        str += "\n"
+        str += "[start bert_service] option:\n"
+        str += "--modules/-m\n"
+        str += "\tPre-install modules via the parameter.\n"
+        str += "--port/-p XXXX\n"
+        str += "\tUse port XXXX for serving.\n"
+        str += "--use_gpu\n"
+        str += "\tUse gpu for predicting if specifies the parameter.\n"
+        str += "--gpu\n"
+        str += "\tSpecify the GPU devices to use.\n"
+        print(str)
+    def parse_args(self):
+        if self.args.config is not None:
+            if os.path.exists(self.args.config):
+                with open(self.args.config, "r") as fp:
+                    # self.args.config = json.load(fp)
+                    self.args_config = json.load(fp)
+                self.args.use_gpu = self.args_config.get('use_gpu', False)
+                self.args.use_multiprocess = self.args_config.get('use_multiprocess', False)
+                self.modules_info = self.args_config["modules_info"]
+                self.args.port = self.args_config.get('port', 8866)
+                if self.args.use_gpu:
+                    self.args.gpu = self.args_config.get('gpu', '0')
+                else:
+                    self.args.gpu = self.args_config.get('gpu', None)
+                self.args.use_gpu = self.args_config.get('use_gpu', False)
+                if self.args.use_multiprocess:
+                    self.args.workers = self.args_config.get('workers', number_of_workers())
+                else:
+                    self.args.workers = self.args_config.get('workers', None)
+            else:
+                raise RuntimeError("{} not exists.".format(self.args.config))
+                exit(1)
+        else:
+            self.modules_info = {}
+            for item in self.args.modules:
+                version = None
+                if "==" in item:
+                    module = item.split("==")[0]
+                    version = item.split("==")[1]
+                else:
+                    module = item
+                self.modules_info.update({module: {"init_args": {"version": version}, "predict_args": {}}})
+        if self.args.gpu:
+            self.args.gpu = self.args.gpu.split(',')
+        return self.modules_info
+    def execute(self, argv):
+        self.show_in_help = True
+        self.description = "Start Module Serving or Bert Service for online predicting."
+        self.parser = argparse.ArgumentParser(
+            description=self.__class__.__doc__, prog='hub serving', usage='%(prog)s', add_help=True)
+        self.parser.add_argument("command")
+        self.parser.add_argument("sub_command")
+        self.parser.add_argument("bert_service", nargs="?")
+        self.sub_parse = self.parser.add_mutually_exclusive_group(required=False)
+        self.parser.add_argument("--use_gpu", action="store_true", default=False)
+        self.parser.add_argument("--use_multiprocess", action="store_true", default=False)
+        self.parser.add_argument("--modules", "-m", nargs="+")
+        self.parser.add_argument("--config", "-c", nargs="?")
+        self.parser.add_argument("--port", "-p", nargs="?", default=8866)
+        self.parser.add_argument("--gpu", "-i", nargs="?", default='0')
+        self.parser.add_argument("--use_singleprocess", action="store_true", default=False)
+        self.parser.add_argument("--modules_info", "-mi", default={}, type=json.loads)
+        self.parser.add_argument("--workers", "-w", nargs="?", default=number_of_workers())
+        try:
+            self.args = self.parser.parse_args()
+        except:
+            ServingCommand.show_help()
+            return False
+        if self.args.sub_command == "start":
+            if self.args.bert_service == "bert_service":
+                ServingCommand.start_bert_serving(self.args)
+            else:
+                self.parse_args()
+                self.start_serving()
+        elif self.args.sub_command == "stop":
+            if self.args.bert_service == "bert_service":
+                log.logger.warning("Please stop Bert Service by kill process by yourself")
+            elif self.args.bert_service is None:
+                self.stop_serving(port=self.args.port)
+        else:
+            ServingCommand.show_help()
--- a/paddlehub/module/module.py
+++ b/paddlehub/module/module.py
@@ -123,6 +123,10 @@ class RunModule(object):
        `hub run` command.
        '''
        return self._run_func != None
+    @property
+    def serving_func_name(self):
+        return self._serving_func_name
 class Module(object):

--- a/paddlehub/serving/__init__.py
+++ b/paddlehub/serving/__init__.py
--- a/paddlehub/serving/app_compat.py
+++ b/paddlehub/serving/app_compat.py
+# coding:utf-8
+# Copyright (c) 2020  PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import traceback
+import time
+import logging
+from flask import Flask, request
+from paddlehub.serving.model_service.base_model_service import cv_module_info
+from paddlehub.serving.model_service.base_model_service import nlp_module_info
+from paddlehub.serving.model_service.base_model_service import v2_module_info
+from paddlehub.utils import utils, log
+def package_result(status: str, msg: str, data: dict):
+    '''
+    Package message of response.
+    Args:
+         status(str): Error code
+            ========   ==============================================================================================
+            Code       Meaning
+            --------   ----------------------------------------------------------------------------------------------
+            '000'      Return results normally
+            '101'      An error occurred in the predicting method
+            '111'      Module is not available
+            '112'      Use outdated and abandoned HTTP protocol format
+            ========   ===============================================================================================
+         msg(str): Detailed info for error
+         data(dict): Result of predict api.
+    Returns:
+        dict: Message of response
+    Examples:
+        .. code-block:: python
+            data = {'result': 0.002}
+            package_result(status='000', msg='', data=data)
+    '''
+    return {"status": status, "msg": msg, "results": data}
+def predict_v2(module_info: dict, input: dict):
+    '''
+    Predict with `serving` API of module.
+    Args:
+         module_info(dict): Module info include module name, method name and
+                            other info.
+         input(dict): Data to input to predict API.
+    Returns:
+        dict: Response after packaging by func `package_result`
+    Examples:
+        .. code-block:: python
+            module_info = {'module_name': 'lac'}}
+            data = {'text': ['今天天气很好']}
+            predict_v2(module_info=module_info, input=data)
+    '''
+    serving_method_name = module_info["method_name"]
+    serving_method = getattr(module_info["module"], serving_method_name)
+    predict_args = module_info["predict_args"].copy()
+    predict_args.update(input)
+    for item in serving_method.__code__.co_varnames:
+        if item in module_info.keys():
+            predict_args.update({item: module_info[item]})
+    try:
+        output = serving_method(**predict_args)
+    except Exception as err:
+        log.logger.error(traceback.format_exc())
+        return package_result("101", err, "")
+    return package_result("000", "", output)
+def create_app(init_flag: bool = False, configs: dict = None):
+    '''
+    Start one flask instance and ready for HTTP requests.
+    Args:
+         init_flag(bool): Whether the instance need to be initialized with
+                          `configs` or not
+         configs(dict): Module configs for initializing.
+    Returns:
+        One flask instance.
+    Examples:
+        .. code-block:: python
+            create_app(init_flag=False, configs=None)
+    '''
+    if init_flag is False:
+        if configs is None:
+            raise RuntimeError("Lack of necessary configs.")
+        config_with_file(configs)
+    app_instance = Flask(__name__)
+    app_instance.config["JSON_AS_ASCII"] = False
+    logging.basicConfig()
+    gunicorn_logger = logging.getLogger('gunicorn.error')
+    app_instance.logger.handlers = gunicorn_logger.handlers
+    app_instance.logger.setLevel(gunicorn_logger.level)
+    @app_instance.route("/", methods=["GET", "POST"])
+    def index():
+        '''
+        Provide index page.
+        '''
+        return '暂不提供可视化界面，请直接使用脚本进行请求。<br/>No visual ' \
+               'interface is provided for the time being, please use the' \
+               ' python script to make a request directly.'
+    @app_instance.before_request
+    def before_request():
+        '''
+        Add id info to `request.data` before request.
+        '''
+        request.data = {"id": utils.md5(request.remote_addr + str(time.time()))}
+    @app_instance.route("/predict/<module_name>", methods=["POST"])
+    def predict_serving_v2(module_name: str):
+        '''
+        Http api for predicting.
+        Args:
+            module_name(str): Module name for predicting.
+        Returns:
+            Result of predicting after packaging.
+        '''
+        if module_name in v2_module_info.modules:
+            module_info = v2_module_info.get_module_info(module_name)
+        else:
+            msg = "Module {} is not available.".format(module_name)
+            return package_result("111", msg, "")
+        inputs = request.json
+        if inputs is None:
+            results = "This usage is out of date, please use 'application/json' as content-type to post to /predict/%s. See 'https://github.com/PaddlePaddle/PaddleHub/blob/release/v1.6/docs/tutorial/serving.md' for more details." % (
+                module_name)
+            return package_result("112", results, "")
+        results = predict_v2(module_info, inputs)
+        return results
+    return app_instance
+def config_with_file(configs: dict):
+    '''
+    Config `cv_module_info` and `nlp_module_info` by configs.
+    Args:
+        configs(dict): Module info and configs
+    Examples:
+        .. code-block:: python
+            configs = {'lac': {'version': 1.0.0, 'category': nlp}}
+            config_with_file(configs=configs)
+    '''
+    for key, value in configs.items():
+        if "CV" == value["category"]:
+            cv_module_info.add_module(key, {key: value})
+        elif "NLP" == value["category"]:
+            nlp_module_info.add_module(key, {key: value})
+        v2_module_info.add_module(key, {key: value})
+        log.logger.info("%s==%s" % (key, value["version"]))
+def run(configs: dict = None, port: int = 8866):
+    '''
+    Run flask instance for PaddleHub-Serving
+    Args:
+         configs(dict): module info and configs
+         port(int): the port of the webserver
+    Examples:
+        .. code-block:: python
+            configs = {'lac': {'version': 1.0.0, 'category': nlp}}
+            run(configs=configs, port=8866)
+    '''
+    if configs is not None:
+        config_with_file(configs)
+    else:
+        log.logger.error("Start failed cause of missing configuration.")
+        return
+    my_app = create_app(init_flag=True)
+    my_app.run(host="0.0.0.0", port=port, debug=False, threaded=False)
+    log.logger.info("PaddleHub-Serving has been stopped.")
--- a/paddlehub/serving/client.py
+++ b/paddlehub/serving/client.py
+# coding:utf-8
+# Copyright (c) 2020  PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import zmq
+class InferenceClient(object):
+    def __init__(self, frontend_addr):
+        self.frontend_addr = frontend_addr
+        self.context = zmq.Context(1)
+        self.socket = self.context.socket(zmq.REQ)
+        self.socket.connect(frontend_addr)
+    def send_req(self, message):
+        self.socket.send_json(message)
+        result = self.socket.recv_json()
+        return result
+class InferenceClientProxy(object):
+    clients = {}
+    @staticmethod
+    def get_client(pid, frontend_addr):
+        if pid not in InferenceClientProxy.clients.keys():
+            client = InferenceClient(frontend_addr)
+            InferenceClientProxy.clients.update({pid: client})
+        return InferenceClientProxy.clients[pid]
--- a/paddlehub/serving/device.py
+++ b/paddlehub/serving/device.py
+# coding:utf-8
+# Copyright (c) 2020  PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import zmq
+import time
+import os
+import json
+import traceback
+import subprocess
+from paddlehub.utils import log
+filename = 'HubServing-%s.log' % time.strftime("%Y_%m_%d", time.localtime())
+log.logger = log.log_to_file(log.logger, filename)
+class InferenceDevice(object):
+    '''
+    The InferenceDevice class provides zmq.device to connect with frontend and
+    backend.
+    '''
+    def __init__(self):
+        self.frontend = None
+        self.backend = None
+    def listen(self, frontend_addr: str, backend_addr: str):
+        '''
+        Start zmq.device to listen from frontend address to backend address.
+        '''
+        try:
+            context = zmq.Context(1)
+            self.frontend = context.socket(zmq.ROUTER)
+            self.frontend.bind(frontend_addr)
+            self.backend = context.socket(zmq.DEALER)
+            self.backend.bind(backend_addr)
+            zmq.device(zmq.QUEUE, self.frontend, self.backend)
+        except Exception as e:
+            log.logger.error(traceback.format_exc())
+        finally:
+            self.frontend.close()
+            self.backend.close()
+            context.term()
+def start_workers(modules_info: dict, gpus: list, backend_addr: str):
+    '''
+    InferenceWorker class provides workers for different GPU device.
+    Args:
+        modules_info(dict): modules info, include module name, version
+        gpus(list): GPU devices index
+        backend_addr(str): the port of PaddleHub-Serving zmq backend address
+    Examples:
+    .. code-block:: python
+        modules_info = {'lac': {'init_args': {'version': '2.1.0'},
+                                'predict_args': {'batch_size': 1}}}
+        start_workers(modules_name, ['0', '1', '2'], 'ipc://backend.ipc')
+    '''
+    work_file = os.path.join(os.path.split(os.path.realpath(__file__))[0], 'worker.py')
+    modules_info = json.dumps(modules_info)
+    for index in range(len(gpus)):
+        subprocess.Popen(['python', work_file, modules_info, gpus[index], backend_addr])
+class InferenceServer(object):
+    '''
+    InferenceServer class starts zmq.rep as backend.
+    Args:
+        modules_name(list): modules name
+        gpus(list): GPU devices index
+    '''
+    def __init__(self, modules_info: dict, gpus: list):
+        self.modules_info = modules_info
+        self.gpus = gpus
+    def listen(self, port: int):
+        backend = "ipc://backend.ipc"
+        start_workers(modules_info=self.modules_info, gpus=self.gpus, backend_addr=backend)
+        d = InferenceDevice()
+        d.listen('tcp://*:%s' % port, backend)
--- a/paddlehub/serving/http_server.py
+++ b/paddlehub/serving/http_server.py
+# coding:utf-8
+# Copyright (c) 2020  PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import time
+import os
+import logging
+import multiprocessing
+import platform
+from flask import Flask, request
+from paddlehub.serving.device import InferenceServer
+from paddlehub.serving.client import InferenceClientProxy
+from paddlehub.utils import utils, log
+filename = 'HubServing-%s.log' % time.strftime("%Y_%m_%d", time.localtime())
+log.logger = log.log_to_file(log.logger, filename)
+if platform.system() == "Windows":
+    class StandaloneApplication(object):
+        def __init__(self):
+            pass
+        def load_config(self):
+            pass
+        def load(self):
+            pass
+else:
+    import gunicorn.app.base
+    class StandaloneApplication(gunicorn.app.base.BaseApplication):
+        '''
+        StandaloneApplication class provides instance of StandaloneApplication
+        as gunicorn backend.
+        '''
+        def __init__(self, app, options=None):
+            self.options = options or {}
+            self.application = app
+            super(StandaloneApplication, self).__init__()
+        def load_config(self):
+            config = {
+                key: value
+                for key, value in self.options.items() if key in self.cfg.settings and value is not None
+            }
+            for key, value in config.items():
+                self.cfg.set(key.lower(), value)
+        def load(self):
+            return self.application
+def package_result(status: str, msg: str, data: dict):
+    '''
+    Package message of response.
+    Args:
+         status(str): Error code
+            ========   ==============================================================================================
+            Code       Meaning
+            --------   ----------------------------------------------------------------------------------------------
+            '000'      Return results normally
+            '101'      An error occurred in the predicting method
+            '111'      Module is not available
+            '112'      Use outdated and abandoned HTTP protocol format
+            ========   ===============================================================================================
+         msg(str): Detailed info for error
+         data(dict): Result of predict api.
+    Returns:
+        dict: Message of response
+    Examples:
+        .. code-block:: python
+            data = {'result': 0.002}
+            package_result(status='000', msg='', data=data)
+    '''
+    return {"status": status, "msg": msg, "results": data}
+def create_app(client_port: int = 5559, modules_name: list = []):
+    '''
+    Start one flask instance and ready for HTTP requests.
+    Args:
+         client_port(str): port of zmq backend address
+         modules_name(list): the name list of modules
+    Returns:
+        One flask instance.
+    Examples:
+        .. code-block:: python
+            create_app(client_port='5559')
+    '''
+    app_instance = Flask(__name__)
+    app_instance.config["JSON_AS_ASCII"] = False
+    pid = os.getpid()
+    @app_instance.route("/", methods=["GET", "POST"])
+    def index():
+        '''
+        Provide index page.
+        '''
+        return '暂不提供可视化界面，请直接使用脚本进行请求。<br/>No visual ' \
+               'interface is provided for the time being, please use the' \
+               ' python script to make a request directly.'
+    @app_instance.before_request
+    def before_request():
+        '''
+        Add id info to `request.data` before request.
+        '''
+        request.data = {"id": utils.md5(request.remote_addr + str(time.time()))}
+    @app_instance.route("/predict/<module_name>", methods=["POST"])
+    def predict_serving_v3(module_name: str):
+        '''
+        Http api for predicting.
+        Args:
+            module_name(str): Module name for predicting.
+        Returns:
+            Result of predicting after packaging.
+        '''
+        if module_name not in modules_name:
+            msg = "Module {} is not available.".format(module_name)
+            return package_result("111", "", msg)
+        inputs = request.json
+        if inputs is None:
+            results = "This usage is out of date, please use 'application/json' as content-type to post to /predict/%s. See 'https://github.com/PaddlePaddle/PaddleHub/blob/release/v1.6/docs/tutorial/serving.md' for more details." % (
+                module_name)
+            return package_result("112", results, "")
+        inputs = {'module_name': module_name, 'inputs': inputs}
+        port_str = 'tcp://localhost:%s' % client_port
+        client = InferenceClientProxy.get_client(pid, port_str)
+        results = client.send_req(inputs)
+        return package_result("000", results, "")
+    return app_instance
+def run(port: int = 8866, client_port: int = 5559, names: list = [], workers: int = 1):
+    '''
+    Run flask instance for PaddleHub-Serving
+    Args:
+         port(int): the port of the webserver
+         client_port(int): the port of zmq backend address
+         names(list): the name list of modules
+         workers(int): workers for every client
+    Examples:
+        .. code-block:: python
+            run(port=8866, client_port='5559')
+    '''
+    if platform.system() == "Windows":
+        my_app = create_app(client_port, modules_name=names)
+        my_app.run(host="0.0.0.0", port=port, debug=False, threaded=False)
+    else:
+        options = {"bind": "0.0.0.0:%s" % port, "workers": workers, "worker_class": "sync"}
+        StandaloneApplication(create_app(client_port, modules_name=names), options).run()
+    log.logger.info("PaddleHub-Serving has been stopped.")
+def run_http_server(port: int = 8866, client_port: int = 5559, names: list = [], workers: int = 1):
+    '''
+    Start subprocess to run function `run`
+    Args:
+        port(int): the port of the webserver
+        client_port(int): the port of zmq backend address
+        names(list): the name list of moduels
+        workers(int): the workers for every client
+    Returns:
+        process id of subprocess
+    Examples:
+        .. code-block:: python
+            run_http_server(port=8866, client_port='5559', names=['lac'])
+    '''
+    p = multiprocessing.Process(target=run, args=(port, client_port, names, workers))
+    p.start()
+    return p.pid
+def run_all(modules_info: dict, gpus: list, frontend_port: int, backend_port: int):
+    '''
+    Run flask instance for frontend HTTP request and zmq device for backend zmq
+    request.
+    Args:
+        modules_info(dict): modules info, include module name, version
+        gpus(list): GPU devices index
+        frontend_port(int): the port of PaddleHub-Serving frontend address
+        backend_port(int): the port of PaddleHub-Serving zmq backend address
+    Examples:
+        .. code-block:: python
+            modules_info = {'lac': {'init_args': {'version': '2.1.0'},
+                                    'predict_args': {'batch_size': 1}}}
+            run_all(modules_info, ['0', '1', '2'], 8866, 8867)
+    '''
+    run_http_server(frontend_port, backend_port, modules_info.keys(), len(gpus))
+    MyIS = InferenceServer(modules_info, gpus)
+    MyIS.listen(backend_port)
--- a/paddlehub/serving/model_service/__init__.py
+++ b/paddlehub/serving/model_service/__init__.py
--- a/paddlehub/serving/model_service/base_model_service.py
+++ b/paddlehub/serving/model_service/base_model_service.py
+# coding: utf-8
+# Copyright (c) 2019  PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import six
+import abc
+class BaseModuleInfo(object):
+    def __init__(self):
+        self._modules_info = {}
+        self._modules = []
+    def set_modules_info(self, modules_info):
+        # dict of modules info.
+        self._modules_info = modules_info
+        # list of modules name.
+        self._modules = list(self._modules_info.keys())
+    def get_module_info(self, module_name):
+        return self._modules_info[module_name]
+    def add_module(self, module_name, module_info):
+        self._modules_info.update(module_info)
+        self._modules.append(module_name)
+    def get_module(self, module_name):
+        return self.get_module_info(module_name).get("module", None)
+    @property
+    def modules_info(self):
+        return self._modules_info
+class CVModuleInfo(BaseModuleInfo):
+    def __init__(self):
+        self.cv_module_method = {
+            "vgg19_imagenet": "predict_classification",
+            "vgg16_imagenet": "predict_classification",
+            "vgg13_imagenet": "predict_classification",
+            "vgg11_imagenet": "predict_classification",
+            "shufflenet_v2_imagenet": "predict_classification",
+            "se_resnext50_32x4d_imagenet": "predict_classification",
+            "se_resnext101_32x4d_imagenet": "predict_classification",
+            "resnet_v2_50_imagenet": "predict_classification",
+            "resnet_v2_34_imagenet": "predict_classification",
+            "resnet_v2_18_imagenet": "predict_classification",
+            "resnet_v2_152_imagenet": "predict_classification",
+            "resnet_v2_101_imagenet": "predict_classification",
+            "pnasnet_imagenet": "predict_classification",
+            "nasnet_imagenet": "predict_classification",
+            "mobilenet_v2_imagenet": "predict_classification",
+            "googlenet_imagenet": "predict_classification",
+            "alexnet_imagenet": "predict_classification",
+            "yolov3_coco2017": "predict_object_detection",
+            "ultra_light_fast_generic_face_detector_1mb_640": "predict_object_detection",
+            "ultra_light_fast_generic_face_detector_1mb_320": "predict_object_detection",
+            "ssd_mobilenet_v1_pascal": "predict_object_detection",
+            "pyramidbox_face_detection": "predict_object_detection",
+            "faster_rcnn_coco2017": "predict_object_detection",
+            "cyclegan_cityscapes": "predict_gan",
+            "deeplabv3p_xception65_humanseg": "predict_semantic_segmentation",
+            "ace2p": "predict_semantic_segmentation",
+            "pyramidbox_lite_server_mask": "predict_mask",
+            "pyramidbox_lite_mobile_mask": "predict_mask"
+        }
+        super(CVModuleInfo, self).__init__()
+    @property
+    def cv_modules(self):
+        return self._modules
+    def add_module(self, module_name, module_info):
+        if "CV" == module_info[module_name].get("category", ""):
+            self._modules_info.update(module_info)
+            self._modules.append(module_name)
+class NLPModuleInfo(BaseModuleInfo):
+    def __init__(self):
+        super(NLPModuleInfo, self).__init__()
+    @property
+    def nlp_modules(self):
+        return self._modules
+    def add_module(self, module_name, module_info):
+        if "NLP" == module_info[module_name].get("category", ""):
+            self._modules_info.update(module_info)
+            self._modules.append(module_name)
+class V2ModuleInfo(BaseModuleInfo):
+    def __init__(self):
+        super(V2ModuleInfo, self).__init__()
+    @property
+    def modules(self):
+        return self._modules
+    def add_module(self, module_name, module_info):
+        self._modules_info.update(module_info)
+        self._modules.append(module_name)
+class BaseModelService(object):
+    def _initialize(self):
+        pass
+    @abc.abstractmethod
+    def _pre_processing(self, data):
+        pass
+    @abc.abstractmethod
+    def _inference(self, data):
+        pass
+    @abc.abstractmethod
+    def _post_processing(self, data):
+        pass
+cv_module_info = CVModuleInfo()
+nlp_module_info = NLPModuleInfo()
+v2_module_info = V2ModuleInfo()
--- a/paddlehub/serving/worker.py
+++ b/paddlehub/serving/worker.py
+# coding:utf-8
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import zmq
+import os
+import json
+import traceback
+import sys
+def run_worker(modules_info: dict, gpu_index: int, addr: str):
+    '''
+    Start zmq.REP as backend on specified GPU.
+    Args:
+        modules_info(dict): module name to serving method
+        gpu_index(int): GPU device index to use
+        addr(str): address of zmq.REP
+    Examples:
+        .. code-block:: python
+            modules_info = {'lac': lexical_analise}
+            run_worker(modules_info=modules_info,
+                       gpu_index=0,
+                       addr='ipc://backend.ipc')
+    '''
+    context = zmq.Context(1)
+    socket = context.socket(zmq.REP)
+    socket.connect(addr)
+    log.logger.info("Using GPU device index:%s" % gpu_index)
+    while True:
+        try:
+            message = socket.recv_json()
+            inputs = message['inputs']
+            module_name = message['module_name']
+            inputs.update(modules_info[module_name]['predict_args'])
+            inputs.update({'use_gpu': True})
+            method = modules_info[module_name]['serving_method']
+            os.environ['CUDA_VISIBLE_DEVICES'] = gpu_index
+            output = method(**inputs)
+        except Exception as err:
+            log.logger.error(traceback.format_exc())
+            output = package_result("101", str(err), "")
+        socket.send_json(output)
+if __name__ == '__main__':
+    argv = sys.argv
+    modules_info = json.loads(argv[1])
+    gpu_index = argv[2]
+    addr = argv[3]
+    os.environ['CUDA_VISIBLE_DEVICES'] = gpu_index
+    import paddlehub as hub
+    from paddlehub.serving.http_server import package_result
+    from paddlehub.utils import log
+    modules_pred_info = {}
+    for module_name, module_info in modules_info.items():
+        init_args = module_info.get('init_args', {})
+        init_args.update({'name': module_name})
+        module = hub.Module(**init_args)
+        method_name = module.serving_func_name
+        serving_method = getattr(module, method_name)
+        predict_args = module_info.get('predict_args', {})
+        modules_pred_info.update({module_name: {'predict_args': predict_args, 'serving_method': serving_method}})
+    run_worker(modules_pred_info, gpu_index, addr)
--- a/paddlehub/utils/log.py
+++ b/paddlehub/utils/log.py
@@ -475,4 +475,31 @@ class Table(object):
        return seprow + content
+def log_to_file(logger, filename):
+    '''
+    Set logger.handler to FileHandler.
+    Args:
+        logger(Logger): modules info, include module name, version
+        filename(str): filename to logging
+    Examples:
+    .. code-block:: python
+        logger = log_to_file(logger, 'test.log')
+        logger.logger.info('test_1')
+    '''
+    old_handlers = logger.logger.handlers
+    for handler in old_handlers:
+        logger.logger.removeHandler(handler)
+    format = logging.Formatter('[%(asctime)-15s] [%(levelname)8s] - %(message)s')
+    sh = logging.FileHandler(filename=filename, mode='a')
+    sh.setFormatter(format)
+    logger.logger.addHandler(sh)
+    logger.logger.setLevel(logging.INFO)
+    return logger
 logger = Logger()
--- a/requirements.txt
+++ b/requirements.txt
@@ -10,6 +10,9 @@ sentencepiece
 colorlog
 tqdm
 nltk
+pyzmq
+gitpython
+easydict
 # pandas no longer support python2 in version 0.25 and above
 pandas ; python_version >= "3"

--- a/setup.py
+++ b/setup.py
@@ -13,24 +13,20 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 """Setup for pip package."""
-from __future__ import absolute_import
-from __future__ import division
-from __future__ import print_function
 import platform
-import six
 from setuptools import find_packages
 from setuptools import setup
-from paddlehub.version import hub_version
+import paddlehub as hub
 with open("requirements.txt") as fin:
    REQUIRED_PACKAGES = fin.read()
 setup(
    name='paddlehub',
-    version=hub_version.replace('-', ''),
+    version=hub.__version__.replace('-', ''),
    description=
    ('A toolkit for managing pretrained models of PaddlePaddle and helping user getting started with transfer learning more efficiently.'
     ),
@@ -41,26 +37,16 @@ setup(
    install_requires=REQUIRED_PACKAGES,
    packages=find_packages(),
    package_data={
-        'paddlehub/serving/templates': [
-            'paddlehub/serving/templates/serving_config.json',
-            'paddlehub/serving/templates/main.html'
-        ],
        'paddlehub/command/tmpl': [
-            'paddlehub/command/tmpl/init_py.tmpl',
+            'paddlehub/command/tmpl/init_py.tmpl', 'paddlehub/command/tmpl/serving_demo.tmpl',
-            'paddlehub/command/tmpl/serving_demo.tmpl',
            'paddlehub/command/tmpl/x_model.tmpl'
        ]
    },
    include_package_data=True,
-    data_files=[('paddlehub/serving/templates', [
+    data_files=[('paddlehub/commands/tmpl', [
-        'paddlehub/serving/templates/serving_config.json',
+        'paddlehub/commands/tmpl/init_py.tmpl', 'paddlehub/commands/tmpl/serving_demo.tmpl',
-        'paddlehub/serving/templates/main.html'
+        'paddlehub/commands/tmpl/x_model.tmpl'
-    ]),
+    ])],
-                ('paddlehub/commands/tmpl', [
-                    'paddlehub/commands/tmpl/init_py.tmpl',
-                    'paddlehub/commands/tmpl/serving_demo.tmpl',
-                    'paddlehub/commands/tmpl/x_model.tmpl'
-                ])],
    include_data_files=True,
    # PyPI package information.
    classifiers=[
@@ -84,4 +70,4 @@ setup(
    ],
    license='Apache 2.0',
    keywords=('paddlehub paddlepaddle fine-tune transfer-learning'),
-    entry_points={'console_scripts': ['hub=paddlehub.commands.hub:main']})
+    entry_points={'console_scripts': ['hub=paddlehub.commands.utils:execute']})