Merge branch 'develop' into ci-test

1ced0a1c · Zhang Yulong · GitHub · 58415334 · ac9f69fb · 1ced0a1c
5 changed file
--- a/core/predictor/framework/infer.cpp
+++ b/core/predictor/framework/infer.cpp
@@ -391,7 +391,8 @@ int InferManager::proc_initialize(const char* path,
    return -1;
  }
  uint32_t engine_num = model_toolkit_conf.engines_size();
-  im::bsf::TaskExecutorVector<TaskT>::instance().resize(*engine_index_ptr+engine_num);
+  im::bsf::TaskExecutorVector<TaskT>::instance().resize(*engine_index_ptr +
+                                                        engine_num);
  for (uint32_t ei = 0; ei < engine_num; ++ei) {
    LOG(INFO) << "model_toolkit_conf.engines(" << ei
              << ").name: " << model_toolkit_conf.engines(ei).name();

--- a/python/paddle_serving_client/client.py
+++ b/python/paddle_serving_client/client.py
@@ -79,7 +79,7 @@ class SDKConfig(object):
        self.tag_list = []
        self.cluster_list = []
        self.variant_weight_list = []
-        self.rpc_timeout_ms = 20000
+        self.rpc_timeout_ms = 200000
        self.load_balance_strategy = "la"

    def add_server_variant(self, tag, cluster, variant_weight):
@@ -142,7 +142,7 @@ class Client(object):
        self.profile_ = _Profiler()
        self.all_numpy_input = True
        self.has_numpy_input = False
-        self.rpc_timeout_ms = 20000
+        self.rpc_timeout_ms = 200000
        from .serving_client import PredictorRes
        self.predictorres_constructor = PredictorRes


--- a/python/paddle_serving_server/serve.py
+++ b/python/paddle_serving_server/serve.py
@@ -31,6 +31,67 @@ elif sys.version_info.major == 3:
    from http.server import BaseHTTPRequestHandler, HTTPServer


+def format_gpu_to_strlist(unformatted_gpus):
+    gpus_strlist = []
+    if isinstance(unformatted_gpus, int):
+        gpus_strlist = [str(unformatted_gpus)]
+    elif isinstance(unformatted_gpus, list):
+        if unformatted_gpus == [""]:
+            gpus_strlist = ["-1"]
+        elif len(unformatted_gpus) == 0:
+            gpus_strlist = ["-1"]
+        else:
+            gpus_strlist = [str(x) for x in unformatted_gpus]
+    elif isinstance(unformatted_gpus, str):
+        if unformatted_gpus == "":
+            gpus_strlist = ["-1"]
+        else:
+            gpus_strlist = [unformatted_gpus]
+    elif unformatted_gpus == None:
+        gpus_strlist = ["-1"]
+    else:
+        raise ValueError("error input of set_gpus")
+
+    # check cuda visible
+    if "CUDA_VISIBLE_DEVICES" in os.environ:
+        env_gpus = os.environ["CUDA_VISIBLE_DEVICES"].split(",")
+        for op_gpus_str in gpus_strlist:
+            op_gpu_list = op_gpus_str.split(",")
+            # op_gpu_list == ["-1"] means this op use CPU
+            # so don`t check cudavisible.
+            if op_gpu_list == ["-1"]:
+                continue
+            for ids in op_gpu_list:
+                if ids not in env_gpus:
+                    print("gpu_ids is not in CUDA_VISIBLE_DEVICES.")
+                    exit(-1)
+
+    # check gpuid is valid
+    for op_gpus_str in gpus_strlist:
+        op_gpu_list = op_gpus_str.split(",")
+        use_gpu = False
+        for ids in op_gpu_list:
+            if int(ids) < -1:
+                raise ValueError("The input of gpuid error.")
+            if int(ids) >= 0:
+                use_gpu = True
+            if int(ids) == -1 and use_gpu:
+                raise ValueError("You can not use CPU and GPU in one model.")
+
+    return gpus_strlist
+
+
+def is_gpu_mode(unformatted_gpus):
+    gpus_strlist = format_gpu_to_strlist(unformatted_gpus)
+    for op_gpus_str in gpus_strlist:
+        op_gpu_list = op_gpus_str.split(",")
+        for ids in op_gpu_list:
+            if int(ids) >= 0:
+                return True
+
+    return False
+
+
 def serve_args():
    parser = argparse.ArgumentParser("serve")
    parser.add_argument(
@@ -38,7 +99,7 @@ def serve_args():
    parser.add_argument(
        "--port", type=int, default=9292, help="Port of the starting gpu")
    parser.add_argument(
-        "--device", type=str, default="gpu", help="Type of device")
+        "--device", type=str, default="cpu", help="Type of device")
    parser.add_argument(
        "--gpu_ids", type=str, default="", nargs="+", help="gpu ids")
    parser.add_argument(
@@ -118,9 +179,9 @@ def serve_args():

 def start_gpu_card_model(gpu_mode, port, args):  # pylint: disable=doc-string-missing

-    device = "gpu"
-    if gpu_mode == False:
-        device = "cpu"
+    device = "cpu"
+    if gpu_mode == True:
+        device = "gpu"

    thread_num = args.thread
    model = args.model
@@ -211,34 +272,15 @@ def start_gpu_card_model(gpu_mode, port, args):  # pylint: disable=doc-string-mi


 def start_multi_card(args, serving_port=None):  # pylint: disable=doc-string-missing
-    gpus = []
+
    if serving_port == None:
        serving_port = args.port

-    if args.gpu_ids == "":
-        gpus = []
-    else:
-        #check the gpu_id is valid or not.
-        gpus = args.gpu_ids
-        if isinstance(gpus, str):
-            gpus = [gpus]
-        if "CUDA_VISIBLE_DEVICES" in os.environ:
-            env_gpus = os.environ["CUDA_VISIBLE_DEVICES"].split(",")
-            for op_gpus_str in gpus:
-                op_gpu_list = op_gpus_str.split(",")
-                for ids in op_gpu_list:
-                    if ids not in env_gpus:
-                        print("gpu_ids is not in CUDA_VISIBLE_DEVICES.")
-                        exit(-1)
-
    if args.use_lite:
        print("run using paddle-lite.")
        start_gpu_card_model(False, serving_port, args)
-    elif len(gpus) <= 0:
-        print("gpu_ids not set, going to run cpu service.")
-        start_gpu_card_model(False, serving_port, args)
    else:
-        start_gpu_card_model(True, serving_port, args)
+        start_gpu_card_model(is_gpu_mode(args.gpu_ids), serving_port, args)


 class MainService(BaseHTTPRequestHandler):
@@ -320,7 +362,9 @@ class MainService(BaseHTTPRequestHandler):


 if __name__ == "__main__":
-
+    # args.device is not used at all.
+    # just keep the interface.
+    # so --device should not be recommended at the HomePage.
    args = serve_args()
    for single_model_config in args.model:
        if os.path.isdir(single_model_config):
@@ -346,29 +390,10 @@ if __name__ == "__main__":
        web_service = WebService(name=args.name)
        web_service.load_model_config(args.model)

-        if args.gpu_ids == "":
-            gpus = []
-        else:
-            #check the gpu_id is valid or not.
-            gpus = args.gpu_ids
-            if isinstance(gpus, str):
-                gpus = [gpus]
-            if "CUDA_VISIBLE_DEVICES" in os.environ:
-                env_gpus = os.environ["CUDA_VISIBLE_DEVICES"].split(",")
-                for op_gpus_str in gpus:
-                    op_gpu_list = op_gpus_str.split(",")
-                    for ids in op_gpu_list:
-                        if ids not in env_gpus:
-                            print("gpu_ids is not in CUDA_VISIBLE_DEVICES.")
-                            exit(-1)
-
-        if len(gpus) > 0:
-            web_service.set_gpus(gpus)
        workdir = "{}_{}".format(args.workdir, args.port)
        web_service.prepare_server(
            workdir=workdir,
            port=args.port,
-            device=args.device,
            use_lite=args.use_lite,
            use_xpu=args.use_xpu,
            ir_optim=args.ir_optim,
@@ -378,7 +403,8 @@ if __name__ == "__main__":
            use_trt=args.use_trt,
            gpu_multi_stream=args.gpu_multi_stream,
            op_num=args.op_num,
-            op_max_batch=args.op_max_batch)
+            op_max_batch=args.op_max_batch,
+            gpuid=args.gpu_ids)
        web_service.run_rpc_service()

        app_instance = Flask(__name__)

--- a/python/paddle_serving_server/server.py
+++ b/python/paddle_serving_server/server.py
@@ -17,6 +17,7 @@ import tarfile
 import socket
 import paddle_serving_server as paddle_serving_server
 from paddle_serving_server.rpc_service import MultiLangServerServiceServicer
+from paddle_serving_server.serve import format_gpu_to_strlist
 from .proto import server_configure_pb2 as server_sdk
 from .proto import general_model_config_pb2 as m_config
 from .proto import multi_lang_general_model_service_pb2_grpc
@@ -171,12 +172,7 @@ class Server(object):
        self.device = device

    def set_gpuid(self, gpuid):
-        if isinstance(gpuid, int):
-            self.gpuid = str(gpuid)
-        elif isinstance(gpuid, list):
-            self.gpuid = [str(x) for x in gpuid]
-        else:
-            self.gpuid = gpuid
+        self.gpuid = format_gpu_to_strlist(gpuid)

    def set_op_num(self, op_num):
        self.op_num = op_num
@@ -197,23 +193,20 @@ class Server(object):
        self.use_xpu = True

    def _prepare_engine(self, model_config_paths, device, use_encryption_model):
+        self.device = device
        if self.model_toolkit_conf == None:
            self.model_toolkit_conf = []
-        self.device = device
-
-        # Generally, self.gpuid = str[] or str.
-        # such as "0" or ["0"] or ["0,1"] or ["0,1" , "1,2"]
-        if isinstance(self.gpuid, str):
-            self.gpuid = [self.gpuid]

+        # Generally, self.gpuid = str[] or [].
        # when len(self.gpuid) means no gpuid is specified.
        # if self.device == "gpu" or self.use_trt:
        # we assume you forget to set gpuid, so set gpuid = ['0'];
-        if len(self.gpuid) == 0:
-            if self.device == "gpu" or self.use_trt:
-                self.gpuid.append("0")
+        if len(self.gpuid) == 0 or self.gpuid == ["-1"]:
+            if self.device == "gpu" or self.use_trt or self.gpu_multi_stream:
+                self.gpuid = ["0"]
+                self.device = "gpu"
            else:
-                self.gpuid.append("-1")
+                self.gpuid = ["-1"]

        if isinstance(self.op_num, int):
            self.op_num = [self.op_num]
@@ -254,12 +247,14 @@ class Server(object):
            for ids in op_gpu_list:
                engine.gpu_ids.extend([int(ids)])

-            if self.device == "gpu" or self.use_trt:
+            if self.device == "gpu" or self.use_trt or self.gpu_multi_stream:
                engine.use_gpu = True
                # this is for Mixed use of GPU and CPU
                # if model-1 use GPU and set the device="gpu"
                # but gpuid[1] = "-1" which means use CPU in Model-2
                # so config about GPU should be False.
+                # op_gpu_list = gpuid[index].split(",")
+                # which is the gpuid for each engine.
                if len(op_gpu_list) == 1:
                    if int(op_gpu_list[0]) == -1:
                        engine.use_gpu = False
@@ -500,10 +495,17 @@ class Server(object):
    def prepare_server(self,
                       workdir=None,
                       port=9292,
-                       device="cpu",
+                       device=None,
                       use_encryption_model=False,
                       cube_conf=None):
-        self.device = device
+        # if `device` is not set, use self.device
+        # self.device may not be changed.
+        # or self.device may have changed by set_device.
+        if device == None:
+            device = self.device
+        # if `device` is set, let self.device = device.
+        else:
+            self.device = device
        if workdir == None:
            workdir = "./tmp"
            os.system("mkdir -p {}".format(workdir))
@@ -602,6 +604,7 @@ class MultiLangServer(object):
        self.body_size_ = 64 * 1024 * 1024
        self.concurrency_ = 100000
        self.is_multi_model_ = False  # for model ensemble, which is not useful right now.
+        self.device = "cpu"  # this is the default value for multilang `device`.

    def set_max_concurrency(self, concurrency):
        self.concurrency_ = concurrency
@@ -609,6 +612,7 @@ class MultiLangServer(object):

    def set_device(self, device="cpu"):
        self.device = device
+        self.bserver_.set_device(device)

    def set_num_threads(self, threads):
        self.worker_num_ = threads
@@ -727,10 +731,18 @@ class MultiLangServer(object):
    def prepare_server(self,
                       workdir=None,
                       port=9292,
-                       device="cpu",
+                       device=None,
                       use_encryption_model=False,
                       cube_conf=None):
-        self.device = device
+        # if `device` is not set, use self.device
+        # self.device may not be changed.
+        # or self.device may have changed by set_device.
+        if device == None:
+            device = self.device
+        # if `device` is set, let self.device = device.
+        else:
+            self.device = device
+
        if not self._port_is_available(port):
            raise SystemExit("Port {} is already used".format(port))
        default_port = 12000

--- a/python/paddle_serving_server/web_service.py
+++ b/python/paddle_serving_server/web_service.py
@@ -26,6 +26,7 @@ import numpy as np
 import os
 from paddle_serving_server import pipeline
 from paddle_serving_server.pipeline import Op
+from paddle_serving_server.serve import format_gpu_to_strlist


 def port_is_available(port):
@@ -44,7 +45,7 @@ class WebService(object):
        # pipeline
        self._server = pipeline.PipelineServer(self.name)

-        self.gpus = []  # deprecated
+        self.gpus = ["-1"]  # deprecated
        self.rpc_service_list = []  # deprecated

    def get_pipeline_response(self, read_op):
@@ -103,19 +104,24 @@ class WebService(object):
        if client_config_path == None:
            self.client_config_path = file_path_list

+    # after this function, self.gpus should be a list of str or [].
    def set_gpus(self, gpus):
        print("This API will be deprecated later. Please do not use it")
-        if isinstance(gpus, int):
-            self.gpus = str(gpus)
-        elif isinstance(gpus, list):
-            self.gpus = [str(x) for x in gpus]
-        else:
-            self.gpus = gpus
+        self.gpus = format_gpu_to_strlist(gpus)
+
+# this function can be called by user
+# or by Function create_rpc_config
+# if by user, user can set_gpus or pass the `gpus`
+# if `gpus` == None, which means it`s not set at all.
+# at this time, we should use self.gpus instead.
+# otherwise, we should use the `gpus` first.
+# which means if set_gpus and `gpus` is both set.
+# `gpus` will be used.

    def default_rpc_service(self,
                            workdir,
                            port=9292,
-                            gpus=-1,
+                            gpus=None,
                            thread_num=2,
                            mem_optim=True,
                            use_lite=False,
@@ -127,16 +133,25 @@ class WebService(object):
                            gpu_multi_stream=False,
                            op_num=None,
                            op_max_batch=None):
-        device = "gpu"
+
+        device = "cpu"
        server = Server()
+        # only when `gpus == None`, which means it`s not set at all
+        # we will use the self.gpus.
+        if gpus == None:
+            gpus = self.gpus
+
+        gpus = format_gpu_to_strlist(gpus)
+        server.set_gpuid(gpus)

-        if gpus == -1 or gpus == "-1":
+        if len(gpus) == 0 or gpus == ["-1"]:
            if use_lite:
                device = "arm"
            else:
                device = "cpu"
        else:
-            server.set_gpuid(gpus)
+            device = "gpu"
+
        op_maker = OpMaker()
        op_seq_maker = OpSeqMaker()

@@ -190,45 +205,31 @@ class WebService(object):
    def _launch_rpc_service(self, service_idx):
        self.rpc_service_list[service_idx].run_server()

+    # if use this function, self.gpus must be set before.
+    # if not, we will use the default value, self.gpus = ["-1"].
+    # so we always pass the `gpus` = self.gpus. 
    def create_rpc_config(self):
-        if len(self.gpus) == 0:
-            # init cpu service
-            self.rpc_service_list.append(
-                self.default_rpc_service(
-                    self.workdir,
-                    self.port_list[0],
-                    -1,
-                    thread_num=self.thread_num,
-                    mem_optim=self.mem_optim,
-                    use_lite=self.use_lite,
-                    use_xpu=self.use_xpu,
-                    ir_optim=self.ir_optim,
-                    precision=self.precision,
-                    use_calib=self.use_calib,
-                    op_num=self.op_num,
-                    op_max_batch=self.op_max_batch))
-        else:
-            self.rpc_service_list.append(
-                self.default_rpc_service(
-                    self.workdir,
-                    self.port_list[0],
-                    self.gpus,
-                    thread_num=self.thread_num,
-                    mem_optim=self.mem_optim,
-                    use_lite=self.use_lite,
-                    use_xpu=self.use_xpu,
-                    ir_optim=self.ir_optim,
-                    precision=self.precision,
-                    use_calib=self.use_calib,
-                    use_trt=self.use_trt,
-                    gpu_multi_stream=self.gpu_multi_stream,
-                    op_num=self.op_num,
-                    op_max_batch=self.op_max_batch))
+        self.rpc_service_list.append(
+            self.default_rpc_service(
+                self.workdir,
+                self.port_list[0],
+                self.gpus,
+                thread_num=self.thread_num,
+                mem_optim=self.mem_optim,
+                use_lite=self.use_lite,
+                use_xpu=self.use_xpu,
+                ir_optim=self.ir_optim,
+                precision=self.precision,
+                use_calib=self.use_calib,
+                use_trt=self.use_trt,
+                gpu_multi_stream=self.gpu_multi_stream,
+                op_num=self.op_num,
+                op_max_batch=self.op_max_batch))

    def prepare_server(self,
                       workdir,
                       port=9393,
-                       device="gpu",
+                       device="cpu",
                       precision="fp32",
                       use_calib=False,
                       use_lite=False,
@@ -240,12 +241,13 @@ class WebService(object):
                       gpu_multi_stream=False,
                       op_num=None,
                       op_max_batch=None,
-                       gpuid=-1):
+                       gpuid=None):
        print("This API will be deprecated later. Please do not use it")
        self.workdir = workdir
        self.port = port
        self.thread_num = thread_num
-        self.device = device
+        # self.device is not used at all.
+        # device is set by gpuid.
        self.precision = precision
        self.use_calib = use_calib
        self.use_lite = use_lite
@@ -257,12 +259,14 @@ class WebService(object):
        self.gpu_multi_stream = gpu_multi_stream
        self.op_num = op_num
        self.op_max_batch = op_max_batch
-        if isinstance(gpuid, int):
-            self.gpus = str(gpuid)
-        elif isinstance(gpuid, list):
-            self.gpus = [str(x) for x in gpuid]
+
+        # if gpuid != None, we will use gpuid first.
+        # otherwise, keep the self.gpus unchanged.
+        # maybe self.gpus is set by the Function set_gpus.
+        if gpuid != None:
+            self.gpus = format_gpu_to_strlist(gpuid)
        else:
-            self.gpus = gpuid
+            pass

        default_port = 12000
        for i in range(1000):
@@ -359,8 +363,8 @@ class WebService(object):
        if gpu:
            # if user forget to call function `set_gpus` to set self.gpus.
            # default self.gpus = [0].
-            if len(self.gpus) == 0:
-                self.gpus.append(0)
+            if len(self.gpus) == 0 or self.gpus == ["-1"]:
+                self.gpus = ["0"]
            # right now, local Predictor only support 1 card.
            # no matter how many gpu_id is in gpus, we only use the first one.
            gpu_id = (self.gpus[0].split(","))[0]