__init__.py 17.4 KB
Newer Older
M
MRXLT 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13
#   Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
B
barrierye 已提交
14
# pylint: disable=doc-string-missing
M
MRXLT 已提交
15 16 17 18 19 20

import os
from .proto import server_configure_pb2 as server_sdk
from .proto import general_model_config_pb2 as m_config
import google.protobuf.text_format
import tarfile
M
MRXLT 已提交
21
import socket
22
import paddle_serving_server_gpu as paddle_serving_server
23
import time
24
from .version import serving_server_version
M
MRXLT 已提交
25
from contextlib import closing
G
guru4elephant 已提交
26
import argparse
B
barrierye 已提交
27
import collections
M
MRXLT 已提交
28

B
barrierye 已提交
29

30 31 32 33 34 35 36 37 38 39 40 41 42 43 44
def serve_args():
    parser = argparse.ArgumentParser("serve")
    parser.add_argument(
        "--thread", type=int, default=10, help="Concurrency of server")
    parser.add_argument(
        "--model", type=str, default="", help="Model for serving")
    parser.add_argument(
        "--port", type=int, default=9292, help="Port of the starting gpu")
    parser.add_argument(
        "--workdir",
        type=str,
        default="workdir",
        help="Working dir of current service")
    parser.add_argument(
        "--device", type=str, default="gpu", help="Type of device")
B
barrierye 已提交
45
    parser.add_argument("--gpu_ids", type=str, default="", help="gpu ids")
46
    parser.add_argument(
47
        "--name", type=str, default="None", help="Default service name")
M
MRXLT 已提交
48 49
    parser.add_argument(
        "--mem_optim", type=bool, default=False, help="Memory optimize")
50
    return parser.parse_args()
M
MRXLT 已提交
51

B
barrierye 已提交
52

M
MRXLT 已提交
53 54 55
class OpMaker(object):
    def __init__(self):
        self.op_dict = {
M
MRXLT 已提交
56 57 58 59 60 61
            "general_infer": "GeneralInferOp",
            "general_reader": "GeneralReaderOp",
            "general_response": "GeneralResponseOp",
            "general_text_reader": "GeneralTextReaderOp",
            "general_text_response": "GeneralTextResponseOp",
            "general_single_kv": "GeneralSingleKVOp",
W
wangjiawei04 已提交
62
            "general_dist_kv_infer": "GeneralDistKVInferOp",
M
MRXLT 已提交
63
            "general_dist_kv": "GeneralDistKVOp"
M
MRXLT 已提交
64
        }
B
barrierye 已提交
65
        self.node_name_suffix_ = collections.defaultdict(int)
M
MRXLT 已提交
66

B
barrierye 已提交
67 68 69 70
    def create(self, node_type, engine_name=None, inputs=[], outputs=[]):
        if node_type not in self.op_dict:
            raise Exception("Op type {} is not supported right now".format(
                node_type))
M
MRXLT 已提交
71
        node = server_sdk.DAGNode()
B
barrierye 已提交
72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93
        # node.name will be used as the infer engine name
        if engine_name:
            node.name = engine_name
        else:
            node.name = '{}_{}'.format(node_type,
                                       self.node_name_suffix_[node_type])
            self.node_name_suffix_[node_type] += 1

        node.type = self.op_dict[node_type]
        if inputs:
            for dep_node_str in inputs:
                dep_node = server_sdk.DAGNode()
                google.protobuf.text_format.Parse(dep_node_str, dep_node)
                dep = server_sdk.DAGNodeDependency()
                dep.name = dep_node.name
                dep.mode = "RO"
                node.dependencies.extend([dep])
        # Because the return value will be used as the key value of the
        # dict, and the proto object is variable which cannot be hashed,
        # so it is processed into a string. This has little effect on
        # overall efficiency.
        return google.protobuf.text_format.MessageToString(node)
M
MRXLT 已提交
94 95 96 97 98 99 100 101


class OpSeqMaker(object):
    def __init__(self):
        self.workflow = server_sdk.Workflow()
        self.workflow.name = "workflow1"
        self.workflow.workflow_type = "Sequence"

B
barrierye 已提交
102 103 104 105 106 107 108
    def add_op(self, node_str):
        node = server_sdk.DAGNode()
        google.protobuf.text_format.Parse(node_str, node)
        if len(node.dependencies) > 1:
            raise Exception(
                'Set more than one predecessor for op in OpSeqMaker is not allowed.'
            )
M
MRXLT 已提交
109
        if len(self.workflow.nodes) >= 1:
B
barrierye 已提交
110 111 112 113 114 115 116 117 118 119 120
            if len(node.dependencies) == 0:
                dep = server_sdk.DAGNodeDependency()
                dep.name = self.workflow.nodes[-1].name
                dep.mode = "RO"
                node.dependencies.extend([dep])
            elif len(node.dependencies) == 1:
                if node.dependencies[0].name != self.workflow.nodes[-1].name:
                    raise Exception(
                        'You must add op in order in OpSeqMaker. The previous op is {}, but the current op is followed by {}.'.
                        format(node.dependencies[0].name, self.workflow.nodes[
                            -1].name))
M
MRXLT 已提交
121 122 123 124 125 126 127 128
        self.workflow.nodes.extend([node])

    def get_op_sequence(self):
        workflow_conf = server_sdk.WorkflowConf()
        workflow_conf.workflows.extend([self.workflow])
        return workflow_conf


B
barrierye 已提交
129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146
class OpGraphMaker(object):
    def __init__(self):
        self.workflow = server_sdk.Workflow()
        self.workflow.name = "workflow1"
        # Currently, SDK only supports "Sequence"
        self.workflow.workflow_type = "Sequence"

    def add_op(self, node_str):
        node = server_sdk.DAGNode()
        google.protobuf.text_format.Parse(node_str, node)
        self.workflow.nodes.extend([node])

    def get_op_graph(self):
        workflow_conf = server_sdk.WorkflowConf()
        workflow_conf.workflows.extend([self.workflow])
        return workflow_conf


M
MRXLT 已提交
147 148 149 150 151 152 153 154 155 156 157 158 159
class Server(object):
    def __init__(self):
        self.server_handle_ = None
        self.infer_service_conf = None
        self.model_toolkit_conf = None
        self.resource_conf = None
        self.memory_optimization = False
        self.model_conf = None
        self.workflow_fn = "workflow.prototxt"
        self.resource_fn = "resource.prototxt"
        self.infer_service_fn = "infer_service.prototxt"
        self.model_toolkit_fn = "model_toolkit.prototxt"
        self.general_model_config_fn = "general_model.prototxt"
W
wangjiawei04 已提交
160
        self.cube_config_fn = "cube.conf"
M
MRXLT 已提交
161 162
        self.workdir = ""
        self.max_concurrency = 0
M
MRXLT 已提交
163
        self.num_threads = 4
M
MRXLT 已提交
164 165 166 167
        self.port = 8080
        self.reload_interval_s = 10
        self.module_path = os.path.dirname(paddle_serving_server.__file__)
        self.cur_path = os.getcwd()
M
MRXLT 已提交
168
        self.check_cuda()
M
MRXLT 已提交
169
        self.use_local_bin = False
M
MRXLT 已提交
170
        self.gpuid = 0
B
barrierye 已提交
171
        self.model_config_paths = None  # for multi-model in a workflow
M
MRXLT 已提交
172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187

    def set_max_concurrency(self, concurrency):
        self.max_concurrency = concurrency

    def set_num_threads(self, threads):
        self.num_threads = threads

    def set_port(self, port):
        self.port = port

    def set_reload_interval(self, interval):
        self.reload_interval_s = interval

    def set_op_sequence(self, op_seq):
        self.workflow_conf = op_seq

B
barrierye 已提交
188 189 190
    def set_op_graph(self, op_graph):
        self.workflow_conf = op_graph

M
MRXLT 已提交
191 192 193
    def set_memory_optimize(self, flag=False):
        self.memory_optimization = flag

M
MRXLT 已提交
194 195 196 197
    def check_local_bin(self):
        if "SERVING_BIN" in os.environ:
            self.use_local_bin = True
            self.bin_path = os.environ["SERVING_BIN"]
M
MRXLT 已提交
198

M
MRXLT 已提交
199
    def check_cuda(self):
M
MRXLT 已提交
200
        r = os.system("cat /usr/local/cuda/version.txt")
M
MRXLT 已提交
201 202 203 204 205
        if r != 0:
            raise SystemExit(
                "CUDA not found, please check your environment or use cpu version by \"pip install paddle_serving_server\""
            )

M
MRXLT 已提交
206 207 208
    def set_gpuid(self, gpuid=0):
        self.gpuid = gpuid

B
barrierye 已提交
209
    def _prepare_engine(self, model_config_paths, device):
M
MRXLT 已提交
210 211 212
        if self.model_toolkit_conf == None:
            self.model_toolkit_conf = server_sdk.ModelToolkitConf()

B
barrierye 已提交
213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233
        for engine_name, model_config_path in model_config_paths.items():
            engine = server_sdk.EngineDesc()
            engine.name = engine_name
            # engine.reloadable_meta = model_config_path + "/fluid_time_file"
            engine.reloadable_meta = self.workdir + "/fluid_time_file"
            os.system("touch {}".format(engine.reloadable_meta))
            engine.reloadable_type = "timestamp_ne"
            engine.runtime_thread_num = 0
            engine.batch_infer_size = 0
            engine.enable_batch_align = 0
            engine.model_data_path = model_config_path
            engine.enable_memory_optimization = self.memory_optimization
            engine.static_optimization = False
            engine.force_update_static_cache = False

            if device == "cpu":
                engine.type = "FLUID_CPU_ANALYSIS_DIR"
            elif device == "gpu":
                engine.type = "FLUID_GPU_ANALYSIS_DIR"

            self.model_toolkit_conf.engines.extend([engine])
M
MRXLT 已提交
234 235 236 237 238 239 240 241 242 243 244

    def _prepare_infer_service(self, port):
        if self.infer_service_conf == None:
            self.infer_service_conf = server_sdk.InferServiceConf()
            self.infer_service_conf.port = port
            infer_service = server_sdk.InferService()
            infer_service.name = "GeneralModelService"
            infer_service.workflows.extend(["workflow1"])
            self.infer_service_conf.services.extend([infer_service])

    def _prepare_resource(self, workdir):
245
        self.workdir = workdir
M
MRXLT 已提交
246 247 248 249 250
        if self.resource_conf == None:
            with open("{}/{}".format(workdir, self.general_model_config_fn),
                      "w") as fout:
                fout.write(str(self.model_conf))
            self.resource_conf = server_sdk.ResourceConf()
W
wangjiawei04 已提交
251 252 253 254 255
            for workflow in self.workflow_conf.workflows:
                for node in workflow.nodes:
                    if "dist_kv" in node.name:
                        self.resource_conf.cube_config_path = workdir
                        self.resource_conf.cube_config_file = self.cube_config_fn
M
MRXLT 已提交
256 257 258 259 260 261 262 263 264
            self.resource_conf.model_toolkit_path = workdir
            self.resource_conf.model_toolkit_file = self.model_toolkit_fn
            self.resource_conf.general_model_path = workdir
            self.resource_conf.general_model_file = self.general_model_config_fn

    def _write_pb_str(self, filepath, pb_obj):
        with open(filepath, "w") as fout:
            fout.write(str(pb_obj))

B
barrierye 已提交
265 266 267 268 269 270 271
    def load_model_config(self, model_config_paths):
        # At present, Serving needs to configure the model path in
        # the resource.prototxt file to determine the input and output
        # format of the workflow. To ensure that the input and output
        # of multiple models are the same
        workflow_oi_config_path = None
        if isinstance(model_config_paths, str):
B
barrierye 已提交
272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289
            # If there is only one model path, use the default infer_op.
            # Because there are several infer_op type, we need to find 
            # it from workflow_conf.
            default_engine_names = [
                'general_infer_0', 'general_dist_kv_infer_0',
                'general_dist_kv_quant_infer'
            ]
            engine_name = None
            for node in self.workflow_conf.nodes:
                if node.name in default_engine_names:
                    engine_name = node.name
                    break
            if engine_name is None:
                raise Exception(
                    "You have set the engine_name of Op. Please use the form {op: model_path} to configure model path"
                )
            self.model_config_paths = {engine_name: model_config_paths}
            workflow_oi_config_path = self.model_config_paths[engine_name]
B
barrierye 已提交
290 291 292 293 294 295 296 297 298 299 300 301 302 303
        elif isinstance(model_config_paths, dict):
            self.model_config_paths = {}
            for node_str, path in model_config_paths.items():
                node = server_sdk.DAGNode()
                google.protobuf.text_format.Parse(node_str, node)
                self.model_config_paths[node.name] = path
            print("You have specified multiple model paths, please ensure "
                  "that the input and output of multiple models are the same.")
            workflow_oi_config_path = self.model_config_paths.items()[0][1]
        else:
            raise Exception("The type of model_config_paths must be str or "
                            "dict({op: model_path}), not {}.".format(
                                type(model_config_paths)))

M
MRXLT 已提交
304
        self.model_conf = m_config.GeneralModelConfig()
B
barrierye 已提交
305 306 307
        f = open(
            "{}/serving_server_conf.prototxt".format(workflow_oi_config_path),
            'r')
M
MRXLT 已提交
308 309 310 311 312 313 314 315 316
        self.model_conf = google.protobuf.text_format.Merge(
            str(f.read()), self.model_conf)
        # check config here
        # print config here

    def download_bin(self):
        os.chdir(self.module_path)
        need_download = False
        device_version = "serving-gpu-"
317 318
        folder_name = device_version + serving_server_version
        tar_name = folder_name + ".tar.gz"
M
MRXLT 已提交
319
        bin_url = "https://paddle-serving.bj.bcebos.com/bin/" + tar_name
320 321 322 323 324 325 326 327 328
        self.server_path = os.path.join(self.module_path, folder_name)

        download_flag = "{}/{}.is_download".format(self.module_path,
                                                   folder_name)
        if os.path.exists(download_flag):
            os.chdir(self.cur_path)
            self.bin_path = self.server_path + "/serving"
            return

M
MRXLT 已提交
329
        if not os.path.exists(self.server_path):
330 331
            os.system("touch {}/{}.is_download".format(self.module_path,
                                                       folder_name))
M
MRXLT 已提交
332 333 334 335 336
            print('Frist time run, downloading PaddleServing components ...')
            r = os.system('wget ' + bin_url + ' --no-check-certificate')
            if r != 0:
                if os.path.exists(tar_name):
                    os.remove(tar_name)
M
MRXLT 已提交
337 338 339
                raise SystemExit(
                    'Download failed, please check your network or permission of {}.'.
                    format(self.module_path))
M
MRXLT 已提交
340 341 342 343 344 345 346 347 348
            else:
                try:
                    print('Decompressing files ..')
                    tar = tarfile.open(tar_name)
                    tar.extractall()
                    tar.close()
                except:
                    if os.path.exists(exe_path):
                        os.remove(exe_path)
M
MRXLT 已提交
349 350 351
                    raise SystemExit(
                        'Decompressing failed, please check your permission of {} or disk space left.'.
                        format(self.module_path))
M
MRXLT 已提交
352 353 354 355 356 357 358 359 360 361 362 363 364
                finally:
                    os.remove(tar_name)
        os.chdir(self.cur_path)
        self.bin_path = self.server_path + "/serving"

    def prepare_server(self, workdir=None, port=9292, device="cpu"):
        if workdir == None:
            workdir = "./tmp"
            os.system("mkdir {}".format(workdir))
        else:
            os.system("mkdir {}".format(workdir))
        os.system("touch {}/fluid_time_file".format(workdir))

M
MRXLT 已提交
365
        if not self.port_is_available(port):
M
MRXLT 已提交
366 367
            raise SystemExit("Prot {} is already used".format(port))

G
guru4elephant 已提交
368
        self.set_port(port)
M
MRXLT 已提交
369
        self._prepare_resource(workdir)
B
barrierye 已提交
370
        self._prepare_engine(self.model_config_paths, device)
M
MRXLT 已提交
371 372 373 374 375 376 377 378 379 380 381 382 383
        self._prepare_infer_service(port)
        self.workdir = workdir

        infer_service_fn = "{}/{}".format(workdir, self.infer_service_fn)
        workflow_fn = "{}/{}".format(workdir, self.workflow_fn)
        resource_fn = "{}/{}".format(workdir, self.resource_fn)
        model_toolkit_fn = "{}/{}".format(workdir, self.model_toolkit_fn)

        self._write_pb_str(infer_service_fn, self.infer_service_conf)
        self._write_pb_str(workflow_fn, self.workflow_conf)
        self._write_pb_str(resource_fn, self.resource_conf)
        self._write_pb_str(model_toolkit_fn, self.model_toolkit_conf)

M
MRXLT 已提交
384
    def port_is_available(self, port):
M
MRXLT 已提交
385 386
        with closing(socket.socket(socket.AF_INET, socket.SOCK_STREAM)) as sock:
            sock.settimeout(2)
387
            result = sock.connect_ex(('0.0.0.0', port))
M
MRXLT 已提交
388 389 390 391 392
        if result != 0:
            return True
        else:
            return False

M
MRXLT 已提交
393 394 395
    def run_server(self):
        # just run server with system command
        # currently we do not load cube
M
MRXLT 已提交
396
        self.check_local_bin()
M
MRXLT 已提交
397 398
        if not self.use_local_bin:
            self.download_bin()
B
fix bug  
barrierye 已提交
399 400 401
            # wait for other process to download server bin
            while not os.path.exists(self.server_path):
                time.sleep(1)
M
MRXLT 已提交
402 403
        else:
            print("Use local bin : {}".format(self.bin_path))
M
MRXLT 已提交
404 405 406 407 408 409 410 411 412 413 414
        command = "{} " \
                  "-enable_model_toolkit " \
                  "-inferservice_path {} " \
                  "-inferservice_file {} " \
                  "-max_concurrency {} " \
                  "-num_threads {} " \
                  "-port {} " \
                  "-reload_interval_s {} " \
                  "-resource_path {} " \
                  "-resource_file {} " \
                  "-workflow_path {} " \
M
MRXLT 已提交
415 416
                  "-workflow_file {} " \
                  "-bthread_concurrency {} " \
M
bug fix  
MRXLT 已提交
417
                  "-gpuid {} ".format(
M
MRXLT 已提交
418 419 420 421 422 423 424 425 426 427
                      self.bin_path,
                      self.workdir,
                      self.infer_service_fn,
                      self.max_concurrency,
                      self.num_threads,
                      self.port,
                      self.reload_interval_s,
                      self.workdir,
                      self.resource_fn,
                      self.workdir,
M
MRXLT 已提交
428 429
                      self.workflow_fn,
                      self.num_threads,
M
MRXLT 已提交
430
                      self.gpuid,)
M
MRXLT 已提交
431 432
        print("Going to Run Comand")
        print(command)
433

M
MRXLT 已提交
434
        os.system(command)