From 62a7a986640cf8e9e9f61774de3d771286a24603 Mon Sep 17 00:00:00 2001 From: MRXLT Date: Thu, 13 Feb 2020 11:35:06 +0800 Subject: [PATCH] add gpu whl --- python/CMakeLists.txt | 51 ++-- python/paddle_serving_server_gpu/__init__.py | 244 +++++++++++++++++++ python/paddle_serving_server_gpu/version.py | 17 ++ 3 files changed, 296 insertions(+), 16 deletions(-) create mode 100644 python/paddle_serving_server_gpu/__init__.py create mode 100644 python/paddle_serving_server_gpu/version.py diff --git a/python/CMakeLists.txt b/python/CMakeLists.txt index b7313d85..d6149590 100644 --- a/python/CMakeLists.txt +++ b/python/CMakeLists.txt @@ -1,15 +1,19 @@ if (CLIENT_ONLY) -file(GLOB_RECURSE SERVING_CLIENT_PY_FILES paddle_serving_client/*.py) -set(PY_FILES ${SERVING_CLIENT_PY_FILES}) -SET(PACKAGE_NAME "serving_client") -set(SETUP_LOG_FILE "setup.py.client.log") + file(GLOB_RECURSE SERVING_CLIENT_PY_FILES paddle_serving_client/*.py) + set(PY_FILES ${SERVING_CLIENT_PY_FILES}) + SET(PACKAGE_NAME "serving_client") + set(SETUP_LOG_FILE "setup.py.client.log") endif() if (NOT CLIENT_ONLY) -file(GLOB_RECURSE SERVING_SERVER_PY_FILES paddle_serving_server/*.py) -set(PY_FILES ${SERVING_SERVER_PY_FILES}) -SET(PACKAGE_NAME "serving_server") -set(SETUP_LOG_FILE "setup.py.server.log") + if (NOT WITH_GPU) + file(GLOB_RECURSE SERVING_SERVER_PY_FILES paddle_serving_server/*.py) + else() + file(GLOB_RECURSE SERVING_SERVER_PY_FILES paddle_serving_server_gpu/*.py) + endif() + set(PY_FILES ${SERVING_SERVER_PY_FILES}) + SET(PACKAGE_NAME "serving_server") + set(SETUP_LOG_FILE "setup.py.server.log") endif() if (CLIENT_ONLY) @@ -18,8 +22,13 @@ configure_file(${CMAKE_CURRENT_SOURCE_DIR}/setup.py.client.in endif() if (NOT CLIENT_ONLY) -configure_file(${CMAKE_CURRENT_SOURCE_DIR}/setup.py.server.in - ${CMAKE_CURRENT_BINARY_DIR}/setup.py) + if (NOT WITH_GPU) + configure_file(${CMAKE_CURRENT_SOURCE_DIR}/setup.py.server.in + ${CMAKE_CURRENT_BINARY_DIR}/setup.py) + else() + configure_file(${CMAKE_CURRENT_SOURCE_DIR}/setup.py.server_gpu.in + ${CMAKE_CURRENT_BINARY_DIR}/setup.py) + endif() endif() set(SERVING_CLIENT_CORE ${PADDLE_SERVING_BINARY_DIR}/core/general-client/serving_client.so) @@ -37,12 +46,22 @@ add_custom_target(paddle_python ALL DEPENDS serving_client ${PADDLE_SERVING_BINA endif() if (NOT CLIENT_ONLY) -add_custom_command( - OUTPUT ${PADDLE_SERVING_BINARY_DIR}/.timestamp - COMMAND cp -r ${CMAKE_CURRENT_SOURCE_DIR}/paddle_serving_server/ ${PADDLE_SERVING_BINARY_DIR}/python/ - COMMAND env ${py_env} ${PYTHON_EXECUTABLE} setup.py bdist_wheel - DEPENDS ${SERVING_SERVER_CORE} server_config_py_proto ${PY_FILES}) -add_custom_target(paddle_python ALL DEPENDS ${PADDLE_SERVING_BINARY_DIR}/.timestamp) + if(NOT WITH_GPU) + add_custom_command( + OUTPUT ${PADDLE_SERVING_BINARY_DIR}/.timestamp + COMMAND cp -r ${CMAKE_CURRENT_SOURCE_DIR}/paddle_serving_server/ ${PADDLE_SERVING_BINARY_DIR}/python/ + COMMAND env ${py_env} ${PYTHON_EXECUTABLE} setup.py bdist_wheel + DEPENDS ${SERVING_SERVER_CORE} server_config_py_proto ${PY_FILES}) + add_custom_target(paddle_python ALL DEPENDS ${PADDLE_SERVING_BINARY_DIR}/.timestamp) + else() + add_custom_command( + OUTPUT ${PADDLE_SERVING_BINARY_DIR}/.timestamp + COMMAND cp -r + ${CMAKE_CURRENT_SOURCE_DIR}/paddle_serving_server_gpu/ ${PADDLE_SERVING_BINARY_DIR}/python/ + COMMAND env ${py_env} ${PYTHON_EXECUTABLE} setup.py bdist_wheel + DEPENDS ${SERVING_SERVER_CORE} server_config_py_proto ${PY_FILES}) + add_custom_target(paddle_python ALL DEPENDS ${PADDLE_SERVING_BINARY_DIR}/.timestamp) + endif() endif() set(SERVING_CLIENT_PYTHON_PACKAGE_DIR ${CMAKE_CURRENT_BINARY_DIR}/dist/) diff --git a/python/paddle_serving_server_gpu/__init__.py b/python/paddle_serving_server_gpu/__init__.py new file mode 100644 index 00000000..a5b584ba --- /dev/null +++ b/python/paddle_serving_server_gpu/__init__.py @@ -0,0 +1,244 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +from .proto import server_configure_pb2 as server_sdk +from .proto import general_model_config_pb2 as m_config +import google.protobuf.text_format +import tarfile +import paddle_serving_server as paddle_serving_server +from version import serving_server_version + + +class OpMaker(object): + def __init__(self): + self.op_dict = { + "general_infer": "GeneralInferOp", + "general_reader": "GeneralReaderOp", + "general_single_kv": "GeneralSingleKVOp", + "general_dist_kv": "GeneralDistKVOp" + } + + # currently, inputs and outputs are not used + # when we have OpGraphMaker, inputs and outputs are necessary + def create(self, name, inputs=[], outputs=[]): + if name not in self.op_dict: + raise Exception("Op name {} is not supported right now".format( + name)) + node = server_sdk.DAGNode() + node.name = "{}_op".format(name) + node.type = self.op_dict[name] + return node + + +class OpSeqMaker(object): + def __init__(self): + self.workflow = server_sdk.Workflow() + self.workflow.name = "workflow1" + self.workflow.workflow_type = "Sequence" + + def add_op(self, node): + if len(self.workflow.nodes) >= 1: + dep = server_sdk.DAGNodeDependency() + dep.name = self.workflow.nodes[-1].name + dep.mode = "RO" + node.dependencies.extend([dep]) + self.workflow.nodes.extend([node]) + + def get_op_sequence(self): + workflow_conf = server_sdk.WorkflowConf() + workflow_conf.workflows.extend([self.workflow]) + return workflow_conf + + +class Server(object): + def __init__(self): + self.server_handle_ = None + self.infer_service_conf = None + self.model_toolkit_conf = None + self.resource_conf = None + self.engine = None + self.memory_optimization = False + self.model_conf = None + self.workflow_fn = "workflow.prototxt" + self.resource_fn = "resource.prototxt" + self.infer_service_fn = "infer_service.prototxt" + self.model_toolkit_fn = "model_toolkit.prototxt" + self.general_model_config_fn = "general_model.prototxt" + self.workdir = "" + self.max_concurrency = 0 + self.num_threads = 0 + self.port = 8080 + self.reload_interval_s = 10 + self.module_path = os.path.dirname(paddle_serving_server.__file__) + self.cur_path = os.getcwd() + + def set_max_concurrency(self, concurrency): + self.max_concurrency = concurrency + + def set_num_threads(self, threads): + self.num_threads = threads + + def set_port(self, port): + self.port = port + + def set_reload_interval(self, interval): + self.reload_interval_s = interval + + def set_op_sequence(self, op_seq): + self.workflow_conf = op_seq + + def set_memory_optimize(self, flag=False): + self.memory_optimization = flag + + def _prepare_engine(self, model_config_path, device): + if self.model_toolkit_conf == None: + self.model_toolkit_conf = server_sdk.ModelToolkitConf() + + if self.engine == None: + self.engine = server_sdk.EngineDesc() + + self.model_config_path = model_config_path + self.engine.name = "general_model" + self.engine.reloadable_meta = model_config_path + "/fluid_time_file" + os.system("touch {}".format(self.engine.reloadable_meta)) + self.engine.reloadable_type = "timestamp_ne" + self.engine.runtime_thread_num = 0 + self.engine.batch_infer_size = 0 + self.engine.enable_batch_align = 0 + self.engine.model_data_path = model_config_path + self.engine.enable_memory_optimization = self.memory_optimization + self.engine.static_optimization = False + self.engine.force_update_static_cache = False + + if device == "cpu": + self.engine.type = "FLUID_CPU_ANALYSIS_DIR" + elif device == "gpu": + self.engine.type = "FLUID_GPU_ANALYSIS_DIR" + + self.model_toolkit_conf.engines.extend([self.engine]) + + def _prepare_infer_service(self, port): + if self.infer_service_conf == None: + self.infer_service_conf = server_sdk.InferServiceConf() + self.infer_service_conf.port = port + infer_service = server_sdk.InferService() + infer_service.name = "GeneralModelService" + infer_service.workflows.extend(["workflow1"]) + self.infer_service_conf.services.extend([infer_service]) + + def _prepare_resource(self, workdir): + if self.resource_conf == None: + with open("{}/{}".format(workdir, self.general_model_config_fn), + "w") as fout: + fout.write(str(self.model_conf)) + self.resource_conf = server_sdk.ResourceConf() + self.resource_conf.model_toolkit_path = workdir + self.resource_conf.model_toolkit_file = self.model_toolkit_fn + self.resource_conf.general_model_path = workdir + self.resource_conf.general_model_file = self.general_model_config_fn + + def _write_pb_str(self, filepath, pb_obj): + with open(filepath, "w") as fout: + fout.write(str(pb_obj)) + + def load_model_config(self, path): + self.model_config_path = path + self.model_conf = m_config.GeneralModelConfig() + f = open("{}/serving_server_conf.prototxt".format(path), 'r') + self.model_conf = google.protobuf.text_format.Merge( + str(f.read()), self.model_conf) + # check config here + # print config here + + def download_bin(self): + os.chdir(self.module_path) + need_download = False + device_version = "serving-gpu-" + floder_name = device_version + serving_server_version + tar_name = floder_name + ".tar.gz" + bin_url = "https://paddle-serving.bj.bcebos.com/bin/" + tar_name + self.server_path = os.path.join(self.module_path, floder_name) + if not os.path.exists(self.server_path): + print('Frist time run, downloading PaddleServing components ...') + r = os.system('wget ' + bin_url + ' --no-check-certificate') + if r != 0: + print('Download failed') + if os.path.exists(tar_name): + os.remove(tar_name) + else: + try: + print('Decompressing files ..') + tar = tarfile.open(tar_name) + tar.extractall() + tar.close() + except: + if os.path.exists(exe_path): + os.remove(exe_path) + finally: + os.remove(tar_name) + os.chdir(self.cur_path) + self.bin_path = self.server_path + "/serving" + + def prepare_server(self, workdir=None, port=9292, device="cpu"): + if workdir == None: + workdir = "./tmp" + os.system("mkdir {}".format(workdir)) + else: + os.system("mkdir {}".format(workdir)) + os.system("touch {}/fluid_time_file".format(workdir)) + + self._prepare_resource(workdir) + self._prepare_engine(self.model_config_path, device) + self._prepare_infer_service(port) + self.workdir = workdir + + infer_service_fn = "{}/{}".format(workdir, self.infer_service_fn) + workflow_fn = "{}/{}".format(workdir, self.workflow_fn) + resource_fn = "{}/{}".format(workdir, self.resource_fn) + model_toolkit_fn = "{}/{}".format(workdir, self.model_toolkit_fn) + + self._write_pb_str(infer_service_fn, self.infer_service_conf) + self._write_pb_str(workflow_fn, self.workflow_conf) + self._write_pb_str(resource_fn, self.resource_conf) + self._write_pb_str(model_toolkit_fn, self.model_toolkit_conf) + + def run_server(self): + # just run server with system command + # currently we do not load cube + self.download_bin() + command = "{} " \ + "-enable_model_toolkit " \ + "-inferservice_path {} " \ + "-inferservice_file {} " \ + "-max_concurrency {} " \ + "-num_threads {} " \ + "-port {} " \ + "-reload_interval_s {} " \ + "-resource_path {} " \ + "-resource_file {} " \ + "-workflow_path {} " \ + "-workflow_file {} ".format( + self.bin_path, + self.workdir, + self.infer_service_fn, + self.max_concurrency, + self.num_threads, + self.port, + self.reload_interval_s, + self.workdir, + self.resource_fn, + self.workdir, + self.workflow_fn) + os.system(command) diff --git a/python/paddle_serving_server_gpu/version.py b/python/paddle_serving_server_gpu/version.py new file mode 100644 index 00000000..d67c284b --- /dev/null +++ b/python/paddle_serving_server_gpu/version.py @@ -0,0 +1,17 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License" +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +""" Paddle Serving Client version string """ +serving_client_version = "0.1.0" +serving_server_version = "0.1.0" +module_proto_version = "0.1.0" -- GitLab