add gpu whl

62a7a986 · MRXLT · 2ee7eb44 · 62a7a986 · 62a7a986 · 62a7a986
3 changed file
--- a/python/CMakeLists.txt
+++ b/python/CMakeLists.txt
 if (CLIENT_ONLY)
-file(GLOB_RECURSE SERVING_CLIENT_PY_FILES paddle_serving_client/*.py)
-set(PY_FILES ${SERVING_CLIENT_PY_FILES})
-SET(PACKAGE_NAME "serving_client")
-set(SETUP_LOG_FILE "setup.py.client.log")
+    file(GLOB_RECURSE SERVING_CLIENT_PY_FILES paddle_serving_client/*.py)
+    set(PY_FILES ${SERVING_CLIENT_PY_FILES})
+    SET(PACKAGE_NAME "serving_client")
+    set(SETUP_LOG_FILE "setup.py.client.log")
 endif()

 if (NOT CLIENT_ONLY)
-file(GLOB_RECURSE SERVING_SERVER_PY_FILES paddle_serving_server/*.py)
-set(PY_FILES ${SERVING_SERVER_PY_FILES})
-SET(PACKAGE_NAME "serving_server")
-set(SETUP_LOG_FILE "setup.py.server.log")
+    if (NOT WITH_GPU)
+        file(GLOB_RECURSE SERVING_SERVER_PY_FILES paddle_serving_server/*.py)
+    else()
+        file(GLOB_RECURSE SERVING_SERVER_PY_FILES paddle_serving_server_gpu/*.py)
+    endif()
+        set(PY_FILES ${SERVING_SERVER_PY_FILES})
+        SET(PACKAGE_NAME "serving_server")
+        set(SETUP_LOG_FILE "setup.py.server.log")
 endif()

 if (CLIENT_ONLY)
@@ -18,8 +22,13 @@ configure_file(${CMAKE_CURRENT_SOURCE_DIR}/setup.py.client.in
 endif()

 if (NOT CLIENT_ONLY)
-configure_file(${CMAKE_CURRENT_SOURCE_DIR}/setup.py.server.in
+    if (NOT WITH_GPU)
+        configure_file(${CMAKE_CURRENT_SOURCE_DIR}/setup.py.server.in
            ${CMAKE_CURRENT_BINARY_DIR}/setup.py)
+    else()
+        configure_file(${CMAKE_CURRENT_SOURCE_DIR}/setup.py.server_gpu.in
+            ${CMAKE_CURRENT_BINARY_DIR}/setup.py)
+    endif()
 endif()

 set(SERVING_CLIENT_CORE ${PADDLE_SERVING_BINARY_DIR}/core/general-client/serving_client.so)
@@ -37,12 +46,22 @@ add_custom_target(paddle_python ALL DEPENDS serving_client ${PADDLE_SERVING_BINA
 endif()

 if (NOT CLIENT_ONLY)
-add_custom_command(
+    if(NOT WITH_GPU)
+        add_custom_command(
            OUTPUT ${PADDLE_SERVING_BINARY_DIR}/.timestamp
            COMMAND cp -r ${CMAKE_CURRENT_SOURCE_DIR}/paddle_serving_server/ ${PADDLE_SERVING_BINARY_DIR}/python/
            COMMAND env ${py_env} ${PYTHON_EXECUTABLE} setup.py bdist_wheel
            DEPENDS ${SERVING_SERVER_CORE} server_config_py_proto ${PY_FILES})
-add_custom_target(paddle_python ALL DEPENDS ${PADDLE_SERVING_BINARY_DIR}/.timestamp)
+        add_custom_target(paddle_python ALL DEPENDS ${PADDLE_SERVING_BINARY_DIR}/.timestamp)
+    else()
+        add_custom_command(
+            OUTPUT ${PADDLE_SERVING_BINARY_DIR}/.timestamp
+            COMMAND cp -r
+            ${CMAKE_CURRENT_SOURCE_DIR}/paddle_serving_server_gpu/ ${PADDLE_SERVING_BINARY_DIR}/python/
+            COMMAND env ${py_env} ${PYTHON_EXECUTABLE} setup.py bdist_wheel
+            DEPENDS ${SERVING_SERVER_CORE} server_config_py_proto ${PY_FILES})
+        add_custom_target(paddle_python ALL DEPENDS ${PADDLE_SERVING_BINARY_DIR}/.timestamp)
+    endif()
 endif()

 set(SERVING_CLIENT_PYTHON_PACKAGE_DIR ${CMAKE_CURRENT_BINARY_DIR}/dist/)

--- a/python/paddle_serving_server_gpu/__init__.py
+++ b/python/paddle_serving_server_gpu/__init__.py
+#   Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import os
+from .proto import server_configure_pb2 as server_sdk
+from .proto import general_model_config_pb2 as m_config
+import google.protobuf.text_format
+import tarfile
+import paddle_serving_server as paddle_serving_server
+from version import serving_server_version
+
+
+class OpMaker(object):
+    def __init__(self):
+        self.op_dict = {
+            "general_infer": "GeneralInferOp",
+            "general_reader": "GeneralReaderOp",
+            "general_single_kv": "GeneralSingleKVOp",
+            "general_dist_kv": "GeneralDistKVOp"
+        }
+
+    # currently, inputs and outputs are not used
+    # when we have OpGraphMaker, inputs and outputs are necessary
+    def create(self, name, inputs=[], outputs=[]):
+        if name not in self.op_dict:
+            raise Exception("Op name {} is not supported right now".format(
+                name))
+        node = server_sdk.DAGNode()
+        node.name = "{}_op".format(name)
+        node.type = self.op_dict[name]
+        return node
+
+
+class OpSeqMaker(object):
+    def __init__(self):
+        self.workflow = server_sdk.Workflow()
+        self.workflow.name = "workflow1"
+        self.workflow.workflow_type = "Sequence"
+
+    def add_op(self, node):
+        if len(self.workflow.nodes) >= 1:
+            dep = server_sdk.DAGNodeDependency()
+            dep.name = self.workflow.nodes[-1].name
+            dep.mode = "RO"
+            node.dependencies.extend([dep])
+        self.workflow.nodes.extend([node])
+
+    def get_op_sequence(self):
+        workflow_conf = server_sdk.WorkflowConf()
+        workflow_conf.workflows.extend([self.workflow])
+        return workflow_conf
+
+
+class Server(object):
+    def __init__(self):
+        self.server_handle_ = None
+        self.infer_service_conf = None
+        self.model_toolkit_conf = None
+        self.resource_conf = None
+        self.engine = None
+        self.memory_optimization = False
+        self.model_conf = None
+        self.workflow_fn = "workflow.prototxt"
+        self.resource_fn = "resource.prototxt"
+        self.infer_service_fn = "infer_service.prototxt"
+        self.model_toolkit_fn = "model_toolkit.prototxt"
+        self.general_model_config_fn = "general_model.prototxt"
+        self.workdir = ""
+        self.max_concurrency = 0
+        self.num_threads = 0
+        self.port = 8080
+        self.reload_interval_s = 10
+        self.module_path = os.path.dirname(paddle_serving_server.__file__)
+        self.cur_path = os.getcwd()
+
+    def set_max_concurrency(self, concurrency):
+        self.max_concurrency = concurrency
+
+    def set_num_threads(self, threads):
+        self.num_threads = threads
+
+    def set_port(self, port):
+        self.port = port
+
+    def set_reload_interval(self, interval):
+        self.reload_interval_s = interval
+
+    def set_op_sequence(self, op_seq):
+        self.workflow_conf = op_seq
+
+    def set_memory_optimize(self, flag=False):
+        self.memory_optimization = flag
+
+    def _prepare_engine(self, model_config_path, device):
+        if self.model_toolkit_conf == None:
+            self.model_toolkit_conf = server_sdk.ModelToolkitConf()
+
+        if self.engine == None:
+            self.engine = server_sdk.EngineDesc()
+
+        self.model_config_path = model_config_path
+        self.engine.name = "general_model"
+        self.engine.reloadable_meta = model_config_path + "/fluid_time_file"
+        os.system("touch {}".format(self.engine.reloadable_meta))
+        self.engine.reloadable_type = "timestamp_ne"
+        self.engine.runtime_thread_num = 0
+        self.engine.batch_infer_size = 0
+        self.engine.enable_batch_align = 0
+        self.engine.model_data_path = model_config_path
+        self.engine.enable_memory_optimization = self.memory_optimization
+        self.engine.static_optimization = False
+        self.engine.force_update_static_cache = False
+
+        if device == "cpu":
+            self.engine.type = "FLUID_CPU_ANALYSIS_DIR"
+        elif device == "gpu":
+            self.engine.type = "FLUID_GPU_ANALYSIS_DIR"
+
+        self.model_toolkit_conf.engines.extend([self.engine])
+
+    def _prepare_infer_service(self, port):
+        if self.infer_service_conf == None:
+            self.infer_service_conf = server_sdk.InferServiceConf()
+            self.infer_service_conf.port = port
+            infer_service = server_sdk.InferService()
+            infer_service.name = "GeneralModelService"
+            infer_service.workflows.extend(["workflow1"])
+            self.infer_service_conf.services.extend([infer_service])
+
+    def _prepare_resource(self, workdir):
+        if self.resource_conf == None:
+            with open("{}/{}".format(workdir, self.general_model_config_fn),
+                      "w") as fout:
+                fout.write(str(self.model_conf))
+            self.resource_conf = server_sdk.ResourceConf()
+            self.resource_conf.model_toolkit_path = workdir
+            self.resource_conf.model_toolkit_file = self.model_toolkit_fn
+            self.resource_conf.general_model_path = workdir
+            self.resource_conf.general_model_file = self.general_model_config_fn
+
+    def _write_pb_str(self, filepath, pb_obj):
+        with open(filepath, "w") as fout:
+            fout.write(str(pb_obj))
+
+    def load_model_config(self, path):
+        self.model_config_path = path
+        self.model_conf = m_config.GeneralModelConfig()
+        f = open("{}/serving_server_conf.prototxt".format(path), 'r')
+        self.model_conf = google.protobuf.text_format.Merge(
+            str(f.read()), self.model_conf)
+        # check config here
+        # print config here
+
+    def download_bin(self):
+        os.chdir(self.module_path)
+        need_download = False
+        device_version = "serving-gpu-"
+        floder_name = device_version + serving_server_version
+        tar_name = floder_name + ".tar.gz"
+        bin_url = "https://paddle-serving.bj.bcebos.com/bin/" + tar_name
+        self.server_path = os.path.join(self.module_path, floder_name)
+        if not os.path.exists(self.server_path):
+            print('Frist time run, downloading PaddleServing components ...')
+            r = os.system('wget ' + bin_url + ' --no-check-certificate')
+            if r != 0:
+                print('Download failed')
+                if os.path.exists(tar_name):
+                    os.remove(tar_name)
+            else:
+                try:
+                    print('Decompressing files ..')
+                    tar = tarfile.open(tar_name)
+                    tar.extractall()
+                    tar.close()
+                except:
+                    if os.path.exists(exe_path):
+                        os.remove(exe_path)
+                finally:
+                    os.remove(tar_name)
+        os.chdir(self.cur_path)
+        self.bin_path = self.server_path + "/serving"
+
+    def prepare_server(self, workdir=None, port=9292, device="cpu"):
+        if workdir == None:
+            workdir = "./tmp"
+            os.system("mkdir {}".format(workdir))
+        else:
+            os.system("mkdir {}".format(workdir))
+        os.system("touch {}/fluid_time_file".format(workdir))
+
+        self._prepare_resource(workdir)
+        self._prepare_engine(self.model_config_path, device)
+        self._prepare_infer_service(port)
+        self.workdir = workdir
+
+        infer_service_fn = "{}/{}".format(workdir, self.infer_service_fn)
+        workflow_fn = "{}/{}".format(workdir, self.workflow_fn)
+        resource_fn = "{}/{}".format(workdir, self.resource_fn)
+        model_toolkit_fn = "{}/{}".format(workdir, self.model_toolkit_fn)
+
+        self._write_pb_str(infer_service_fn, self.infer_service_conf)
+        self._write_pb_str(workflow_fn, self.workflow_conf)
+        self._write_pb_str(resource_fn, self.resource_conf)
+        self._write_pb_str(model_toolkit_fn, self.model_toolkit_conf)
+
+    def run_server(self):
+        # just run server with system command
+        # currently we do not load cube
+        self.download_bin()
+        command = "{} " \
+                  "-enable_model_toolkit " \
+                  "-inferservice_path {} " \
+                  "-inferservice_file {} " \
+                  "-max_concurrency {} " \
+                  "-num_threads {} " \
+                  "-port {} " \
+                  "-reload_interval_s {} " \
+                  "-resource_path {} " \
+                  "-resource_file {} " \
+                  "-workflow_path {} " \
+                  "-workflow_file {} ".format(
+                      self.bin_path,
+                      self.workdir,
+                      self.infer_service_fn,
+                      self.max_concurrency,
+                      self.num_threads,
+                      self.port,
+                      self.reload_interval_s,
+                      self.workdir,
+                      self.resource_fn,
+                      self.workdir,
+                      self.workflow_fn)
+        os.system(command)
--- a/python/paddle_serving_server_gpu/version.py
+++ b/python/paddle_serving_server_gpu/version.py
+# Copyright (c) 2020  PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+""" Paddle Serving Client version string """
+serving_client_version = "0.1.0"
+serving_server_version = "0.1.0"
+module_proto_version = "0.1.0"