Merge branch 'develop' into add_go

b7694543 · Dong Daxiang · GitHub · e7cdd6c9 · 24e6a0ab · b7694543
10 changed file
--- a/Dockerfile
+++ b/Dockerfile
 FROM centos:centos6.10
-RUN export http_proxy=http://172.19.56.199:3128 \
-    && export https_proxy=http://172.19.56.199:3128 \
-    && yum -y install wget \
+RUN yum -y install wget \
    && wget http://people.centos.org/tru/devtools-2/devtools-2.repo -O /etc/yum.repos.d/devtoolset-2.repo \
    && yum -y install devtoolset-2-gcc devtoolset-2-gcc-c++ devtoolset-2-binutils \
    && source /opt/rh/devtoolset-2/enable \

--- a/cmake/paddlepaddle.cmake
+++ b/cmake/paddlepaddle.cmake
@@ -49,7 +49,7 @@ endif()

 SET(PADDLE_LIB_PATH "http://paddle-inference-lib.bj.bcebos.com/${PADDLE_LIB_VERSION}/fluid_inference.tgz")
 MESSAGE(STATUS "PADDLE_LIB_PATH=${PADDLE_LIB_PATH}")
-
+if (WITH_GPU OR WITH_MKLML)
 ExternalProject_Add(
    "extern_paddle"
    ${EXTERNAL_PROJECT_LOG_ARGS}
@@ -62,11 +62,24 @@ ExternalProject_Add(
    INSTALL_COMMAND
        ${CMAKE_COMMAND} -E copy_directory ${PADDLE_DOWNLOAD_DIR}/paddle/include ${PADDLE_INSTALL_DIR}/include &&
        ${CMAKE_COMMAND} -E copy_directory ${PADDLE_DOWNLOAD_DIR}/paddle/lib ${PADDLE_INSTALL_DIR}/lib &&
-        ${CMAKE_COMMAND} -E copy_directory ${PADDLE_DOWNLOAD_DIR}/third_party ${PADDLE_INSTALL_DIR}/third_party 
+        ${CMAKE_COMMAND} -E copy_directory ${PADDLE_DOWNLOAD_DIR}/third_party ${PADDLE_INSTALL_DIR}/third_party &&
+        ${CMAKE_COMMAND} -E copy ${PADDLE_INSTALL_DIR}/third_party/install/mkldnn/lib/libmkldnn.so.0 ${PADDLE_INSTALL_DIR}/third_party/install/mkldnn/lib/libmkldnn.so 
+)
+else()
+ExternalProject_Add(
+    "extern_paddle"
+    ${EXTERNAL_PROJECT_LOG_ARGS}
+    URL                 "${PADDLE_LIB_PATH}"
+    PREFIX              "${PADDLE_SOURCES_DIR}"
+    DOWNLOAD_DIR        "${PADDLE_DOWNLOAD_DIR}"
+    CONFIGURE_COMMAND   ""
+    BUILD_COMMAND       ""
+    UPDATE_COMMAND      ""
+    INSTALL_COMMAND
+        ${CMAKE_COMMAND} -E copy_directory ${PADDLE_DOWNLOAD_DIR}/paddle/include ${PADDLE_INSTALL_DIR}/include &&
+        ${CMAKE_COMMAND} -E copy_directory ${PADDLE_DOWNLOAD_DIR}/paddle/lib ${PADDLE_INSTALL_DIR}/lib &&
+        ${CMAKE_COMMAND} -E copy_directory ${PADDLE_DOWNLOAD_DIR}/third_party ${PADDLE_INSTALL_DIR}/third_party
 )
-
-if (WITH_MKLML)
-   file(COPY ${PADDLE_INSTALL_DIR}/third_party/install/mkldnn/lib/libmkldnn.so.0 DESTINATION ${PADDLE_INSTALL_DIR}/third_party/install/mkldnn/lib/libmkldnn.so FOLLOW_SYMLINK_CHAIN)
 endif()

 INCLUDE_DIRECTORIES(${PADDLE_INCLUDE_DIR})

--- a/core/configure/CMakeLists.txt
+++ b/core/configure/CMakeLists.txt
@@ -55,6 +55,7 @@ if (NOT CLIENT_ONLY)
 py_proto_compile(server_config_py_proto SRCS proto/server_configure.proto)
 add_custom_target(server_config_py_proto_init ALL COMMAND ${CMAKE_COMMAND} -E touch __init__.py)
 add_dependencies(server_config_py_proto server_config_py_proto_init)
+if (NOT WITH_GPU)
 add_custom_command(TARGET server_config_py_proto POST_BUILD
 		COMMAND ${CMAKE_COMMAND} -E make_directory ${PADDLE_SERVING_BINARY_DIR}/python/paddle_serving_server/proto
 		COMMAND cp *.py ${PADDLE_SERVING_BINARY_DIR}/python/paddle_serving_server/proto
@@ -66,6 +67,24 @@ add_custom_command(TARGET general_model_config_py_proto POST_BUILD
 		COMMAND cp *.py ${PADDLE_SERVING_BINARY_DIR}/python/paddle_serving_server/proto
 		COMMENT "Copy generated general_model_config proto file into directory paddle_serving_server/proto."
 		WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR})
+else()
+add_custom_command(TARGET server_config_py_proto POST_BUILD
+		COMMAND ${CMAKE_COMMAND} -E make_directory
+        ${PADDLE_SERVING_BINARY_DIR}/python/paddle_serving_server_gpu/proto
+		COMMAND cp *.py
+        ${PADDLE_SERVING_BINARY_DIR}/python/paddle_serving_server_gpu/proto
+		COMMENT "Copy generated python proto into directory
+        paddle_serving_server_gpu/proto."
+		WORKING_DIRECTORY ${CMAKE_CURRENT_BINRARY_DIR})

+add_custom_command(TARGET general_model_config_py_proto POST_BUILD
+		COMMAND ${CMAKE_COMMAND} -E make_directory
+        ${PADDLE_SERVING_BINARY_DIR}/python/paddle_serving_server_gpu/proto
+		COMMAND cp *.py
+        ${PADDLE_SERVING_BINARY_DIR}/python/paddle_serving_server_gpu/proto
+		COMMENT "Copy generated general_model_config proto file into directory
+        paddle_serving_server_gpu/proto."
+		WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR})
+endif()
 endif()

--- a/core/general-server/CMakeLists.txt
+++ b/core/general-server/CMakeLists.txt
@@ -31,7 +31,7 @@ if(WITH_GPU)
    target_link_libraries(serving ${CUDA_LIBRARIES})
 endif()

-if(WITH_MKL)
+if(WITH_MKL OR WITH_GPU)
    target_link_libraries(serving -liomp5 -lmklml_intel -lmkldnn -lpthread -lcrypto -lm -lrt -lssl -ldl -lz -lbz2)
 else()
    target_link_libraries(serving openblas -lpthread -lcrypto -lm -lrt -lssl -ldl -lz -lbz2)

--- a/python/CMakeLists.txt
+++ b/python/CMakeLists.txt
 if (CLIENT_ONLY)
-file(GLOB_RECURSE SERVING_CLIENT_PY_FILES paddle_serving_client/*.py)
-set(PY_FILES ${SERVING_CLIENT_PY_FILES})
-SET(PACKAGE_NAME "serving_client")
-set(SETUP_LOG_FILE "setup.py.client.log")
+    file(GLOB_RECURSE SERVING_CLIENT_PY_FILES paddle_serving_client/*.py)
+    set(PY_FILES ${SERVING_CLIENT_PY_FILES})
+    SET(PACKAGE_NAME "serving_client")
+    set(SETUP_LOG_FILE "setup.py.client.log")
 endif()

 if (NOT CLIENT_ONLY)
-file(GLOB_RECURSE SERVING_SERVER_PY_FILES paddle_serving_server/*.py)
-set(PY_FILES ${SERVING_SERVER_PY_FILES})
-SET(PACKAGE_NAME "serving_server")
-set(SETUP_LOG_FILE "setup.py.server.log")
+    if (NOT WITH_GPU)
+        file(GLOB_RECURSE SERVING_SERVER_PY_FILES paddle_serving_server/*.py)
+    else()
+        file(GLOB_RECURSE SERVING_SERVER_PY_FILES paddle_serving_server_gpu/*.py)
+    endif()
+        set(PY_FILES ${SERVING_SERVER_PY_FILES})
+        SET(PACKAGE_NAME "serving_server")
+        set(SETUP_LOG_FILE "setup.py.server.log")
 endif()

 if (CLIENT_ONLY)
@@ -18,8 +22,13 @@ configure_file(${CMAKE_CURRENT_SOURCE_DIR}/setup.py.client.in
 endif()

 if (NOT CLIENT_ONLY)
-configure_file(${CMAKE_CURRENT_SOURCE_DIR}/setup.py.server.in
-    ${CMAKE_CURRENT_BINARY_DIR}/setup.py)
+    if (NOT WITH_GPU)
+        configure_file(${CMAKE_CURRENT_SOURCE_DIR}/setup.py.server.in
+            ${CMAKE_CURRENT_BINARY_DIR}/setup.py)
+    else()
+        configure_file(${CMAKE_CURRENT_SOURCE_DIR}/setup.py.server_gpu.in
+            ${CMAKE_CURRENT_BINARY_DIR}/setup.py)
+    endif()
 endif()

 set(SERVING_CLIENT_CORE ${PADDLE_SERVING_BINARY_DIR}/core/general-client/serving_client.so)
@@ -37,12 +46,22 @@ add_custom_target(paddle_python ALL DEPENDS serving_client ${PADDLE_SERVING_BINA
 endif()

 if (NOT CLIENT_ONLY)
-add_custom_command(
-   	OUTPUT ${PADDLE_SERVING_BINARY_DIR}/.timestamp
-	COMMAND cp -r ${CMAKE_CURRENT_SOURCE_DIR}/paddle_serving_server/ ${PADDLE_SERVING_BINARY_DIR}/python/
-	COMMAND env ${py_env} ${PYTHON_EXECUTABLE} setup.py bdist_wheel
-	DEPENDS ${SERVING_SERVER_CORE} server_config_py_proto ${PY_FILES})
-add_custom_target(paddle_python ALL DEPENDS ${PADDLE_SERVING_BINARY_DIR}/.timestamp)
+    if(NOT WITH_GPU)
+        add_custom_command(
+            OUTPUT ${PADDLE_SERVING_BINARY_DIR}/.timestamp
+            COMMAND cp -r ${CMAKE_CURRENT_SOURCE_DIR}/paddle_serving_server/ ${PADDLE_SERVING_BINARY_DIR}/python/
+            COMMAND env ${py_env} ${PYTHON_EXECUTABLE} setup.py bdist_wheel
+            DEPENDS ${SERVING_SERVER_CORE} server_config_py_proto ${PY_FILES})
+        add_custom_target(paddle_python ALL DEPENDS ${PADDLE_SERVING_BINARY_DIR}/.timestamp)
+    else()
+        add_custom_command(
+            OUTPUT ${PADDLE_SERVING_BINARY_DIR}/.timestamp
+            COMMAND cp -r
+            ${CMAKE_CURRENT_SOURCE_DIR}/paddle_serving_server_gpu/ ${PADDLE_SERVING_BINARY_DIR}/python/
+            COMMAND env ${py_env} ${PYTHON_EXECUTABLE} setup.py bdist_wheel
+            DEPENDS ${SERVING_SERVER_CORE} server_config_py_proto ${PY_FILES})
+        add_custom_target(paddle_python ALL DEPENDS ${PADDLE_SERVING_BINARY_DIR}/.timestamp)
+    endif()
 endif()

 set(SERVING_CLIENT_PYTHON_PACKAGE_DIR ${CMAKE_CURRENT_BINARY_DIR}/dist/)

--- a/python/examples/imdb/test_server.py
+++ b/python/examples/imdb/test_server.py
@@ -14,6 +14,8 @@ op_seq_maker.add_op(general_infer_op)

 server = Server()
 server.set_op_sequence(op_seq_maker.get_op_sequence())
+server.set_num_threads(12)
 server.load_model_config(sys.argv[1])
-server.prepare_server(workdir="work_dir1", port=9292, device="cpu")
+port = int(sys.argv[2])
+server.prepare_server(workdir="work_dir1", port=port, device="cpu")
 server.run_server()
--- a/python/paddle_serving_server/__init__.py
+++ b/python/paddle_serving_server/__init__.py
@@ -16,6 +16,10 @@ import os
 from .proto import server_configure_pb2 as server_sdk
 from .proto import general_model_config_pb2 as m_config
 import google.protobuf.text_format
+import tarfile
+import paddle_serving_server as paddle_serving_server
+from version import serving_server_version
+

 class OpMaker(object):
    def __init__(self):
@@ -30,12 +34,14 @@ class OpMaker(object):
    # when we have OpGraphMaker, inputs and outputs are necessary
    def create(self, name, inputs=[], outputs=[]):
        if name not in self.op_dict:
-            raise Exception("Op name {} is not supported right now".format(name))
+            raise Exception("Op name {} is not supported right now".format(
+                name))
        node = server_sdk.DAGNode()
        node.name = "{}_op".format(name)
        node.type = self.op_dict[name]
        return node

+
 class OpSeqMaker(object):
    def __init__(self):
        self.workflow = server_sdk.Workflow()
@@ -55,6 +61,7 @@ class OpSeqMaker(object):
        workflow_conf.workflows.extend([self.workflow])
        return workflow_conf

+
 class Server(object):
    def __init__(self):
        self.server_handle_ = None
@@ -74,6 +81,8 @@ class Server(object):
        self.num_threads = 0
        self.port = 8080
        self.reload_interval_s = 10
+        self.module_path = os.path.dirname(paddle_serving_server.__file__)
+        self.cur_path = os.getcwd()

    def set_max_concurrency(self, concurrency):
        self.max_concurrency = concurrency
@@ -131,7 +140,8 @@ class Server(object):

    def _prepare_resource(self, workdir):
        if self.resource_conf == None:
-            with open("{}/{}".format(workdir, self.general_model_config_fn), "w") as fout:
+            with open("{}/{}".format(workdir, self.general_model_config_fn),
+                      "w") as fout:
                fout.write(str(self.model_conf))
            self.resource_conf = server_sdk.ResourceConf()
            self.resource_conf.model_toolkit_path = workdir
@@ -152,6 +162,54 @@ class Server(object):
        # check config here
        # print config here

+    def get_device_version(self):
+        avx_flag = False
+        mkl_flag = False
+        openblas_flag = False
+        r = os.system("cat /proc/cpuinfo | grep avx > /dev/null 2>&1")
+        if r == 0:
+            avx_flag = True
+        r = os.system("which mkl")
+        if r == 0:
+            mkl_flag = True
+        if avx_flag:
+            if mkl_flag:
+                device_version = "serving-cpu-avx-mkl-"
+            else:
+                device_version = "serving-cpu-avx-openblas-"
+        else:
+            device_version = "serving-cpu-noavx-openblas-"
+        return device_version
+
+    def download_bin(self):
+        os.chdir(self.module_path)
+        need_download = False
+        device_version = self.get_device_version()
+        floder_name = device_version + serving_server_version
+        tar_name = floder_name + ".tar.gz"
+        bin_url = "https://paddle-serving.bj.bcebos.com/bin/" + tar_name
+        self.server_path = os.path.join(self.module_path, floder_name)
+        if not os.path.exists(self.server_path):
+            print('Frist time run, downloading PaddleServing components ...')
+            r = os.system('wget ' + bin_url + ' --no-check-certificate')
+            if r != 0:
+                print('Download failed')
+                if os.path.exists(tar_name):
+                    os.remove(tar_name)
+            else:
+                try:
+                    print('Decompressing files ..')
+                    tar = tarfile.open(tar_name)
+                    tar.extractall()
+                    tar.close()
+                except:
+                    if os.path.exists(exe_path):
+                        os.remove(exe_path)
+                finally:
+                    os.remove(tar_name)
+        os.chdir(self.cur_path)
+        self.bin_path = self.server_path + "/serving"
+
    def prepare_server(self, workdir=None, port=9292, device="cpu"):
        if workdir == None:
            workdir = "./tmp"
@@ -178,8 +236,9 @@ class Server(object):
    def run_server(self):
        # just run server with system command
        # currently we do not load cube
-        command = "/home/users/dongdaxiang/github_develop/Serving/build_server/core/general-server/serving" \
-                  " -enable_model_toolkit " \
+        self.download_bin()
+        command = "{} " \
+                  "-enable_model_toolkit " \
                  "-inferservice_path {} " \
                  "-inferservice_file {} " \
                  "-max_concurrency {} " \
@@ -189,7 +248,9 @@ class Server(object):
                  "-resource_path {} " \
                  "-resource_file {} " \
                  "-workflow_path {} " \
-                  "-workflow_file {} ".format(
+                  "-workflow_file {} " \
+                  "-bthread_concurrency {} ".format(
+                      self.bin_path,
                      self.workdir,
                      self.infer_service_fn,
                      self.max_concurrency,
@@ -198,8 +259,7 @@ class Server(object):
                      self.reload_interval_s,
                      self.workdir,
                      self.resource_fn,
-                      self.workdir, 
-                      self.workflow_fn)
+                      self.workdir,
+                      self.workflow_fn,
+                      self.num_threads,)
        os.system(command)
-
-
--- a/python/paddle_serving_server_gpu/__init__.py
+++ b/python/paddle_serving_server_gpu/__init__.py
+#   Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import os
+from .proto import server_configure_pb2 as server_sdk
+from .proto import general_model_config_pb2 as m_config
+import google.protobuf.text_format
+import tarfile
+import paddle_serving_server as paddle_serving_server
+from version import serving_server_version
+
+
+class OpMaker(object):
+    def __init__(self):
+        self.op_dict = {
+            "general_infer": "GeneralInferOp",
+            "general_reader": "GeneralReaderOp",
+            "general_single_kv": "GeneralSingleKVOp",
+            "general_dist_kv": "GeneralDistKVOp"
+        }
+
+    # currently, inputs and outputs are not used
+    # when we have OpGraphMaker, inputs and outputs are necessary
+    def create(self, name, inputs=[], outputs=[]):
+        if name not in self.op_dict:
+            raise Exception("Op name {} is not supported right now".format(
+                name))
+        node = server_sdk.DAGNode()
+        node.name = "{}_op".format(name)
+        node.type = self.op_dict[name]
+        return node
+
+
+class OpSeqMaker(object):
+    def __init__(self):
+        self.workflow = server_sdk.Workflow()
+        self.workflow.name = "workflow1"
+        self.workflow.workflow_type = "Sequence"
+
+    def add_op(self, node):
+        if len(self.workflow.nodes) >= 1:
+            dep = server_sdk.DAGNodeDependency()
+            dep.name = self.workflow.nodes[-1].name
+            dep.mode = "RO"
+            node.dependencies.extend([dep])
+        self.workflow.nodes.extend([node])
+
+    def get_op_sequence(self):
+        workflow_conf = server_sdk.WorkflowConf()
+        workflow_conf.workflows.extend([self.workflow])
+        return workflow_conf
+
+
+class Server(object):
+    def __init__(self):
+        self.server_handle_ = None
+        self.infer_service_conf = None
+        self.model_toolkit_conf = None
+        self.resource_conf = None
+        self.engine = None
+        self.memory_optimization = False
+        self.model_conf = None
+        self.workflow_fn = "workflow.prototxt"
+        self.resource_fn = "resource.prototxt"
+        self.infer_service_fn = "infer_service.prototxt"
+        self.model_toolkit_fn = "model_toolkit.prototxt"
+        self.general_model_config_fn = "general_model.prototxt"
+        self.workdir = ""
+        self.max_concurrency = 0
+        self.num_threads = 0
+        self.port = 8080
+        self.reload_interval_s = 10
+        self.module_path = os.path.dirname(paddle_serving_server.__file__)
+        self.cur_path = os.getcwd()
+
+    def set_max_concurrency(self, concurrency):
+        self.max_concurrency = concurrency
+
+    def set_num_threads(self, threads):
+        self.num_threads = threads
+
+    def set_port(self, port):
+        self.port = port
+
+    def set_reload_interval(self, interval):
+        self.reload_interval_s = interval
+
+    def set_op_sequence(self, op_seq):
+        self.workflow_conf = op_seq
+
+    def set_memory_optimize(self, flag=False):
+        self.memory_optimization = flag
+
+    def _prepare_engine(self, model_config_path, device):
+        if self.model_toolkit_conf == None:
+            self.model_toolkit_conf = server_sdk.ModelToolkitConf()
+
+        if self.engine == None:
+            self.engine = server_sdk.EngineDesc()
+
+        self.model_config_path = model_config_path
+        self.engine.name = "general_model"
+        self.engine.reloadable_meta = model_config_path + "/fluid_time_file"
+        os.system("touch {}".format(self.engine.reloadable_meta))
+        self.engine.reloadable_type = "timestamp_ne"
+        self.engine.runtime_thread_num = 0
+        self.engine.batch_infer_size = 0
+        self.engine.enable_batch_align = 0
+        self.engine.model_data_path = model_config_path
+        self.engine.enable_memory_optimization = self.memory_optimization
+        self.engine.static_optimization = False
+        self.engine.force_update_static_cache = False
+
+        if device == "cpu":
+            self.engine.type = "FLUID_CPU_ANALYSIS_DIR"
+        elif device == "gpu":
+            self.engine.type = "FLUID_GPU_ANALYSIS_DIR"
+
+        self.model_toolkit_conf.engines.extend([self.engine])
+
+    def _prepare_infer_service(self, port):
+        if self.infer_service_conf == None:
+            self.infer_service_conf = server_sdk.InferServiceConf()
+            self.infer_service_conf.port = port
+            infer_service = server_sdk.InferService()
+            infer_service.name = "GeneralModelService"
+            infer_service.workflows.extend(["workflow1"])
+            self.infer_service_conf.services.extend([infer_service])
+
+    def _prepare_resource(self, workdir):
+        if self.resource_conf == None:
+            with open("{}/{}".format(workdir, self.general_model_config_fn),
+                      "w") as fout:
+                fout.write(str(self.model_conf))
+            self.resource_conf = server_sdk.ResourceConf()
+            self.resource_conf.model_toolkit_path = workdir
+            self.resource_conf.model_toolkit_file = self.model_toolkit_fn
+            self.resource_conf.general_model_path = workdir
+            self.resource_conf.general_model_file = self.general_model_config_fn
+
+    def _write_pb_str(self, filepath, pb_obj):
+        with open(filepath, "w") as fout:
+            fout.write(str(pb_obj))
+
+    def load_model_config(self, path):
+        self.model_config_path = path
+        self.model_conf = m_config.GeneralModelConfig()
+        f = open("{}/serving_server_conf.prototxt".format(path), 'r')
+        self.model_conf = google.protobuf.text_format.Merge(
+            str(f.read()), self.model_conf)
+        # check config here
+        # print config here
+
+    def download_bin(self):
+        os.chdir(self.module_path)
+        need_download = False
+        device_version = "serving-gpu-"
+        floder_name = device_version + serving_server_version
+        tar_name = floder_name + ".tar.gz"
+        bin_url = "https://paddle-serving.bj.bcebos.com/bin/" + tar_name
+        self.server_path = os.path.join(self.module_path, floder_name)
+        if not os.path.exists(self.server_path):
+            print('Frist time run, downloading PaddleServing components ...')
+            r = os.system('wget ' + bin_url + ' --no-check-certificate')
+            if r != 0:
+                print('Download failed')
+                if os.path.exists(tar_name):
+                    os.remove(tar_name)
+            else:
+                try:
+                    print('Decompressing files ..')
+                    tar = tarfile.open(tar_name)
+                    tar.extractall()
+                    tar.close()
+                except:
+                    if os.path.exists(exe_path):
+                        os.remove(exe_path)
+                finally:
+                    os.remove(tar_name)
+        os.chdir(self.cur_path)
+        self.bin_path = self.server_path + "/serving"
+
+    def prepare_server(self, workdir=None, port=9292, device="cpu"):
+        if workdir == None:
+            workdir = "./tmp"
+            os.system("mkdir {}".format(workdir))
+        else:
+            os.system("mkdir {}".format(workdir))
+        os.system("touch {}/fluid_time_file".format(workdir))
+
+        self._prepare_resource(workdir)
+        self._prepare_engine(self.model_config_path, device)
+        self._prepare_infer_service(port)
+        self.workdir = workdir
+
+        infer_service_fn = "{}/{}".format(workdir, self.infer_service_fn)
+        workflow_fn = "{}/{}".format(workdir, self.workflow_fn)
+        resource_fn = "{}/{}".format(workdir, self.resource_fn)
+        model_toolkit_fn = "{}/{}".format(workdir, self.model_toolkit_fn)
+
+        self._write_pb_str(infer_service_fn, self.infer_service_conf)
+        self._write_pb_str(workflow_fn, self.workflow_conf)
+        self._write_pb_str(resource_fn, self.resource_conf)
+        self._write_pb_str(model_toolkit_fn, self.model_toolkit_conf)
+
+    def run_server(self):
+        # just run server with system command
+        # currently we do not load cube
+        self.download_bin()
+        command = "{} " \
+                  "-enable_model_toolkit " \
+                  "-inferservice_path {} " \
+                  "-inferservice_file {} " \
+                  "-max_concurrency {} " \
+                  "-num_threads {} " \
+                  "-port {} " \
+                  "-reload_interval_s {} " \
+                  "-resource_path {} " \
+                  "-resource_file {} " \
+                  "-workflow_path {} " \
+                  "-workflow_file {} ".format(
+                      self.bin_path,
+                      self.workdir,
+                      self.infer_service_fn,
+                      self.max_concurrency,
+                      self.num_threads,
+                      self.port,
+                      self.reload_interval_s,
+                      self.workdir,
+                      self.resource_fn,
+                      self.workdir,
+                      self.workflow_fn)
+        os.system(command)
--- a/python/paddle_serving_server_gpu/version.py
+++ b/python/paddle_serving_server_gpu/version.py
+# Copyright (c) 2020  PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+""" Paddle Serving Client version string """
+serving_client_version = "0.1.0"
+serving_server_version = "0.1.0"
+module_proto_version = "0.1.0"
--- a/python/setup.py.server_gpu.in
+++ b/python/setup.py.server_gpu.in
+#   Copyright (c) 2020  PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Setup for pip package."""
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import platform
+
+from setuptools import setup, Distribution, Extension
+from setuptools import find_packages
+from setuptools import setup
+from paddle_serving_server_gpu.version import serving_server_version
+
+def python_version():
+    return [int(v) for v in platform.python_version().split(".")]
+
+max_version, mid_version, min_version = python_version()
+
+REQUIRED_PACKAGES = [
+    'six >= 1.10.0', 'protobuf >= 3.1.0','paddlepaddle'
+]
+
+packages=['paddle_serving_server_gpu',
+          'paddle_serving_server_gpu.proto']
+
+package_dir={'paddle_serving_server_gpu':
+             '${PADDLE_SERVING_BINARY_DIR}/python/paddle_serving_server_gpu',
+             'paddle_serving_server_gpu.proto':
+             '${PADDLE_SERVING_BINARY_DIR}/python/paddle_serving_server_gpu/proto'}
+
+setup(
+    name='paddle-serving-server-gpu',
+    version=serving_server_version.replace('-', ''),
+    description=
+    ('Paddle Serving Package for saved model with PaddlePaddle'),
+    url='https://github.com/PaddlePaddle/Serving',
+    author='PaddlePaddle Author',
+    author_email='guru4elephant@gmail.com',
+    install_requires=REQUIRED_PACKAGES,
+    packages=packages,
+    package_dir=package_dir,
+    # PyPI package information.
+    classifiers=[
+        'Development Status :: 4 - Beta',
+        'Intended Audience :: Developers',
+        'Intended Audience :: Education',
+        'Intended Audience :: Science/Research',
+        'License :: OSI Approved :: Apache Software License',
+        'Programming Language :: Python :: 2.7',
+        'Programming Language :: Python :: 3',
+        'Programming Language :: Python :: 3.4',
+        'Programming Language :: Python :: 3.5',
+        'Programming Language :: Python :: 3.6',
+        'Topic :: Scientific/Engineering',
+        'Topic :: Scientific/Engineering :: Mathematics',
+        'Topic :: Scientific/Engineering :: Artificial Intelligence',
+        'Topic :: Software Development',
+        'Topic :: Software Development :: Libraries',
+        'Topic :: Software Development :: Libraries :: Python Modules',
+    ],
+    license='Apache 2.0',
+    keywords=('paddle-serving serving-server deployment industrial easy-to-use'))