Merge branch 'jiawei/dist_kv_benchmark' of...

Merge branch 'jiawei/dist_kv_benchmark' of https://github.com/wangjiawei04/Serving into jiawei/dist_kv_benchmark test=serving

Merge branch 'jiawei/dist_kv_benchmark' of...
Merge branch 'jiawei/dist_kv_benchmark' of https://github.com/wangjiawei04/Serving into jiawei/dist_kv_benchmark test=serving
ea628a58 · wangjiawei04 · d1ecc172 · ed493b6b · ea628a58 · ea628a58
15 changed file
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -65,7 +65,7 @@ endif()

 if (NOT CLIENT_ONLY)
 include(external/jsoncpp)
-include(external/rocksdb)
+#include(external/rocksdb)
 endif()
 #include(external/gtest)


--- a/README.md
+++ b/README.md
@@ -53,8 +53,9 @@ Paddle Serving provides HTTP and RPC based service for users to access

 ### HTTP service

+Paddle Serving provides a built-in python module called `paddle_serving_server.serve` that can start a rpc service or a http service with one-line command. If we specify the argument `--name uci`, it means that we will have a HTTP service with a url of `$IP:$PORT/uci/prediction`
 ``` shell
-python -m paddle_serving_server.web_serve --model uci_housing_model --thread 10 --port 9292 --name uci
+python -m paddle_serving_server.serve --model uci_housing_model --thread 10 --port 9292 --name uci
 ```
 <center>

@@ -65,6 +66,7 @@ python -m paddle_serving_server.web_serve --model uci_housing_model --thread 10
 | `name` | str | `""` | Service name, can be used to generate HTTP request url |
 | `model` | str | `""` | Path of paddle model directory to be served |

+Here, we use `curl` to send a HTTP POST request to the service we just started. Users can use any python library to send HTTP POST as well, e.g, [requests](https://requests.readthedocs.io/en/master/).
 </center>

 ``` shell
@@ -73,6 +75,7 @@ curl -H "Content-Type:application/json" -X POST -d '{"x": [0.0137, -0.1136, 0.25

 ### RPC service

+A user can also start a rpc service with `paddle_serving_server.serve`. RPC service is usually faster than HTTP service, although a user needs to do some coding based on Paddle Serving's python client API. Note that we do not specify `--name` here. 
 ``` shell
 python -m paddle_serving_server.serve --model uci_housing_model --thread 10 --port 9292
 ```
@@ -91,17 +94,37 @@ print(fetch_map)

 ```

-<h2 align="center">Applications you can do with Paddle Serving</h2>
+<h2 align="center"> Pre-built services with Paddle Serving</h2>

-<center>
+<h3 align="center">Chinese Word Segmentation</h4>

-|      Model Name      	|              Resnet50              	|
-|:--------------------:	|:----------------------------------:	|
-|      Package URL     	|           To be released           	|
-|      Description     	| Get the representation of an image 	|
-| Training Data Source 	|              Imagenet              	|
+- **Description**: Chinese word segmentation HTTP service that can be deployed with one line command.
+
+- **Download**: 
+``` shell
+wget --no-check-certificate https://paddle-serving.bj.bcebos.com/lac/lac_model_jieba_web.tar.gz
+```
+- **Host web service**: 
+``` shell
+tar -xzf lac_model_jieba_web.tar.gz
+python lac_web_service.py jieba_server_model/ lac_workdir 9292
+```
+- **Request sample**: 
+``` shell
+curl -H "Content-Type:application/json" -X POST -d '{"words": "我爱北京天安门", "fetch":["word_seg"]}' http://127.0.0.1:9292/lac/prediction
+```
+- **Request result**: 
+``` shell
+{"word_seg":"我|爱|北京|天安门"}
+```
+
+
+<h3 align="center">Chinese Sentence To Vector</h4>
+
+<h3 align="center">Image To Vector</h4>
+
+<h3 align="center">Image Classification</h4>

-</center>


 <h2 align="center">Document</h2>
@@ -119,6 +142,7 @@ print(fetch_map)

 ### About Efficiency
 - [How profile serving efficiency?(Chinese)](https://github.com/PaddlePaddle/Serving/tree/develop/python/examples/util)
+- [Benchmarks](doc/BENCHMARK.md)

 ### FAQ
 - [FAQ(Chinese)](doc/FAQ.md)

--- a/core/CMakeLists.txt
+++ b/core/CMakeLists.txt
@@ -14,7 +14,7 @@

 if(NOT CLIENT_ONLY)
 add_subdirectory(cube)
-add_subdirectory(kvdb)
+#add_subdirectory(kvdb)
 endif()
 add_subdirectory(configure)
 add_subdirectory(pdcodegen)

--- a/core/general-server/CMakeLists.txt
+++ b/core/general-server/CMakeLists.txt
-include_directories(SYSTEM  ${CMAKE_CURRENT_LIST_DIR}/../kvdb/include)
 include_directories(SYSTEM  ${CMAKE_CURRENT_LIST_DIR}/../../)
 include(op/CMakeLists.txt)
 include(proto/CMakeLists.txt)
@@ -25,8 +24,6 @@ target_link_libraries(serving pdserving)
 target_link_libraries(serving cube-api)
 target_link_libraries(serving utils)

-target_link_libraries(serving kvdb rocksdb)
-
 if(WITH_GPU)
    target_link_libraries(serving ${CUDA_LIBRARIES})
 endif()

--- a/core/predictor/CMakeLists.txt
+++ b/core/predictor/CMakeLists.txt
@@ -6,17 +6,16 @@ include(framework/CMakeLists.txt)
 include(tools/CMakeLists.txt)
 include(src/CMakeLists.txt)

-include_directories(SYSTEM  ${CMAKE_CURRENT_LIST_DIR}/../kvdb/include)

 add_library(pdserving ${pdserving_srcs})
 set_source_files_properties(
        ${pdserving_srcs}
        PROPERTIES
        COMPILE_FLAGS  "-Wno-strict-aliasing -Wno-unused-variable -Wno-non-virtual-dtor -Wno-error=non-virtual-dtor -Wno-error=delete-non-virtual-dtor")
-add_dependencies(pdserving protobuf kvdb boost brpc leveldb pdcodegen configure)
+add_dependencies(pdserving protobuf boost brpc leveldb pdcodegen configure)

 target_link_libraries(pdserving
-        brpc protobuf boost leveldb configure kvdb -lpthread -lcrypto -lm -lrt -lssl -ldl -lz)
+        brpc protobuf boost leveldb configure -lpthread -lcrypto -lm -lrt -lssl -ldl -lz)

 # install
 install(TARGETS pdserving

--- a/core/predictor/framework/resource.cpp
+++ b/core/predictor/framework/resource.cpp
@@ -39,8 +39,6 @@ DynamicResource::~DynamicResource() {}

 int DynamicResource::initialize() { return 0; }

-std::shared_ptr<RocksDBWrapper> Resource::getDB() { return db; }
-
 std::shared_ptr<PaddleGeneralModelConfig> Resource::get_general_model_config() {
  return _config;
 }
@@ -155,9 +153,6 @@ int Resource::initialize(const std::string& path, const std::string& file) {
    this->cube_config_fullpath = cube_config_fullpath;
  }

-  if (db.get() == nullptr) {
-    db = RocksDBWrapper::RocksDBWrapperFactory("kvdb");
-  }

  THREAD_SETSPECIFIC(_tls_bspec_key, NULL);
  return 0;

--- a/core/predictor/framework/resource.h
+++ b/core/predictor/framework/resource.h
@@ -18,7 +18,6 @@
 #include <string>
 #include <vector>
 #include "core/cube/cube-api/include/cube_api.h"
-#include "core/kvdb/include/kvdb/paddle_rocksdb.h"
 #include "core/predictor/common/inner_common.h"
 #include "core/predictor/framework/infer.h"
 #include "core/predictor/framework/memory.h"
@@ -101,8 +100,6 @@ class Resource {
  void print_general_model_config(
      const std::shared_ptr<PaddleGeneralModelConfig>& config);

-  std::shared_ptr<RocksDBWrapper> getDB();
-
  DynamicResource* get_dynamic_resource() {
    return reinterpret_cast<DynamicResource*>(
        THREAD_GETSPECIFIC(_tls_bspec_key));
@@ -110,7 +107,6 @@ class Resource {

 private:
  int thread_finalize() { return 0; }
-  std::shared_ptr<RocksDBWrapper> db;
  std::shared_ptr<PaddleGeneralModelConfig> _config;
  std::string cube_config_fullpath;


--- a/paddle_inference/inferencer-fluid-cpu/CMakeLists.txt
+++ b/paddle_inference/inferencer-fluid-cpu/CMakeLists.txt
@@ -2,7 +2,7 @@ FILE(GLOB fluid_cpu_engine_srcs ${CMAKE_CURRENT_LIST_DIR}/src/*.cpp)
 add_library(fluid_cpu_engine ${fluid_cpu_engine_srcs})
 target_include_directories(fluid_cpu_engine PUBLIC
        ${CMAKE_BINARY_DIR}/Paddle/fluid_install_dir/)
-add_dependencies(fluid_cpu_engine pdserving extern_paddle configure kvdb)
+add_dependencies(fluid_cpu_engine pdserving extern_paddle configure)
 target_link_libraries(fluid_cpu_engine pdserving paddle_fluid -lpthread -lcrypto -lm -lrt -lssl -ldl -lz)

 install(TARGETS fluid_cpu_engine 

--- a/paddle_inference/inferencer-fluid-gpu/CMakeLists.txt
+++ b/paddle_inference/inferencer-fluid-gpu/CMakeLists.txt
@@ -2,7 +2,7 @@ FILE(GLOB fluid_gpu_engine_srcs ${CMAKE_CURRENT_LIST_DIR}/src/*.cpp)
 add_library(fluid_gpu_engine ${fluid_gpu_engine_srcs})
 target_include_directories(fluid_gpu_engine PUBLIC
        ${CMAKE_BINARY_DIR}/Paddle/fluid_install_dir/)
-add_dependencies(fluid_gpu_engine pdserving extern_paddle configure kvdb)
+add_dependencies(fluid_gpu_engine pdserving extern_paddle configure)
 target_link_libraries(fluid_gpu_engine pdserving paddle_fluid iomp5 mklml_intel -lpthread -lcrypto -lm -lrt -lssl -ldl -lz)

 install(TARGETS fluid_gpu_engine 

--- a/python/paddle_serving_server/serve.py
+++ b/python/paddle_serving_server/serve.py
@@ -18,6 +18,7 @@ Usage:
        python -m paddle_serving_server.serve --model ./serving_server_model --port 9292
 """
 import argparse
+from .web_service import WebService


 def parse_args():  # pylint: disable=doc-string-missing
@@ -28,6 +29,8 @@ def parse_args():  # pylint: disable=doc-string-missing
        "--model", type=str, default="", help="Model for serving")
    parser.add_argument(
        "--port", type=int, default=9292, help="Port the server")
+    parser.add_argument(
+        "--name", type=str, default="None", help="Web service name")
    parser.add_argument(
        "--workdir",
        type=str,
@@ -71,4 +74,13 @@ def start_standard_model():  # pylint: disable=doc-string-missing


 if __name__ == "__main__":
+
+    args = parse_args()
+    if args.name == "None":
        start_standard_model()
+    else:
+        service = WebService(name=args.name)
+        service.load_model_config(args.model)
+        service.prepare_server(
+            workdir=args.workdir, port=args.port, device=args.device)
+        service.run_server()
--- a/python/paddle_serving_server/web_serve.py
+++ b/python/paddle_serving_server/web_serve.py
-# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-"""
-Usage:
-    Host a trained paddle model with one line command
-    Example:
-        python -m paddle_serving_server.web_serve --model ./serving_server_model --port 9292
-"""
-import argparse
-from multiprocessing import Pool, Process
-from .web_service import WebService
-
-
-def parse_args():  # pylint: disable=doc-string-missing
-    parser = argparse.ArgumentParser("web_serve")
-    parser.add_argument(
-        "--thread", type=int, default=10, help="Concurrency of server")
-    parser.add_argument(
-        "--model", type=str, default="", help="Model for serving")
-    parser.add_argument(
-        "--port", type=int, default=9292, help="Port the server")
-    parser.add_argument(
-        "--workdir",
-        type=str,
-        default="workdir",
-        help="Working dir of current service")
-    parser.add_argument(
-        "--device", type=str, default="cpu", help="Type of device")
-    parser.add_argument(
-        "--name", type=str, default="default", help="Default service name")
-    return parser.parse_args()
-
-
-if __name__ == "__main__":
-    args = parse_args()
-    service = WebService(name=args.name)
-    service.load_model_config(args.model)
-    service.prepare_server(
-        workdir=args.workdir, port=args.port, device=args.device)
-    service.run_server()
--- a/python/paddle_serving_server_gpu/__init__.py
+++ b/python/paddle_serving_server_gpu/__init__.py
@@ -42,7 +42,7 @@ def serve_args():
        "--device", type=str, default="gpu", help="Type of device")
    parser.add_argument("--gpu_ids", type=str, default="", help="gpu ids")
    parser.add_argument(
-        "--name", type=str, default="default", help="Default service name")
+        "--name", type=str, default="None", help="Default service name")
    return parser.parse_args()



--- a/python/paddle_serving_server_gpu/serve.py
+++ b/python/paddle_serving_server_gpu/serve.py
@@ -88,4 +88,18 @@ def start_multi_card(args):  # pylint: disable=doc-string-missing

 if __name__ == "__main__":
    args = serve_args()
+    if args.name == "None":
        start_multi_card(args)
+    else:
+        web_service = WebService(name=args.name)
+        web_service.load_model_config(args.model)
+        gpu_ids = []
+        if args.gpu_ids == "":
+            if "CUDA_VISIBLE_DEVICES" in os.environ:
+                gpu_ids = os.environ["CUDA_VISIBLE_DEVICES"]
+        if len(gpu_ids) > 0:
+            gpus = [int(x) for x in gpu_ids.split(",")]
+            web_service.set_gpus(gpus)
+        web_service.prepare_server(
+            workdir=args.workdir, port=args.port, device=args.device)
+        web_service.run_server()
--- a/python/paddle_serving_server_gpu/web_serve.py
+++ b/python/paddle_serving_server_gpu/web_serve.py
-# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-"""
-Usage:
-    Host a trained paddle model with one line command
-    Example:
-        python -m paddle_serving_server.web_serve --model ./serving_server_model --port 9292
-"""
-import os
-from multiprocessing import Pool, Process
-from .web_service import WebService
-import paddle_serving_server_gpu as serving
-from paddle_serving_server_gpu import serve_args
-
-if __name__ == "__main__":
-    args = serve_args()
-    web_service = WebService(name=args.name)
-    web_service.load_model_config(args.model)
-    gpu_ids = []
-    if args.gpu_ids == "":
-        if "CUDA_VISIBLE_DEVICES" in os.environ:
-            gpu_ids = os.environ["CUDA_VISIBLE_DEVICES"]
-    if len(gpu_ids) > 0:
-        gpus = [int(x) for x in gpu_ids.split(",")]
-        web_service.set_gpus(gpus)
-    web_service.prepare_server(
-        workdir=args.workdir, port=args.port, device=args.device)
-    web_service.run_server()
--- a/tools/serving_build.sh
+++ b/tools/serving_build.sh
@@ -83,7 +83,7 @@ function python_test_fit_a_line() {
            check_cmd "python test_client.py uci_housing_client/serving_client_conf.prototxt > /dev/null"
            ps -ef | grep "paddle_serving_server" | grep -v grep | awk '{print $2}' | xargs kill
            # test web
-            check_cmd "python -m paddle_serving_server.web_serve --model uci_housing_model/ --name uci --port 9399 --name uci > /dev/null &"
+            check_cmd "python -m paddle_serving_server.serve --model uci_housing_model/ --name uci --port 9399 --name uci > /dev/null &"
            sleep 5
            check_cmd "curl -H \"Content-Type:application/json\" -X POST -d '{\"x\": [0.0137, -0.1136, 0.2553, -0.0692, 0.0582, -0.0727, -0.1583, -0.0584, 0.6283, 0.4919, 0.1856, 0.0795, -0.0332], \"fetch\":[\"price\"]}' http://127.0.0.1:9399/uci/prediction"
            ps -ef | grep "paddle_serving_server" | grep -v grep | awk '{print $2}' | xargs kill