diff --git a/CMakeLists.txt b/CMakeLists.txt
index cf67e6127fabfe998bedefda174fa38e2e358e39..f4e8c64c4ff73d0a417c35159901c2e67d0ca4ef 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -65,7 +65,7 @@ endif()
 
 if (NOT CLIENT_ONLY)
 include(external/jsoncpp)
-include(external/rocksdb)
+#include(external/rocksdb)
 endif()
 #include(external/gtest)
 
diff --git a/README.md b/README.md
index 8b0ab325b0b85b8fbcad3b22c07f42041340394d..bae5bb3834035a0b786e963d4e6fe4c72acdd72b 100644
--- a/README.md
+++ b/README.md
@@ -53,8 +53,9 @@ Paddle Serving provides HTTP and RPC based service for users to access
 
 ### HTTP service
 
+Paddle Serving provides a built-in python module called `paddle_serving_server.serve` that can start a rpc service or a http service with one-line command. If we specify the argument `--name uci`, it means that we will have a HTTP service with a url of `$IP:$PORT/uci/prediction`
 ``` shell
-python -m paddle_serving_server.web_serve --model uci_housing_model --thread 10 --port 9292 --name uci
+python -m paddle_serving_server.serve --model uci_housing_model --thread 10 --port 9292 --name uci
 ```
 
 
@@ -65,6 +66,7 @@ python -m paddle_serving_server.web_serve --model uci_housing_model --thread 10
 | `name` | str | `""` | Service name, can be used to generate HTTP request url |
 | `model` | str | `""` | Path of paddle model directory to be served |
 
+Here, we use `curl` to send a HTTP POST request to the service we just started. Users can use any python library to send HTTP POST as well, e.g, [requests](https://requests.readthedocs.io/en/master/).
 
 
 ``` shell
@@ -73,6 +75,7 @@ curl -H "Content-Type:application/json" -X POST -d '{"x": [0.0137, -0.1136, 0.25
 
 ### RPC service
 
+A user can also start a rpc service with `paddle_serving_server.serve`. RPC service is usually faster than HTTP service, although a user needs to do some coding based on Paddle Serving's python client API. Note that we do not specify `--name` here. 
 ``` shell
 python -m paddle_serving_server.serve --model uci_housing_model --thread 10 --port 9292
 ```
@@ -91,17 +94,37 @@ print(fetch_map)
 
 ```
 
-Applications you can do with Paddle Serving
+ Pre-built services with Paddle Serving
 
-
+Chinese Word Segmentation
 
-|      Model Name      	|              Resnet50              	|
-|:--------------------:	|:----------------------------------:	|
-|      Package URL     	|           To be released           	|
-|      Description     	| Get the representation of an image 	|
-| Training Data Source 	|              Imagenet              	|
+- **Description**: Chinese word segmentation HTTP service that can be deployed with one line command.
+
+- **Download**: 
+``` shell
+wget --no-check-certificate https://paddle-serving.bj.bcebos.com/lac/lac_model_jieba_web.tar.gz
+```
+- **Host web service**: 
+``` shell
+tar -xzf lac_model_jieba_web.tar.gz
+python lac_web_service.py jieba_server_model/ lac_workdir 9292
+```
+- **Request sample**: 
+``` shell
+curl -H "Content-Type:application/json" -X POST -d '{"words": "我爱北京天安门", "fetch":["word_seg"]}' http://127.0.0.1:9292/lac/prediction
+```
+- **Request result**: 
+``` shell
+{"word_seg":"我|爱|北京|天安门"}
+```
+
+
+Chinese Sentence To Vector
+
+Image To Vector
+
+Image Classification
 
-
 
 
 Document
@@ -119,6 +142,7 @@ print(fetch_map)
 
 ### About Efficiency
 - [How profile serving efficiency?(Chinese)](https://github.com/PaddlePaddle/Serving/tree/develop/python/examples/util)
+- [Benchmarks](doc/BENCHMARK.md)
 
 ### FAQ
 - [FAQ(Chinese)](doc/FAQ.md)
diff --git a/core/CMakeLists.txt b/core/CMakeLists.txt
index 3c775a9acb2ac17a99557bfd28c4e8a3cf20a8c5..7226a69a87163cf625f49f54fe2a7df996a98efd 100644
--- a/core/CMakeLists.txt
+++ b/core/CMakeLists.txt
@@ -14,7 +14,7 @@
 
 if(NOT CLIENT_ONLY)
 add_subdirectory(cube)
-add_subdirectory(kvdb)
+#add_subdirectory(kvdb)
 endif()
 add_subdirectory(configure)
 add_subdirectory(pdcodegen)
diff --git a/core/general-server/CMakeLists.txt b/core/general-server/CMakeLists.txt
index 7cf0a04d837c992dfc2557f442ea4c33e494ed28..9056e229a51f56463dc2eec5629f219d00dc6a38 100644
--- a/core/general-server/CMakeLists.txt
+++ b/core/general-server/CMakeLists.txt
@@ -1,4 +1,3 @@
-include_directories(SYSTEM  ${CMAKE_CURRENT_LIST_DIR}/../kvdb/include)
 include_directories(SYSTEM  ${CMAKE_CURRENT_LIST_DIR}/../../)
 include(op/CMakeLists.txt)
 include(proto/CMakeLists.txt)
@@ -25,8 +24,6 @@ target_link_libraries(serving pdserving)
 target_link_libraries(serving cube-api)
 target_link_libraries(serving utils)
 
-target_link_libraries(serving kvdb rocksdb)
-
 if(WITH_GPU)
     target_link_libraries(serving ${CUDA_LIBRARIES})
 endif()
diff --git a/core/predictor/CMakeLists.txt b/core/predictor/CMakeLists.txt
index 3a0724fe25c83804a2d594bc47b0ac4d263bb2bd..1b9dc7b29845a2b8c7f958c1d8e836cb57e91d41 100644
--- a/core/predictor/CMakeLists.txt
+++ b/core/predictor/CMakeLists.txt
@@ -6,17 +6,16 @@ include(framework/CMakeLists.txt)
 include(tools/CMakeLists.txt)
 include(src/CMakeLists.txt)
 
-include_directories(SYSTEM  ${CMAKE_CURRENT_LIST_DIR}/../kvdb/include)
 
 add_library(pdserving ${pdserving_srcs})
 set_source_files_properties(
         ${pdserving_srcs}
         PROPERTIES
         COMPILE_FLAGS  "-Wno-strict-aliasing -Wno-unused-variable -Wno-non-virtual-dtor -Wno-error=non-virtual-dtor -Wno-error=delete-non-virtual-dtor")
-add_dependencies(pdserving protobuf kvdb boost brpc leveldb pdcodegen configure)
+add_dependencies(pdserving protobuf boost brpc leveldb pdcodegen configure)
 
 target_link_libraries(pdserving
-        brpc protobuf boost leveldb configure kvdb -lpthread -lcrypto -lm -lrt -lssl -ldl -lz)
+        brpc protobuf boost leveldb configure -lpthread -lcrypto -lm -lrt -lssl -ldl -lz)
 
 # install
 install(TARGETS pdserving
diff --git a/core/predictor/framework/resource.cpp b/core/predictor/framework/resource.cpp
index 61a132d13f28350b70e03d8113f537d393ee6282..647cd58d041802de27defb6033ba05b5234e0563 100644
--- a/core/predictor/framework/resource.cpp
+++ b/core/predictor/framework/resource.cpp
@@ -39,8 +39,6 @@ DynamicResource::~DynamicResource() {}
 
 int DynamicResource::initialize() { return 0; }
 
-std::shared_ptr Resource::getDB() { return db; }
-
 std::shared_ptr Resource::get_general_model_config() {
   return _config;
 }
@@ -155,9 +153,6 @@ int Resource::initialize(const std::string& path, const std::string& file) {
     this->cube_config_fullpath = cube_config_fullpath;
   }
 
-  if (db.get() == nullptr) {
-    db = RocksDBWrapper::RocksDBWrapperFactory("kvdb");
-  }
 
   THREAD_SETSPECIFIC(_tls_bspec_key, NULL);
   return 0;
diff --git a/core/predictor/framework/resource.h b/core/predictor/framework/resource.h
index 70461f48446cd2e41e0531d1863298cf48d2cf0a..1a648f0fa363efa4eb915a64553949206ec96153 100644
--- a/core/predictor/framework/resource.h
+++ b/core/predictor/framework/resource.h
@@ -18,7 +18,6 @@
 #include 
 #include 
 #include "core/cube/cube-api/include/cube_api.h"
-#include "core/kvdb/include/kvdb/paddle_rocksdb.h"
 #include "core/predictor/common/inner_common.h"
 #include "core/predictor/framework/infer.h"
 #include "core/predictor/framework/memory.h"
@@ -101,8 +100,6 @@ class Resource {
   void print_general_model_config(
       const std::shared_ptr& config);
 
-  std::shared_ptr getDB();
-
   DynamicResource* get_dynamic_resource() {
     return reinterpret_cast(
         THREAD_GETSPECIFIC(_tls_bspec_key));
@@ -110,7 +107,6 @@ class Resource {
 
  private:
   int thread_finalize() { return 0; }
-  std::shared_ptr db;
   std::shared_ptr _config;
   std::string cube_config_fullpath;
 
diff --git a/paddle_inference/inferencer-fluid-cpu/CMakeLists.txt b/paddle_inference/inferencer-fluid-cpu/CMakeLists.txt
index b3183eca1a0c2bb9524db7d7eb19f924079e0f0d..fe7cd91c534d900f3afb466e48599f60e64e4f9d 100644
--- a/paddle_inference/inferencer-fluid-cpu/CMakeLists.txt
+++ b/paddle_inference/inferencer-fluid-cpu/CMakeLists.txt
@@ -2,7 +2,7 @@ FILE(GLOB fluid_cpu_engine_srcs ${CMAKE_CURRENT_LIST_DIR}/src/*.cpp)
 add_library(fluid_cpu_engine ${fluid_cpu_engine_srcs})
 target_include_directories(fluid_cpu_engine PUBLIC
         ${CMAKE_BINARY_DIR}/Paddle/fluid_install_dir/)
-add_dependencies(fluid_cpu_engine pdserving extern_paddle configure kvdb)
+add_dependencies(fluid_cpu_engine pdserving extern_paddle configure)
 target_link_libraries(fluid_cpu_engine pdserving paddle_fluid -lpthread -lcrypto -lm -lrt -lssl -ldl -lz)
 
 install(TARGETS fluid_cpu_engine 
diff --git a/paddle_inference/inferencer-fluid-gpu/CMakeLists.txt b/paddle_inference/inferencer-fluid-gpu/CMakeLists.txt
index 6c980d6d94ef99585cf956072d8181d0d98cda42..725da85b45ca1070badf5343f340e49dce6b936f 100644
--- a/paddle_inference/inferencer-fluid-gpu/CMakeLists.txt
+++ b/paddle_inference/inferencer-fluid-gpu/CMakeLists.txt
@@ -2,7 +2,7 @@ FILE(GLOB fluid_gpu_engine_srcs ${CMAKE_CURRENT_LIST_DIR}/src/*.cpp)
 add_library(fluid_gpu_engine ${fluid_gpu_engine_srcs})
 target_include_directories(fluid_gpu_engine PUBLIC
         ${CMAKE_BINARY_DIR}/Paddle/fluid_install_dir/)
-add_dependencies(fluid_gpu_engine pdserving extern_paddle configure kvdb)
+add_dependencies(fluid_gpu_engine pdserving extern_paddle configure)
 target_link_libraries(fluid_gpu_engine pdserving paddle_fluid iomp5 mklml_intel -lpthread -lcrypto -lm -lrt -lssl -ldl -lz)
 
 install(TARGETS fluid_gpu_engine 
diff --git a/python/paddle_serving_server/serve.py b/python/paddle_serving_server/serve.py
index c86c3f46b3b3ef83fb5fe630031cf28a95c52649..279e3a895e975473fc5569c4716368c3dda1d9f1 100644
--- a/python/paddle_serving_server/serve.py
+++ b/python/paddle_serving_server/serve.py
@@ -18,6 +18,7 @@ Usage:
         python -m paddle_serving_server.serve --model ./serving_server_model --port 9292
 """
 import argparse
+from .web_service import WebService
 
 
 def parse_args():  # pylint: disable=doc-string-missing
@@ -28,6 +29,8 @@ def parse_args():  # pylint: disable=doc-string-missing
         "--model", type=str, default="", help="Model for serving")
     parser.add_argument(
         "--port", type=int, default=9292, help="Port the server")
+    parser.add_argument(
+        "--name", type=str, default="None", help="Web service name")
     parser.add_argument(
         "--workdir",
         type=str,
@@ -71,4 +74,13 @@ def start_standard_model():  # pylint: disable=doc-string-missing
 
 
 if __name__ == "__main__":
-    start_standard_model()
+
+    args = parse_args()
+    if args.name == "None":
+        start_standard_model()
+    else:
+        service = WebService(name=args.name)
+        service.load_model_config(args.model)
+        service.prepare_server(
+            workdir=args.workdir, port=args.port, device=args.device)
+        service.run_server()
diff --git a/python/paddle_serving_server/web_serve.py b/python/paddle_serving_server/web_serve.py
deleted file mode 100644
index 46437ad5e53288c6ab03b32ea8882e1b3cfa66a3..0000000000000000000000000000000000000000
--- a/python/paddle_serving_server/web_serve.py
+++ /dev/null
@@ -1,51 +0,0 @@
-# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-"""
-Usage:
-    Host a trained paddle model with one line command
-    Example:
-        python -m paddle_serving_server.web_serve --model ./serving_server_model --port 9292
-"""
-import argparse
-from multiprocessing import Pool, Process
-from .web_service import WebService
-
-
-def parse_args():  # pylint: disable=doc-string-missing
-    parser = argparse.ArgumentParser("web_serve")
-    parser.add_argument(
-        "--thread", type=int, default=10, help="Concurrency of server")
-    parser.add_argument(
-        "--model", type=str, default="", help="Model for serving")
-    parser.add_argument(
-        "--port", type=int, default=9292, help="Port the server")
-    parser.add_argument(
-        "--workdir",
-        type=str,
-        default="workdir",
-        help="Working dir of current service")
-    parser.add_argument(
-        "--device", type=str, default="cpu", help="Type of device")
-    parser.add_argument(
-        "--name", type=str, default="default", help="Default service name")
-    return parser.parse_args()
-
-
-if __name__ == "__main__":
-    args = parse_args()
-    service = WebService(name=args.name)
-    service.load_model_config(args.model)
-    service.prepare_server(
-        workdir=args.workdir, port=args.port, device=args.device)
-    service.run_server()
diff --git a/python/paddle_serving_server_gpu/__init__.py b/python/paddle_serving_server_gpu/__init__.py
index 8ee1e137fb8fe282d26bda95e4b4bffa6f670f11..02b55801c35fb5d1ed7e35c249ac07e4d3eb45ab 100644
--- a/python/paddle_serving_server_gpu/__init__.py
+++ b/python/paddle_serving_server_gpu/__init__.py
@@ -42,7 +42,7 @@ def serve_args():
         "--device", type=str, default="gpu", help="Type of device")
     parser.add_argument("--gpu_ids", type=str, default="", help="gpu ids")
     parser.add_argument(
-        "--name", type=str, default="default", help="Default service name")
+        "--name", type=str, default="None", help="Default service name")
     return parser.parse_args()
 
 
diff --git a/python/paddle_serving_server_gpu/serve.py b/python/paddle_serving_server_gpu/serve.py
index cc9b18f6920c46c5d0119e8adfaf8f76ecf2ad26..5d9d96d517d64b21313fda0b44a83b34142b014b 100644
--- a/python/paddle_serving_server_gpu/serve.py
+++ b/python/paddle_serving_server_gpu/serve.py
@@ -88,4 +88,18 @@ def start_multi_card(args):  # pylint: disable=doc-string-missing
 
 if __name__ == "__main__":
     args = serve_args()
-    start_multi_card(args)
+    if args.name == "None":
+        start_multi_card(args)
+    else:
+        web_service = WebService(name=args.name)
+        web_service.load_model_config(args.model)
+        gpu_ids = []
+        if args.gpu_ids == "":
+            if "CUDA_VISIBLE_DEVICES" in os.environ:
+                gpu_ids = os.environ["CUDA_VISIBLE_DEVICES"]
+        if len(gpu_ids) > 0:
+            gpus = [int(x) for x in gpu_ids.split(",")]
+            web_service.set_gpus(gpus)
+        web_service.prepare_server(
+            workdir=args.workdir, port=args.port, device=args.device)
+        web_service.run_server()
diff --git a/python/paddle_serving_server_gpu/web_serve.py b/python/paddle_serving_server_gpu/web_serve.py
deleted file mode 100644
index 734e6d7b93b4f3ad22f330b1545b63c6ac6f2838..0000000000000000000000000000000000000000
--- a/python/paddle_serving_server_gpu/web_serve.py
+++ /dev/null
@@ -1,39 +0,0 @@
-# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
-#
-# Licensed under the Apache License, Version 2.0 (the "License");
-# you may not use this file except in compliance with the License.
-# You may obtain a copy of the License at
-#
-#     http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-"""
-Usage:
-    Host a trained paddle model with one line command
-    Example:
-        python -m paddle_serving_server.web_serve --model ./serving_server_model --port 9292
-"""
-import os
-from multiprocessing import Pool, Process
-from .web_service import WebService
-import paddle_serving_server_gpu as serving
-from paddle_serving_server_gpu import serve_args
-
-if __name__ == "__main__":
-    args = serve_args()
-    web_service = WebService(name=args.name)
-    web_service.load_model_config(args.model)
-    gpu_ids = []
-    if args.gpu_ids == "":
-        if "CUDA_VISIBLE_DEVICES" in os.environ:
-            gpu_ids = os.environ["CUDA_VISIBLE_DEVICES"]
-    if len(gpu_ids) > 0:
-        gpus = [int(x) for x in gpu_ids.split(",")]
-        web_service.set_gpus(gpus)
-    web_service.prepare_server(
-        workdir=args.workdir, port=args.port, device=args.device)
-    web_service.run_server()
diff --git a/tools/serving_build.sh b/tools/serving_build.sh
index 163f4f74429066581aa17cc78b3ab00947ba4d77..b810e3139803bd363c771c6f655cef6595177dc8 100644
--- a/tools/serving_build.sh
+++ b/tools/serving_build.sh
@@ -83,7 +83,7 @@ function python_test_fit_a_line() {
             check_cmd "python test_client.py uci_housing_client/serving_client_conf.prototxt > /dev/null"
             ps -ef | grep "paddle_serving_server" | grep -v grep | awk '{print $2}' | xargs kill
             # test web
-            check_cmd "python -m paddle_serving_server.web_serve --model uci_housing_model/ --name uci --port 9399 --name uci > /dev/null &"
+            check_cmd "python -m paddle_serving_server.serve --model uci_housing_model/ --name uci --port 9399 --name uci > /dev/null &"
             sleep 5
             check_cmd "curl -H \"Content-Type:application/json\" -X POST -d '{\"x\": [0.0137, -0.1136, 0.2553, -0.0692, 0.0582, -0.0727, -0.1583, -0.0584, 0.6283, 0.4919, 0.1856, 0.0795, -0.0332], \"fetch\":[\"price\"]}' http://127.0.0.1:9399/uci/prediction"
             ps -ef | grep "paddle_serving_server" | grep -v grep | awk '{print $2}' | xargs kill