diff --git a/CMakeLists.txt b/CMakeLists.txt
index f4e8c64c4ff73d0a417c35159901c2e67d0ca4ef..af065158699199af61aca02f563dda1b1cddf2b1 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -49,7 +49,9 @@ set(THIRD_PARTY_BUILD_TYPE Release)
 option(WITH_AVX	    "Compile Paddle Serving with AVX intrinsics"    OFF)
 option(WITH_MKL	    "Compile Paddle Serving with MKL support."      OFF)
 option(WITH_GPU	    "Compile Paddle Serving with NVIDIA GPU"        OFF)
-option(CLIENT_ONLY  "Compile client libraries and demos only"       OFF)
+option(CLIENT  	    "Compile Paddle Serving Client"		    OFF)
+option(SERVER	    "Compile Paddle Serving Server"		    OFF)
+option(APP          "Compile Paddle Serving App package"	    OFF)
 option(WITH_ELASTIC_CTR "Compile ELASITC-CTR solution"              OFF)
 option(PACK         "Compile for whl"                               OFF)
 
@@ -63,12 +65,12 @@ if (NOT DEFINED WITH_MKLDNN)
     endif()
 endif()
 
-if (NOT CLIENT_ONLY)
+if (SERVER)
 include(external/jsoncpp)
 #include(external/rocksdb)
 endif()
-#include(external/gtest)
 
+if (SERVER OR CLIENT)
 include(external/snappy)
 include(external/leveldb)
 include(external/zlib)
@@ -81,8 +83,9 @@ include(external/pybind11)
 include(external/python)
 include(generic)
 include(flags)
+endif()
 
-if (NOT CLIENT_ONLY)
+if (SERVER)
 include(external/cudnn)
 include(paddlepaddle)
 endif()
@@ -91,7 +94,7 @@ message("paddle serving source dir: " ${PADDLE_SERVING_SOURCE_DIR})
 include_directories(${PADDLE_SERVING_SOURCE_DIR})
 include_directories(${PADDLE_SERVING_BINARY_DIR})
 
-if(NOT CLIENT_ONLY)
+if(SERVER)
     set(EXTERNAL_LIBS
 	jsoncpp
 	gflags
@@ -109,28 +112,27 @@ set(EXTERNAL_LIBS
 	brpc
 )
 
-if(NOT CLIENT_ONLY)
+if(SERVER)
 if(WITH_MKLML)
     list(APPEND EXTERNAL_LIBS ${MKLML_IOMP_LIB})
 endif()
 endif()
 
 
-if(NOT CLIENT_ONLY)
+if(SERVER)
 if(WITH_MKLDNN)
     list(APPEND EXTERNAL_LIBS ${MKLDNN_LIB})
 endif()
 endif()
 
-if (NOT CLIENT_ONLY)
+if (SERVER)
     list(APPEND EXTERNAL_LIBS paddlepaddle)
 endif()
 
 add_subdirectory(core)
 
-if(NOT CLIENT_ONLY)
+if(SERVER)
 add_subdirectory(paddle_inference)
 endif()
 
 add_subdirectory(python)
-#add_subdirectory(examples)
diff --git a/core/CMakeLists.txt b/core/CMakeLists.txt
index 7226a69a87163cf625f49f54fe2a7df996a98efd..ce2e5e3814ae1e585976c5d9c8848b506293ee67 100644
--- a/core/CMakeLists.txt
+++ b/core/CMakeLists.txt
@@ -12,19 +12,26 @@
 # See the License for the specific language governing permissions and
 # limitations under the License
 
-if(NOT CLIENT_ONLY)
+if(SERVER)
 add_subdirectory(cube)
 #add_subdirectory(kvdb)
 endif()
+
+if (CLIENT OR SERVER)
 add_subdirectory(configure)
 add_subdirectory(pdcodegen)
 add_subdirectory(sdk-cpp)
-if(CLIENT_ONLY)
+endif()
+
+if(CLIENT)
 add_subdirectory(general-client)
 endif()
 
-if (NOT CLIENT_ONLY)
+if (SERVER)
 add_subdirectory(predictor)
 add_subdirectory(general-server)
 endif()
+
+if (CLIENT OR SERVER)
 add_subdirectory(util)
+endif()
diff --git a/core/configure/CMakeLists.txt b/core/configure/CMakeLists.txt
index 685eff01837cbc86dab33524b14e796f6f7573f2..b6384fc99ea3df6d71a61865e3aabf5b39b510dd 100644
--- a/core/configure/CMakeLists.txt
+++ b/core/configure/CMakeLists.txt
@@ -33,7 +33,7 @@ py_proto_compile(general_model_config_py_proto SRCS proto/general_model_config.p
 add_custom_target(general_model_config_py_proto_init ALL COMMAND ${CMAKE_COMMAND} -E touch __init__.py)
 add_dependencies(general_model_config_py_proto general_model_config_py_proto_init)
 
-if (CLIENT_ONLY)
+if (CLIENT)
 py_proto_compile(sdk_configure_py_proto SRCS proto/sdk_configure.proto)
 add_custom_target(sdk_configure_py_proto_init ALL COMMAND ${CMAKE_COMMAND} -E touch __init__.py)
 add_dependencies(sdk_configure_py_proto sdk_configure_py_proto_init)
@@ -51,7 +51,7 @@ add_custom_command(TARGET general_model_config_py_proto POST_BUILD
 
 endif()
 
-if (NOT CLIENT_ONLY)
+if (SERVER)
 py_proto_compile(server_config_py_proto SRCS proto/server_configure.proto)
 add_custom_target(server_config_py_proto_init ALL COMMAND ${CMAKE_COMMAND} -E touch __init__.py)
 add_dependencies(server_config_py_proto server_config_py_proto_init)
diff --git a/core/general-client/CMakeLists.txt b/core/general-client/CMakeLists.txt
index f3bb666a3a0eb114463e941fdaa1ba8134337bb1..88abcbcb776ae999cbf9123d1dad0864a987ecf4 100644
--- a/core/general-client/CMakeLists.txt
+++ b/core/general-client/CMakeLists.txt
@@ -1,4 +1,4 @@
-if(CLIENT_ONLY)
+if(CLIENT)
 add_subdirectory(pybind11)
 pybind11_add_module(serving_client src/general_model.cpp src/pybind_general_model.cpp)
 target_link_libraries(serving_client PRIVATE -Wl,--whole-archive utils sdk-cpp pybind python -Wl,--no-whole-archive -lpthread -lcrypto -lm -lrt -lssl -ldl -lz)
diff --git a/python/CMakeLists.txt b/python/CMakeLists.txt
index 855b006949e8b621260e7ed86c4a8c86e2332104..c1590fb1b36de669f89711f95c4d49aedadb0c91 100644
--- a/python/CMakeLists.txt
+++ b/python/CMakeLists.txt
@@ -1,11 +1,11 @@
-if (CLIENT_ONLY)
+if (CLIENT)
     file(GLOB_RECURSE SERVING_CLIENT_PY_FILES paddle_serving_client/*.py)
     set(PY_FILES ${SERVING_CLIENT_PY_FILES})
     SET(PACKAGE_NAME "serving_client")
     set(SETUP_LOG_FILE "setup.py.client.log")
 endif()
 
-if (NOT CLIENT_ONLY)
+if (SERVER)
     if (NOT WITH_GPU)
         file(GLOB_RECURSE SERVING_SERVER_PY_FILES paddle_serving_server/*.py)
     else()
@@ -16,12 +16,17 @@ if (NOT CLIENT_ONLY)
         set(SETUP_LOG_FILE "setup.py.server.log")
 endif()
 
-if (CLIENT_ONLY)
+if (CLIENT)
 configure_file(${CMAKE_CURRENT_SOURCE_DIR}/setup.py.client.in
     ${CMAKE_CURRENT_BINARY_DIR}/setup.py)
 endif()
 
-if (NOT CLIENT_ONLY)
+if (APP)
+configure_file(${CMAKE_CURRENT_SOURCE_DIR}/setup.py.app.in
+    ${CMAKE_CURRENT_BINARY_DIR}/setup.py)
+endif()
+
+if (SERVER)
     if (NOT WITH_GPU)
         configure_file(${CMAKE_CURRENT_SOURCE_DIR}/setup.py.server.in
             ${CMAKE_CURRENT_BINARY_DIR}/setup.py)
@@ -34,7 +39,15 @@ endif()
 set (SERVING_CLIENT_CORE ${PADDLE_SERVING_BINARY_DIR}/core/general-client/*.so)
 message("python env: " ${py_env})
 
-if (CLIENT_ONLY)
+if (APP)
+add_custom_command(
+        OUTPUT ${PADDLE_SERVING_BINARY_DIR}/.timestamp
+        COMMAND cp -r ${CMAKE_CURRENT_SOURCE_DIR}/paddle_serving_app/ ${PADDLE_SERVING_BINARY_DIR}/python/
+        COMMAND env ${py_env} ${PYTHON_EXECUTABLE} setup.py bdist_wheel)
+add_custom_target(paddle_python ALL DEPENDS ${PADDLE_SERVING_BINARY_DIR}/.timestamp)
+endif()
+
+if (CLIENT)
 add_custom_command(
 	OUTPUT ${PADDLE_SERVING_BINARY_DIR}/.timestamp
 	COMMAND cp -r ${CMAKE_CURRENT_SOURCE_DIR}/paddle_serving_client/ ${PADDLE_SERVING_BINARY_DIR}/python/
@@ -44,7 +57,7 @@ add_custom_command(
 add_custom_target(paddle_python ALL DEPENDS serving_client ${PADDLE_SERVING_BINARY_DIR}/.timestamp)
 endif()
 
-if (NOT CLIENT_ONLY)
+if (SERVER)
     if(NOT WITH_GPU)
         add_custom_command(
             OUTPUT ${PADDLE_SERVING_BINARY_DIR}/.timestamp
@@ -66,20 +79,22 @@ endif()
 set(SERVING_CLIENT_PYTHON_PACKAGE_DIR ${CMAKE_CURRENT_BINARY_DIR}/dist/)
 set(SERVING_SERVER_PYTHON_PACKAGE_DIR ${CMAKE_CURRENT_BINARY_DIR}/dist/)
 
-if (CLIENT_ONLY)
+if (CLIENT)
 install(DIRECTORY ${SERVING_CLIENT_PYTHON_PACKAGE_DIR}
     DESTINATION opt/serving_client/share/wheels
 )
 endif()
 
-if (NOT CLIENT_ONLY)
+if (SERVER)
 install(DIRECTORY ${SERVING_SERVER_PYTHON_PACKAGE_DIR}
     DESTINATION opt/serving_server/share/wheels
 )
 endif()
 
+if (CLIENT OR SERVER)
 find_program(PATCHELF_EXECUTABLE patchelf)
-if(NOT PATCHELF_EXECUTABLE)
+if (NOT PATCHELF_EXECUTABLE)
   message(FATAL_ERROR "patchelf not found, please install it.\n"
          "For Ubuntu, the command is: apt-get install -y patchelf.")
 endif()
+endif()
diff --git a/python/examples/bert/benchmark_batch.py b/python/examples/bert/benchmark_batch.py
index b4d13c7db6b3c32c7e8ccd75c33ce25a196e0ea8..e0f677146a47c0366a1bbafe9eff049e2671a617 100644
--- a/python/examples/bert/benchmark_batch.py
+++ b/python/examples/bert/benchmark_batch.py
@@ -41,13 +41,13 @@ def single_func(idx, resource):
         client = Client()
         client.load_client_config(args.model)
         client.connect([resource["endpoint"][idx % len(resource["endpoint"])]])
+        feed_batch = []
+        for bi in range(args.batch_size):
+            feed_batch.append(reader.process(dataset[bi]))
 
         start = time.time()
         for i in range(1000):
             if args.batch_size >= 1:
-                feed_batch = []
-                for bi in range(args.batch_size):
-                    feed_batch.append(reader.process(dataset[i]))
                 result = client.batch_predict(
                     feed_batch=feed_batch, fetch=fetch)
             else:
@@ -61,7 +61,9 @@ def single_func(idx, resource):
 
 if __name__ == '__main__':
     multi_thread_runner = MultiThreadRunner()
-    endpoint_list = ["127.0.0.1:9292"]
+    endpoint_list = [
+        "127.0.0.1:9295", "127.0.0.1:9296", "127.0.0.1:9297", "127.0.0.1:9298"
+    ]
     result = multi_thread_runner.run(single_func, args.thread,
                                      {"endpoint": endpoint_list})
     avg_cost = 0
diff --git a/python/examples/bert/benchmark_batch.sh b/python/examples/bert/benchmark_batch.sh
index 46ba451d0ade36c24151e260d5c9b3cc3666a548..272923776d6640880175745920a8fad9e84972fd 100644
--- a/python/examples/bert/benchmark_batch.sh
+++ b/python/examples/bert/benchmark_batch.sh
@@ -1,10 +1,17 @@
 rm profile_log
+export CUDA_VISIBLE_DEVICES=0,1,2,3
+python -m paddle_serving_server_gpu.serve --model bert_seq20_model/ --port 9295 --thread 4 --gpu_ids 0,1,2,3 2> elog > stdlog &
+
+sleep 5
+
 for thread_num in 1 2 4 8 16
 do
 for batch_size in 1 2 4 8 16 32 64 128 256 512
 do
     $PYTHONROOT/bin/python benchmark_batch.py --thread $thread_num --batch_size $batch_size --model serving_client_conf/serving_client_conf.prototxt --request rpc > profile 2>&1
     echo "========================================"
+    echo "thread num: ", $thread_num
+    echo "batch size: ", $batch_size
     echo "batch size : $batch_size" >> profile_log
     $PYTHONROOT/bin/python ../util/show_profile.py profile $thread_num >> profile_log
     tail -n 1 profile >> profile_log
diff --git a/python/examples/bert/benchmark_with_profile.sh b/python/examples/bert/benchmark_with_profile.sh
new file mode 100644
index 0000000000000000000000000000000000000000..8102e30d5c794d5e21d34e2f4ffd88a1af791b5e
--- /dev/null
+++ b/python/examples/bert/benchmark_with_profile.sh
@@ -0,0 +1,10 @@
+export CUDA_VISIBLE_DEVICES=0,1,2,3
+python -m paddle_serving_server_gpu.serve --model bert_seq20_model/ --port 9295 --thread 4 --gpu_ids 0,1,2,3 2> elog > stdlog &
+export FLAGS_profile_client=1
+export FLAGS_profile_server=1
+sleep 5
+thread_num=4
+python benchmark_batch.py --thread ${thread_num} --batch_size 64 --model serving_client_conf/serving_client_conf.prototxt 2> profile
+
+python show_profile.py profile ${thread_num}
+python timeline_trace.py profile trace
diff --git a/python/paddle_serving_app/__init__.py b/python/paddle_serving_app/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..968e5582cc286455d5200e154033087b71ac86de
--- /dev/null
+++ b/python/paddle_serving_app/__init__.py
@@ -0,0 +1,14 @@
+#   Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from .reader.chinese_bert_reader import ChineseBertReader
diff --git a/python/paddle_serving_app/reader/__init__.py b/python/paddle_serving_app/reader/__init__.py
new file mode 100644
index 0000000000000000000000000000000000000000..847ddc47ac89114f2012bc6b9990a69abfe39fb3
--- /dev/null
+++ b/python/paddle_serving_app/reader/__init__.py
@@ -0,0 +1,13 @@
+#   Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
diff --git a/python/paddle_serving_app/reader/batching.py b/python/paddle_serving_app/reader/batching.py
new file mode 100644
index 0000000000000000000000000000000000000000..5ec5f320cf5ec7bd0ab4624d9b39ef936553c774
--- /dev/null
+++ b/python/paddle_serving_app/reader/batching.py
@@ -0,0 +1,126 @@
+#coding:utf-8
+#   Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Mask, padding and batching."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import numpy as np
+
+
+def prepare_batch_data(insts,
+                       total_token_num,
+                       max_seq_len=128,
+                       pad_id=None,
+                       cls_id=None,
+                       sep_id=None,
+                       mask_id=None,
+                       return_input_mask=True,
+                       return_max_len=True,
+                       return_num_token=False):
+    """
+    1. generate Tensor of data
+    2. generate Tensor of position
+    3. generate self attention mask, [shape: batch_size *  max_len * max_len]
+    """
+
+    batch_src_ids = [inst[0] for inst in insts]
+    batch_sent_ids = [inst[1] for inst in insts]
+    batch_pos_ids = [inst[2] for inst in insts]
+    labels_list = []
+    # compatible with squad, whose example includes start/end positions,
+    # or unique id
+
+    for i in range(3, len(insts[0]), 1):
+        labels = [inst[i] for inst in insts]
+        labels = np.array(labels).astype("int64").reshape([-1, 1])
+        labels_list.append(labels)
+
+    out = batch_src_ids
+    # Second step: padding
+    src_id, self_input_mask = pad_batch_data(
+        out, pad_idx=pad_id, max_seq_len=max_seq_len, return_input_mask=True)
+    pos_id = pad_batch_data(
+        batch_pos_ids,
+        pad_idx=pad_id,
+        max_seq_len=max_seq_len,
+        return_pos=False,
+        return_input_mask=False)
+    sent_id = pad_batch_data(
+        batch_sent_ids,
+        pad_idx=pad_id,
+        max_seq_len=max_seq_len,
+        return_pos=False,
+        return_input_mask=False)
+
+    return_list = [src_id, pos_id, sent_id, self_input_mask] + labels_list
+
+    return return_list if len(return_list) > 1 else return_list[0]
+
+
+def pad_batch_data(insts,
+                   pad_idx=0,
+                   max_seq_len=128,
+                   return_pos=False,
+                   return_input_mask=False,
+                   return_max_len=False,
+                   return_num_token=False,
+                   return_seq_lens=False):
+    """
+    Pad the instances to the max sequence length in batch, and generate the
+    corresponding position data and input mask.
+    """
+    return_list = []
+    #max_len = max(len(inst) for inst in insts)
+    max_len = max_seq_len
+    # Any token included in dict can be used to pad, since the paddings' loss
+    # will be masked out by weights and make no effect on parameter gradients.
+
+    inst_data = np.array([
+        list(inst) + list([pad_idx] * (max_len - len(inst))) for inst in insts
+    ])
+    return_list += [inst_data.astype("int64").reshape([-1, max_len, 1])]
+
+    # position data
+    if return_pos:
+        inst_pos = np.array([
+            list(range(0, len(inst))) + [pad_idx] * (max_len - len(inst))
+            for inst in insts
+        ])
+
+        return_list += [inst_pos.astype("int64").reshape([-1, max_len, 1])]
+
+    if return_input_mask:
+        # This is used to avoid attention on paddings.
+        input_mask_data = np.array(
+            [[1] * len(inst) + [0] * (max_len - len(inst)) for inst in insts])
+        input_mask_data = np.expand_dims(input_mask_data, axis=-1)
+        return_list += [input_mask_data.astype("float32")]
+
+    if return_max_len:
+        return_list += [max_len]
+
+    if return_num_token:
+        num_token = 0
+        for inst in insts:
+            num_token += len(inst)
+        return_list += [num_token]
+
+    if return_seq_lens:
+        seq_lens = np.array([len(inst) for inst in insts])
+        return_list += [seq_lens.astype("int64").reshape([-1, 1])]
+
+    return return_list if len(return_list) > 1 else return_list[0]
diff --git a/python/paddle_serving_app/reader/bert_base_reader.py b/python/paddle_serving_app/reader/bert_base_reader.py
new file mode 100644
index 0000000000000000000000000000000000000000..9888dbe82675f5353b881ba528acf8b3af504ddb
--- /dev/null
+++ b/python/paddle_serving_app/reader/bert_base_reader.py
@@ -0,0 +1,24 @@
+#   Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from .reader import ReaderBase
+
+
+class BertBaseReader(ReaderBase):
+    def __init__(self):
+        super(BertBaseReader, self).__init__()
+        pass
+
+    def process(self, line):
+        super(BertBaseReader, self).process(line)
+        pass
diff --git a/python/paddle_serving_app/reader/chinese_bert_reader.py b/python/paddle_serving_app/reader/chinese_bert_reader.py
new file mode 100644
index 0000000000000000000000000000000000000000..6c884c2aa8f9efa07bb1f13d17709a78921989ca
--- /dev/null
+++ b/python/paddle_serving_app/reader/chinese_bert_reader.py
@@ -0,0 +1,128 @@
+#   Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# coding=utf-8
+from .bert_base_reader import BertBaseReader
+from .batching import pad_batch_data
+from .tokenization import FullTokenizer, convert_to_unicode
+
+
+class ChineseBertReader(BertBaseReader):
+    """
+    ChineseBertReader handles the most traditional Chinese Bert
+    preprocessing, a user can define the vocab file through initialization
+    
+    Examples:
+    from paddle_serving_app import ChineseBertReader
+
+    line = ["this is China"]
+    reader = ChineseBertReader()
+    reader.process(line[0])
+
+    """
+
+    def __init__(self, args={}):
+        super(ChineseBertReader, self).__init__()
+        vocab_file = ""
+        if "vocab_file" in args:
+            vocab_file = args["vocab_file"]
+        else:
+            vocab_file = self._download_or_not()
+
+        self.tokenizer = FullTokenizer(vocab_file=vocab_file)
+        if "max_seq_len" in args:
+            self.max_seq_len = args["max_seq_len"]
+        else:
+            self.max_seq_len = 20
+        self.vocab = self.tokenizer.vocab
+        self.pad_id = self.vocab["[PAD]"]
+        self.cls_id = self.vocab["[CLS]"]
+        self.sep_id = self.vocab["[SEP]"]
+        self.mask_id = self.vocab["[MASK]"]
+        self.feed_keys = [
+            "input_ids", "position_ids", "segment_ids", "input_mask"
+        ]
+
+    """
+    inner function
+    """
+
+    def _download_or_not(self):
+        import os
+        import paddle_serving_app
+        module_path = os.path.dirname(paddle_serving_app.__file__)
+        full_path = "{}/tmp/chinese_bert".format(module_path)
+        os.system("mkdir -p {}".format(full_path))
+        if os.path.exists("{}/vocab.txt".format(full_path)):
+            pass
+        else:
+            url = "https://paddle-serving.bj.bcebos.com/reader/chinese_bert/vocab.txt"
+            r = os.system("wget --no-check-certificate " + url)
+            os.system("mv vocab.txt {}".format(full_path))
+            if r != 0:
+                raise SystemExit('Download failed, please check your network')
+        return "{}/vocab.txt".format(full_path)
+
+    """
+    inner function
+    """
+
+    def _pad_batch(self, token_ids, text_type_ids, position_ids):
+        batch_token_ids = [token_ids]
+        batch_text_type_ids = [text_type_ids]
+        batch_position_ids = [position_ids]
+
+        padded_token_ids, input_mask = pad_batch_data(
+            batch_token_ids,
+            max_seq_len=self.max_seq_len,
+            pad_idx=self.pad_id,
+            return_input_mask=True)
+        padded_text_type_ids = pad_batch_data(
+            batch_text_type_ids,
+            max_seq_len=self.max_seq_len,
+            pad_idx=self.pad_id)
+        padded_position_ids = pad_batch_data(
+            batch_position_ids,
+            max_seq_len=self.max_seq_len,
+            pad_idx=self.pad_id)
+        return padded_token_ids, padded_position_ids, padded_text_type_ids, input_mask
+
+    """
+    process function deals with a raw Chinese string as a sentence
+    this funtion returns a feed_dict
+    default key of the returned feed_dict: input_ids, position_ids, segment_ids, input_mask
+    """
+
+    def process(self, line):
+        text_a = convert_to_unicode(line)
+        tokens_a = self.tokenizer.tokenize(text_a)
+        if len(tokens_a) > self.max_seq_len - 2:
+            tokens_a = tokens_a[0:(self.max_seq_len - 2)]
+        tokens = []
+        text_type_ids = []
+        tokens.append("[CLS]")
+        text_type_ids.append(0)
+        for token in tokens_a:
+            tokens.append(token)
+            text_type_ids.append(0)
+        token_ids = self.tokenizer.convert_tokens_to_ids(tokens)
+        position_ids = list(range(len(token_ids)))
+        p_token_ids, p_pos_ids, p_text_type_ids, input_mask = \
+            self._pad_batch(token_ids, text_type_ids, position_ids)
+        feed_result = {
+            self.feed_keys[0]: p_token_ids.reshape(-1).tolist(),
+            self.feed_keys[1]: p_pos_ids.reshape(-1).tolist(),
+            self.feed_keys[2]: p_text_type_ids.reshape(-1).tolist(),
+            self.feed_keys[3]: input_mask.reshape(-1).tolist()
+        }
+        return feed_result
diff --git a/python/paddle_serving_app/reader/reader.py b/python/paddle_serving_app/reader/reader.py
new file mode 100644
index 0000000000000000000000000000000000000000..0a0fa97b02abd5e8952f8d900e39ca7e30ec5028
--- /dev/null
+++ b/python/paddle_serving_app/reader/reader.py
@@ -0,0 +1,24 @@
+#   Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+class ReaderBase(object):
+    def __init__(self):
+        self.feed_keys = []
+
+    def set_feed_keys(self, keys):
+        self.feed_keys = keys
+
+    def get_feed_keys(self):
+        return self.feed_keys
diff --git a/python/paddle_serving_app/reader/tokenization.py b/python/paddle_serving_app/reader/tokenization.py
new file mode 100644
index 0000000000000000000000000000000000000000..0d84ed38468207e853e5270a59179b4274900cb0
--- /dev/null
+++ b/python/paddle_serving_app/reader/tokenization.py
@@ -0,0 +1,441 @@
+# coding=utf-8
+# Copyright 2018 The Google AI Language Team Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Tokenization classes."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import collections
+import io
+import unicodedata
+import six
+import sentencepiece as spm
+import pickle
+
+
+def convert_to_unicode(text):  # pylint: disable=doc-string-with-all-args
+    """Converts `text` to Unicode (if it's not already), assuming utf-8 input."""
+    if six.PY3:
+        if isinstance(text, str):
+            return text
+        elif isinstance(text, bytes):
+            return text.decode("utf-8", "ignore")
+        else:
+            raise ValueError("Unsupported string type: %s" % (type(text)))
+    elif six.PY2:
+        if isinstance(text, str):
+            return text.decode("utf-8", "ignore")
+        elif isinstance(text, unicode):  # noqa
+            return text
+        else:
+            raise ValueError("Unsupported string type: %s" % (type(text)))
+    else:
+        raise ValueError("Not running on Python2 or Python 3?")
+
+
+def printable_text(text):  # pylint: disable=doc-string-with-all-args
+    """Returns text encoded in a way suitable for print or `tf.logging`."""
+
+    # These functions want `str` for both Python2 and Python3, but in one case
+    # it's a Unicode string and in the other it's a byte string.
+    if six.PY3:
+        if isinstance(text, str):
+            return text
+        elif isinstance(text, bytes):
+            return text.decode("utf-8", "ignore")
+        else:
+            raise ValueError("Unsupported string type: %s" % (type(text)))
+    elif six.PY2:
+        if isinstance(text, str):
+            return text
+        elif isinstance(text, unicode):  # noqa
+            return text.encode("utf-8")
+        else:
+            raise ValueError("Unsupported string type: %s" % (type(text)))
+    else:
+        raise ValueError("Not running on Python2 or Python 3?")
+
+
+def load_vocab(vocab_file):  # pylint: disable=doc-string-with-all-args, doc-string-with-returns
+    """Loads a vocabulary file into a dictionary."""
+    vocab = collections.OrderedDict()
+    fin = io.open(vocab_file, "r", encoding="UTF-8")
+    for num, line in enumerate(fin):
+        items = convert_to_unicode(line.strip()).split("\t")
+        if len(items) > 2:
+            break
+        token = items[0]
+        index = items[1] if len(items) == 2 else num
+        token = token.strip()
+        vocab[token] = int(index)
+    fin.close()
+    return vocab
+
+
+def convert_by_vocab(vocab, items):
+    """Converts a sequence of [tokens|ids] using the vocab."""
+    output = []
+    for item in items:
+        output.append(vocab[item])
+    return output
+
+
+def convert_tokens_to_ids(vocab, tokens):
+    return convert_by_vocab(vocab, tokens)
+
+
+def convert_ids_to_tokens(inv_vocab, ids):
+    return convert_by_vocab(inv_vocab, ids)
+
+
+def whitespace_tokenize(text):
+    """Runs basic whitespace cleaning and splitting on a peice of text."""
+    text = text.strip()
+    if not text:
+        return []
+    tokens = text.split()
+    return tokens
+
+
+class FullTokenizer(object):
+    """Runs end-to-end tokenziation."""
+
+    def __init__(self,
+                 vocab_file,
+                 do_lower_case=True,
+                 use_sentence_piece_vocab=False):
+        self.vocab = load_vocab(vocab_file)
+        self.inv_vocab = {v: k for k, v in self.vocab.items()}
+        self.basic_tokenizer = BasicTokenizer(do_lower_case=do_lower_case)
+        self.use_sentence_piece_vocab = use_sentence_piece_vocab
+        self.wordpiece_tokenizer = WordpieceTokenizer(
+            vocab=self.vocab,
+            use_sentence_piece_vocab=self.use_sentence_piece_vocab)
+
+    def tokenize(self, text):
+        split_tokens = []
+        for token in self.basic_tokenizer.tokenize(text):
+            for sub_token in self.wordpiece_tokenizer.tokenize(token):
+                split_tokens.append(sub_token)
+
+        return split_tokens
+
+    def convert_tokens_to_ids(self, tokens):
+        return convert_by_vocab(self.vocab, tokens)
+
+    def convert_ids_to_tokens(self, ids):
+        return convert_by_vocab(self.inv_vocab, ids)
+
+
+class CharTokenizer(object):
+    """Runs end-to-end tokenziation."""
+
+    def __init__(self, vocab_file, do_lower_case=True):
+        self.vocab = load_vocab(vocab_file)
+        self.inv_vocab = {v: k for k, v in self.vocab.items()}
+        self.wordpiece_tokenizer = WordpieceTokenizer(vocab=self.vocab)
+
+    def tokenize(self, text):
+        split_tokens = []
+        for token in text.lower().split(" "):
+            for sub_token in self.wordpiece_tokenizer.tokenize(token):
+                split_tokens.append(sub_token)
+
+        return split_tokens
+
+    def convert_tokens_to_ids(self, tokens):
+        return convert_by_vocab(self.vocab, tokens)
+
+    def convert_ids_to_tokens(self, ids):
+        return convert_by_vocab(self.inv_vocab, ids)
+
+
+class WSSPTokenizer(object):  # pylint: disable=doc-string-missing
+    def __init__(self, vocab_file, sp_model_dir, word_dict, ws=True,
+                 lower=True):
+        self.vocab = load_vocab(vocab_file)
+        self.inv_vocab = {v: k for k, v in self.vocab.items()}
+        self.ws = ws
+        self.lower = lower
+        self.dict = pickle.load(open(word_dict, 'rb'))
+        self.sp_model = spm.SentencePieceProcessor()
+        self.window_size = 5
+        self.sp_model.Load(sp_model_dir)
+
+    def cut(self, chars):  # pylint: disable=doc-string-missing
+        words = []
+        idx = 0
+        while idx < len(chars):
+            matched = False
+            for i in range(self.window_size, 0, -1):
+                cand = chars[idx:idx + i]
+                if cand in self.dict:
+                    words.append(cand)
+                    matched = True
+                    break
+            if not matched:
+                i = 1
+                words.append(chars[idx])
+            idx += i
+        return words
+
+    def tokenize(self, text, unk_token="[UNK]"):  # pylint: disable=doc-string-missing
+        text = convert_to_unicode(text)
+        if self.ws:
+            text = [s for s in self.cut(text) if s != ' ']
+        else:
+            text = text.split(' ')
+        if self.lower:
+            text = [s.lower() for s in text]
+        text = ' '.join(text)
+        tokens = self.sp_model.EncodeAsPieces(text)
+        in_vocab_tokens = []
+        for token in tokens:
+            if token in self.vocab:
+                in_vocab_tokens.append(token)
+            else:
+                in_vocab_tokens.append(unk_token)
+        return in_vocab_tokens
+
+    def convert_tokens_to_ids(self, tokens):
+        return convert_by_vocab(self.vocab, tokens)
+
+    def convert_ids_to_tokens(self, ids):
+        return convert_by_vocab(self.inv_vocab, ids)
+
+
+class BasicTokenizer(object):
+    """Runs basic tokenization (punctuation splitting, lower casing, etc.)."""
+
+    def __init__(self, do_lower_case=True):
+        """Constructs a BasicTokenizer.
+
+        Args:
+            do_lower_case: Whether to lower case the input.
+        """
+        self.do_lower_case = do_lower_case
+
+    def tokenize(self, text):  # pylint: disable=doc-string-with-all-args, doc-string-with-returns
+        """Tokenizes a piece of text."""
+        text = convert_to_unicode(text)
+        text = self._clean_text(text)
+
+        # This was added on November 1st, 2018 for the multilingual and Chinese
+        # models. This is also applied to the English models now, but it doesn't
+        # matter since the English models were not trained on any Chinese data
+        # and generally don't have any Chinese data in them (there are Chinese
+        # characters in the vocabulary because Wikipedia does have some Chinese
+        # words in the English Wikipedia.).
+        text = self._tokenize_chinese_chars(text)
+
+        orig_tokens = whitespace_tokenize(text)
+        split_tokens = []
+        for token in orig_tokens:
+            if self.do_lower_case:
+                token = token.lower()
+                token = self._run_strip_accents(token)
+            split_tokens.extend(self._run_split_on_punc(token))
+
+        output_tokens = whitespace_tokenize(" ".join(split_tokens))
+        return output_tokens
+
+    def _run_strip_accents(self, text):
+        """Strips accents from a piece of text."""
+        text = unicodedata.normalize("NFD", text)
+        output = []
+        for char in text:
+            cat = unicodedata.category(char)
+            if cat == "Mn":
+                continue
+            output.append(char)
+        return "".join(output)
+
+    def _run_split_on_punc(self, text):
+        """Splits punctuation on a piece of text."""
+        chars = list(text)
+        i = 0
+        start_new_word = True
+        output = []
+        while i < len(chars):
+            char = chars[i]
+            if _is_punctuation(char):
+                output.append([char])
+                start_new_word = True
+            else:
+                if start_new_word:
+                    output.append([])
+                start_new_word = False
+                output[-1].append(char)
+            i += 1
+
+        return ["".join(x) for x in output]
+
+    def _tokenize_chinese_chars(self, text):
+        """Adds whitespace around any CJK character."""
+        output = []
+        for char in text:
+            cp = ord(char)
+            if self._is_chinese_char(cp):
+                output.append(" ")
+                output.append(char)
+                output.append(" ")
+            else:
+                output.append(char)
+        return "".join(output)
+
+    def _is_chinese_char(self, cp):
+        """Checks whether CP is the codepoint of a CJK character."""
+        # This defines a "chinese character" as anything in the CJK Unicode block:
+        #     https://en.wikipedia.org/wiki/CJK_Unified_Ideographs_(Unicode_block)
+        #
+        # Note that the CJK Unicode block is NOT all Japanese and Korean characters,
+        # despite its name. The modern Korean Hangul alphabet is a different block,
+        # as is Japanese Hiragana and Katakana. Those alphabets are used to write
+        # space-separated words, so they are not treated specially and handled
+        # like the all of the other languages.
+        if ((cp >= 0x4E00 and cp <= 0x9FFF) or  #
+            (cp >= 0x3400 and cp <= 0x4DBF) or  #
+            (cp >= 0x20000 and cp <= 0x2A6DF) or  #
+            (cp >= 0x2A700 and cp <= 0x2B73F) or  #
+            (cp >= 0x2B740 and cp <= 0x2B81F) or  #
+            (cp >= 0x2B820 and cp <= 0x2CEAF) or
+            (cp >= 0xF900 and cp <= 0xFAFF) or  #
+            (cp >= 0x2F800 and cp <= 0x2FA1F)):  #
+            return True
+
+        return False
+
+    def _clean_text(self, text):
+        """Performs invalid character removal and whitespace cleanup on text."""
+        output = []
+        for char in text:
+            cp = ord(char)
+            if cp == 0 or cp == 0xfffd or _is_control(char):
+                continue
+            if _is_whitespace(char):
+                output.append(" ")
+            else:
+                output.append(char)
+        return "".join(output)
+
+
+class WordpieceTokenizer(object):
+    """Runs WordPiece tokenziation."""
+
+    def __init__(self,
+                 vocab,
+                 unk_token="[UNK]",
+                 max_input_chars_per_word=100,
+                 use_sentence_piece_vocab=False):
+        self.vocab = vocab
+        self.unk_token = unk_token
+        self.max_input_chars_per_word = max_input_chars_per_word
+        self.use_sentence_piece_vocab = use_sentence_piece_vocab
+
+    def tokenize(self, text):  # pylint: disable=doc-string-with-all-args
+        """Tokenizes a piece of text into its word pieces.
+
+        This uses a greedy longest-match-first algorithm to perform tokenization
+        using the given vocabulary.
+
+        For example:
+            input = "unaffable"
+            output = ["un", "##aff", "##able"]
+
+        Args:
+            text: A single token or whitespace separated tokens. This should have
+                already been passed through `BasicTokenizer.
+
+        Returns:
+            A list of wordpiece tokens.
+        """
+
+        text = convert_to_unicode(text)
+
+        output_tokens = []
+        for token in whitespace_tokenize(text):
+            chars = list(token)
+            if len(chars) > self.max_input_chars_per_word:
+                output_tokens.append(self.unk_token)
+                continue
+
+            is_bad = False
+            start = 0
+            sub_tokens = []
+            while start < len(chars):
+                end = len(chars)
+                cur_substr = None
+                while start < end:
+                    substr = "".join(chars[start:end])
+                    if start == 0 and self.use_sentence_piece_vocab:
+                        substr = u'\u2581' + substr
+                    if start > 0 and not self.use_sentence_piece_vocab:
+                        substr = "##" + substr
+                    if substr in self.vocab:
+                        cur_substr = substr
+                        break
+                    end -= 1
+                if cur_substr is None:
+                    is_bad = True
+                    break
+                sub_tokens.append(cur_substr)
+                start = end
+
+            if is_bad:
+                output_tokens.append(self.unk_token)
+            else:
+                output_tokens.extend(sub_tokens)
+        return output_tokens
+
+
+def _is_whitespace(char):
+    """Checks whether `chars` is a whitespace character."""
+    # \t, \n, and \r are technically contorl characters but we treat them
+    # as whitespace since they are generally considered as such.
+    if char == " " or char == "\t" or char == "\n" or char == "\r":
+        return True
+    cat = unicodedata.category(char)
+    if cat == "Zs":
+        return True
+    return False
+
+
+def _is_control(char):
+    """Checks whether `chars` is a control character."""
+    # These are technically control characters but we count them as whitespace
+    # characters.
+    if char == "\t" or char == "\n" or char == "\r":
+        return False
+    cat = unicodedata.category(char)
+    if cat.startswith("C"):
+        return True
+    return False
+
+
+def _is_punctuation(char):
+    """Checks whether `chars` is a punctuation character."""
+    cp = ord(char)
+    # We treat all non-letter/number ASCII as punctuation.
+    # Characters such as "^", "$", and "`" are not in the Unicode
+    # Punctuation class but we treat them as punctuation anyways, for
+    # consistency.
+    if ((cp >= 33 and cp <= 47) or (cp >= 58 and cp <= 64) or
+        (cp >= 91 and cp <= 96) or (cp >= 123 and cp <= 126)):
+        return True
+    cat = unicodedata.category(char)
+    if cat.startswith("P"):
+        return True
+    return False
diff --git a/python/paddle_serving_app/version.py b/python/paddle_serving_app/version.py
new file mode 100644
index 0000000000000000000000000000000000000000..80f647be56d09740adfb9d68dd47bb0b1fa2c985
--- /dev/null
+++ b/python/paddle_serving_app/version.py
@@ -0,0 +1,15 @@
+# Copyright (c) 2020  PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+""" Paddle Serving App version string """
+serving_app_version = "0.0.1"
diff --git a/tools/serving_build.sh b/tools/serving_build.sh
index 8bb305c750b7b6a60eaeb44bcbfa87746f7f25dc..93c11012108fbc8ed32503e96ff1422e0844c041 100644
--- a/tools/serving_build.sh
+++ b/tools/serving_build.sh
@@ -58,7 +58,7 @@ function build_client() {
             cmake -DPYTHON_INCLUDE_DIR=$PYTHONROOT/include/python2.7/ \
                   -DPYTHON_LIBRARIES=$PYTHONROOT/lib64/libpython2.7.so \
                   -DPYTHON_EXECUTABLE=$PYTHONROOT/bin/python \
-                  -DCLIENT_ONLY=ON ..
+                  -DCLIENT=ON ..
             rerun "make -j2 >/dev/null" 3 # due to some network reasons, compilation may fail
             pip install -U python/dist/paddle_serving_client* >/dev/null
             ;;
@@ -82,7 +82,7 @@ function build_server() {
             cmake -DPYTHON_INCLUDE_DIR=$PYTHONROOT/include/python2.7/ \
                   -DPYTHON_LIBRARIES=$PYTHONROOT/lib64/libpython2.7.so \
                   -DPYTHON_EXECUTABLE=$PYTHONROOT/bin/python \
-                  -DCLIENT_ONLY=OFF ..
+                  -DSERVER=ON ..
             rerun "make -j2 >/dev/null" 3 # due to some network reasons, compilation may fail
             check_cmd "make install -j2 >/dev/null"
             pip install -U python/dist/paddle_serving_server* >/dev/null
@@ -91,7 +91,7 @@ function build_server() {
             cmake -DPYTHON_INCLUDE_DIR=$PYTHONROOT/include/python2.7/ \
                   -DPYTHON_LIBRARIES=$PYTHONROOT/lib64/libpython2.7.so \
                   -DPYTHON_EXECUTABLE=$PYTHONROOT/bin/python \
-                  -DCLIENT_ONLY=OFF \
+                  -DSERVER=ON \
                   -DWITH_GPU=ON ..
             rerun "make -j2 >/dev/null" 3 # due to some network reasons, compilation may fail
             check_cmd "make install -j2 >/dev/null"
@@ -111,6 +111,7 @@ function kill_server_process() {
     ps -ef | grep "serving" | grep -v serving_build | grep -v grep | awk '{print $2}' | xargs kill
 }
 
+
 function python_test_fit_a_line() {
     # pwd: /Serving/python/examples
     cd fit_a_line # pwd: /Serving/python/examples/fit_a_line