diff --git a/README.md b/README.md
index c8b6816857ab82da7074afbe800354c605c2fd78..c158a77920c001b6123ac719e8bb32d93d9b9b5e 100644
--- a/README.md
+++ b/README.md
@@ -55,7 +55,7 @@ pip install paddle-serving-server-gpu # GPU
 ```
 
 You may need to use a domestic mirror source (in China, you can use the Tsinghua mirror source, add `-i https://pypi.tuna.tsinghua.edu.cn/simple` to pip command) to speed up the download.
- 
+
 Client package support Centos 7 and Ubuntu 18, or you can use HTTP service without install client.
 
 <h2 align="center">Quick Start Example</h2>
@@ -256,6 +256,7 @@ curl -H "Content-Type:application/json" -X POST -d '{"url": "https://paddle-serv
 ### Developers
 - [How to config Serving native operators on server side?](doc/SERVER_DAG.md)
 - [How to develop a new Serving operator?](doc/NEW_OPERATOR.md)
+- [How to develop a new Web Service?](doc/NEW_WEB_SERVICE.md)
 - [Golang client](doc/IMDB_GO_CLIENT.md)
 - [Compile from source code](doc/COMPILE.md)
 
diff --git a/README_CN.md b/README_CN.md
index bd21f7c2b9bb1bd1a10486418efe554192782f48..4df883a7ccb6e5e9ed5cb0e9f26f147ef0cdb846 100644
--- a/README_CN.md
+++ b/README_CN.md
@@ -262,6 +262,7 @@ curl -H "Content-Type:application/json" -X POST -d '{"url": "https://paddle-serv
 ### 开发者教程
 - [如何配置Server端的计算图?](doc/SERVER_DAG_CN.md)
 - [如何开发一个新的General Op?](doc/NEW_OPERATOR_CN.md)
+- [如何开发一个新的Web Service?](doc/NEW_WEB_SERVICE_CN.md)
 - [如何在Paddle Serving使用Go Client?](doc/IMDB_GO_CLIENT_CN.md)
 - [如何编译PaddleServing?](doc/COMPILE_CN.md)
 
diff --git a/core/general-client/src/general_model.cpp b/core/general-client/src/general_model.cpp
index 65fa6587ecb68f18b72a03c7f54433252ea1608a..92dca7eeba53c2fa23020526faa83a19a38633b6 100644
--- a/core/general-client/src/general_model.cpp
+++ b/core/general-client/src/general_model.cpp
@@ -119,7 +119,7 @@ int PredictorClient::create_predictor_by_desc(const std::string &sdk_desc) {
     LOG(ERROR) << "Predictor Creation Failed";
     return -1;
   }
-  _api.thrd_initialize();
+  // _api.thrd_initialize();
   return 0;
 }
 
@@ -130,7 +130,7 @@ int PredictorClient::create_predictor() {
     LOG(ERROR) << "Predictor Creation Failed";
     return -1;
   }
-  _api.thrd_initialize();
+  // _api.thrd_initialize();
   return 0;
 }
 
@@ -152,7 +152,7 @@ int PredictorClient::batch_predict(
 
   int fetch_name_num = fetch_name.size();
 
-  _api.thrd_clear();
+  _api.thrd_initialize();
   std::string variant_tag;
   _predictor = _api.fetch_predictor("general_model", &variant_tag);
   predict_res_batch.set_variant_tag(variant_tag);
@@ -247,8 +247,9 @@ int PredictorClient::batch_predict(
   } else {
     client_infer_end = timeline.TimeStampUS();
     postprocess_start = client_infer_end;
-
+    VLOG(2) << "get model output num";
     uint32_t model_num = res.outputs_size();
+    VLOG(2) << "model num: " << model_num;
     for (uint32_t m_idx = 0; m_idx < model_num; ++m_idx) {
       VLOG(2) << "process model output index: " << m_idx;
       auto output = res.outputs(m_idx);
@@ -326,6 +327,8 @@ int PredictorClient::batch_predict(
 
     fprintf(stderr, "%s\n", oss.str().c_str());
   }
+
+  _api.thrd_clear();
   return 0;
 }
 
diff --git a/core/predictor/common/macros.h b/core/predictor/common/macros.h
index fa4a068668cb1a37c37a2726634c24be26a3fb40..ba3ac0dae3b22e68198c9ca9995c56a3ba31a55c 100644
--- a/core/predictor/common/macros.h
+++ b/core/predictor/common/macros.h
@@ -27,9 +27,9 @@ namespace predictor {
   }
 #endif
 
-#ifdef WITH_GPU
-#define USE_PTHREAD
-#endif
+// #ifdef WITH_GPU
+// #define USE_PTHREAD
+// #endif
 
 #ifdef USE_PTHREAD
 
diff --git a/core/sdk-cpp/include/macros.h b/core/sdk-cpp/include/macros.h
new file mode 100644
index 0000000000000000000000000000000000000000..66eaef445f3b54f7d0209c11667aafaed5522569
--- /dev/null
+++ b/core/sdk-cpp/include/macros.h
@@ -0,0 +1,80 @@
+// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#pragma once
+#include "core/sdk-cpp/include/common.h"
+
+namespace baidu {
+namespace paddle_serving {
+namespace sdk_cpp {
+
+#ifndef CATCH_ANY_AND_RET
+#define CATCH_ANY_AND_RET(errno)       \
+  catch (...) {                        \
+    LOG(ERROR) << "exception catched"; \
+    return errno;                      \
+  }
+#endif
+
+#define USE_PTHREAD
+
+#ifdef USE_PTHREAD
+
+#define THREAD_T pthread_t
+#define THREAD_KEY_T pthread_key_t
+#define THREAD_MUTEX_T pthread_mutex_t
+#define THREAD_KEY_CREATE pthread_key_create
+#define THREAD_SETSPECIFIC pthread_setspecific
+#define THREAD_GETSPECIFIC pthread_getspecific
+#define THREAD_CREATE pthread_create
+#define THREAD_CANCEL pthread_cancel
+#define THREAD_JOIN pthread_join
+#define THREAD_KEY_DELETE pthread_key_delete
+#define THREAD_MUTEX_INIT pthread_mutex_init
+#define THREAD_MUTEX_LOCK pthread_mutex_lock
+#define THREAD_MUTEX_UNLOCK pthread_mutex_unlock
+#define THREAD_MUTEX_DESTROY pthread_mutex_destroy
+#define THREAD_COND_T pthread_cond_t
+#define THREAD_COND_INIT pthread_cond_init
+#define THREAD_COND_SIGNAL pthread_cond_signal
+#define THREAD_COND_WAIT pthread_cond_wait
+#define THREAD_COND_DESTROY pthread_cond_destroy
+
+#else
+
+#define THREAD_T bthread_t
+#define THREAD_KEY_T bthread_key_t
+#define THREAD_MUTEX_T bthread_mutex_t
+#define THREAD_KEY_CREATE bthread_key_create
+#define THREAD_SETSPECIFIC bthread_setspecific
+#define THREAD_GETSPECIFIC bthread_getspecific
+#define THREAD_CREATE bthread_start_background
+#define THREAD_CANCEL bthread_stop
+#define THREAD_JOIN bthread_join
+#define THREAD_KEY_DELETE bthread_key_delete
+#define THREAD_MUTEX_INIT bthread_mutex_init
+#define THREAD_MUTEX_LOCK bthread_mutex_lock
+#define THREAD_MUTEX_UNLOCK bthread_mutex_unlock
+#define THREAD_MUTEX_DESTROY bthread_mutex_destroy
+#define THREAD_COND_T bthread_cond_t
+#define THREAD_COND_INIT bthread_cond_init
+#define THREAD_COND_SIGNAL bthread_cond_signal
+#define THREAD_COND_WAIT bthread_cond_wait
+#define THREAD_COND_DESTROY bthread_cond_destroy
+
+#endif
+
+}  // namespace sdk_cpp
+}  // namespace paddle_serving
+}  // namespace baidu
diff --git a/core/sdk-cpp/include/stub_impl.h b/core/sdk-cpp/include/stub_impl.h
index dc3c16ca6414b915ba3fd5d4feaac501bbe07cba..a112ddf25a2451e1bcffd62654bc0c6d043c9d80 100644
--- a/core/sdk-cpp/include/stub_impl.h
+++ b/core/sdk-cpp/include/stub_impl.h
@@ -19,6 +19,7 @@
 #include <vector>
 #include "core/sdk-cpp/include/common.h"
 #include "core/sdk-cpp/include/endpoint_config.h"
+#include "core/sdk-cpp/include/macros.h"
 #include "core/sdk-cpp/include/predictor.h"
 #include "core/sdk-cpp/include/stub.h"
 
@@ -245,7 +246,7 @@ class StubImpl : public Stub {
                                        const brpc::ChannelOptions& options);
 
   StubTLS* get_tls() {
-    return static_cast<StubTLS*>(bthread_getspecific(_bthread_key));
+    return static_cast<StubTLS*>(THREAD_GETSPECIFIC(_bthread_key));
   }
 
  private:
@@ -262,7 +263,8 @@ class StubImpl : public Stub {
   uint32_t _package_size;
 
   // tls handlers
-  bthread_key_t _bthread_key;
+  // bthread_key_t _bthread_key;
+  THREAD_KEY_T _bthread_key;
 
   // bvar variables
   std::map<std::string, BvarWrapper*> _ltc_bvars;
diff --git a/core/sdk-cpp/include/stub_impl.hpp b/core/sdk-cpp/include/stub_impl.hpp
index 6fad5b5e2c702652126bc159333046790fcefc69..756c12893393f10a1c2ebfa83bf3a94adac7a4bc 100644
--- a/core/sdk-cpp/include/stub_impl.hpp
+++ b/core/sdk-cpp/include/stub_impl.hpp
@@ -70,7 +70,7 @@ int StubImpl<T, C, R, I, O>::initialize(const VariantInfo& var,
 
   _endpoint = ep;
 
-  if (bthread_key_create(&_bthread_key, NULL) != 0) {
+  if (THREAD_KEY_CREATE(&_bthread_key, NULL) != 0) {
     LOG(FATAL) << "Failed create key for stub tls";
     return -1;
   }
@@ -132,13 +132,13 @@ int StubImpl<T, C, R, I, O>::initialize(const VariantInfo& var,
 
 template <typename T, typename C, typename R, typename I, typename O>
 int StubImpl<T, C, R, I, O>::thrd_initialize() {
-  if (bthread_getspecific(_bthread_key) != NULL) {
+  if (THREAD_GETSPECIFIC(_bthread_key) != NULL) {
     LOG(WARNING) << "Already thread initialized for stub";
     return 0;
   }
 
   StubTLS* tls = new (std::nothrow) StubTLS();
-  if (!tls || bthread_setspecific(_bthread_key, tls) != 0) {
+  if (!tls || THREAD_SETSPECIFIC(_bthread_key, tls) != 0) {
     LOG(FATAL) << "Failed binding tls data to bthread_key";
     return -1;
   }
diff --git a/doc/DESIGN_DOC.md b/doc/DESIGN_DOC.md
index 2e7baaeb885c732bb723979e90edae529e7cbc74..b8169b43b63c9f2548a29d454eb24f8577b755ef 100644
--- a/doc/DESIGN_DOC.md
+++ b/doc/DESIGN_DOC.md
@@ -164,12 +164,26 @@ Distributed Sparse Parameter Indexing is commonly seen in advertising and recomm
 <img src='cube_eng.png' width = "450" height = "230">
     <br>
 <p>
-
 Why do we need to support distributed sparse parameter indexing in Paddle Serving? 1) In some recommendation scenarios, the number of features can be up to hundreds of billions that a single node can not hold the parameters within random access memory. 2) Paddle Serving supports distributed sparse parameter indexing that can couple with paddle inference. Users do not need to do extra work to have a low latency inference engine with hundreds of billions of parameters.
-                          
-### 3.2 Model Management, online A/B test, Model Online Reloading
 
-Paddle Serving's C++ engine supports model management, online A/B test and model online reloading. Currently, python API is not released yet, please wait for the next release.
+### 3.2 Online A/B test
+
+After sufficient offline evaluation of the model, online A/B test is usually needed to decide whether to enable the service on a large scale. The following figure shows the basic structure of A/B test with Paddle Serving. After the client is configured with the corresponding configuration, the traffic will be automatically distributed to different servers to achieve A/B test. Please refer to [ABTEST in Paddle Serving](ABTEST_IN_PADDLE_SERVING.md) for specific examples.
+
+<p align="center">
+    <br>
+<img src='abtest.png' width = "345" height = "230">
+    <br>
+<p>
+
+
+### 3.3 Model Online Reloading     
+
+In order to ensure the availability of services, the model needs to be hot loaded without service interruption. Paddle Serving supports this feature and provides a tool for monitoring output models to update local models. Please refer to [Hot loading in Paddle Serving](HOT_LOADING_IN_SERVING.md) for specific examples.
+
+### 3.4 Model Management
+
+Paddle Serving's C++ engine supports model management. Currently, python API is not released yet, please wait for the next release.
 
 ## 4. User Types
 Paddle Serving provides RPC and HTTP protocol for users. For HTTP service, we recommend users with median or small traffic services to use, and the latency is not a strict requirement. For RPC protocol, we recommend high traffic services and low latency required services to use. For users who use distributed sparse parameter indexing built-in service, it is not necessary to care about the underlying details of communication. The following figure gives out several scenarios that user may want to use Paddle Serving. 
diff --git a/doc/DESIGN_DOC_CN.md b/doc/DESIGN_DOC_CN.md
index 2a63d56593dc47a5ca69f9c5c324710ee6dc3fc6..c068ac35bb6beebe70a6f873318c6d5059fc51e7 100644
--- a/doc/DESIGN_DOC_CN.md
+++ b/doc/DESIGN_DOC_CN.md
@@ -159,14 +159,30 @@ Paddle Serving的核心执行引擎是一个有向无环图，图中的每个节
 <img src='cube_eng.png' width = "450" height = "230">
     <br>
 <p>
-                    
+
 为什么要使用Paddle Serving提供的分布式稀疏参数索引服务？1）在一些推荐场景中，模型的输入特征规模通常可以达到上千亿，单台机器无法支撑T级别模型在内存的保存，因此需要进行分布式存储。2）Paddle Serving提供的分布式稀疏参数索引服务，具有并发请求多个节点的能力，从而以较低的延时完成预估服务。
                           
-### 3.2 模型管理、在线A/B流量测试、模型热加载
+### 3.2 在线A/B流量测试
+
+在对模型进行充分的离线评估后，通常需要进行在线A/B测试，来决定是否大规模上线服务。下图为使用Paddle Serving做A/B测试的基本结构，Client端做好相应的配置后，自动将流量分发给不同的Server，从而完成A/B测试。具体例子请参考[如何使用Paddle Serving做ABTEST](ABTEST_IN_PADDLE_SERVING_CN.md)。
+
+<p align="center">
+    <br>
+<img src='abtest.png' width = "345" height = "230">
+    <br>
+<p>
+
+
+### 3.3 模型热加载
 
-Paddle Serving的C++引擎支持模型管理、在线A/B流量测试、模型热加载等功能，当前在Python API还有没完全开放这部分功能的配置，敬请期待。
+为了保证服务的可用性，需要在服务不中断的情况下对模型进行热加载。Paddle Serving对该特性进行了支持，并提供了一个监控产出模型更新本地模型的工具，具体例子请参考[Paddle Serving中的模型热加载](HOT_LOADING_IN_SERVING_CN.md)。
+
+### 3.4 模型管理
+
+Paddle Serving的C++引擎支持模型管理功能，当前在Python API还有没完全开放这部分功能的配置，敬请期待。
 
 ## 4. 用户类型
+
 Paddle Serving面向的用户提供RPC和HTTP两种访问协议。对于HTTP协议，我们更倾向于流量中小型的服务使用，并且对延时没有严格要求的AI服务开发者。对于RPC协议，我们面向流量较大，对延时要求更高的用户，此外RPC的客户端可能也处在一个大系统的服务中，这种情况下非常适合使用Paddle Serving提供的RPC服务。对于使用分布式稀疏参数索引服务而言，Paddle Serving的用户不需要关心底层的细节，其调用本质也是通过RPC服务再调用RPC服务。下图给出了当前设计的Paddle Serving可能会使用Serving服务的几种场景。
 
 <p align="center">
diff --git a/doc/NEW_WEB_SERVICE.md b/doc/NEW_WEB_SERVICE.md
new file mode 100644
index 0000000000000000000000000000000000000000..63f62a774d914c7271bfed1508881e04f74f2ca8
--- /dev/null
+++ b/doc/NEW_WEB_SERVICE.md
@@ -0,0 +1,64 @@
+# How to develop a new Web service?
+
+([简体中文](NEW_WEB_SERVICE_CN.md)|English)
+
+This document will take the image classification service based on the Imagenet data set as an example to introduce how to develop a new web service. The complete code can be visited at [here](https://github.com/PaddlePaddle/Serving/blob/develop/python/examples/imagenet/image_classification_service.py).
+
+## WebService base class
+
+Paddle Serving implements the [WebService](https://github.com/PaddlePaddle/Serving/blob/develop/python/paddle_serving_server/web_service.py#L23) base class. You need to override its `preprocess` and `postprocess` method. The default implementation is as follows:
+
+```python
+class WebService(object):
+  
+    def preprocess(self, feed={}, fetch=[]):
+        return feed, fetch
+    def postprocess(self, feed={}, fetch=[], fetch_map=None):
+        return fetch_map
+```
+
+### preprocess
+
+The preprocess method has two input parameters, `feed` and `fetch`. For an HTTP request `request`:
+
+- The value of `feed` is request data `request.json`
+- The value of `fetch` is the fetch part `request.json["fetch"]` in the request data
+
+The return values are the feed and fetch values used in the prediction.
+
+### postprocess
+
+The postprocess method has three input parameters, `feed`, `fetch` and `fetch_map`:
+
+- The value of `feed` is request data `request.json`
+- The value of `fetch` is the fetch part `request.json["fetch"]` in the request data
+- The value of `fetch_map` is the model output value.
+
+The return value will be processed as `{"reslut": fetch_map}` as the return of the HTTP request.
+
+## Develop ImageService class
+
+```python
+class ImageService(WebService):
+    def preprocess(self, feed={}, fetch=[]):
+        reader = ImageReader()
+        if "image" not in feed:
+            raise ("feed data error!")
+        if isinstance(feed["image"], list):
+            feed_batch = []
+            for image in feed["image"]:
+                sample = base64.b64decode(image)
+                img = reader.process_image(sample)
+                res_feed = {}
+                res_feed["image"] = img.reshape(-1)
+                feed_batch.append(res_feed)
+            return feed_batch, fetch
+        else:
+            sample = base64.b64decode(feed["image"])
+            img = reader.process_image(sample)
+            res_feed = {}
+            res_feed["image"] = img.reshape(-1)
+            return res_feed, fetch
+```
+
+For the above `ImageService`, only the `preprocess` method is rewritten to process the image data in Base64 format into the data format required by prediction.
diff --git a/doc/NEW_WEB_SERVICE_CN.md b/doc/NEW_WEB_SERVICE_CN.md
new file mode 100644
index 0000000000000000000000000000000000000000..e1a21d8a0e91a114c9d94b09ef0afa9a0d29de89
--- /dev/null
+++ b/doc/NEW_WEB_SERVICE_CN.md
@@ -0,0 +1,64 @@
+# 如何开发一个新的Web Service？
+
+(简体中文|[English](NEW_WEB_SERVICE.md))
+
+本文档将以Imagenet图像分类服务为例，来介绍如何开发一个新的Web Service。您可以在[这里](https://github.com/PaddlePaddle/Serving/blob/develop/python/examples/imagenet/image_classification_service.py)查阅完整的代码。
+
+## WebService基类
+
+Paddle Serving实现了[WebService](https://github.com/PaddlePaddle/Serving/blob/develop/python/paddle_serving_server/web_service.py#L23)基类，您需要重写它的`preprocess`方法和`postprocess`方法，默认实现如下：
+
+```python
+class WebService(object):
+  
+    def preprocess(self, feed={}, fetch=[]):
+        return feed, fetch
+    def postprocess(self, feed={}, fetch=[], fetch_map=None):
+        return fetch_map
+```
+
+### preprocess方法
+
+preprocess方法有两个输入参数，`feed`和`fetch`。对于一个HTTP请求`request`：
+
+- `feed`的值为请求数据`request.json`
+- `fetch`的值为请求数据中的fetch部分`request.json["fetch"]`
+
+返回值分别是预测过程中用到的feed和fetch值。
+
+### postprocess方法
+
+postprocess方法有三个输入参数，`feed`、`fetch`和`fetch_map`：
+
+- `feed`的值为请求数据`request.json`
+- `fetch`的值为请求数据中的fetch部分`request.json["fetch"]`
+- `fetch_map`的值为fetch到的模型输出值
+
+返回值将会被处理成`{"reslut": fetch_map}`作为HTTP请求的返回。
+
+## 开发ImageService类
+
+```python
+class ImageService(WebService):
+    def preprocess(self, feed={}, fetch=[]):
+        reader = ImageReader()
+        if "image" not in feed:
+            raise ("feed data error!")
+        if isinstance(feed["image"], list):
+            feed_batch = []
+            for image in feed["image"]:
+                sample = base64.b64decode(image)
+                img = reader.process_image(sample)
+                res_feed = {}
+                res_feed["image"] = img.reshape(-1)
+                feed_batch.append(res_feed)
+            return feed_batch, fetch
+        else:
+            sample = base64.b64decode(feed["image"])
+            img = reader.process_image(sample)
+            res_feed = {}
+            res_feed["image"] = img.reshape(-1)
+            return res_feed, fetch
+```
+
+对于上述的`ImageService`，只重写了前处理方法，将base64格式的图片数据处理成模型预测需要的数据格式。
diff --git a/doc/abtest.png b/doc/abtest.png
index 3a33c4b30b96b32645d84291133cff0f0b79fcca..5e8f8980dffb46f4960390e6edb281968ae8bd83 100644
Binary files a/doc/abtest.png and b/doc/abtest.png differ
diff --git a/python/examples/bert/bert_web_service.py b/python/examples/bert/bert_web_service.py
index e22e379d67e076d4712c8971b6d342b4eaceadb2..f72694c0e8c5bb7ab2778278d3fc79f13516dc12 100644
--- a/python/examples/bert/bert_web_service.py
+++ b/python/examples/bert/bert_web_service.py
@@ -36,3 +36,4 @@ bert_service.set_gpus(gpu_ids)
 bert_service.prepare_server(
     workdir="workdir", port=int(sys.argv[2]), device="gpu")
 bert_service.run_server()
+bert_service.run_flask()
diff --git a/python/examples/faster_rcnn_model/000000570688.jpg b/python/examples/faster_rcnn_model/000000570688.jpg
old mode 100755
new mode 100644
diff --git a/python/examples/faster_rcnn_model/README.md b/python/examples/faster_rcnn_model/README.md
index aca6e5183daf9a46096587fab8276b4e7346f746..66f65b5ad77186dd3dd08acaddc85356277fe6fd 100644
--- a/python/examples/faster_rcnn_model/README.md
+++ b/python/examples/faster_rcnn_model/README.md
@@ -2,55 +2,23 @@
 
 ([简体中文](./README_CN.md)|English)
 
-This article requires [Paddle Detection](https://github.com/PaddlePaddle/PaddleDetection) trained models and configuration files. If users want to quickly deploy on Paddle Serving, please read the Chapter 2 directly.
-
-## 1. Train an object detection model
-
-Users can read [Paddle Detection Getting Started](https://github.com/PaddlePaddle/PaddleDetection/blob/release/0.2/docs/tutorials/GETTING_STARTED_cn.md) to understand the background of Paddle Detection. The purpose of PaddleDetection is to provide a rich and easy-to-use object detection model for industry and academia. Not only is it superior in performance and easy to deploy, but it can also flexibly meet the needs of algorithm research.
-
-### Environmental requirements
-
-CPU version: No special requirements
-
-GPU version: CUDA 9.0 and above
-
-```
-git clone https://github.com/PaddlePaddle/PaddleDetection
-cd PaddleDetection
-```
-Next, you can train the faster rcnn model
-```
-python tools/train.py -c configs/faster_rcnn_r50_1x.yml
-```
-The time for training the model depends on the situation and is related to the computing power of the training equipment and the number of iterations.
-In the training process, `faster_rcnn_r50_1x.yml` defines the snapshot of the saved model. After the final training, the model with the best effect will be saved as `best_model.pdmodel`, which is a compressed PaddleDetection Exclusive model files.
-
-**If we want the model to be used by Paddle Serving, we must do export_model.**
-
-Output model
-```
-python export_model.py
-```
-## 2. Start the model and predict
-If users do not use the Paddle Detection project to train models, we are here to provide you with sample model downloads. If you trained the model with Paddle Detection, you can skip the ** Download Model ** section.
-
-### Download model
+### Get The Faster RCNN Model
 ```
 wget https://paddle-serving.bj.bcebos.com/pddet_demo/faster_rcnn_model.tar.gz
-wget https://paddle-serving.bj.bcebos.com/pddet_demo/paddle_serving_app-0.0.1-py2-none-any.whl
 wget https://paddle-serving.bj.bcebos.com/pddet_demo/infer_cfg.yml
-tar xf faster_rcnn_model.tar.gz
-mv faster_rcnn_model/pddet *.
 ```
+If you want to have more detection models, please refer to [Paddle Detection Model Zoo](https://github.com/PaddlePaddle/PaddleDetection/blob/release/0.2/docs/MODEL_ZOO_cn.md)
 
 ### Start the service
 ```
-GLOG_v = 2 python -m paddle_serving_server_gpu.serve --model pddet_serving_model --port 9494 --gpu_id 0
+tar xf faster_rcnn_model.tar.gz
+mv faster_rcnn_model/pddet *.
+GLOG_v=2 python -m paddle_serving_server_gpu.serve --model pddet_serving_model --port 9494 --gpu_id 0
 ```
 
 ### Perform prediction
 ```
-python test_client.py --config_path = infer_cfg.yml --infer_img = 000000570688.jpg --dump_result --visualize
+python test_client.py pddet_client_conf/serving_client_conf.prototxt infer_cfg.yml 000000570688.jpg
 ```
 
 ## 3. Result analysis
diff --git a/python/examples/faster_rcnn_model/README_CN.md b/python/examples/faster_rcnn_model/README_CN.md
index a1ac36ff93f5d75a4d8874b89f3cb1509589c4d0..7aa4d343f05df92068d36499b48d9aa5ad7b2a2e 100644
--- a/python/examples/faster_rcnn_model/README_CN.md
+++ b/python/examples/faster_rcnn_model/README_CN.md
@@ -1,57 +1,24 @@
-# Faster RCNN模型
+# 使用Paddle Serving部署Faster RCNN模型
 
 (简体中文|[English](./README.md))
 
-本文需要[Paddle Detection](https://github.com/PaddlePaddle/PaddleDetection)训练的模型和配置文件。如果用户想要快速部署在Paddle Serving上，请直接阅读第二章节。
-
-## 1. 训练物体检测模型
-
-用户可以阅读 [Paddle Detection入门使用](https://github.com/PaddlePaddle/PaddleDetection/blob/release/0.2/docs/tutorials/GETTING_STARTED_cn.md)来了解Paddle Detection的背景。PaddleDetection的目的是为工业界和学术界提供丰富、易用的目标检测模型。不仅性能优越、易于部署，而且能够灵活的满足算法研究的需求。
-
-### 环境要求
-
-CPU版： 没有特别要求
-
-GPU版： CUDA 9.0及以上
-
-```
-git clone https://github.com/PaddlePaddle/PaddleDetection
-cd PaddleDetection
-```
-接下来可以训练faster rcnn模型
-```
-python tools/train.py -c configs/faster_rcnn_r50_1x.yml
-```
-训练模型的时间视情况而定，与训练的设备算力和迭代轮数相关。
-在训练的过程中，`faster_rcnn_r50_1x.yml`当中定义了保存模型的`snapshot`，在最终训练完成后，效果最好的模型，会被保存为`best_model.pdmodel`，这是一个经过压缩的PaddleDetection的专属模型文件。
-
-**如果我们要让模型可被Paddle Serving所使用，必须做export_model。**
-
-输出模型
-```
-python export_model.py
-```
-
-## 2. 启动模型并预测
-如果用户没有用Paddle Detection项目训练模型，我们也在此为您提供示例模型下载。如果您用Paddle Detection训练了模型，可以跳过 **下载模型** 部分。
-
-### 下载模型
+## 获得Faster RCNN模型
 ```
 wget https://paddle-serving.bj.bcebos.com/pddet_demo/faster_rcnn_model.tar.gz
-wget https://paddle-serving.bj.bcebos.com/pddet_demo/paddle_serving_app-0.0.1-py2-none-any.whl
 wget https://paddle-serving.bj.bcebos.com/pddet_demo/infer_cfg.yml
-tar xf faster_rcnn_model.tar.gz
-mv faster_rcnn_model/pddet* .
 ```
+如果你想要更多的检测模型，请参考[Paddle检测模型库](https://github.com/PaddlePaddle/PaddleDetection/blob/release/0.2/docs/MODEL_ZOO_cn.md)
 
 ### 启动服务
 ```
+tar xf faster_rcnn_model.tar.gz
+mv faster_rcnn_model/pddet* .
 GLOG_v=2 python -m paddle_serving_server_gpu.serve --model pddet_serving_model --port 9494 --gpu_id 0
 ```
 
 ### 执行预测
 ```
-python test_client.py --config_path=infer_cfg.yml --infer_img=000000570688.jpg --dump_result --visualize
+python test_client.py pddet_client_conf/serving_client_conf.prototxt infer_cfg.yml 000000570688.jpg
 ```
 
 ## 3. 结果分析
diff --git a/python/examples/imagenet/image_classification_service.py b/python/examples/imagenet/image_classification_service.py
index 2776eb1bc7126fab32dbb05774fb0060506b61af..ee3ae6dd1c64bda154bbadabe8d1e91da734fb5a 100644
--- a/python/examples/imagenet/image_classification_service.py
+++ b/python/examples/imagenet/image_classification_service.py
@@ -31,14 +31,14 @@ class ImageService(WebService):
                 sample = base64.b64decode(image)
                 img = reader.process_image(sample)
                 res_feed = {}
-                res_feed["image"] = img.reshape(-1)
+                res_feed["image"] = img
                 feed_batch.append(res_feed)
             return feed_batch, fetch
         else:
             sample = base64.b64decode(feed["image"])
             img = reader.process_image(sample)
             res_feed = {}
-            res_feed["image"] = img.reshape(-1)
+            res_feed["image"] = img
             return res_feed, fetch
 
 
@@ -47,3 +47,4 @@ image_service.load_model_config(sys.argv[1])
 image_service.prepare_server(
     workdir=sys.argv[2], port=int(sys.argv[3]), device="cpu")
 image_service.run_server()
+image_service.run_flask()
diff --git a/python/examples/imagenet/image_classification_service_gpu.py b/python/examples/imagenet/image_classification_service_gpu.py
index 287392e4f3ea922686cb03a032ba0b8e13d39709..d8ba4ed8cda9f600fb6d33441b90accdf5ecc532 100644
--- a/python/examples/imagenet/image_classification_service_gpu.py
+++ b/python/examples/imagenet/image_classification_service_gpu.py
@@ -12,12 +12,12 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from paddle_serving_server_gpu.web_service import WebService
 import sys
 import cv2
 import base64
 import numpy as np
 from image_reader import ImageReader
+from paddle_serving_server_gpu.web_service import WebService
 
 
 class ImageService(WebService):
@@ -32,14 +32,14 @@ class ImageService(WebService):
                 sample = base64.b64decode(image)
                 img = reader.process_image(sample)
                 res_feed = {}
-                res_feed["image"] = img.reshape(-1)
+                res_feed["image"] = img
                 feed_batch.append(res_feed)
             return feed_batch, fetch
         else:
             sample = base64.b64decode(feed["image"])
             img = reader.process_image(sample)
             res_feed = {}
-            res_feed["image"] = img.reshape(-1)
+            res_feed["image"] = img
             return res_feed, fetch
 
 
@@ -49,3 +49,4 @@ image_service.set_gpus("0,1")
 image_service.prepare_server(
     workdir=sys.argv[2], port=int(sys.argv[3]), device="gpu")
 image_service.run_server()
+image_service.run_flask()
diff --git a/python/examples/imagenet/image_http_client.py b/python/examples/imagenet/image_http_client.py
index b72f6b02f4e6065430edcdd58a2eefc4336a10a5..61b021be246dc4b843e608dcea21418419731b49 100644
--- a/python/examples/imagenet/image_http_client.py
+++ b/python/examples/imagenet/image_http_client.py
@@ -31,7 +31,7 @@ def predict(image_path, server):
     r = requests.post(
         server, data=req, headers={"Content-Type": "application/json"})
     try:
-        print(r.json()["score"][0])
+        print(r.json()["result"]["score"])
     except ValueError:
         print(r.text)
     return r
diff --git a/python/examples/imagenet/image_rpc_client.py b/python/examples/imagenet/image_rpc_client.py
index 76f3a043474bf75e1e96a44f18ac7dfe3da11f78..f905179629f0dfc8c9da09b0cae90bae7be3687e 100644
--- a/python/examples/imagenet/image_rpc_client.py
+++ b/python/examples/imagenet/image_rpc_client.py
@@ -26,7 +26,7 @@ start = time.time()
 for i in range(1000):
     with open("./data/n01440764_10026.JPEG", "rb") as f:
         img = f.read()
-    img = reader.process_image(img).reshape(-1)
+    img = reader.process_image(img)
     fetch_map = client.predict(feed={"image": img}, fetch=["score"])
 end = time.time()
 print(end - start)
diff --git a/python/examples/imdb/text_classify_service.py b/python/examples/imdb/text_classify_service.py
index 50d0d1aebba34a630c16442c6e3d00460bb1bc6a..5ff919ebb44b9a2590b148e4ccf8b91ce85f3f53 100755
--- a/python/examples/imdb/text_classify_service.py
+++ b/python/examples/imdb/text_classify_service.py
@@ -39,3 +39,4 @@ imdb_service.prepare_server(
     workdir=sys.argv[2], port=int(sys.argv[3]), device="cpu")
 imdb_service.prepare_dict({"dict_file_path": sys.argv[4]})
 imdb_service.run_server()
+imdb_service.run_flask()
diff --git a/python/paddle_serving_client/__init__.py b/python/paddle_serving_client/__init__.py
index 8aeb22c92c781a4fb27b70403537f7016f05940d..053062ee508b33e7602dea5a53b4868a662452cd 100644
--- a/python/paddle_serving_client/__init__.py
+++ b/python/paddle_serving_client/__init__.py
@@ -273,7 +273,7 @@ class Client(object):
                 if self.fetch_names_to_type_[name] == int_type:
                     result_map[name] = result_batch.get_int64_by_name(mi, name)
                     shape = result_batch.get_shape(mi, name)
-                    result_map[name] = np.array(result_map[name])
+                    result_map[name] = np.array(result_map[name], dtype='int64')
                     result_map[name].shape = shape
                     if name in self.lod_tensor_set:
                         result_map["{}.lod".format(
@@ -281,7 +281,8 @@ class Client(object):
                 elif self.fetch_names_to_type_[name] == float_type:
                     result_map[name] = result_batch.get_float_by_name(mi, name)
                     shape = result_batch.get_shape(mi, name)
-                    result_map[name] = np.array(result_map[name])
+                    result_map[name] = np.array(
+                        result_map[name], dtype='float32')
                     result_map[name].shape = shape
                     if name in self.lod_tensor_set:
                         result_map["{}.lod".format(
diff --git a/python/paddle_serving_server/__init__.py b/python/paddle_serving_server/__init__.py
index 8062a7c83d99c0bed712ff46840b81f4557a353d..a58fb11ac3ee1fbe5086ae4381f6d6208c0c73ec 100644
--- a/python/paddle_serving_server/__init__.py
+++ b/python/paddle_serving_server/__init__.py
@@ -351,6 +351,7 @@ class Server(object):
         self._prepare_resource(workdir)
         self._prepare_engine(self.model_config_paths, device)
         self._prepare_infer_service(port)
+        self.port = port
         self.workdir = workdir
 
         infer_service_fn = "{}/{}".format(workdir, self.infer_service_fn)
diff --git a/python/paddle_serving_server/web_service.py b/python/paddle_serving_server/web_service.py
index 56cbd976e49bdd0743b4354a9c6a31872657f168..7e69b241f50255aa69d34c1405b72eacb675be04 100755
--- a/python/paddle_serving_server/web_service.py
+++ b/python/paddle_serving_server/web_service.py
@@ -18,6 +18,8 @@ from flask import Flask, request, abort
 from multiprocessing import Pool, Process
 from paddle_serving_server import OpMaker, OpSeqMaker, Server
 from paddle_serving_client import Client
+from contextlib import closing
+import socket
 
 
 class WebService(object):
@@ -41,19 +43,34 @@ class WebService(object):
         server.set_num_threads(16)
         server.load_model_config(self.model_config)
         server.prepare_server(
-            workdir=self.workdir, port=self.port + 1, device=self.device)
+            workdir=self.workdir, port=self.port_list[0], device=self.device)
         server.run_server()
 
+    def port_is_available(self, port):
+        with closing(socket.socket(socket.AF_INET, socket.SOCK_STREAM)) as sock:
+            sock.settimeout(2)
+            result = sock.connect_ex(('0.0.0.0', port))
+        if result != 0:
+            return True
+        else:
+            return False
+
     def prepare_server(self, workdir="", port=9393, device="cpu"):
         self.workdir = workdir
         self.port = port
         self.device = device
+        default_port = 12000
+        self.port_list = []
+        for i in range(1000):
+            if self.port_is_available(default_port + i):
+                self.port_list.append(default_port + i)
+                break
 
     def _launch_web_service(self):
-        self.client_service = Client()
-        self.client_service.load_client_config(
-            "{}/serving_server_conf.prototxt".format(self.model_config))
-        self.client_service.connect(["0.0.0.0:{}".format(self.port + 1)])
+        self.client = Client()
+        self.client.load_client_config("{}/serving_server_conf.prototxt".format(
+            self.model_config))
+        self.client.connect(["0.0.0.0:{}".format(self.port_list[0])])
 
     def get_prediction(self, request):
         if not request.json:
@@ -65,12 +82,12 @@ class WebService(object):
                                           request.json["fetch"])
             if isinstance(feed, dict) and "fetch" in feed:
                 del feed["fetch"]
-            fetch_map = self.client_service.predict(feed=feed, fetch=fetch)
+            fetch_map = self.client.predict(feed=feed, fetch=fetch)
             for key in fetch_map:
                 fetch_map[key] = fetch_map[key].tolist()
-            result = self.postprocess(
+            fetch_map = self.postprocess(
                 feed=feed, fetch=fetch, fetch_map=fetch_map)
-            result = {"result": result}
+            result = {"result": fetch_map}
         except ValueError:
             result = {"result": "Request Value Error"}
         return result
@@ -84,6 +101,24 @@ class WebService(object):
         p_rpc = Process(target=self._launch_rpc_service)
         p_rpc.start()
 
+    def run_flask(self):
+        app_instance = Flask(__name__)
+
+        @app_instance.before_first_request
+        def init():
+            self._launch_web_service()
+
+        service_name = "/" + self.name + "/prediction"
+
+        @app_instance.route(service_name, methods=["POST"])
+        def run():
+            return self.get_prediction(request)
+
+        app_instance.run(host="0.0.0.0",
+                         port=self.port,
+                         threaded=False,
+                         processes=4)
+
     def preprocess(self, feed=[], fetch=[]):
         return feed, fetch
 
diff --git a/python/paddle_serving_server_gpu/web_service.py b/python/paddle_serving_server_gpu/web_service.py
index 6482f9b252ca5520bdde31d6aa8e1069364e82a0..2ec996b1db89bdff3c4550caa566bec5af2d9506 100644
--- a/python/paddle_serving_server_gpu/web_service.py
+++ b/python/paddle_serving_server_gpu/web_service.py
@@ -14,14 +14,15 @@
 # pylint: disable=doc-string-missing
 
 from flask import Flask, request, abort
-from paddle_serving_server_gpu import OpMaker, OpSeqMaker, Server
-import paddle_serving_server_gpu as serving
+from contextlib import closing
 from multiprocessing import Pool, Process, Queue
 from paddle_serving_client import Client
+from paddle_serving_server_gpu import OpMaker, OpSeqMaker, Server
 from paddle_serving_server_gpu.serve import start_multi_card
-
+import socket
 import sys
 import numpy as np
+import paddle_serving_server_gpu as serving
 
 
 class WebService(object):
@@ -67,22 +68,39 @@ class WebService(object):
     def _launch_rpc_service(self, service_idx):
         self.rpc_service_list[service_idx].run_server()
 
+    def port_is_available(self, port):
+        with closing(socket.socket(socket.AF_INET, socket.SOCK_STREAM)) as sock:
+            sock.settimeout(2)
+            result = sock.connect_ex(('0.0.0.0', port))
+        if result != 0:
+            return True
+        else:
+            return False
+
     def prepare_server(self, workdir="", port=9393, device="gpu", gpuid=0):
         self.workdir = workdir
         self.port = port
         self.device = device
         self.gpuid = gpuid
+        self.port_list = []
+        default_port = 12000
+        for i in range(1000):
+            if self.port_is_available(default_port + i):
+                self.port_list.append(default_port + i)
+            if len(self.port_list) > len(self.gpus):
+                break
+
         if len(self.gpus) == 0:
             # init cpu service
             self.rpc_service_list.append(
                 self.default_rpc_service(
-                    self.workdir, self.port + 1, -1, thread_num=10))
+                    self.workdir, self.port_list[0], -1, thread_num=10))
         else:
             for i, gpuid in enumerate(self.gpus):
                 self.rpc_service_list.append(
                     self.default_rpc_service(
                         "{}_{}".format(self.workdir, i),
-                        self.port + 1 + i,
+                        self.port_list[i],
                         gpuid,
                         thread_num=10))
 
@@ -94,9 +112,9 @@ class WebService(object):
         endpoints = ""
         if gpu_num > 0:
             for i in range(gpu_num):
-                endpoints += "127.0.0.1:{},".format(self.port + i + 1)
+                endpoints += "127.0.0.1:{},".format(self.port_list[i])
         else:
-            endpoints = "127.0.0.1:{}".format(self.port + 1)
+            endpoints = "127.0.0.1:{}".format(self.port_list[0])
         self.client.connect([endpoints])
 
     def get_prediction(self, request):
@@ -115,6 +133,7 @@ class WebService(object):
             result = self.postprocess(
                 feed=feed, fetch=fetch, fetch_map=fetch_map)
             result = {"result": result}
+            result = {"result": fetch_map}
         except ValueError:
             result = {"result": "Request Value Error"}
         return result
@@ -132,6 +151,24 @@ class WebService(object):
         for p in server_pros:
             p.start()
 
+    def run_flask(self):
+        app_instance = Flask(__name__)
+
+        @app_instance.before_first_request
+        def init():
+            self._launch_web_service()
+
+        service_name = "/" + self.name + "/prediction"
+
+        @app_instance.route(service_name, methods=["POST"])
+        def run():
+            return self.get_prediction(request)
+
+        app_instance.run(host="0.0.0.0",
+                         port=self.port,
+                         threaded=False,
+                         processes=4)
+
     def preprocess(self, feed=[], fetch=[]):
         return feed, fetch