bug fix

47ae3ad2 · MRXLT · 272d3afe · 9c71c83e · 47ae3ad2 · 47ae3ad2
120 changed file
--- a/.travis.yml
+++ b/.travis.yml
+language: generic
+sudo: required
+dist: trusty
+os:
+    - linux
+env:
+    - COMPILE_TYPE=CPU DOCKERFILE_CPU=$PWD/tools/Dockerfile.ci
+services:
+    - docker
+before_install:
+    - docker build -f ${DOCKERFILE_CPU} -t serving-img:${COMPILE_TYPE} .
+install:
+    - if [ $COMPILE_TYPE == "CPU" ]; then docker run -it -v $PWD:/Serving serving-img:${COMPILE_TYPE} /bin/bash Serving/tools/serving_check_style.sh ; fi;
+    - docker run -it -v $PWD:/Serving serving-img:${COMPILE_TYPE} /bin/bash Serving/tools/serving_build.sh $COMPILE_TYPE
--- a/core/configure/CMakeLists.txt
+++ b/core/configure/CMakeLists.txt
@@ -87,4 +87,3 @@ add_custom_command(TARGET general_model_config_py_proto POST_BUILD
 		WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR})
 endif()
 endif()
-
--- a/core/configure/include/configure_parser.h
+++ b/core/configure/include/configure_parser.h
@@ -20,16 +20,16 @@ namespace baidu {
 namespace paddle_serving {
 namespace configure {

-  int read_proto_conf(const std::string &conf_full_path,
-                      google::protobuf::Message *conf);
-  
-  int read_proto_conf(const std::string &conf_path,
-                      const std::string &conf_file,
-                      google::protobuf::Message *conf);
-  
-  int write_proto_conf(google::protobuf::Message *message,
-                       const std::string &output_path,
-                       const std::string &output_file);
+int read_proto_conf(const std::string &conf_full_path,
+                    google::protobuf::Message *conf);
+
+int read_proto_conf(const std::string &conf_path,
+                    const std::string &conf_file,
+                    google::protobuf::Message *conf);
+
+int write_proto_conf(google::protobuf::Message *message,
+                     const std::string &output_path,
+                     const std::string &output_file);

 }  // namespace configure
 }  // namespace paddle_serving

--- a/core/cube/cube-server/src/server.cpp
+++ b/core/cube/cube-server/src/server.cpp
@@ -12,9 +12,9 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.

+#include "core/cube/cube-server/include/cube/server.h"
 #include <brpc/server.h>
 #include "core/cube/cube-server/include/cube/framework.h"
-#include "core/cube/cube-server/include/cube/server.h"

 namespace rec {
 namespace mcube {

--- a/core/general-client/include/general_model.h
+++ b/core/general-client/include/general_model.h
@@ -84,19 +84,14 @@ class PredictorClient {
              PredictorRes& predict_res,  // NOLINT
              const int& pid);

-  std::vector<std::vector<float>> predict(
-      const std::vector<std::vector<float>>& float_feed,
-      const std::vector<std::string>& float_feed_name,
-      const std::vector<std::vector<int64_t>>& int_feed,
-      const std::vector<std::string>& int_feed_name,
-      const std::vector<std::string>& fetch_name);
-
-  std::vector<std::vector<std::vector<float>>> batch_predict(
+  int batch_predict(
      const std::vector<std::vector<std::vector<float>>>& float_feed_batch,
      const std::vector<std::string>& float_feed_name,
      const std::vector<std::vector<std::vector<int64_t>>>& int_feed_batch,
      const std::vector<std::string>& int_feed_name,
-      const std::vector<std::string>& fetch_name);
+      const std::vector<std::string>& fetch_name,
+      PredictorRes& predict_res_batch,  // NOLINT
+      const int& pid);

 private:
  PredictorApi _api;

--- a/core/general-client/src/general_model.cpp
+++ b/core/general-client/src/general_model.cpp
@@ -264,26 +264,22 @@ int PredictorClient::predict(const std::vector<std::vector<float>> &float_feed,
  return 0;
 }

-std::vector<std::vector<std::vector<float>>> PredictorClient::batch_predict(
+int PredictorClient::batch_predict(
    const std::vector<std::vector<std::vector<float>>> &float_feed_batch,
    const std::vector<std::string> &float_feed_name,
    const std::vector<std::vector<std::vector<int64_t>>> &int_feed_batch,
    const std::vector<std::string> &int_feed_name,
-    const std::vector<std::string> &fetch_name) {
+    const std::vector<std::string> &fetch_name,
+    PredictorRes &predict_res_batch,
+    const int &pid) {
  int batch_size = std::max(float_feed_batch.size(), int_feed_batch.size());
-  std::vector<std::vector<std::vector<float>>> fetch_result_batch;
-  if (fetch_name.size() == 0) {
-    return fetch_result_batch;
-  }

+  predict_res_batch._int64_map.clear();
+  predict_res_batch._float_map.clear();
  Timer timeline;
  int64_t preprocess_start = timeline.TimeStampUS();

-  fetch_result_batch.resize(batch_size);
  int fetch_name_num = fetch_name.size();
-  for (int bi = 0; bi < batch_size; bi++) {
-    fetch_result_batch[bi].resize(fetch_name_num);
-  }

  _api.thrd_clear();
  _predictor = _api.fetch_predictor("general_model");
@@ -371,20 +367,36 @@ std::vector<std::vector<std::vector<float>>> PredictorClient::batch_predict(
  } else {
    client_infer_end = timeline.TimeStampUS();
    postprocess_start = client_infer_end;
-
+    for (auto &name : fetch_name) {
+      predict_res_batch._int64_map[name].resize(batch_size);
+      predict_res_batch._float_map[name].resize(batch_size);
+    }
    for (int bi = 0; bi < batch_size; bi++) {
      for (auto &name : fetch_name) {
        int idx = _fetch_name_to_idx[name];
        int len = res.insts(bi).tensor_array(idx).data_size();
-        VLOG(2) << "fetch name: " << name;
-        VLOG(2) << "tensor data size: " << len;
-        fetch_result_batch[bi][idx].resize(len);
-        VLOG(2)
-            << "fetch name " << name << " index " << idx << " first data "
-            << *(const float *)res.insts(bi).tensor_array(idx).data(0).c_str();
-        /*
-          TBA
-        */
+        if (_fetch_name_to_type[name] == 0) {
+          int len = res.insts(bi).tensor_array(idx).int64_data_size();
+          VLOG(2) << "fetch tensor : " << name << " type: int64 len : " << len;
+          predict_res_batch._int64_map[name][bi].resize(len);
+          VLOG(2) << "fetch name " << name << " index " << idx << " first data "
+                  << res.insts(bi).tensor_array(idx).int64_data(0);
+          for (int i = 0; i < len; ++i) {
+            predict_res_batch._int64_map[name][bi][i] =
+                res.insts(bi).tensor_array(idx).int64_data(i);
+          }
+        } else if (_fetch_name_to_type[name] == 1) {
+          int len = res.insts(bi).tensor_array(idx).float_data_size();
+          VLOG(2) << "fetch tensor : " << name
+                  << " type: float32 len : " << len;
+          predict_res_batch._float_map[name][bi].resize(len);
+          VLOG(2) << "fetch name " << name << " index " << idx << " first data "
+                  << res.insts(bi).tensor_array(idx).float_data(0);
+          for (int i = 0; i < len; ++i) {
+            predict_res_batch._float_map[name][bi][i] =
+                res.insts(bi).tensor_array(idx).float_data(i);
+          }
+        }
      }
    }
    postprocess_end = timeline.TimeStampUS();
@@ -393,6 +405,7 @@ std::vector<std::vector<std::vector<float>>> PredictorClient::batch_predict(
  if (FLAGS_profile_client) {
    std::ostringstream oss;
    oss << "PROFILE\t"
+        << "pid:" << pid << "\t"
        << "prepro_0:" << preprocess_start << " "
        << "prepro_1:" << preprocess_end << " "
        << "client_infer_0:" << client_infer_start << " "
@@ -411,7 +424,7 @@ std::vector<std::vector<std::vector<float>>> PredictorClient::batch_predict(

    fprintf(stderr, "%s\n", oss.str().c_str());
  }
-  return fetch_result_batch;
+  return 0;
 }

 }  // namespace general_model

--- a/core/general-client/src/general_model_main.cpp
+++ b/core/general-client/src/general_model_main.cpp
@@ -17,18 +17,18 @@

 #include "core/general-client/include/general_model.h"

-using namespace std;
+using namespace std;  // NOLINT

 using baidu::paddle_serving::general_model::PredictorClient;
 using baidu::paddle_serving::general_model::FetchedMap;

-int main(int argc, char * argv[]) {
-  PredictorClient * client = new PredictorClient();
+int main(int argc, char* argv[]) {
+  PredictorClient* client = new PredictorClient();
  client->init("inference.conf");
  client->set_predictor_conf("./", "predictor.conf");
  client->create_predictor();
-  std::vector<std::vector<float> > float_feed;
-  std::vector<std::vector<int64_t> > int_feed;
+  std::vector<std::vector<float>> float_feed;
+  std::vector<std::vector<int64_t>> int_feed;
  std::vector<std::string> float_feed_name;
  std::vector<std::string> int_feed_name = {"words", "label"};
  std::vector<std::string> fetch_name = {"cost", "acc", "prediction"};
@@ -53,13 +53,14 @@ int main(int argc, char * argv[]) {
    cin >> label;
    int_feed.push_back({label});

-    
    FetchedMap result;

-    client->predict(
-        float_feed, float_feed_name,
-        int_feed, int_feed_name, fetch_name,
-        &result);
+    client->predict(float_feed,
+                    float_feed_name,
+                    int_feed,
+                    int_feed_name,
+                    fetch_name,
+                    &result);

    cout << label << "\t" << result["prediction"][1] << endl;


--- a/core/general-client/src/pybind_general_model.cpp
+++ b/core/general-client/src/pybind_general_model.cpp
@@ -90,12 +90,16 @@ PYBIND11_MODULE(serving_client, m) {
              const std::vector<std::vector<std::vector<int64_t>>>
                  &int_feed_batch,
              const std::vector<std::string> &int_feed_name,
-              const std::vector<std::string> &fetch_name) {
+              const std::vector<std::string> &fetch_name,
+              PredictorRes &predict_res_batch,
+              const int &pid) {
             return self.batch_predict(float_feed_batch,
                                       float_feed_name,
                                       int_feed_batch,
                                       int_feed_name,
-                                       fetch_name);
+                                       fetch_name,
+                                       predict_res_batch,
+                                       pid);
           });
 }


--- a/core/general-server/op/general_copy_op.cpp
+++ b/core/general-server/op/general_copy_op.cpp
@@ -69,8 +69,7 @@ int GeneralCopyOp::inference() {

  for (int i = 0; i < out->size(); ++i) {
    int64_t *src_ptr = static_cast<int64_t *>(in->at(i).data.data());
-    out->at(i).data.Resize(
-        out->at(i).lod[0].back() * sizeof(int64_t));
+    out->at(i).data.Resize(out->at(i).lod[0].back() * sizeof(int64_t));
    out->at(i).shape = {out->at(i).lod[0].back(), 1};
    int64_t *tgt_ptr = static_cast<int64_t *>(out->at(i).data.data());
    for (int j = 0; j < out->at(i).lod[0].back(); ++j) {

--- a/core/general-server/op/general_copy_op.h
+++ b/core/general-server/op/general_copy_op.h
@@ -24,23 +24,22 @@
 #include "paddle_inference_api.h"  // NOLINT
 #endif
 #include <string>
-#include "core/predictor/framework/resource.h"
-#include "core/general-server/op/general_infer_helper.h"
 #include "core/general-server/general_model_service.pb.h"
+#include "core/general-server/op/general_infer_helper.h"
+#include "core/predictor/framework/resource.h"

 namespace baidu {
 namespace paddle_serving {
 namespace serving {

-class GeneralCopyOp :
-    public baidu::paddle_serving::predictor::OpWithChannel<GeneralBlob> {
+class GeneralCopyOp
+    : public baidu::paddle_serving::predictor::OpWithChannel<GeneralBlob> {
 public:
  typedef std::vector<paddle::PaddleTensor> TensorVector;

  DECLARE_OP(GeneralCopyOp);

  int inference();
-
 };

 }  // namespace serving

--- a/core/general-server/op/general_infer_op.h
+++ b/core/general-server/op/general_infer_op.h
@@ -39,7 +39,6 @@ class GeneralInferOp
  DECLARE_OP(GeneralInferOp);

  int inference();
-
 };

 }  // namespace serving

--- a/core/general-server/op/general_reader_op.cpp
+++ b/core/general-server/op/general_reader_op.cpp
@@ -188,8 +188,7 @@ int GeneralReaderOp::inference() {
      for (int j = 0; j < batch_size; ++j) {
        int elem_num = req->insts(j).tensor_array(i).int64_data_size();
        for (int k = 0; k < elem_num; ++k) {
-          dst_ptr[offset + k] =
-              req->insts(j).tensor_array(i).int64_data(k);
+          dst_ptr[offset + k] = req->insts(j).tensor_array(i).int64_data(k);
        }
        if (out->at(i).lod.size() == 1) {
          offset = out->at(i).lod[0][j + 1];
@@ -203,8 +202,7 @@ int GeneralReaderOp::inference() {
      for (int j = 0; j < batch_size; ++j) {
        int elem_num = req->insts(j).tensor_array(i).float_data_size();
        for (int k = 0; k < elem_num; ++k) {
-          dst_ptr[offset + k] =
-              req->insts(j).tensor_array(i).float_data(k);
+          dst_ptr[offset + k] = req->insts(j).tensor_array(i).float_data(k);
        }
        if (out->at(i).lod.size() == 1) {
          offset = out->at(i).lod[0][j + 1];

--- a/core/general-server/op/general_reader_op.h
+++ b/core/general-server/op/general_reader_op.h
@@ -24,24 +24,23 @@
 #include "paddle_inference_api.h"  // NOLINT
 #endif
 #include <string>
-#include "core/predictor/framework/resource.h"
-#include "core/general-server/op/general_infer_helper.h"
 #include "core/general-server/general_model_service.pb.h"
 #include "core/general-server/load_general_model_service.pb.h"
+#include "core/general-server/op/general_infer_helper.h"
+#include "core/predictor/framework/resource.h"

 namespace baidu {
 namespace paddle_serving {
 namespace serving {

-class GeneralReaderOp : public baidu::paddle_serving::predictor::OpWithChannel<
-                            GeneralBlob> {
+class GeneralReaderOp
+    : public baidu::paddle_serving::predictor::OpWithChannel<GeneralBlob> {
 public:
  typedef std::vector<paddle::PaddleTensor> TensorVector;

  DECLARE_OP(GeneralReaderOp);

  int inference();
-
 };

 }  // namespace serving

--- a/core/general-server/op/general_response_op.cpp
+++ b/core/general-server/op/general_response_op.cpp
@@ -122,8 +122,7 @@ int GeneralResponseOp::inference() {
        } else {
          for (int j = 0; j < batch_size; ++j) {
            FetchInst *fetch_p = res->mutable_insts(j);
-            fetch_p->mutable_tensor_array(var_idx)->add_int64_data(
-                data_ptr[0]);
+            fetch_p->mutable_tensor_array(var_idx)->add_int64_data(data_ptr[0]);
          }
        }
      }
@@ -143,16 +142,15 @@ int GeneralResponseOp::inference() {
        if (var_size == batch_size) {
          for (int j = 0; j < batch_size; ++j) {
            for (int k = j * cap; k < (j + 1) * cap; ++k) {
-              FetchInst * fetch_p = res->mutable_insts(j);
+              FetchInst *fetch_p = res->mutable_insts(j);
              fetch_p->mutable_tensor_array(var_idx)->add_float_data(
                  data_ptr[k]);
            }
          }
        } else {
          for (int j = 0; j < batch_size; ++j) {
-            FetchInst * fetch_p = res->mutable_insts(j);
-            fetch_p->mutable_tensor_array(var_idx)->add_float_data(
-                data_ptr[0]);
+            FetchInst *fetch_p = res->mutable_insts(j);
+            fetch_p->mutable_tensor_array(var_idx)->add_float_data(data_ptr[0]);
          }
        }
      }

--- a/core/general-server/op/general_response_op.h
+++ b/core/general-server/op/general_response_op.h
@@ -39,7 +39,6 @@ class GeneralResponseOp
  DECLARE_OP(GeneralResponseOp);

  int inference();
-
 };

 }  // namespace serving

--- a/core/general-server/op/general_text_reader_op.cpp
+++ b/core/general-server/op/general_text_reader_op.cpp
@@ -12,11 +12,11 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.

+#include "core/general-server/op/general_text_reader_op.h"
 #include <algorithm>
 #include <iostream>
 #include <memory>
 #include <sstream>
-#include "core/general-server/op/general_text_reader_op.h"
 #include "core/predictor/framework/infer.h"
 #include "core/predictor/framework/memory.h"
 #include "core/util/include/timer.h"
@@ -32,7 +32,6 @@ using baidu::paddle_serving::predictor::general_model::Request;
 using baidu::paddle_serving::predictor::general_model::FeedInst;
 using baidu::paddle_serving::predictor::PaddleGeneralModelConfig;

-
 int GeneralTextReaderOp::inference() {
  // reade request from client
  const Request *req = dynamic_cast<const Request *>(get_request_message());
@@ -132,11 +131,9 @@ int GeneralTextReaderOp::inference() {
      int64_t *dst_ptr = static_cast<int64_t *>(out->at(i).data.data());
      int offset = 0;
      for (int j = 0; j < batch_size; ++j) {
-        for (int k = 0;
-             k < req->insts(j).tensor_array(i).int_data_size();
+        for (int k = 0; k < req->insts(j).tensor_array(i).int_data_size();
             ++k) {
-          dst_ptr[offset + k] =
-              req->insts(j).tensor_array(i).int_data(k);
+          dst_ptr[offset + k] = req->insts(j).tensor_array(i).int_data(k);
        }
        if (out->at(i).lod.size() == 1) {
          offset = out->at(i).lod[0][j + 1];
@@ -148,11 +145,9 @@ int GeneralTextReaderOp::inference() {
      float *dst_ptr = static_cast<float *>(out->at(i).data.data());
      int offset = 0;
      for (int j = 0; j < batch_size; ++j) {
-        for (int k = 0;
-             k < req->insts(j).tensor_array(i).int_data_size();
+        for (int k = 0; k < req->insts(j).tensor_array(i).int_data_size();
             ++k) {
-          dst_ptr[offset + k] =
-              req->insts(j).tensor_array(i).int_data(k);
+          dst_ptr[offset + k] = req->insts(j).tensor_array(i).int_data(k);
        }
        if (out->at(i).lod.size() == 1) {
          offset = out->at(i).lod[0][j + 1];

--- a/core/general-server/op/general_text_reader_op.h
+++ b/core/general-server/op/general_text_reader_op.h
@@ -24,17 +24,17 @@
 #include "paddle_inference_api.h"  // NOLINT
 #endif
 #include <string>
-#include "core/predictor/framework/resource.h"
-#include "core/general-server/op/general_infer_helper.h"
 #include "core/general-server/general_model_service.pb.h"
 #include "core/general-server/load_general_model_service.pb.h"
+#include "core/general-server/op/general_infer_helper.h"
+#include "core/predictor/framework/resource.h"

 namespace baidu {
 namespace paddle_serving {
 namespace serving {

-class GeneralTextReaderOp :
-    public baidu::paddle_serving::predictor::OpWithChannel<GeneralBlob> {
+class GeneralTextReaderOp
+    : public baidu::paddle_serving::predictor::OpWithChannel<GeneralBlob> {
 public:
  typedef std::vector<paddle::PaddleTensor> TensorVector;


--- a/core/general-server/op/general_text_response_op.h
+++ b/core/general-server/op/general_text_response_op.h
@@ -40,7 +40,6 @@ class GeneralTextResponseOp
  DECLARE_OP(GeneralTextResponseOp);

  int inference();
-
 };

 }  // namespace serving

--- a/core/general-server/proto/general_model_service.proto
+++ b/core/general-server/proto/general_model_service.proto
@@ -28,13 +28,9 @@ message Tensor {
  repeated int32 shape = 6;
 };

-message FeedInst {
-  repeated Tensor tensor_array = 1;
-};
+message FeedInst { repeated Tensor tensor_array = 1; };

-message FetchInst {
-  repeated Tensor tensor_array = 1;
-};
+message FetchInst { repeated Tensor tensor_array = 1; };

 message Request {
  repeated FeedInst insts = 1;

--- a/core/kvdb/include/kvdb/kvdb_impl.h
+++ b/core/kvdb/include/kvdb/kvdb_impl.h
@@ -27,11 +27,11 @@
 // limitations under the License.

 #pragma once
-#include <chrono>
+#include <chrono>  // NOLINT
+#include <functional>
 #include <memory>
 #include <unordered_map>
 #include <vector>
-#include <functional>
 class AbstractKVDB;
 class FileReader;
 class ParamDict;
@@ -65,7 +65,7 @@ class FileReader {
      std::string data;
      FILE *stream = nullptr;
      const int max_buffer = 256;
-      char buffer[max_buffer];
+      char buffer[max_buffer];  // NOLINT
      cmd.append(" 2>&1");
      stream = popen(cmd.c_str(), "r");
      if (stream) {
@@ -76,7 +76,8 @@ class FileReader {
      return data;
    };
    std::string cmd = "md5sum " + this->filename_;
-    // TODO: throw exception if error occurs during execution of shell command
+    // NOLINT TODO: throw exception if error occurs during execution of shell
+    // command
    std::string md5val = getCmdOut(cmd);
    this->time_stamp_ = md5val == this->last_md5_val_
                            ? this->time_stamp_
@@ -93,7 +94,7 @@ class FileReader {
    return this->time_stamp_;
  }

-  inline virtual ~FileReader(){};
+  inline virtual ~FileReader() {}

 private:
  std::string filename_;
@@ -128,7 +129,7 @@ class ParamDict {
  virtual ~ParamDict();

 private:
-  std::function<std::pair<Key, Value>(std::string)> read_func_;
+  std::function<std::pair<Key, Value>(std::string)> read_func_;  // NOLINT
  std::vector<FileReaderPtr> file_reader_lst_;
  AbsKVDBPtr front_db, back_db;
 };
@@ -139,5 +140,5 @@ class ParamDictMgr {
  void InsertParamDict(std::string, ParamDictPtr);

 private:
-  std::unordered_map<std::string, ParamDictPtr> ParamDictMap;
+  std::unordered_map<std::string, ParamDictPtr> ParamDictMap;  // NOLINT
 };
--- a/core/kvdb/include/kvdb/paddle_rocksdb.h
+++ b/core/kvdb/include/kvdb/paddle_rocksdb.h
@@ -25,7 +25,7 @@

 class RocksDBWrapper {
 public:
-  RocksDBWrapper(std::string db_name);
+  RocksDBWrapper(std::string db_name);  // NOLINT
  std::string Get(std::string key);

  bool Put(std::string key, std::string value);
@@ -33,6 +33,7 @@ class RocksDBWrapper {
  static std::shared_ptr<RocksDBWrapper> RocksDBWrapperFactory(
      std::string db_name = "SparseMatrix");
  void Close();
+
 private:
  rocksdb::DB *db_;
  std::string db_name_;

--- a/core/kvdb/src/mock_param_dict_impl.cpp
+++ b/core/kvdb/src/mock_param_dict_impl.cpp
@@ -16,7 +16,7 @@
 #include <fstream>
 #include <iterator>
 #include <sstream>
-#include <thread>
+#include <thread>  // NOLINT
 #include "core/kvdb/include/kvdb/rocksdb_impl.h"

 std::vector<FileReaderPtr> ParamDict::GetDictReaderLst() {
@@ -33,8 +33,10 @@ void ParamDict::SetFileReaderLst(std::vector<std::string> lst) {

 std::vector<float> ParamDict::GetSparseValue(std::string feasign,
                                             std::string slot) {
-  auto BytesToFloat = [](uint8_t* byte_array) { return *((float*)byte_array); };
-  // TODO: the concatation of feasign and slot is TBD.
+  auto BytesToFloat = [](uint8_t* byte_array) {
+    return *((float*)byte_array);  // NOLINT
+  };
+  // NOLINT TODO: the concatation of feasign and slot is TBD.
  std::string result = front_db->Get(feasign + slot);
  std::vector<float> value;
  if (result == "NOT_FOUND") return value;
@@ -87,7 +89,7 @@ bool ParamDict::InsertSparseValue(std::string feasign,
    value.push_back(raw_values_ptr[i]);
  }
  back_db->Set(key, value);
-  // TODO: change stateless to stateful
+  // NOLINT TODO: change stateless to stateful
  return true;
 }

@@ -140,5 +142,4 @@ void ParamDict::CreateKVDB() {
  this->back_db->CreateDB();
 }

-ParamDict::~ParamDict() {
-}
+ParamDict::~ParamDict() {}
--- a/core/kvdb/src/paddle_rocksdb.cpp
+++ b/core/kvdb/src/paddle_rocksdb.cpp
@@ -51,7 +51,7 @@ void RocksDBWrapper::SetDBName(std::string db_name) {
 void RocksDBWrapper::Close() {
  if (db_ != nullptr) {
    db_->Close();
-    delete(db_);
+    delete (db_);
    db_ = nullptr;
  }
 }

--- a/core/kvdb/src/rockskvdb_impl.cpp
+++ b/core/kvdb/src/rockskvdb_impl.cpp
@@ -32,12 +32,8 @@ void RocksKVDB::Set(std::string key, std::string value) {
  return;
 }

-void RocksKVDB::Close() {
-  this->db_->Close();
-}
+void RocksKVDB::Close() { this->db_->Close(); }

 std::string RocksKVDB::Get(std::string key) { return this->db_->Get(key); }

-RocksKVDB::~RocksKVDB() {
-  this->db_->Close();
-}
+RocksKVDB::~RocksKVDB() { this->db_->Close(); }
--- a/core/pdcodegen/src/pdcodegen.cpp
+++ b/core/pdcodegen/src/pdcodegen.cpp
@@ -15,14 +15,14 @@
 #include <list>
 #include "boost/algorithm/string.hpp"
 #include "boost/scoped_ptr.hpp"
+#include "core/pdcodegen/pds_option.pb.h"
+#include "core/pdcodegen/plugin/strutil.h"
+#include "core/pdcodegen/plugin/substitute.h"
 #include "google/protobuf/compiler/code_generator.h"
 #include "google/protobuf/compiler/plugin.h"
 #include "google/protobuf/descriptor.h"
 #include "google/protobuf/io/printer.h"
 #include "google/protobuf/io/zero_copy_stream.h"
-#include "core/pdcodegen/pds_option.pb.h"
-#include "core/pdcodegen/plugin/strutil.h"
-#include "core/pdcodegen/plugin/substitute.h"
 using std::string;
 using google::protobuf::Descriptor;
 using google::protobuf::FileDescriptor;
@@ -115,7 +115,8 @@ class PdsCodeGenerator : public CodeGenerator {
          printer.Print("#include \"core/predictor/common/inner_common.h\"\n");
          printer.Print("#include \"core/predictor/framework/service.h\"\n");
          printer.Print("#include \"core/predictor/framework/manager.h\"\n");
-          printer.Print("#include \"core/predictor/framework/service_manager.h\"\n");
+          printer.Print(
+              "#include \"core/predictor/framework/service_manager.h\"\n");
        }
        if (generate_stub) {
          printer.Print("#include <baidu/rpc/parallel_channel.h>\n");
@@ -845,7 +846,8 @@ class PdsCodeGenerator : public CodeGenerator {
          printer.Print("#include \"core/predictor/common/inner_common.h\"\n");
          printer.Print("#include \"core/predictor/framework/service.h\"\n");
          printer.Print("#include \"core/predictor/framework/manager.h\"\n");
-          printer.Print("#include \"core/predictor/framework/service_manager.h\"\n");
+          printer.Print(
+              "#include \"core/predictor/framework/service_manager.h\"\n");
        }
        if (generate_stub) {
          printer.Print("#include <brpc/parallel_channel.h>\n");

--- a/core/predictor/common/inner_common.h
+++ b/core/predictor/common/inner_common.h
@@ -52,9 +52,9 @@

 #include "glog/raw_logging.h"

+#include "core/configure/general_model_config.pb.h"
 #include "core/configure/include/configure_parser.h"
 #include "core/configure/server_configure.pb.h"
-#include "core/configure/general_model_config.pb.h"

 #include "core/predictor/common/constant.h"
 #include "core/predictor/common/types.h"

--- a/core/predictor/framework/dag_view.cpp
+++ b/core/predictor/framework/dag_view.cpp
@@ -45,7 +45,7 @@ int DagView::init(Dag* dag, const std::string& service_name) {
                 << "at:" << si;
      return ERR_MEM_ALLOC_FAILURE;
    }
-    VLOG(2) << "stage[" << si << "] name: " <<  stage->full_name;
+    VLOG(2) << "stage[" << si << "] name: " << stage->full_name;
    VLOG(2) << "stage[" << si << "] node size: " << stage->nodes.size();
    vstage->full_name = service_name + NAME_DELIMITER + stage->full_name;
    uint32_t node_size = stage->nodes.size();
@@ -74,7 +74,7 @@ int DagView::init(Dag* dag, const std::string& service_name) {
        LOG(WARNING) << "Failed init op, type:" << node->type;
        return ERR_INTERNAL_FAILURE;
      }
-      
+
      op->set_full_name(service_name + NAME_DELIMITER + node->full_name);
      vnode->conf = node;
      vnode->op = op;
@@ -85,9 +85,9 @@ int DagView::init(Dag* dag, const std::string& service_name) {
      VLOG(2) << "set op pre name: \n"
              << "current op name: " << vstage->nodes.back()->op->op_name()
              << " previous op name: "
-              << _view[si-1]->nodes.back()->op->op_name();
+              << _view[si - 1]->nodes.back()->op->op_name();
      vstage->nodes.back()->op->set_pre_node_name(
-          _view[si-1]->nodes.back()->op->op_name());
+          _view[si - 1]->nodes.back()->op->op_name());
    }
    _view.push_back(vstage);
  }

--- a/core/predictor/framework/factory.h
+++ b/core/predictor/framework/factory.h
@@ -16,8 +16,8 @@
 #include <map>
 #include <string>
 #include <utility>
-#include "glog/raw_logging.h"
 #include "core/predictor/common/inner_common.h"
+#include "glog/raw_logging.h"
 namespace baidu {
 namespace paddle_serving {
 namespace predictor {

--- a/core/predictor/framework/resource.cpp
+++ b/core/predictor/framework/resource.cpp
@@ -197,13 +197,10 @@ int Resource::general_model_initialize(const std::string& path,
  for (int i = 0; i < feed_var_num; ++i) {
    _config->_feed_name[i] = model_config.feed_var(i).name();
    _config->_feed_alias_name[i] = model_config.feed_var(i).alias_name();
-    VLOG(2) << "feed var[" << i << "]: "
-            << _config->_feed_name[i];
-    VLOG(2) << "feed var[" << i << "]: "
-            << _config->_feed_alias_name[i];
+    VLOG(2) << "feed var[" << i << "]: " << _config->_feed_name[i];
+    VLOG(2) << "feed var[" << i << "]: " << _config->_feed_alias_name[i];
    _config->_feed_type[i] = model_config.feed_var(i).feed_type();
-    VLOG(2) << "feed type[" << i << "]: "
-            << _config->_feed_type[i];
+    VLOG(2) << "feed type[" << i << "]: " << _config->_feed_type[i];

    if (model_config.feed_var(i).is_lod_tensor()) {
      VLOG(2) << "var[" << i << "] is lod tensor";

--- a/core/predictor/framework/resource.h
+++ b/core/predictor/framework/resource.h
@@ -13,10 +13,10 @@
 // limitations under the License.

 #pragma once
+#include <map>
 #include <memory>
 #include <string>
 #include <vector>
-#include <map>
 #include "core/cube/cube-api/include/cube_api.h"
 #include "core/kvdb/include/kvdb/paddle_rocksdb.h"
 #include "core/predictor/common/inner_common.h"
@@ -36,15 +36,15 @@ class PaddleGeneralModelConfig {
 public:
  std::vector<std::string> _feed_name;
  std::vector<std::string> _feed_alias_name;
-  std::vector<int> _feed_type;     // 0 int64, 1 float
-  std::vector<bool> _is_lod_feed;  // true lod tensor
+  std::vector<int> _feed_type;      // 0 int64, 1 float
+  std::vector<bool> _is_lod_feed;   // true lod tensor
  std::vector<bool> _is_lod_fetch;  // whether a fetch var is lod_tensor
-  std::vector<int> _capacity;      //  capacity for each tensor
-                                   /*
-                                     feed_shape_ for feeded variable
-                                     feed_shape_[i][j] represents the jth dim for ith input Tensor
-                                     if is_lod_feed_[i] == False, feed_shape_[i][0] = -1
-                                    */
+  std::vector<int> _capacity;       //  capacity for each tensor
+                                    /*
+                                      feed_shape_ for feeded variable
+                                      feed_shape_[i][j] represents the jth dim for ith input Tensor
+                                      if is_lod_feed_[i] == False, feed_shape_[i][0] = -1
+                                     */
  std::vector<std::vector<int>> _feed_shape;

  std::vector<std::string> _fetch_name;

--- a/core/predictor/src/pdserving.cpp
+++ b/core/predictor/src/pdserving.cpp
@@ -99,8 +99,8 @@ static void g_change_server_port() {
  if (read_proto_conf(FLAGS_inferservice_path.c_str(),
                      FLAGS_inferservice_file.c_str(),
                      &conf) != 0) {
-    VLOG(2) << "failed to load configure[" << FLAGS_inferservice_path
-            << "," << FLAGS_inferservice_file << "].";
+    VLOG(2) << "failed to load configure[" << FLAGS_inferservice_path << ","
+            << FLAGS_inferservice_file << "].";
    return;
  }
  uint32_t port = conf.port();
@@ -157,8 +157,7 @@ int main(int argc, char** argv) {
    mkdir(FLAGS_log_dir.c_str(), 0777);
    ret = stat(FLAGS_log_dir.c_str(), &st_buf);
    if (ret != 0) {
-      VLOG(2) << "Log path " << FLAGS_log_dir
-              << " not exist, and create fail";
+      VLOG(2) << "Log path " << FLAGS_log_dir << " not exist, and create fail";
      return -1;
    }
  }

--- a/core/predictor/unittest/test_message_op.h
+++ b/core/predictor/unittest/test_message_op.h
@@ -15,8 +15,8 @@
 #pragma once
 #include <gtest/gtest.h>
 #include "core/predictor/framework/channel.h"
-#include "core/predictor/op/op.h"
 #include "core/predictor/msg_data.pb.h"
+#include "core/predictor/op/op.h"

 namespace baidu {
 namespace paddle_serving {

--- a/core/predictor/unittest/test_server_manager.cpp
+++ b/core/predictor/unittest/test_server_manager.cpp
@@ -13,7 +13,7 @@
 // limitations under the License.

 #include "core/predictor/unittest/test_server_manager.h"  // TestServerManager
-#include <gflags/gflags.h>                           // FLAGS
+#include <gflags/gflags.h>                                // FLAGS
 #include <string>
 #include "core/predictor/framework/server.h"  // ServerManager


--- a/core/sdk-cpp/include/common.h
+++ b/core/sdk-cpp/include/common.h
@@ -53,9 +53,9 @@
 #include "json2pb/json_to_pb.h"
 #endif

+#include "core/configure/general_model_config.pb.h"
 #include "core/configure/include/configure_parser.h"
 #include "core/configure/sdk_configure.pb.h"
-#include "core/configure/general_model_config.pb.h"

 #include "core/sdk-cpp/include/utils.h"


--- a/core/sdk-cpp/include/config_manager.h
+++ b/core/sdk-cpp/include/config_manager.h
@@ -32,9 +32,9 @@ class EndpointConfigManager {
  EndpointConfigManager()
      : _last_update_timestamp(0), _current_endpointmap_id(1) {}

-  int create(const std::string & sdk_desc_str);
+  int create(const std::string& sdk_desc_str);

-  int load(const std::string & sdk_desc_str);
+  int load(const std::string& sdk_desc_str);

  int create(const char* path, const char* file);


--- a/core/sdk-cpp/include/factory.h
+++ b/core/sdk-cpp/include/factory.h
@@ -16,9 +16,9 @@
 #include <map>
 #include <string>
 #include <utility>
-#include "glog/raw_logging.h"
 #include "core/sdk-cpp/include/common.h"
 #include "core/sdk-cpp/include/stub_impl.h"
+#include "glog/raw_logging.h"

 namespace baidu {
 namespace paddle_serving {

--- a/core/sdk-cpp/include/predictor_sdk.h
+++ b/core/sdk-cpp/include/predictor_sdk.h
@@ -31,7 +31,7 @@ class PredictorApi {

  int register_all();

-  int create(const std::string & sdk_desc_str);
+  int create(const std::string& sdk_desc_str);

  int create(const char* path, const char* file);


--- a/core/sdk-cpp/proto/general_model_service.proto
+++ b/core/sdk-cpp/proto/general_model_service.proto
@@ -28,13 +28,9 @@ message Tensor {
  repeated int32 shape = 6;
 };

-message FeedInst {
-  repeated Tensor tensor_array = 1;
-};
+message FeedInst { repeated Tensor tensor_array = 1; };

-message FetchInst {
-  repeated Tensor tensor_array = 1;
-};
+message FetchInst { repeated Tensor tensor_array = 1; };

 message Request {
  repeated FeedInst insts = 1;

--- a/core/sdk-cpp/src/endpoint.cpp
+++ b/core/sdk-cpp/src/endpoint.cpp
@@ -35,8 +35,7 @@ int Endpoint::initialize(const EndpointInfo& ep_info) {
      return -1;
    }
    _variant_list.push_back(var);
-    VLOG(2) << "Succ create variant: " << vi
-            << ", endpoint:" << _endpoint_name;
+    VLOG(2) << "Succ create variant: " << vi << ", endpoint:" << _endpoint_name;
  }

  return 0;

--- a/core/sdk-cpp/src/predictor_sdk.cpp
+++ b/core/sdk-cpp/src/predictor_sdk.cpp
@@ -30,7 +30,7 @@ int PredictorApi::register_all() {
  return 0;
 }

-int PredictorApi::create(const std::string & api_desc_str) {
+int PredictorApi::create(const std::string& api_desc_str) {
  VLOG(2) << api_desc_str;
  if (register_all() != 0) {
    LOG(ERROR) << "Failed do register all!";

--- a/core/sdk-cpp/src/variant.cpp
+++ b/core/sdk-cpp/src/variant.cpp
@@ -54,7 +54,7 @@ int Variant::initialize(const EndpointInfo& ep_info,

  if (_stub_map.size() > 0) {
    VLOG(2) << "Initialize variants from VariantInfo"
-              << ", stubs count: " << _stub_map.size();
+            << ", stubs count: " << _stub_map.size();
    return 0;
  }


--- a/core/util/CMakeLists.txt
+++ b/core/util/CMakeLists.txt
 include(src/CMakeLists.txt)
 add_library(utils ${util_srcs})
-
--- a/core/util/include/timer.h
+++ b/core/util/include/timer.h
@@ -15,7 +15,6 @@ limitations under the License. */
 #pragma once
 #include <stdlib.h>

-
 namespace baidu {
 namespace paddle_serving {


--- a/core/util/src/CMakeLists.txt
+++ b/core/util/src/CMakeLists.txt
 FILE(GLOB srcs ${CMAKE_CURRENT_LIST_DIR}/*.cc)
 LIST(APPEND util_srcs ${srcs})
-
--- a/doc/COMPILE.md
+++ b/doc/COMPILE.md
 # 如何编译PaddleServing

+### 编译环境设置
+- os: CentOS 6u3
+- gcc: 4.8.2及以上
+- go: 1.9.2及以上
+- git：2.17.1及以上
+- cmake：3.2.2及以上
+- python：2.7.2及以上
+
 ### 获取代码

 ``` python
@@ -9,6 +17,12 @@ git submodule update --init --recursive

 ### 编译Server部分

+#### PYTHONROOT设置
+``` shell
+# 例如python的路径为/usr/bin/python，可以设置PYTHONROOT
+export PYTHONROOT=/usr/
+```
+
 #### 集成CPU版本Paddle Inference Library
 ``` shell
 cmake -DPYTHON_INCLUDE_DIR=$PYTHONROOT/include/python2.7/ -DPYTHON_LIBRARIES=$PYTHONROOT/lib/libpython2.7.so -DPYTHON_EXECUTABLE=/home/users/dongdaxiang/software/baidu/third-party/python/bin/python -DCLIENT_ONLY=OFF ..
@@ -28,3 +42,5 @@ cmake -DPYTHON_INCLUDE_DIR=$PYTHONROOT/include/python2.7/ -DPYTHON_LIBRARIES=$PY
 make -j10
 ```

+### 安装wheel包
+无论是client端还是server端，编译完成后，安装python/dist/下的whl包即可
--- a/doc/CONTRIBUTE.md
+++ b/doc/CONTRIBUTE.md
@@ -152,4 +152,3 @@ GLOG_minloglevel=1 bin/serving
 2 -ERROR

 3 - FATAL (Be careful as FATAL log will generate a coredump)
-
--- a/doc/IMDB_GO_CLIENT.md
+++ b/doc/IMDB_GO_CLIENT.md
@@ -193,6 +193,3 @@ total num:  25000
 acc num:  22014
 acc:  0.88056
 ```
-
-
-
--- a/doc/NEW_OPERATOR.md
+++ b/doc/NEW_OPERATOR.md
@@ -143,6 +143,3 @@ self.op_dict = {
            "general_dist_kv": "GeneralDistKVOp"
        }
 ```
-
-
-
--- a/doc/SERVER_DAG.md
+++ b/doc/SERVER_DAG.md
@@ -54,10 +54,3 @@ op_seq_maker.add_op(dist_kv_op)
 op_seq_maker.add_op(general_infer_op)
 op_seq_maker.add_op(general_response_op)
 ```
-
-
-
-
-
-
-
--- a/paddle_inference/inferencer-fluid-cpu/include/fluid_cpu_engine.h
+++ b/paddle_inference/inferencer-fluid-cpu/include/fluid_cpu_engine.h
@@ -21,8 +21,8 @@
 #include <vector>
 #include "core/configure/include/configure_parser.h"
 #include "core/configure/inferencer_configure.pb.h"
-#include "paddle_inference_api.h"  // NOLINT
 #include "core/predictor/framework/infer.h"
+#include "paddle_inference_api.h"  // NOLINT

 namespace baidu {
 namespace paddle_serving {
@@ -336,7 +336,7 @@ class SigmoidModel {
      return -1;
    }
    VLOG(2) << "load sigmoid_b [" << _sigmoid_b._params[0] << "] ["
-                 << _sigmoid_b._params[1] << "].";
+            << _sigmoid_b._params[1] << "].";
    _exp_max_input = exp_max;
    _exp_min_input = exp_min;
    return 0;
@@ -373,7 +373,7 @@ class SigmoidFluidModel {
    clone_model.reset(new SigmoidFluidModel());
    clone_model->_sigmoid_core = _sigmoid_core;
    clone_model->_fluid_core = _fluid_core->Clone();
-    return std::move(clone_model);
+    return std::move(clone_model);  // NOLINT
  }

 public:
@@ -459,7 +459,7 @@ class FluidCpuWithSigmoidCore : public FluidFamilyCore {
  }

 protected:
-  std::unique_ptr<SigmoidFluidModel> _core;
+  std::unique_ptr<SigmoidFluidModel> _core;  // NOLINT
 };

 class FluidCpuNativeDirWithSigmoidCore : public FluidCpuWithSigmoidCore {

--- a/paddle_inference/inferencer-fluid-gpu/include/fluid_gpu_engine.h
+++ b/paddle_inference/inferencer-fluid-gpu/include/fluid_gpu_engine.h
@@ -23,8 +23,8 @@
 #include <vector>
 #include "core/configure/include/configure_parser.h"
 #include "core/configure/inferencer_configure.pb.h"
-#include "paddle_inference_api.h"  // NOLINT
 #include "core/predictor/framework/infer.h"
+#include "paddle_inference_api.h"  // NOLINT

 DECLARE_int32(gpuid);

@@ -334,13 +334,13 @@ class SigmoidModel {
      return -1;
    }
    VLOG(2) << "load sigmoid_w [" << _sigmoid_w._params[0] << "] ["
-                 << _sigmoid_w._params[1] << "].";
+            << _sigmoid_w._params[1] << "].";
    if (0 != _sigmoid_b.init(2, 1, sigmoid_b_file) || 0 != _sigmoid_b.load()) {
      LOG(ERROR) << "load params sigmoid_b failed.";
      return -1;
    }
    VLOG(2) << "load sigmoid_b [" << _sigmoid_b._params[0] << "] ["
-                 << _sigmoid_b._params[1] << "].";
+            << _sigmoid_b._params[1] << "].";
    _exp_max_input = exp_max;
    _exp_min_input = exp_min;
    return 0;

--- a/python/examples/bert/batching.py
+++ b/python/examples/bert/batching.py
+#coding:utf-8
+#   Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Mask, padding and batching."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import numpy as np
+
+
+def prepare_batch_data(insts,
+                       total_token_num,
+                       max_seq_len=128,
+                       pad_id=None,
+                       cls_id=None,
+                       sep_id=None,
+                       mask_id=None,
+                       return_input_mask=True,
+                       return_max_len=True,
+                       return_num_token=False):
+    """
+    1. generate Tensor of data
+    2. generate Tensor of position
+    3. generate self attention mask, [shape: batch_size *  max_len * max_len]
+    """
+
+    batch_src_ids = [inst[0] for inst in insts]
+    batch_sent_ids = [inst[1] for inst in insts]
+    batch_pos_ids = [inst[2] for inst in insts]
+    labels_list = []
+    # compatible with squad, whose example includes start/end positions,
+    # or unique id
+
+    for i in range(3, len(insts[0]), 1):
+        labels = [inst[i] for inst in insts]
+        labels = np.array(labels).astype("int64").reshape([-1, 1])
+        labels_list.append(labels)
+
+    out = batch_src_ids
+    # Second step: padding
+    src_id, self_input_mask = pad_batch_data(
+        out, pad_idx=pad_id, max_seq_len=max_seq_len, return_input_mask=True)
+    pos_id = pad_batch_data(
+        batch_pos_ids,
+        pad_idx=pad_id,
+        max_seq_len=max_seq_len,
+        return_pos=False,
+        return_input_mask=False)
+    sent_id = pad_batch_data(
+        batch_sent_ids,
+        pad_idx=pad_id,
+        max_seq_len=max_seq_len,
+        return_pos=False,
+        return_input_mask=False)
+
+    return_list = [src_id, pos_id, sent_id, self_input_mask] + labels_list
+
+    return return_list if len(return_list) > 1 else return_list[0]
+
+
+def pad_batch_data(insts,
+                   pad_idx=0,
+                   max_seq_len=128,
+                   return_pos=False,
+                   return_input_mask=False,
+                   return_max_len=False,
+                   return_num_token=False,
+                   return_seq_lens=False):
+    """
+    Pad the instances to the max sequence length in batch, and generate the
+    corresponding position data and input mask.
+    """
+    return_list = []
+    #max_len = max(len(inst) for inst in insts)
+    max_len = max_seq_len
+    # Any token included in dict can be used to pad, since the paddings' loss
+    # will be masked out by weights and make no effect on parameter gradients.
+
+    inst_data = np.array([
+        list(inst) + list([pad_idx] * (max_len - len(inst))) for inst in insts
+    ])
+    return_list += [inst_data.astype("int64").reshape([-1, max_len, 1])]
+
+    # position data
+    if return_pos:
+        inst_pos = np.array([
+            list(range(0, len(inst))) + [pad_idx] * (max_len - len(inst))
+            for inst in insts
+        ])
+
+        return_list += [inst_pos.astype("int64").reshape([-1, max_len, 1])]
+
+    if return_input_mask:
+        # This is used to avoid attention on paddings.
+        input_mask_data = np.array(
+            [[1] * len(inst) + [0] * (max_len - len(inst)) for inst in insts])
+        input_mask_data = np.expand_dims(input_mask_data, axis=-1)
+        return_list += [input_mask_data.astype("float32")]
+
+    if return_max_len:
+        return_list += [max_len]
+
+    if return_num_token:
+        num_token = 0
+        for inst in insts:
+            num_token += len(inst)
+        return_list += [num_token]
+
+    if return_seq_lens:
+        seq_lens = np.array([len(inst) for inst in insts])
+        return_list += [seq_lens.astype("int64").reshape([-1, 1])]
+
+    return return_list if len(return_list) > 1 else return_list[0]
--- a/python/examples/bert/benchmark.py
+++ b/python/examples/bert/benchmark.py
+# -*- coding: utf-8 -*-
+#
 # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
@@ -11,55 +13,59 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+# pylint: disable=doc-string-missing

+from __future__ import unicode_literals, absolute_import
+import os
 import sys
+import time
 from paddle_serving_client import Client
-from paddle_serving_client.metric import auc
 from paddle_serving_client.utils import MultiThreadRunner
-import time
-from bert_client import BertService
+from paddle_serving_client.utils import benchmark_args
+from batching import pad_batch_data
+import tokenization
+import requests
+import json
+from bert_reader import BertReader

+args = benchmark_args()

-def predict(thr_id, resource):
-    bc = BertService(
-        model_name="bert_chinese_L-12_H-768_A-12",
-        max_seq_len=20,
-        do_lower_case=True)
-    bc.load_client(resource["conf_file"], resource["server_endpoint"])
-    thread_num = resource["thread_num"]
-    file_list = resource["filelist"]
-    line_id = 0
-    result = []
-    label_list = []
-    dataset = []
-    for fn in file_list:
-        fin = open(fn)
-        for line in fin:
-            if line_id % thread_num == thr_id - 1:
-                dataset.append(line.strip())
-            line_id += 1
-        fin.close()
-
-    start = time.time()
-    fetch = ["pooled_output"]
-    for inst in dataset:
-        fetch_map = bc.run_general([[inst]], fetch)
-        result.append(fetch_map["pooled_output"])
-    end = time.time()
-    return [result, label_list, [end - start]]

+def single_func(idx, resource):
+    fin = open("data-c.txt")
+    if args.request == "rpc":
+        reader = BertReader(vocab_file="vocab.txt", max_seq_len=20)
+        config_file = './serving_client_conf/serving_client_conf.prototxt'
+        fetch = ["pooled_output"]
+        client = Client()
+        client.load_client_config(args.model)
+        client.connect([resource["endpoint"][idx % 4]])

-if __name__ == '__main__':
-    conf_file = sys.argv[1]
-    data_file = sys.argv[2]
-    thread_num = sys.argv[3]
-    resource = {}
-    resource["conf_file"] = conf_file
-    resource["server_endpoint"] = ["127.0.0.1:9292"]
-    resource["filelist"] = [data_file]
-    resource["thread_num"] = int(thread_num)
+        start = time.time()
+        for line in fin:
+            feed_dict = reader.process(line)
+            result = client.predict(feed=feed_dict, fetch=fetch)
+        end = time.time()
+    elif args.request == "http":
+        start = time.time()
+        header = {"Content-Type": "application/json"}
+        for line in fin:
+            #dict_data = {"words": "this is for output ", "fetch": ["pooled_output"]}
+            dict_data = {"words": line, "fetch": ["pooled_output"]}
+            r = requests.post(
+                'http://{}/bert/prediction'.format(resource["endpoint"][0]),
+                data=json.dumps(dict_data),
+                headers=header)
+        end = time.time()
+    return [[end - start]]

-    thread_runner = MultiThreadRunner()
-    result = thread_runner.run(predict, int(sys.argv[3]), resource)

-    print("total time {} s".format(sum(result[-1]) / len(result[-1])))
+if __name__ == '__main__':
+    multi_thread_runner = MultiThreadRunner()
+    endpoint_list = [
+        "127.0.0.1:9494", "127.0.0.1:9495", "127.0.0.1:9496", "127.0.0.1:9497"
+    ]
+    #endpoint_list = endpoint_list + endpoint_list + endpoint_list
+    #result = multi_thread_runner.run(single_func, args.thread, {"endpoint":endpoint_list})
+    result = single_func(0, {"endpoint": endpoint_list})
+    print(result)
--- a/python/examples/bert/bert_client.py
+++ b/python/examples/bert/bert_client.py
 # coding:utf-8
+# pylint: disable=doc-string-missing
 import os
 import sys
 import numpy as np
@@ -9,6 +10,9 @@ import time
 from paddlehub.common.logger import logger
 import socket
 from paddle_serving_client import Client
+from paddle_serving_client.utils import MultiThreadRunner
+from paddle_serving_client.utils import benchmark_args
+args = benchmark_args()

 _ver = sys.version_info
 is_py2 = (_ver[0] == 2)
@@ -122,36 +126,30 @@ class BertService():
        return fetch_map_batch


-def test():
+def single_func(idx, resource):
    bc = BertService(
        model_name='bert_chinese_L-12_H-768_A-12',
        max_seq_len=20,
        show_ids=False,
        do_lower_case=True)
-    server_addr = ["127.0.0.1:9292"]
    config_file = './serving_client_conf/serving_client_conf.prototxt'
    fetch = ["pooled_output"]
+    server_addr = [resource["endpoint"][idx]]
    bc.load_client(config_file, server_addr)
    batch_size = 1
-    batch = []
-    for line in sys.stdin:
-        if batch_size == 1:
-            result = bc.run_general([[line.strip()]], fetch)
-            print(result)
-        else:
-            if len(batch) < batch_size:
-                batch.append([line.strip()])
-            else:
-                result = bc.run_batch_general(batch, fetch)
-                batch = []
-                for r in result:
-                    print(r)
-    if len(batch) > 0:
-        result = bc.run_batch_general(batch, fetch)
-        batch = []
-        for r in result:
-            print(r)
+    start = time.time()
+    fin = open("data-c.txt")
+    for line in fin:
+        result = bc.run_general([[line.strip()]], fetch)
+    end = time.time()
+    return [[end - start]]


 if __name__ == '__main__':
-    test()
+    multi_thread_runner = MultiThreadRunner()
+    result = multi_thread_runner.run(single_func, args.thread, {
+        "endpoint": [
+            "127.0.0.1:9494", "127.0.0.1:9495", "127.0.0.1:9496",
+            "127.0.0.1:9497"
+        ]
+    })
--- a/python/examples/bert/bert_reader.py
+++ b/python/examples/bert/bert_reader.py
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# pylint: disable=doc-string-missing
+from batching import pad_batch_data
+import tokenization
+
+
+class BertReader():
+    def __init__(self, vocab_file="", max_seq_len=128):
+        self.vocab_file = vocab_file
+        self.tokenizer = tokenization.FullTokenizer(vocab_file=vocab_file)
+        self.max_seq_len = max_seq_len
+        self.vocab = self.tokenizer.vocab
+        self.pad_id = self.vocab["[PAD]"]
+        self.cls_id = self.vocab["[CLS]"]
+        self.sep_id = self.vocab["[SEP]"]
+        self.mask_id = self.vocab["[MASK]"]
+
+    def pad_batch(self, token_ids, text_type_ids, position_ids):
+        batch_token_ids = [token_ids]
+        batch_text_type_ids = [text_type_ids]
+        batch_position_ids = [position_ids]
+
+        padded_token_ids, input_mask = pad_batch_data(
+            batch_token_ids,
+            max_seq_len=self.max_seq_len,
+            pad_idx=self.pad_id,
+            return_input_mask=True)
+        padded_text_type_ids = pad_batch_data(
+            batch_text_type_ids,
+            max_seq_len=self.max_seq_len,
+            pad_idx=self.pad_id)
+        padded_position_ids = pad_batch_data(
+            batch_position_ids,
+            max_seq_len=self.max_seq_len,
+            pad_idx=self.pad_id)
+        return padded_token_ids, padded_position_ids, padded_text_type_ids, input_mask
+
+    def process(self, sent):
+        text_a = tokenization.convert_to_unicode(sent)
+        tokens_a = self.tokenizer.tokenize(text_a)
+        if len(tokens_a) > self.max_seq_len - 2:
+            tokens_a = tokens_a[0:(self.max_seq_len - 2)]
+        tokens = []
+        text_type_ids = []
+        tokens.append("[CLS]")
+        text_type_ids.append(0)
+        for token in tokens_a:
+            tokens.append(token)
+            text_type_ids.append(0)
+        token_ids = self.tokenizer.convert_tokens_to_ids(tokens)
+        position_ids = list(range(len(token_ids)))
+        p_token_ids, p_pos_ids, p_text_type_ids, input_mask = \
+            self.pad_batch(token_ids, text_type_ids, position_ids)
+        feed_result = {
+            "input_ids": p_token_ids.reshape(-1).tolist(),
+            "position_ids": p_pos_ids.reshape(-1).tolist(),
+            "segment_ids": p_text_type_ids.reshape(-1).tolist(),
+            "input_mask": input_mask.reshape(-1).tolist()
+        }
+        return feed_result
--- a/python/examples/bert/bert_web_service.py
+++ b/python/examples/bert/bert_web_service.py
+# coding=utf-8
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# pylint: disable=doc-string-missing
+from paddle_serving_server_gpu.web_service import WebService
+from bert_reader import BertReader
+import sys
+import os
+
+
+class BertService(WebService):
+    def load(self):
+        self.reader = BertReader(vocab_file="vocab.txt", max_seq_len=20)
+
+    def preprocess(self, feed={}, fetch=[]):
+        feed_res = self.reader.process(feed["words"].encode("utf-8"))
+        return feed_res, fetch
+
+
+bert_service = BertService(name="bert")
+bert_service.load()
+bert_service.load_model_config(sys.argv[1])
+gpu_ids = os.environ["CUDA_VISIBLE_DEVICES"]
+gpus = [int(x) for x in gpu_ids.split(",")]
+bert_service.set_gpus(gpus)
+bert_service.prepare_server(workdir="workdir", port=9494, device="gpu")
+bert_service.run_server()
--- a/python/examples/bert/prepare_model.py
+++ b/python/examples/bert/prepare_model.py
@@ -11,14 +11,16 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
-
+# pylint: disable=doc-string-missing
 import paddlehub as hub
 import paddle.fluid as fluid
+import sys
 import paddle_serving_client.io as serving_io

 model_name = "bert_chinese_L-12_H-768_A-12"
 module = hub.Module(model_name)
-inputs, outputs, program = module.context(trainable=True, max_seq_len=20)
+inputs, outputs, program = module.context(
+    trainable=True, max_seq_len=int(sys.argv[1]))
 place = fluid.core_avx.CPUPlace()
 exe = fluid.Executor(place)
 input_ids = inputs["input_ids"]
@@ -34,10 +36,12 @@ feed_var_names = [

 target_vars = [pooled_output, sequence_output]

-serving_io.save_model("serving_server_model", "serving_client_conf", {
-    "input_ids": input_ids,
-    "position_ids": position_ids,
-    "segment_ids": segment_ids,
-    "input_mask": input_mask,
-}, {"pooled_output": pooled_output,
-    "sequence_output": sequence_output}, program)
+serving_io.save_model(
+    "bert_seq{}_model".format(sys.argv[1]),
+    "bert_seq{}_client".format(sys.argv[1]), {
+        "input_ids": input_ids,
+        "position_ids": position_ids,
+        "segment_ids": segment_ids,
+        "input_mask": input_mask,
+    }, {"pooled_output": pooled_output,
+        "sequence_output": sequence_output}, program)
--- a/python/examples/bert/tokenization.py
+++ b/python/examples/bert/tokenization.py
+# coding=utf-8
+# Copyright 2018 The Google AI Language Team Authors.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#         http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+"""Tokenization classes."""
+
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+
+import collections
+import io
+import unicodedata
+import six
+import sentencepiece as spm
+import pickle
+
+
+def convert_to_unicode(text):  # pylint: disable=doc-string-with-all-args
+    """Converts `text` to Unicode (if it's not already), assuming utf-8 input."""
+    if six.PY3:
+        if isinstance(text, str):
+            return text
+        elif isinstance(text, bytes):
+            return text.decode("utf-8", "ignore")
+        else:
+            raise ValueError("Unsupported string type: %s" % (type(text)))
+    elif six.PY2:
+        if isinstance(text, str):
+            return text.decode("utf-8", "ignore")
+        elif isinstance(text, unicode):  # noqa
+            return text
+        else:
+            raise ValueError("Unsupported string type: %s" % (type(text)))
+    else:
+        raise ValueError("Not running on Python2 or Python 3?")
+
+
+def printable_text(text):  # pylint: disable=doc-string-with-all-args
+    """Returns text encoded in a way suitable for print or `tf.logging`."""
+
+    # These functions want `str` for both Python2 and Python3, but in one case
+    # it's a Unicode string and in the other it's a byte string.
+    if six.PY3:
+        if isinstance(text, str):
+            return text
+        elif isinstance(text, bytes):
+            return text.decode("utf-8", "ignore")
+        else:
+            raise ValueError("Unsupported string type: %s" % (type(text)))
+    elif six.PY2:
+        if isinstance(text, str):
+            return text
+        elif isinstance(text, unicode):  # noqa
+            return text.encode("utf-8")
+        else:
+            raise ValueError("Unsupported string type: %s" % (type(text)))
+    else:
+        raise ValueError("Not running on Python2 or Python 3?")
+
+
+def load_vocab(vocab_file):  # pylint: disable=doc-string-with-all-args, doc-string-with-returns
+    """Loads a vocabulary file into a dictionary."""
+    vocab = collections.OrderedDict()
+    fin = io.open(vocab_file, "r", encoding="UTF-8")
+    for num, line in enumerate(fin):
+        items = convert_to_unicode(line.strip()).split("\t")
+        if len(items) > 2:
+            break
+        token = items[0]
+        index = items[1] if len(items) == 2 else num
+        token = token.strip()
+        vocab[token] = int(index)
+    fin.close()
+    return vocab
+
+
+def convert_by_vocab(vocab, items):
+    """Converts a sequence of [tokens|ids] using the vocab."""
+    output = []
+    for item in items:
+        output.append(vocab[item])
+    return output
+
+
+def convert_tokens_to_ids(vocab, tokens):
+    return convert_by_vocab(vocab, tokens)
+
+
+def convert_ids_to_tokens(inv_vocab, ids):
+    return convert_by_vocab(inv_vocab, ids)
+
+
+def whitespace_tokenize(text):
+    """Runs basic whitespace cleaning and splitting on a peice of text."""
+    text = text.strip()
+    if not text:
+        return []
+    tokens = text.split()
+    return tokens
+
+
+class FullTokenizer(object):
+    """Runs end-to-end tokenziation."""
+
+    def __init__(self,
+                 vocab_file,
+                 do_lower_case=True,
+                 use_sentence_piece_vocab=False):
+        self.vocab = load_vocab(vocab_file)
+        self.inv_vocab = {v: k for k, v in self.vocab.items()}
+        self.basic_tokenizer = BasicTokenizer(do_lower_case=do_lower_case)
+        self.use_sentence_piece_vocab = use_sentence_piece_vocab
+        self.wordpiece_tokenizer = WordpieceTokenizer(
+            vocab=self.vocab,
+            use_sentence_piece_vocab=self.use_sentence_piece_vocab)
+
+    def tokenize(self, text):
+        split_tokens = []
+        for token in self.basic_tokenizer.tokenize(text):
+            for sub_token in self.wordpiece_tokenizer.tokenize(token):
+                split_tokens.append(sub_token)
+
+        return split_tokens
+
+    def convert_tokens_to_ids(self, tokens):
+        return convert_by_vocab(self.vocab, tokens)
+
+    def convert_ids_to_tokens(self, ids):
+        return convert_by_vocab(self.inv_vocab, ids)
+
+
+class CharTokenizer(object):
+    """Runs end-to-end tokenziation."""
+
+    def __init__(self, vocab_file, do_lower_case=True):
+        self.vocab = load_vocab(vocab_file)
+        self.inv_vocab = {v: k for k, v in self.vocab.items()}
+        self.wordpiece_tokenizer = WordpieceTokenizer(vocab=self.vocab)
+
+    def tokenize(self, text):
+        split_tokens = []
+        for token in text.lower().split(" "):
+            for sub_token in self.wordpiece_tokenizer.tokenize(token):
+                split_tokens.append(sub_token)
+
+        return split_tokens
+
+    def convert_tokens_to_ids(self, tokens):
+        return convert_by_vocab(self.vocab, tokens)
+
+    def convert_ids_to_tokens(self, ids):
+        return convert_by_vocab(self.inv_vocab, ids)
+
+
+class WSSPTokenizer(object):  # pylint: disable=doc-string-missing
+    def __init__(self, vocab_file, sp_model_dir, word_dict, ws=True,
+                 lower=True):
+        self.vocab = load_vocab(vocab_file)
+        self.inv_vocab = {v: k for k, v in self.vocab.items()}
+        self.ws = ws
+        self.lower = lower
+        self.dict = pickle.load(open(word_dict, 'rb'))
+        self.sp_model = spm.SentencePieceProcessor()
+        self.window_size = 5
+        self.sp_model.Load(sp_model_dir)
+
+    def cut(self, chars):  # pylint: disable=doc-string-missing
+        words = []
+        idx = 0
+        while idx < len(chars):
+            matched = False
+            for i in range(self.window_size, 0, -1):
+                cand = chars[idx:idx + i]
+                if cand in self.dict:
+                    words.append(cand)
+                    matched = True
+                    break
+            if not matched:
+                i = 1
+                words.append(chars[idx])
+            idx += i
+        return words
+
+    def tokenize(self, text, unk_token="[UNK]"):  # pylint: disable=doc-string-missing
+        text = convert_to_unicode(text)
+        if self.ws:
+            text = [s for s in self.cut(text) if s != ' ']
+        else:
+            text = text.split(' ')
+        if self.lower:
+            text = [s.lower() for s in text]
+        text = ' '.join(text)
+        tokens = self.sp_model.EncodeAsPieces(text)
+        in_vocab_tokens = []
+        for token in tokens:
+            if token in self.vocab:
+                in_vocab_tokens.append(token)
+            else:
+                in_vocab_tokens.append(unk_token)
+        return in_vocab_tokens
+
+    def convert_tokens_to_ids(self, tokens):
+        return convert_by_vocab(self.vocab, tokens)
+
+    def convert_ids_to_tokens(self, ids):
+        return convert_by_vocab(self.inv_vocab, ids)
+
+
+class BasicTokenizer(object):
+    """Runs basic tokenization (punctuation splitting, lower casing, etc.)."""
+
+    def __init__(self, do_lower_case=True):
+        """Constructs a BasicTokenizer.
+
+        Args:
+            do_lower_case: Whether to lower case the input.
+        """
+        self.do_lower_case = do_lower_case
+
+    def tokenize(self, text):  # pylint: disable=doc-string-with-all-args, doc-string-with-returns
+        """Tokenizes a piece of text."""
+        text = convert_to_unicode(text)
+        text = self._clean_text(text)
+
+        # This was added on November 1st, 2018 for the multilingual and Chinese
+        # models. This is also applied to the English models now, but it doesn't
+        # matter since the English models were not trained on any Chinese data
+        # and generally don't have any Chinese data in them (there are Chinese
+        # characters in the vocabulary because Wikipedia does have some Chinese
+        # words in the English Wikipedia.).
+        text = self._tokenize_chinese_chars(text)
+
+        orig_tokens = whitespace_tokenize(text)
+        split_tokens = []
+        for token in orig_tokens:
+            if self.do_lower_case:
+                token = token.lower()
+                token = self._run_strip_accents(token)
+            split_tokens.extend(self._run_split_on_punc(token))
+
+        output_tokens = whitespace_tokenize(" ".join(split_tokens))
+        return output_tokens
+
+    def _run_strip_accents(self, text):
+        """Strips accents from a piece of text."""
+        text = unicodedata.normalize("NFD", text)
+        output = []
+        for char in text:
+            cat = unicodedata.category(char)
+            if cat == "Mn":
+                continue
+            output.append(char)
+        return "".join(output)
+
+    def _run_split_on_punc(self, text):
+        """Splits punctuation on a piece of text."""
+        chars = list(text)
+        i = 0
+        start_new_word = True
+        output = []
+        while i < len(chars):
+            char = chars[i]
+            if _is_punctuation(char):
+                output.append([char])
+                start_new_word = True
+            else:
+                if start_new_word:
+                    output.append([])
+                start_new_word = False
+                output[-1].append(char)
+            i += 1
+
+        return ["".join(x) for x in output]
+
+    def _tokenize_chinese_chars(self, text):
+        """Adds whitespace around any CJK character."""
+        output = []
+        for char in text:
+            cp = ord(char)
+            if self._is_chinese_char(cp):
+                output.append(" ")
+                output.append(char)
+                output.append(" ")
+            else:
+                output.append(char)
+        return "".join(output)
+
+    def _is_chinese_char(self, cp):
+        """Checks whether CP is the codepoint of a CJK character."""
+        # This defines a "chinese character" as anything in the CJK Unicode block:
+        #     https://en.wikipedia.org/wiki/CJK_Unified_Ideographs_(Unicode_block)
+        #
+        # Note that the CJK Unicode block is NOT all Japanese and Korean characters,
+        # despite its name. The modern Korean Hangul alphabet is a different block,
+        # as is Japanese Hiragana and Katakana. Those alphabets are used to write
+        # space-separated words, so they are not treated specially and handled
+        # like the all of the other languages.
+        if ((cp >= 0x4E00 and cp <= 0x9FFF) or  #
+            (cp >= 0x3400 and cp <= 0x4DBF) or  #
+            (cp >= 0x20000 and cp <= 0x2A6DF) or  #
+            (cp >= 0x2A700 and cp <= 0x2B73F) or  #
+            (cp >= 0x2B740 and cp <= 0x2B81F) or  #
+            (cp >= 0x2B820 and cp <= 0x2CEAF) or
+            (cp >= 0xF900 and cp <= 0xFAFF) or  #
+            (cp >= 0x2F800 and cp <= 0x2FA1F)):  #
+            return True
+
+        return False
+
+    def _clean_text(self, text):
+        """Performs invalid character removal and whitespace cleanup on text."""
+        output = []
+        for char in text:
+            cp = ord(char)
+            if cp == 0 or cp == 0xfffd or _is_control(char):
+                continue
+            if _is_whitespace(char):
+                output.append(" ")
+            else:
+                output.append(char)
+        return "".join(output)
+
+
+class WordpieceTokenizer(object):
+    """Runs WordPiece tokenziation."""
+
+    def __init__(self,
+                 vocab,
+                 unk_token="[UNK]",
+                 max_input_chars_per_word=100,
+                 use_sentence_piece_vocab=False):
+        self.vocab = vocab
+        self.unk_token = unk_token
+        self.max_input_chars_per_word = max_input_chars_per_word
+        self.use_sentence_piece_vocab = use_sentence_piece_vocab
+
+    def tokenize(self, text):  # pylint: disable=doc-string-with-all-args
+        """Tokenizes a piece of text into its word pieces.
+
+        This uses a greedy longest-match-first algorithm to perform tokenization
+        using the given vocabulary.
+
+        For example:
+            input = "unaffable"
+            output = ["un", "##aff", "##able"]
+
+        Args:
+            text: A single token or whitespace separated tokens. This should have
+                already been passed through `BasicTokenizer.
+
+        Returns:
+            A list of wordpiece tokens.
+        """
+
+        text = convert_to_unicode(text)
+
+        output_tokens = []
+        for token in whitespace_tokenize(text):
+            chars = list(token)
+            if len(chars) > self.max_input_chars_per_word:
+                output_tokens.append(self.unk_token)
+                continue
+
+            is_bad = False
+            start = 0
+            sub_tokens = []
+            while start < len(chars):
+                end = len(chars)
+                cur_substr = None
+                while start < end:
+                    substr = "".join(chars[start:end])
+                    if start == 0 and self.use_sentence_piece_vocab:
+                        substr = u'\u2581' + substr
+                    if start > 0 and not self.use_sentence_piece_vocab:
+                        substr = "##" + substr
+                    if substr in self.vocab:
+                        cur_substr = substr
+                        break
+                    end -= 1
+                if cur_substr is None:
+                    is_bad = True
+                    break
+                sub_tokens.append(cur_substr)
+                start = end
+
+            if is_bad:
+                output_tokens.append(self.unk_token)
+            else:
+                output_tokens.extend(sub_tokens)
+        return output_tokens
+
+
+def _is_whitespace(char):
+    """Checks whether `chars` is a whitespace character."""
+    # \t, \n, and \r are technically contorl characters but we treat them
+    # as whitespace since they are generally considered as such.
+    if char == " " or char == "\t" or char == "\n" or char == "\r":
+        return True
+    cat = unicodedata.category(char)
+    if cat == "Zs":
+        return True
+    return False
+
+
+def _is_control(char):
+    """Checks whether `chars` is a control character."""
+    # These are technically control characters but we count them as whitespace
+    # characters.
+    if char == "\t" or char == "\n" or char == "\r":
+        return False
+    cat = unicodedata.category(char)
+    if cat.startswith("C"):
+        return True
+    return False
+
+
+def _is_punctuation(char):
+    """Checks whether `chars` is a punctuation character."""
+    cp = ord(char)
+    # We treat all non-letter/number ASCII as punctuation.
+    # Characters such as "^", "$", and "`" are not in the Unicode
+    # Punctuation class but we treat them as punctuation anyways, for
+    # consistency.
+    if ((cp >= 33 and cp <= 47) or (cp >= 58 and cp <= 64) or
+        (cp >= 91 and cp <= 96) or (cp >= 123 and cp <= 126)):
+        return True
+    cat = unicodedata.category(char)
+    if cat.startswith("P"):
+        return True
+    return False
--- a/python/examples/criteo_ctr/README.md
+++ b/python/examples/criteo_ctr/README.md
 # CTR task on Criteo Dataset
-
--- a/python/examples/criteo_ctr/args.py
+++ b/python/examples/criteo_ctr/args.py
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# pylint: disable=doc-string-missing
 import argparse

+
 def parse_args():
-        parser = argparse.ArgumentParser(description="PaddlePaddle CTR example")
-        parser.add_argument(
-                '--train_data_path',
-                type=str,
-                default='./data/raw/train.txt',
-                help="The path of training dataset")
-        parser.add_argument(
-                '--sparse_only',
-                type=bool,
-                default=False,
-                help="Whether we use sparse features only")
-        parser.add_argument(
-                '--test_data_path',
-                type=str,
-                default='./data/raw/valid.txt',
-                help="The path of testing dataset")
-        parser.add_argument(
-                '--batch_size',
-                type=int,
-                default=1000,
-                help="The size of mini-batch (default:1000)")
-        parser.add_argument(
-                '--embedding_size',
-                type=int,
-                default=10,
-                help="The size for embedding layer (default:10)")
-        parser.add_argument(
-                '--num_passes',
-                type=int,
-                default=10,
-                help="The number of passes to train (default: 10)")
-        parser.add_argument(
-                '--model_output_dir',
-                type=str,
-                default='models',
-                help='The path for model to store (default: models)')
-        parser.add_argument(
-                '--sparse_feature_dim',
-                type=int,
-                default=1000001,
-                help='sparse feature hashing space for index processing')
-        parser.add_argument(
-                '--is_local',
-                type=int,
-                default=1,
-                help='Local train or distributed train (default: 1)')
-        parser.add_argument(
-                '--cloud_train',
-                type=int,
-                default=0,
-                help='Local train or distributed train on paddlecloud (default: 0)')
-        parser.add_argument(
-                '--async_mode',
-                action='store_true',
-                default=False,
-                help='Whether start pserver in async mode to support ASGD')
-        parser.add_argument(
-                '--no_split_var',
-                action='store_true',
-                default=False,
-                help='Whether split variables into blocks when update_method is pserver')
-        parser.add_argument(
-                '--role',
-                type=str,
-                default='pserver', # trainer or pserver
-                help='The path for model to store (default: models)')
-        parser.add_argument(
-                '--endpoints',
-                type=str,
-                default='127.0.0.1:6000',
-                help='The pserver endpoints, like: 127.0.0.1:6000,127.0.0.1:6001')
-        parser.add_argument(
-                '--current_endpoint',
-                type=str,
-                default='127.0.0.1:6000',
-                help='The path for model to store (default: 127.0.0.1:6000)')
-        parser.add_argument(
-                '--trainer_id',
-                type=int,
-                default=0,
-                help='The path for model to store (default: models)')
-        parser.add_argument(
-                '--trainers',
-                type=int,
-                default=1,
-                help='The num of trianers, (default: 1)')
-        return parser.parse_args()
+    parser = argparse.ArgumentParser(description="PaddlePaddle CTR example")
+    parser.add_argument(
+        '--train_data_path',
+        type=str,
+        default='./data/raw/train.txt',
+        help="The path of training dataset")
+    parser.add_argument(
+        '--sparse_only',
+        type=bool,
+        default=False,
+        help="Whether we use sparse features only")
+    parser.add_argument(
+        '--test_data_path',
+        type=str,
+        default='./data/raw/valid.txt',
+        help="The path of testing dataset")
+    parser.add_argument(
+        '--batch_size',
+        type=int,
+        default=1000,
+        help="The size of mini-batch (default:1000)")
+    parser.add_argument(
+        '--embedding_size',
+        type=int,
+        default=10,
+        help="The size for embedding layer (default:10)")
+    parser.add_argument(
+        '--num_passes',
+        type=int,
+        default=10,
+        help="The number of passes to train (default: 10)")
+    parser.add_argument(
+        '--model_output_dir',
+        type=str,
+        default='models',
+        help='The path for model to store (default: models)')
+    parser.add_argument(
+        '--sparse_feature_dim',
+        type=int,
+        default=1000001,
+        help='sparse feature hashing space for index processing')
+    parser.add_argument(
+        '--is_local',
+        type=int,
+        default=1,
+        help='Local train or distributed train (default: 1)')
+    parser.add_argument(
+        '--cloud_train',
+        type=int,
+        default=0,
+        help='Local train or distributed train on paddlecloud (default: 0)')
+    parser.add_argument(
+        '--async_mode',
+        action='store_true',
+        default=False,
+        help='Whether start pserver in async mode to support ASGD')
+    parser.add_argument(
+        '--no_split_var',
+        action='store_true',
+        default=False,
+        help='Whether split variables into blocks when update_method is pserver')
+    parser.add_argument(
+        '--role',
+        type=str,
+        default='pserver',  # trainer or pserver
+        help='The path for model to store (default: models)')
+    parser.add_argument(
+        '--endpoints',
+        type=str,
+        default='127.0.0.1:6000',
+        help='The pserver endpoints, like: 127.0.0.1:6000,127.0.0.1:6001')
+    parser.add_argument(
+        '--current_endpoint',
+        type=str,
+        default='127.0.0.1:6000',
+        help='The path for model to store (default: 127.0.0.1:6000)')
+    parser.add_argument(
+        '--trainer_id',
+        type=int,
+        default=0,
+        help='The path for model to store (default: models)')
+    parser.add_argument(
+        '--trainers',
+        type=int,
+        default=1,
+        help='The num of trianers, (default: 1)')
+    return parser.parse_args()
--- a/python/examples/criteo_ctr/criteo_reader.py
+++ b/python/examples/criteo_ctr/criteo_reader.py
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# pylint: disable=doc-string-missing
+
 import sys
 import paddle.fluid.incubate.data_generator as dg

+
 class CriteoDataset(dg.MultiSlotDataGenerator):
    def setup(self, sparse_feature_dim):
        self.cont_min_ = [0, -3, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0]
-        self.cont_max_ = [20, 600, 100, 50, 64000, 500, 100, 50, 500, 10, 10, 10, 50]
-        self.cont_diff_ = [20, 603, 100, 50, 64000, 500, 100, 50, 500, 10, 10, 10, 50]
+        self.cont_max_ = [
+            20, 600, 100, 50, 64000, 500, 100, 50, 500, 10, 10, 10, 50
+        ]
+        self.cont_diff_ = [
+            20, 603, 100, 50, 64000, 500, 100, 50, 500, 10, 10, 10, 50
+        ]
        self.hash_dim_ = sparse_feature_dim
        # here, training data are lines with line_index < train_idx_
        self.train_idx_ = 41256555
@@ -23,8 +43,9 @@ class CriteoDataset(dg.MultiSlotDataGenerator):
                dense_feature.append((float(features[idx]) - self.cont_min_[idx - 1]) / \
                                     self.cont_diff_[idx - 1])
        for idx in self.categorical_range_:
-            sparse_feature.append([hash(str(idx) + features[idx]) % self.hash_dim_])
-            
+            sparse_feature.append(
+                [hash(str(idx) + features[idx]) % self.hash_dim_])
+
        return dense_feature, sparse_feature, [int(features[0])]

    def infer_reader(self, filelist, batch, buf_size):
@@ -32,16 +53,17 @@ class CriteoDataset(dg.MultiSlotDataGenerator):
            for fname in filelist:
                with open(fname.strip(), "r") as fin:
                    for line in fin:
-                        dense_feature, sparse_feature, label = self._process_line(line)
+                        dense_feature, sparse_feature, label = self._process_line(
+                            line)
                        #yield dense_feature, sparse_feature, label
                        yield [dense_feature] + sparse_feature + [label]
+
        import paddle
        batch_iter = paddle.batch(
            paddle.reader.shuffle(
                local_iter, buf_size=buf_size),
            batch_size=batch)
        return batch_iter
-                        

    def generate_sample(self, line):
        def data_iter():
@@ -54,6 +76,7 @@ class CriteoDataset(dg.MultiSlotDataGenerator):

        return data_iter

+
 if __name__ == "__main__":
    criteo_dataset = CriteoDataset()
    criteo_dataset.setup(int(sys.argv[1]))

--- a/python/examples/criteo_ctr/local_train.py
+++ b/python/examples/criteo_ctr/local_train.py
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# pylint: disable=doc-string-missing
+
 from __future__ import print_function

 from args import parse_args
@@ -17,15 +32,17 @@ def train():
    dense_input = fluid.layers.data(
        name="dense_input", shape=[dense_feature_dim], dtype='float32')
    sparse_input_ids = [
-        fluid.layers.data(name="C" + str(i), shape=[1], lod_level=1, dtype="int64")
-        for i in range(1, 27)]
+        fluid.layers.data(
+            name="C" + str(i), shape=[1], lod_level=1, dtype="int64")
+        for i in range(1, 27)
+    ]
    label = fluid.layers.data(name='label', shape=[1], dtype='int64')

    #nn_input = None if sparse_only else dense_input
    nn_input = dense_input
    predict_y, loss, auc_var, batch_auc_var = dnn_model(
-        nn_input, sparse_input_ids, label,
-        args.embedding_size, args.sparse_feature_dim)
+        nn_input, sparse_input_ids, label, args.embedding_size,
+        args.sparse_feature_dim)

    optimizer = fluid.optimizer.SGD(learning_rate=1e-4)
    optimizer.minimize(loss)
@@ -36,16 +53,17 @@ def train():
    dataset.set_use_var([dense_input] + sparse_input_ids + [label])

    python_executable = "python"
-    pipe_command = "{} criteo_reader.py {}".format(
-        python_executable, args.sparse_feature_dim)
+    pipe_command = "{} criteo_reader.py {}".format(python_executable,
+                                                   args.sparse_feature_dim)

    dataset.set_pipe_command(pipe_command)
    dataset.set_batch_size(128)
    thread_num = 10
    dataset.set_thread(thread_num)

-    whole_filelist = ["raw_data/part-%d" % x for x in
-                      range(len(os.listdir("raw_data")))]
+    whole_filelist = [
+        "raw_data/part-%d" % x for x in range(len(os.listdir("raw_data")))
+    ]

    dataset.set_filelist(whole_filelist[:thread_num])
    dataset.load_into_memory()
@@ -53,8 +71,7 @@ def train():
    epochs = 1
    for i in range(epochs):
        exe.train_from_dataset(
-            program=fluid.default_main_program(),
-            dataset=dataset, debug=True)
+            program=fluid.default_main_program(), dataset=dataset, debug=True)
        print("epoch {} finished".format(i))

    import paddle_serving_client.io as server_io
@@ -63,9 +80,9 @@ def train():
        feed_var_dict["sparse_{}".format(i)] = sparse
    fetch_var_dict = {"prob": predict_y}

-    server_io.save_model(
-        "ctr_serving_model", "ctr_client_conf",
-        feed_var_dict, fetch_var_dict, fluid.default_main_program())
+    server_io.save_model("ctr_serving_model", "ctr_client_conf", feed_var_dict,
+                         fetch_var_dict, fluid.default_main_program())
+

 if __name__ == '__main__':
    train()
--- a/python/examples/criteo_ctr/network_conf.py
+++ b/python/examples/criteo_ctr/network_conf.py
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# pylint: disable=doc-string-missing
+
 import paddle.fluid as fluid
 import math

-def dnn_model(dense_input, sparse_inputs, label,
-              embedding_size, sparse_feature_dim):

+def dnn_model(dense_input, sparse_inputs, label, embedding_size,
+              sparse_feature_dim):
    def embedding_layer(input):
        emb = fluid.layers.embedding(
            input=input,
            is_sparse=True,
            is_distributed=False,
            size=[sparse_feature_dim, embedding_size],
-            param_attr=fluid.ParamAttr(name="SparseFeatFactors",
-                                       initializer=fluid.initializer.Uniform()))
+            param_attr=fluid.ParamAttr(
+                name="SparseFeatFactors",
+                initializer=fluid.initializer.Uniform()))
        return fluid.layers.sequence_pool(input=emb, pool_type='sum')

    def mlp_input_tensor(emb_sums, dense_tensor):
@@ -21,18 +37,30 @@ def dnn_model(dense_input, sparse_inputs, label,
            return fluid.layers.concat(emb_sums + [dense_tensor], axis=1)

    def mlp(mlp_input):
-        fc1 = fluid.layers.fc(input=mlp_input, size=400, act='relu',
-                              param_attr=fluid.ParamAttr(initializer=fluid.initializer.Normal(
-                                  scale=1 / math.sqrt(mlp_input.shape[1]))))
-        fc2 = fluid.layers.fc(input=fc1, size=400, act='relu',
-                              param_attr=fluid.ParamAttr(initializer=fluid.initializer.Normal(
-                                  scale=1 / math.sqrt(fc1.shape[1]))))
-        fc3 = fluid.layers.fc(input=fc2, size=400, act='relu',
-                              param_attr=fluid.ParamAttr(initializer=fluid.initializer.Normal(
-                                  scale=1 / math.sqrt(fc2.shape[1]))))
-        pre = fluid.layers.fc(input=fc3, size=2, act='softmax',
-                              param_attr=fluid.ParamAttr(initializer=fluid.initializer.Normal(
-                                  scale=1 / math.sqrt(fc3.shape[1]))))
+        fc1 = fluid.layers.fc(input=mlp_input,
+                              size=400,
+                              act='relu',
+                              param_attr=fluid.ParamAttr(
+                                  initializer=fluid.initializer.Normal(
+                                      scale=1 / math.sqrt(mlp_input.shape[1]))))
+        fc2 = fluid.layers.fc(input=fc1,
+                              size=400,
+                              act='relu',
+                              param_attr=fluid.ParamAttr(
+                                  initializer=fluid.initializer.Normal(
+                                      scale=1 / math.sqrt(fc1.shape[1]))))
+        fc3 = fluid.layers.fc(input=fc2,
+                              size=400,
+                              act='relu',
+                              param_attr=fluid.ParamAttr(
+                                  initializer=fluid.initializer.Normal(
+                                      scale=1 / math.sqrt(fc2.shape[1]))))
+        pre = fluid.layers.fc(input=fc3,
+                              size=2,
+                              act='softmax',
+                              param_attr=fluid.ParamAttr(
+                                  initializer=fluid.initializer.Normal(
+                                      scale=1 / math.sqrt(fc3.shape[1]))))
        return pre

    emb_sums = list(map(embedding_layer, sparse_inputs))

--- a/python/examples/criteo_ctr/test_client.py
+++ b/python/examples/criteo_ctr/test_client.py
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# pylint: disable=doc-string-missing
+
 from paddle_serving_client import Client
 import paddle
 import sys
@@ -13,9 +28,12 @@ batch = 1
 buf_size = 100
 dataset = criteo.CriteoDataset()
 dataset.setup(1000001)
-test_filelists = ["{}/part-%d".format(sys.argv[2]) % x
-                 for x in range(len(os.listdir(sys.argv[2])))]
-reader = dataset.infer_reader(test_filelists[len(test_filelists)-40:], batch, buf_size)
+test_filelists = [
+    "{}/part-%d".format(sys.argv[2]) % x
+    for x in range(len(os.listdir(sys.argv[2])))
+]
+reader = dataset.infer_reader(test_filelists[len(test_filelists) - 40:], batch,
+                              buf_size)

 label_list = []
 prob_list = []
@@ -25,4 +43,3 @@ for data in reader():
        feed_dict["sparse_{}".format(i - 1)] = data[0][i]
    fetch_map = client.predict(feed=feed_dict, fetch=["prob"])
    print(fetch_map)
-
--- a/python/examples/criteo_ctr/test_server.py
+++ b/python/examples/criteo_ctr/test_server.py
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# pylint: disable=doc-string-missing
+
 import os
 import sys
 from paddle_serving_server import OpMaker

--- a/python/examples/fit_a_line/README.md
+++ b/python/examples/fit_a_line/README.md
@@ -19,4 +19,4 @@ python -m paddle_serving_server.web_serve --model uci_housing_model/ --thread 10
 Prediction through http post
 ``` shell
 curl -H "Content-Type:application/json" -X POST -d '{"x": [0.0137, -0.1136, 0.2553, -0.0692, 0.0582, -0.0727, -0.1583, -0.0584, 0.6283, 0.4919, 0.1856, 0.0795, -0.0332], "fetch":["price"]}' http://127.0.0.1:9393/uci/prediction
-```
\ No newline at end of file
+```
--- a/python/examples/fit_a_line/benchmark.py
+++ b/python/examples/fit_a_line/benchmark.py
@@ -11,6 +11,8 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+# pylint: disable=doc-string-missing
+
 from paddle_serving_client import Client
 from paddle_serving_client.utils import MultiThreadRunner
 from paddle_serving_client.utils import benchmark_args
@@ -21,28 +23,35 @@ import requests

 args = benchmark_args()

+
 def single_func(idx, resource):
    if args.request == "rpc":
        client = Client()
        client.load_client_config(args.model)
        client.connect([args.endpoint])
-        train_reader = paddle.batch(paddle.reader.shuffle(
-            paddle.dataset.uci_housing.train(), buf_size=500), batch_size=1)
+        train_reader = paddle.batch(
+            paddle.reader.shuffle(
+                paddle.dataset.uci_housing.train(), buf_size=500),
+            batch_size=1)
        start = time.time()
        for data in train_reader():
            fetch_map = client.predict(feed={"x": data[0][0]}, fetch=["price"])
        end = time.time()
        return [[end - start]]
    elif args.request == "http":
-        train_reader = paddle.batch(paddle.reader.shuffle(
-            paddle.dataset.uci_housing.train(), buf_size=500), batch_size=1)
+        train_reader = paddle.batch(
+            paddle.reader.shuffle(
+                paddle.dataset.uci_housing.train(), buf_size=500),
+            batch_size=1)
        start = time.time()
        for data in train_reader():
-            r = requests.post('http://{}/uci/prediction'.format(args.endpoint),
-                              data = {"x": data[0]})
+            r = requests.post(
+                'http://{}/uci/prediction'.format(args.endpoint),
+                data={"x": data[0]})
        end = time.time()
        return [[end - start]]

+
 multi_thread_runner = MultiThreadRunner()
 result = multi_thread_runner.run(single_func, args.thread, {})
 print(result)
--- a/python/examples/fit_a_line/local_train.py
+++ b/python/examples/fit_a_line/local_train.py
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# pylint: disable=doc-string-missing
+
 import sys
 import paddle
 import paddle.fluid as fluid

-train_reader = paddle.batch(paddle.reader.shuffle(
-    paddle.dataset.uci_housing.train(), buf_size=500), batch_size=16)
+train_reader = paddle.batch(
+    paddle.reader.shuffle(
+        paddle.dataset.uci_housing.train(), buf_size=500),
+    batch_size=16)

-test_reader = paddle.batch(paddle.reader.shuffle(
-    paddle.dataset.uci_housing.test(), buf_size=500), batch_size=16)
+test_reader = paddle.batch(
+    paddle.reader.shuffle(
+        paddle.dataset.uci_housing.test(), buf_size=500),
+    batch_size=16)

 x = fluid.data(name='x', shape=[None, 13], dtype='float32')
 y = fluid.data(name='y', shape=[None, 1], dtype='float32')
@@ -26,11 +45,9 @@ import paddle_serving_client.io as serving_io

 for pass_id in range(30):
    for data_train in train_reader():
-        avg_loss_value, = exe.run(
-            fluid.default_main_program(),
-            feed=feeder.feed(data_train),
-            fetch_list=[avg_loss])
-
-serving_io.save_model(
-    "uci_housing_model", "uci_housing_client",
-    {"x": x}, {"price": y_predict}, fluid.default_main_program())
+        avg_loss_value, = exe.run(fluid.default_main_program(),
+                                  feed=feeder.feed(data_train),
+                                  fetch_list=[avg_loss])
+
+serving_io.save_model("uci_housing_model", "uci_housing_client", {"x": x},
+                      {"price": y_predict}, fluid.default_main_program())
--- a/python/examples/fit_a_line/test_client.py
+++ b/python/examples/fit_a_line/test_client.py
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# pylint: disable=doc-string-missing
+
 from paddle_serving_client import Client
-import paddle
 import sys

 client = Client()
 client.load_client_config(sys.argv[1])
 client.connect(["127.0.0.1:9393"])

-test_reader = paddle.batch(paddle.reader.shuffle(
-    paddle.dataset.uci_housing.test(), buf_size=500), batch_size=1)
+import paddle
+test_reader = paddle.batch(
+    paddle.reader.shuffle(
+        paddle.dataset.uci_housing.test(), buf_size=500),
+    batch_size=1)

 for data in test_reader():
    fetch_map = client.predict(feed={"x": data[0][0]}, fetch=["price"])
    print("{} {}".format(fetch_map["price"][0], data[0][1][0]))
-    
--- a/python/examples/fit_a_line/test_server.py
+++ b/python/examples/fit_a_line/test_server.py
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# pylint: disable=doc-string-missing
+
 import os
 import sys
 from paddle_serving_server import OpMaker

--- a/python/examples/imdb/benchmark.py
+++ b/python/examples/imdb/benchmark.py
@@ -11,6 +11,7 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+# pylint: disable=doc-string-missing

 import sys
 import time
@@ -22,6 +23,7 @@ from paddle_serving_client.utils import benchmark_args

 args = benchmark_args()

+
 def single_func(idx, resource):
    imdb_dataset = IMDBDataset()
    imdb_dataset.load_resource(args.vocab)
@@ -40,18 +42,21 @@ def single_func(idx, resource):
            fin = open(fn)
            for line in fin:
                word_ids, label = imdb_dataset.get_words_and_label(line)
-                fetch_map = client.predict(feed={"words": word_ids},
-                                           fetch=["prediction"])
+                fetch_map = client.predict(
+                    feed={"words": word_ids}, fetch=["prediction"])
    elif args.request == "http":
        for fn in filelist:
            fin = open(fn)
            for line in fin:
                word_ids, label = imdb_dataset.get_words_and_label(line)
-                r = requests.post("http://{}/imdb/prediction".format(args.endpoint),
-                                  data={"words": word_ids})
+                r = requests.post(
+                    "http://{}/imdb/prediction".format(args.endpoint),
+                    data={"words": word_ids,
+                          "fetch": ["prediction"]})
    end = time.time()
    return [[end - start]]

+
 multi_thread_runner = MultiThreadRunner()
 result = multi_thread_runner.run(single_func, args.thread, {})
 print(result)
--- a/python/examples/imdb/imdb_reader.py
+++ b/python/examples/imdb/imdb_reader.py
@@ -11,6 +11,7 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+# pylint: disable=doc-string-missing

 import sys
 import os
@@ -18,6 +19,7 @@ import paddle
 import re
 import paddle.fluid.incubate.data_generator as dg

+
 class IMDBDataset(dg.MultiSlotDataGenerator):
    def load_resource(self, dictfile):
        self._vocab = {}
@@ -42,7 +44,7 @@ class IMDBDataset(dg.MultiSlotDataGenerator):
        send = '|'.join(line.split('|')[:-1]).lower().replace("<br />",
                                                              " ").strip()
        label = [int(line.split('|')[-1])]
-        
+
        words = [x for x in self._pattern.split(send) if x and x != " "]
        feas = [
            self._vocab[x] if x in self._vocab else self._unk_id for x in words
@@ -56,9 +58,11 @@ class IMDBDataset(dg.MultiSlotDataGenerator):
                    for line in fin:
                        feas, label = self.get_words_and_label(line)
                        yield feas, label
+
        import paddle
        batch_iter = paddle.batch(
-            paddle.reader.shuffle(local_iter, buf_size=buf_size),
+            paddle.reader.shuffle(
+                local_iter, buf_size=buf_size),
            batch_size=batch)
        return batch_iter

@@ -66,13 +70,15 @@ class IMDBDataset(dg.MultiSlotDataGenerator):
        def memory_iter():
            for i in range(1000):
                yield self.return_value
+
        def data_iter():
            feas, label = self.get_words_and_label(line)
            yield ("words", feas), ("label", label)
+
        return data_iter

+
 if __name__ == "__main__":
    imdb = IMDBDataset()
    imdb.load_resource("imdb.vocab")
    imdb.run_from_stdin()
-
--- a/python/examples/imdb/imdb_web_service_demo.sh
+++ b/python/examples/imdb/imdb_web_service_demo.sh
@@ -3,4 +3,3 @@ tar -xzf imdb_service.tar.gz
 wget --no-check-certificate https://fleet.bj.bcebos.com/text_classification_data.tar.gz
 tar -zxvf text_classification_data.tar.gz
 python text_classify_service.py serving_server_model/ workdir imdb.vocab
-
--- a/python/examples/imdb/local_train.py
+++ b/python/examples/imdb/local_train.py
@@ -11,6 +11,7 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+# pylint: disable=doc-string-missing
 import os
 import sys
 import paddle

--- a/python/examples/imdb/nets.py
+++ b/python/examples/imdb/nets.py
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# pylint: disable=doc-string-missing, doc-string-with-all-args, doc-string-with-returns
+
 import sys
 import time
 import numpy as np
@@ -13,10 +28,9 @@ def bow_net(data,
            hid_dim=128,
            hid_dim2=96,
            class_dim=2):
-    """
-    bow net
-    """
-    emb = fluid.layers.embedding(input=data, size=[dict_dim, emb_dim], is_sparse=True)
+    """ bow net. """
+    emb = fluid.layers.embedding(
+        input=data, size=[dict_dim, emb_dim], is_sparse=True)
    bow = fluid.layers.sequence_pool(input=emb, pool_type='sum')
    bow_tanh = fluid.layers.tanh(bow)
    fc_1 = fluid.layers.fc(input=bow_tanh, size=hid_dim, act="tanh")
@@ -37,10 +51,9 @@ def cnn_net(data,
            hid_dim2=96,
            class_dim=2,
            win_size=3):
-    """
-    conv net
-    """
-    emb = fluid.layers.embedding(input=data, size=[dict_dim, emb_dim], is_sparse=True)
+    """ conv net. """
+    emb = fluid.layers.embedding(
+        input=data, size=[dict_dim, emb_dim], is_sparse=True)

    conv_3 = fluid.nets.sequence_conv_pool(
        input=emb,
@@ -67,9 +80,7 @@ def lstm_net(data,
             hid_dim2=96,
             class_dim=2,
             emb_lr=30.0):
-    """
-    lstm net
-    """
+    """ lstm net. """
    emb = fluid.layers.embedding(
        input=data,
        size=[dict_dim, emb_dim],
@@ -103,9 +114,7 @@ def gru_net(data,
            hid_dim2=96,
            class_dim=2,
            emb_lr=400.0):
-    """
-    gru net
-    """
+    """ gru net. """
    emb = fluid.layers.embedding(
        input=data,
        size=[dict_dim, emb_dim],

--- a/python/examples/imdb/test_client.py
+++ b/python/examples/imdb/test_client.py
@@ -11,6 +11,7 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+# pylint: disable=doc-string-missing
 from paddle_serving_client import Client
 from imdb_reader import IMDBDataset
 import sys
@@ -31,4 +32,3 @@ for line in sys.stdin:
    fetch = ["acc", "cost", "prediction"]
    fetch_map = client.predict(feed=feed, fetch=fetch)
    print("{} {}".format(fetch_map["prediction"][1], label[0]))
-    
--- a/python/examples/imdb/test_client_batch.py
+++ b/python/examples/imdb/test_client_batch.py
@@ -11,6 +11,7 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+# pylint: disable=doc-string-missing

 from paddle_serving_client import Client
 import sys

--- a/python/examples/imdb/text_classify_service.py
+++ b/python/examples/imdb/text_classify_service.py
@@ -11,17 +11,20 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+# pylint: disable=doc-string-missing
+
 from paddle_serving_server.web_service import WebService
 from imdb_reader import IMDBDataset
 import sys

+
 class IMDBService(WebService):
    def prepare_dict(self, args={}):
        if len(args) == 0:
            exit(-1)
        self.dataset = IMDBDataset()
        self.dataset.load_resource(args["dict_file_path"])
-    
+
    def preprocess(self, feed={}, fetch=[]):
        if "words" not in feed:
            exit(-1)
@@ -29,8 +32,9 @@ class IMDBService(WebService):
        res_feed["words"] = self.dataset.get_words_only(feed["words"])[0]
        return res_feed, fetch

+
 imdb_service = IMDBService(name="imdb")
 imdb_service.load_model_config(sys.argv[1])
 imdb_service.prepare_server(workdir=sys.argv[2], port=9393, device="cpu")
-imdb_service.prepare_dict({"dict_file_path":sys.argv[3]})
+imdb_service.prepare_dict({"dict_file_path": sys.argv[3]})
 imdb_service.run_server()
--- a/python/examples/util/get_acc.py
+++ b/python/examples/util/get_acc.py
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+# pylint: disable=doc-string-missing
+
 import sys
 import os


--- a/python/paddle_serving_client/__init__.py
+++ b/python/paddle_serving_client/__init__.py
@@ -199,6 +199,7 @@ class Client(object):
        float_feed_names = []
        fetch_names = []
        counter = 0
+        batch_size = len(feed_batch)
        for feed in feed_batch:
            int_slot = []
            float_slot = []
@@ -221,15 +222,21 @@ class Client(object):
            if key in self.fetch_names_:
                fetch_names.append(key)

-        result_batch = self.client_handle_.batch_predict(
+        result_batch = self.result_handle_
+        res = self.client_handle_.batch_predict(
            float_slot_batch, float_feed_names, int_slot_batch, int_feed_names,
-            fetch_names)
+            fetch_names, result_batch, self.pid)

        result_map_batch = []
-        for result in result_batch:
+        for index in range(batch_size):
            result_map = {}
            for i, name in enumerate(fetch_names):
-                result_map[name] = result[i]
+                if self.fetch_names_to_type_[name] == int_type:
+                    result_map[name] = result_batch.get_int64_by_name(name)[
+                        index]
+                elif self.fetch_names_to_type_[name] == float_type:
+                    result_map[name] = result_batch.get_float_by_name(name)[
+                        index]
            result_map_batch.append(result_map)

        return result_map_batch

--- a/python/paddle_serving_client/io/__init__.py
+++ b/python/paddle_serving_client/io/__init__.py
@@ -11,6 +11,7 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+# pylint: disable=doc-string-missing

 from paddle.fluid import Executor
 from paddle.fluid.compiler import CompiledProgram
@@ -22,6 +23,7 @@ from paddle.fluid.io import save_inference_model
 from ..proto import general_model_config_pb2 as model_conf
 import os

+
 def save_model(server_model_folder,
               client_config_folder,
               feed_var_dict,
@@ -32,8 +34,12 @@ def save_model(server_model_folder,
    feed_var_names = [feed_var_dict[x].name for x in feed_var_dict]
    target_vars = fetch_var_dict.values()

-    save_inference_model(server_model_folder, feed_var_names,
-                         target_vars, executor, main_program=main_program)
+    save_inference_model(
+        server_model_folder,
+        feed_var_names,
+        target_vars,
+        executor,
+        main_program=main_program)

    config = model_conf.GeneralModelConfig()

@@ -67,7 +73,7 @@ def save_model(server_model_folder,
            fetch_var.fetch_type = 0

        if fetch_var_dict[key].dtype == core.VarDesc.VarType.FP32:
-           fetch_var.fetch_type = 1
+            fetch_var.fetch_type = 1

        if fetch_var.is_lod_tensor:
            fetch_var.shape.extend([-1])
@@ -82,15 +88,15 @@ def save_model(server_model_folder,
    cmd = "mkdir -p {}".format(client_config_folder)

    os.system(cmd)
-    with open("{}/serving_client_conf.prototxt".format(client_config_folder), "w") as fout:
+    with open("{}/serving_client_conf.prototxt".format(client_config_folder),
+              "w") as fout:
        fout.write(str(config))
-    with open("{}/serving_server_conf.prototxt".format(server_model_folder), "w") as fout:
+    with open("{}/serving_server_conf.prototxt".format(server_model_folder),
+              "w") as fout:
        fout.write(str(config))
-    with open("{}/serving_client_conf.stream.prototxt".format(client_config_folder), "wb") as fout:
+    with open("{}/serving_client_conf.stream.prototxt".format(
+            client_config_folder), "wb") as fout:
        fout.write(config.SerializeToString())
-    with open("{}/serving_server_conf.stream.prototxt".format(server_model_folder), "wb") as fout:
+    with open("{}/serving_server_conf.stream.prototxt".format(
+            server_model_folder), "wb") as fout:
        fout.write(config.SerializeToString())
-
-
-    
-
--- a/python/paddle_serving_client/metric/acc.py
+++ b/python/paddle_serving_client/metric/acc.py
@@ -11,6 +11,8 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+# pylint: disable=doc-string-missing
+

 def acc(prob, label, threshold):
    # we support prob is the probability for label to be one
@@ -21,5 +23,3 @@ def acc(prob, label, threshold):
        if (prob - threshold) * (label - prob) > 0:
            right += 1
    return float(right) / total
-
-
--- a/python/paddle_serving_client/metric/auc.py
+++ b/python/paddle_serving_client/metric/auc.py
@@ -11,6 +11,8 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+# pylint: disable=doc-string-missing, doc-string-with-all-args, doc-string-with-returns
+

 def tied_rank(x):
    """
@@ -24,21 +26,22 @@ def tied_rank(x):
    score : list of numbers
            The tied rank f each element in x
    """
-    sorted_x = sorted(zip(x,range(len(x))))
+    sorted_x = sorted(zip(x, range(len(x))))
    r = [0 for k in x]
    cur_val = sorted_x[0][0]
    last_rank = 0
    for i in range(len(sorted_x)):
        if cur_val != sorted_x[i][0]:
            cur_val = sorted_x[i][0]
-            for j in range(last_rank, i): 
-                r[sorted_x[j][1]] = float(last_rank+1+i)/2.0
+            for j in range(last_rank, i):
+                r[sorted_x[j][1]] = float(last_rank + 1 + i) / 2.0
            last_rank = i
-        if i==len(sorted_x)-1:
-            for j in range(last_rank, i+1): 
-                r[sorted_x[j][1]] = float(last_rank+i+2)/2.0
+        if i == len(sorted_x) - 1:
+            for j in range(last_rank, i + 1):
+                r[sorted_x[j][1]] = float(last_rank + i + 2) / 2.0
    return r

+
 def auc(actual, posterior):
    """
    Computes the area under the receiver-operater characteristic (AUC)
@@ -56,10 +59,9 @@ def auc(actual, posterior):
            The mean squared error between actual and posterior
    """
    r = tied_rank(posterior)
-    num_positive = len([0 for x in actual if x==1])
-    num_negative = len(actual)-num_positive
-    sum_positive = sum([r[i] for i in range(len(r)) if actual[i]==1])
-    auc = ((sum_positive - num_positive*(num_positive+1)/2.0) /
-           (num_negative*num_positive))
+    num_positive = len([0 for x in actual if x == 1])
+    num_negative = len(actual) - num_positive
+    sum_positive = sum([r[i] for i in range(len(r)) if actual[i] == 1])
+    auc = ((sum_positive - num_positive * (num_positive + 1) / 2.0) /
+           (num_negative * num_positive))
    return auc
-
--- a/python/paddle_serving_client/utils/__init__.py
+++ b/python/paddle_serving_client/utils/__init__.py
@@ -11,18 +11,26 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+# pylint: disable=doc-string-missing
 import os
 import sys
 import subprocess
 import argparse
 from multiprocessing import Pool

+
 def benchmark_args():
    parser = argparse.ArgumentParser("benchmark")
    parser.add_argument("--thread", type=int, default=10, help="concurrecy")
-    parser.add_argument("--model", type=str, default="", help="model for evaluation")
-    parser.add_argument("--endpoint", type=str, default="127.0.0.1:9292", help="endpoint of server")
-    parser.add_argument("--request", type=str, default="rpc", help="mode of service")
+    parser.add_argument(
+        "--model", type=str, default="", help="model for evaluation")
+    parser.add_argument(
+        "--endpoint",
+        type=str,
+        default="127.0.0.1:9292",
+        help="endpoint of server")
+    parser.add_argument(
+        "--request", type=str, default="rpc", help="mode of service")
    return parser.parse_args()



--- a/python/paddle_serving_server/serve.py
+++ b/python/paddle_serving_server/serve.py
@@ -19,16 +19,26 @@ Usage:
 """
 import argparse

-def parse_args():
+
+def parse_args():  # pylint: disable=doc-string-missing
    parser = argparse.ArgumentParser("serve")
-    parser.add_argument("--thread", type=int, default=10, help="Concurrency of server")
-    parser.add_argument("--model", type=str, default="", help="Model for serving")
-    parser.add_argument("--port", type=int, default=9292, help="Port the server")
-    parser.add_argument("--workdir", type=str, default="workdir", help="Working dir of current service")
-    parser.add_argument("--device", type=str, default="cpu", help="Type of device")
+    parser.add_argument(
+        "--thread", type=int, default=10, help="Concurrency of server")
+    parser.add_argument(
+        "--model", type=str, default="", help="Model for serving")
+    parser.add_argument(
+        "--port", type=int, default=9292, help="Port the server")
+    parser.add_argument(
+        "--workdir",
+        type=str,
+        default="workdir",
+        help="Working dir of current service")
+    parser.add_argument(
+        "--device", type=str, default="cpu", help="Type of device")
    return parser.parse_args()

-def start_standard_model():
+
+def start_standard_model():  # pylint: disable=doc-string-missing
    args = parse_args()
    thread_num = args.thread
    model = args.model
@@ -59,5 +69,6 @@ def start_standard_model():
    server.prepare_server(workdir=workdir, port=port, device=device)
    server.run_server()

+
 if __name__ == "__main__":
    start_standard_model()
--- a/python/paddle_serving_server/web_serve.py
+++ b/python/paddle_serving_server/web_serve.py
@@ -21,19 +21,31 @@ import argparse
 from multiprocessing import Pool, Process
 from .web_service import WebService

-def parse_args():
+
+def parse_args():  # pylint: disable=doc-string-missing
    parser = argparse.ArgumentParser("web_serve")
-    parser.add_argument("--thread", type=int, default=10, help="Concurrency of server")
-    parser.add_argument("--model", type=str, default="", help="Model for serving")
-    parser.add_argument("--port", type=int, default=9292, help="Port the server")
-    parser.add_argument("--workdir", type=str, default="workdir", help="Working dir of current service")
-    parser.add_argument("--device", type=str, default="cpu", help="Type of device")
-    parser.add_argument("--name", type=str, default="default", help="Default service name")
+    parser.add_argument(
+        "--thread", type=int, default=10, help="Concurrency of server")
+    parser.add_argument(
+        "--model", type=str, default="", help="Model for serving")
+    parser.add_argument(
+        "--port", type=int, default=9292, help="Port the server")
+    parser.add_argument(
+        "--workdir",
+        type=str,
+        default="workdir",
+        help="Working dir of current service")
+    parser.add_argument(
+        "--device", type=str, default="cpu", help="Type of device")
+    parser.add_argument(
+        "--name", type=str, default="default", help="Default service name")
    return parser.parse_args()

+
 if __name__ == "__main__":
    args = parse_args()
    service = WebService(name=args.name)
    service.load_model_config(args.model)
-    service.prepare_server(workdir=args.workdir, port=args.port, device=args.device)
+    service.prepare_server(
+        workdir=args.workdir, port=args.port, device=args.device)
    service.run_server()
--- a/python/paddle_serving_server/web_service.py
+++ b/python/paddle_serving_server/web_service.py
@@ -12,11 +12,14 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 #!flask/bin/python
+# pylint: disable=doc-string-missing
+
 from flask import Flask, request, abort
 from multiprocessing import Pool, Process
 from paddle_serving_server import OpMaker, OpSeqMaker, Server
 from paddle_serving_client import Client

+
 class WebService(object):
    def __init__(self, name="default_service"):
        self.name = name
@@ -38,7 +41,7 @@ class WebService(object):
        server.set_num_threads(16)
        server.load_model_config(self.model_config)
        server.prepare_server(
-            workdir=self.workdir, port=self.port+1, device=self.device)
+            workdir=self.workdir, port=self.port + 1, device=self.device)
        server.run_server()

    def prepare_server(self, workdir="", port=9393, device="cpu"):
@@ -51,8 +54,9 @@ class WebService(object):
        client_service = Client()
        client_service.load_client_config(
            "{}/serving_server_conf.prototxt".format(self.model_config))
-        client_service.connect(["127.0.0.1:{}".format(self.port+1)])
+        client_service.connect(["127.0.0.1:{}".format(self.port + 1)])
        service_name = "/" + self.name + "/prediction"
+
        @app_instance.route(service_name, methods=['POST'])
        def get_prediction():
            if not request.json:
@@ -60,16 +64,24 @@ class WebService(object):
            if "fetch" not in request.json:
                abort(400)
            feed, fetch = self.preprocess(request.json, request.json["fetch"])
+            if "fetch" in feed:
+                del feed["fetch"]
            fetch_map = client_service.predict(feed=feed, fetch=fetch)
-            fetch_map = self.postprocess(feed=request.json, fetch=fetch, fetch_map=fetch_map)
+            fetch_map = self.postprocess(
+                feed=request.json, fetch=fetch, fetch_map=fetch_map)
            return fetch_map
-        app_instance.run(host="127.0.0.1", port=self.port, threaded=False, processes=1)
+
+        app_instance.run(host="0.0.0.0",
+                         port=self.port,
+                         threaded=False,
+                         processes=1)

    def run_server(self):
        import socket
        localIP = socket.gethostbyname(socket.gethostname())
        print("web service address:")
-        print("http://{}:{}/{}/prediction".format(localIP, self.port, self.name))
+        print("http://{}:{}/{}/prediction".format(localIP, self.port,
+                                                  self.name))
        p_rpc = Process(target=self._launch_rpc_service)
        p_web = Process(target=self._launch_web_service)
        p_rpc.start()
@@ -82,4 +94,3 @@ class WebService(object):

    def postprocess(self, feed={}, fetch=[], fetch_map={}):
        return fetch_map
-    
--- a/python/paddle_serving_server_gpu/__init__.py
+++ b/python/paddle_serving_server_gpu/__init__.py
@@ -11,6 +11,7 @@
 # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 # See the License for the specific language governing permissions and
 # limitations under the License.
+# pylint: disable=doc-string-missing

 import os
 from .proto import server_configure_pb2 as server_sdk
@@ -21,6 +22,28 @@ import socket
 import paddle_serving_server_gpu as paddle_serving_server
 from version import serving_server_version
 from contextlib import closing
+import argparse
+
+
+def serve_args():
+    parser = argparse.ArgumentParser("serve")
+    parser.add_argument(
+        "--thread", type=int, default=10, help="Concurrency of server")
+    parser.add_argument(
+        "--model", type=str, default="", help="Model for serving")
+    parser.add_argument(
+        "--port", type=int, default=9292, help="Port of the starting gpu")
+    parser.add_argument(
+        "--workdir",
+        type=str,
+        default="workdir",
+        help="Working dir of current service")
+    parser.add_argument(
+        "--device", type=str, default="gpu", help="Type of device")
+    parser.add_argument("--gpu_ids", type=str, default="", help="gpu ids")
+    parser.add_argument(
+        "--name", type=str, default="default", help="Default service name")
+    return parser.parse_args()


 class OpMaker(object):
@@ -126,7 +149,8 @@ class Server(object):

        self.model_config_path = model_config_path
        self.engine.name = "general_model"
-        self.engine.reloadable_meta = model_config_path + "/fluid_time_file"
+        #self.engine.reloadable_meta = model_config_path + "/fluid_time_file"
+        self.engine.reloadable_meta = self.workdir + "/fluid_time_file"
        os.system("touch {}".format(self.engine.reloadable_meta))
        self.engine.reloadable_type = "timestamp_ne"
        self.engine.runtime_thread_num = 0
@@ -154,6 +178,7 @@ class Server(object):
            self.infer_service_conf.services.extend([infer_service])

    def _prepare_resource(self, workdir):
+        self.workdir = workdir
        if self.resource_conf == None:
            with open("{}/{}".format(workdir, self.general_model_config_fn),
                      "w") as fout:
@@ -217,6 +242,7 @@ class Server(object):
        if not self.check_port(port):
            raise SystemExit("Prot {} is already used".format(port))

+        self.set_port(port)
        self._prepare_resource(workdir)
        self._prepare_engine(self.model_config_path, device)
        self._prepare_infer_service(port)

--- a/python/paddle_serving_server_gpu/serve.py
+++ b/python/paddle_serving_server_gpu/serve.py
@@ -17,36 +17,23 @@ Usage:
    Example:
        python -m paddle_serving_server.serve --model ./serving_server_model --port 9292
 """
+import os
 import argparse
+from multiprocessing import Pool, Process
+from paddle_serving_server_gpu import serve_args


-def parse_args():
-    parser = argparse.ArgumentParser("serve")
-    parser.add_argument(
-        "--thread", type=int, default=10, help="Concurrency of server")
-    parser.add_argument(
-        "--model", type=str, default="", help="Model for serving")
-    parser.add_argument(
-        "--port", type=int, default=9292, help="Port the server")
-    parser.add_argument(
-        "--workdir",
-        type=str,
-        default="workdir",
-        help="Working dir of current service")
-    parser.add_argument(
-        "--device", type=str, default="gpu", help="Type of device")
-    parser.add_argument("--gpuid", type=int, default=0, help="Index of GPU")
-    return parser.parse_args()
-
-
-def start_standard_model():
-    args = parse_args()
+def start_gpu_card_model(gpuid, args):  # pylint: disable=doc-string-missing
+    gpuid = int(gpuid)
+    device = "gpu"
+    port = args.port
+    if gpuid == -1:
+        device = "cpu"
+    elif gpuid >= 0:
+        port = args.port + gpuid
    thread_num = args.thread
    model = args.model
-    port = args.port
-    workdir = args.workdir
-    device = args.device
-    gpuid = args.gpuid
+    workdir = "{}_{}".format(args.workdir, gpuid)

    if model == "":
        print("You must specify your serving model")
@@ -69,9 +56,33 @@ def start_standard_model():

    server.load_model_config(model)
    server.prepare_server(workdir=workdir, port=port, device=device)
-    server.set_gpuid(gpuid)
+    if gpuid >= 0:
+        server.set_gpuid(gpuid)
    server.run_server()


+def start_multi_card(args):  # pylint: disable=doc-string-missing
+    gpus = ""
+    if args.gpu_ids == "":
+        gpus = os.environ["CUDA_VISIBLE_DEVICES"]
+    else:
+        gpus = args.gpu_ids.split(",")
+    if len(gpus) <= 0:
+        start_gpu_card_model(-1)
+    else:
+        gpu_processes = []
+        for i, gpu_id in enumerate(gpus):
+            p = Process(
+                target=start_gpu_card_model, args=(
+                    i,
+                    args, ))
+            gpu_processes.append(p)
+        for p in gpu_processes:
+            p.start()
+        for p in gpu_processes:
+            p.join()
+
+
 if __name__ == "__main__":
-    start_standard_model()
+    args = serve_args()
+    start_multi_card(args)
--- a/python/paddle_serving_server_gpu/web_serve.py
+++ b/python/paddle_serving_server_gpu/web_serve.py
@@ -17,35 +17,23 @@ Usage:
    Example:
        python -m paddle_serving_server.web_serve --model ./serving_server_model --port 9292
 """
-import argparse
+import os
 from multiprocessing import Pool, Process
 from .web_service import WebService
-
-
-def parse_args():
-    parser = argparse.ArgumentParser("web_serve")
-    parser.add_argument(
-        "--thread", type=int, default=10, help="Concurrency of server")
-    parser.add_argument(
-        "--model", type=str, default="", help="Model for serving")
-    parser.add_argument(
-        "--port", type=int, default=9292, help="Port the server")
-    parser.add_argument(
-        "--workdir",
-        type=str,
-        default="workdir",
-        help="Working dir of current service")
-    parser.add_argument(
-        "--device", type=str, default="cpu", help="Type of device")
-    parser.add_argument(
-        "--name", type=str, default="default", help="Default service name")
-    return parser.parse_args()
-
+import paddle_serving_server_gpu as serving
+from paddle_serving_server_gpu import serve_args

 if __name__ == "__main__":
-    args = parse_args()
-    service = WebService(name=args.name)
-    service.load_model_config(args.model)
-    service.prepare_server(
+    args = serve_args()
+    web_service = WebService(name=args.name)
+    web_service.load_model_config(args.model)
+    gpu_ids = []
+    if args.gpu_ids == "":
+        if "CUDA_VISIBLE_DEVICES" in os.environ:
+            gpu_ids = os.environ["CUDA_VISIBLE_DEVICES"]
+    if len(gpu_ids) > 0:
+        gpus = [int(x) for x in gpu_ids.split(",")]
+        web_service.set_gpus(gpus)
+    web_service.prepare_server(
        workdir=args.workdir, port=args.port, device=args.device)
    service.run_server()
--- a/python/paddle_serving_server_gpu/web_service.py
+++ b/python/paddle_serving_server_gpu/web_service.py
@@ -12,49 +12,92 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 #!flask/bin/python
+# pylint: disable=doc-string-missing
+
 from flask import Flask, request, abort
 from multiprocessing import Pool, Process
 from paddle_serving_server_gpu import OpMaker, OpSeqMaker, Server
+import paddle_serving_server_gpu as serving
 from paddle_serving_client import Client
+from .serve import start_multi_card
+import time
+import random


 class WebService(object):
    def __init__(self, name="default_service"):
        self.name = name
+        self.gpus = []
+        self.rpc_service_list = []

    def load_model_config(self, model_config):
        self.model_config = model_config

-    def _launch_rpc_service(self):
-        op_maker = OpMaker()
+    def set_gpus(self, gpus):
+        self.gpus = gpus
+
+    def default_rpc_service(self,
+                            workdir="conf",
+                            port=9292,
+                            gpuid=0,
+                            thread_num=10):
+        device = "gpu"
+        if gpuid == -1:
+            device = "cpu"
+        op_maker = serving.OpMaker()
        read_op = op_maker.create('general_reader')
        general_infer_op = op_maker.create('general_infer')
        general_response_op = op_maker.create('general_response')
-        op_seq_maker = OpSeqMaker()
+
+        op_seq_maker = serving.OpSeqMaker()
        op_seq_maker.add_op(read_op)
        op_seq_maker.add_op(general_infer_op)
        op_seq_maker.add_op(general_response_op)
-        server = Server()
+
+        server = serving.Server()
        server.set_op_sequence(op_seq_maker.get_op_sequence())
-        server.set_num_threads(16)
-        server.set_gpuid = self.gpuid
+        server.set_num_threads(thread_num)
+
        server.load_model_config(self.model_config)
-        server.prepare_server(
-            workdir=self.workdir, port=self.port + 1, device=self.device)
-        server.run_server()
+        if gpuid >= 0:
+            server.set_gpuid(gpuid)
+        server.prepare_server(workdir=workdir, port=port, device=device)
+        return server
+
+    def _launch_rpc_service(self, service_idx):
+        self.rpc_service_list[service_idx].run_server()

    def prepare_server(self, workdir="", port=9393, device="gpu", gpuid=0):
        self.workdir = workdir
        self.port = port
        self.device = device
        self.gpuid = gpuid
+        if len(self.gpus) == 0:
+            # init cpu service
+            self.rpc_service_list.append(
+                self.default_rpc_service(
+                    self.workdir, self.port + 1, -1, thread_num=10))
+        else:
+            for i, gpuid in enumerate(self.gpus):
+                self.rpc_service_list.append(
+                    self.default_rpc_service(
+                        "{}_{}".format(self.workdir, i),
+                        self.port + 1 + i,
+                        gpuid,
+                        thread_num=10))

-    def _launch_web_service(self):
+    def _launch_web_service(self, gpu_num):
        app_instance = Flask(__name__)
-        client_service = Client()
-        client_service.load_client_config(
-            "{}/serving_server_conf.prototxt".format(self.model_config))
-        client_service.connect(["127.0.0.1:{}".format(self.port + 1)])
+        client_list = []
+        if gpu_num > 1:
+            gpu_num = 0
+        for i in range(gpu_num):
+            client_service = Client()
+            client_service.load_client_config(
+                "{}/serving_server_conf.prototxt".format(self.model_config))
+            client_service.connect(["127.0.0.1:{}".format(self.port + i + 1)])
+            client_list.append(client_service)
+            time.sleep(1)
        service_name = "/" + self.name + "/prediction"

        @app_instance.route(service_name, methods=['POST'])
@@ -64,12 +107,12 @@ class WebService(object):
            if "fetch" not in request.json:
                abort(400)
            feed, fetch = self.preprocess(request.json, request.json["fetch"])
-            fetch_map = client_service.predict(feed=feed, fetch=fetch)
+            fetch_map = client_list[0].predict(feed=feed, fetch=fetch)
            fetch_map = self.postprocess(
                feed=request.json, fetch=fetch, fetch_map=fetch_map)
            return fetch_map

-        app_instance.run(host="127.0.0.1",
+        app_instance.run(host="0.0.0.0",
                         port=self.port,
                         threaded=False,
                         processes=1)
@@ -80,12 +123,21 @@ class WebService(object):
        print("web service address:")
        print("http://{}:{}/{}/prediction".format(localIP, self.port,
                                                  self.name))
-        p_rpc = Process(target=self._launch_rpc_service)
-        p_web = Process(target=self._launch_web_service)
-        p_rpc.start()
+
+        rpc_processes = []
+        for idx in range(len(self.rpc_service_list)):
+            p_rpc = Process(target=self._launch_rpc_service, args=(idx, ))
+            rpc_processes.append(p_rpc)
+
+        for p in rpc_processes:
+            p.start()
+
+        p_web = Process(
+            target=self._launch_web_service, args=(len(self.gpus), ))
        p_web.start()
+        for p in rpc_processes:
+            p.join()
        p_web.join()
-        p_rpc.join()

    def preprocess(self, feed={}, fetch=[]):
        return feed, fetch

--- a/python/setup.py.in
+++ b/python/setup.py.in
@@ -77,4 +77,3 @@ setup(
    ],
    license='Apache 2.0',
    keywords=('paddle-serving serving-client deployment industrial easy-to-use'))
-
--- a/python/setup.py.server.in
+++ b/python/setup.py.server.in
@@ -38,7 +38,7 @@ max_version, mid_version, min_version = python_version()

 REQUIRED_PACKAGES = [
    'six >= 1.10.0', 'protobuf >= 3.1.0',
-    'paddle_serving_client', 'flask'
+    'paddle_serving_client', 'flask >= 1.1.1'
 ]

 if not find_package("paddlepaddle") and not find_package("paddlepaddle-gpu"):

--- a/tools/Dockerfile.ci
+++ b/tools/Dockerfile.ci
+FROM centos:7.3.1611
+RUN yum -y install wget >/dev/null \
+    && yum -y install gcc gcc-c++ make glibc-static which >/dev/null \
+    && yum -y install git openssl-devel curl-devel bzip2-devel python-devel >/dev/null \
+    && wget https://cmake.org/files/v3.2/cmake-3.2.0-Linux-x86_64.tar.gz >/dev/null \
+    && tar xzf cmake-3.2.0-Linux-x86_64.tar.gz \
+    && mv cmake-3.2.0-Linux-x86_64 /usr/local/cmake3.2.0 \
+    && echo 'export PATH=/usr/local/cmake3.2.0/bin:$PATH' >> /root/.bashrc \
+    && rm cmake-3.2.0-Linux-x86_64.tar.gz \
+    && wget https://dl.google.com/go/go1.14.linux-amd64.tar.gz >/dev/null \
+    && tar xzf go1.14.linux-amd64.tar.gz \
+    && mv go /usr/local/go \
+    && echo 'export GOROOT=/usr/local/go' >> /root/.bashrc \
+    && echo 'export PATH=/usr/local/go/bin:$PATH' >> /root/.bashrc \
+    && rm go1.14.linux-amd64.tar.gz \
+    && yum -y install python-devel sqlite-devel >/dev/null \
+    && curl https://bootstrap.pypa.io/get-pip.py -o get-pip.py >/dev/null \
+    && python get-pip.py >/dev/null \
+    && pip install google protobuf setuptools wheel flask >/dev/null \
+    && rm get-pip.py \
+    && wget http://nixos.org/releases/patchelf/patchelf-0.10/patchelf-0.10.tar.bz2 \
+    && yum -y install bzip2 >/dev/null \
+    && tar -jxf patchelf-0.10.tar.bz2 \
+    && cd patchelf-0.10 \
+    && ./configure --prefix=/usr \
+    && make >/dev/null && make install >/dev/null \
+    && cd .. \
+    && rm -rf patchelf-0.10* \
+    && yum -y update >/dev/null \
+    && yum -y install dnf >/dev/null \
+    && yum -y install dnf-plugins-core >/dev/null \
+    && dnf copr enable alonid/llvm-3.8.0 -y \
+    && dnf install llvm-3.8.0 clang-3.8.0 compiler-rt-3.8.0 -y \
+    && echo 'export PATH=/opt/llvm-3.8.0/bin:$PATH' >> /root/.bashrc
--- a/tools/cpp_examples/demo-client/paddle_serving_client.egg-info/SOURCES.txt
+++ b/tools/cpp_examples/demo-client/paddle_serving_client.egg-info/SOURCES.txt
@@ -3,4 +3,4 @@ paddle_serving_client.egg-info/PKG-INFO
 paddle_serving_client.egg-info/SOURCES.txt
 paddle_serving_client.egg-info/dependency_links.txt
 paddle_serving_client.egg-info/not-zip-safe
-paddle_serving_client.egg-info/top_level.txt
\ No newline at end of file
+paddle_serving_client.egg-info/top_level.txt
--- a/tools/cpp_examples/demo-client/paddle_serving_client.egg-info/dependency_links.txt
+++ b/tools/cpp_examples/demo-client/paddle_serving_client.egg-info/dependency_links.txt
-
--- a/tools/cpp_examples/demo-client/src/general_model.cpp
+++ b/tools/cpp_examples/demo-client/src/general_model.cpp
@@ -12,8 +12,8 @@
 // See the License for the specific language governing permissions and
 // limitations under the License.

+#include "general_model.h"  // NOLINT
 #include <fstream>
-#include "general_model.h"
 #include "sdk-cpp/builtin_format.pb.h"
 #include "sdk-cpp/include/common.h"
 #include "sdk-cpp/include/predictor_sdk.h"
@@ -28,7 +28,7 @@ namespace baidu {
 namespace paddle_serving {
 namespace general_model {

-void PredictorClient::init(const std::string & conf_file) {
+void PredictorClient::init(const std::string &conf_file) {
  _conf_file = conf_file;
  std::ifstream fin(conf_file);
  if (!fin) {
@@ -65,9 +65,8 @@ void PredictorClient::init(const std::string & conf_file) {
  }
 }

-void PredictorClient::set_predictor_conf(
-    const std::string & conf_path,
-    const std::string & conf_file) {
+void PredictorClient::set_predictor_conf(const std::string &conf_path,
+                                         const std::string &conf_file) {
  _predictor_path = conf_path;
  _predictor_conf = conf_file;
 }
@@ -80,53 +79,51 @@ int PredictorClient::create_predictor() {
  _api.thrd_initialize();
 }

-void PredictorClient::predict(
-    const std::vector<std::vector<float> > & float_feed,
-    const std::vector<std::string> & float_feed_name,
-    const std::vector<std::vector<int64_t> > & int_feed,
-    const std::vector<std::string> & int_feed_name,
-    const std::vector<std::string> & fetch_name,
-    FetchedMap * fetch_result) {
-
+void PredictorClient::predict(const std::vector<std::vector<float>> &float_feed,
+                              const std::vector<std::string> &float_feed_name,
+                              const std::vector<std::vector<int64_t>> &int_feed,
+                              const std::vector<std::string> &int_feed_name,
+                              const std::vector<std::string> &fetch_name,
+                              FetchedMap *fetch_result) {
  _api.thrd_clear();
  _predictor = _api.fetch_predictor("general_model");
  Request req;
  std::vector<Tensor *> tensor_vec;
-  FeedInst * inst = req.add_insts();
-  for (auto & name : float_feed_name) {
+  FeedInst *inst = req.add_insts();
+  for (auto &name : float_feed_name) {
    tensor_vec.push_back(inst->add_tensor_array());
  }

-  for (auto & name : int_feed_name) {
+  for (auto &name : int_feed_name) {
    tensor_vec.push_back(inst->add_tensor_array());
  }

  int vec_idx = 0;
-  for (auto & name : float_feed_name) {
+  for (auto &name : float_feed_name) {
    int idx = _feed_name_to_idx[name];
-    Tensor * tensor = tensor_vec[idx];
+    Tensor *tensor = tensor_vec[idx];
    for (int j = 0; j < _shape[idx].size(); ++j) {
      tensor->add_shape(_shape[idx][j]);
    }
    tensor->set_elem_type(1);
    for (int j = 0; j < float_feed[vec_idx].size(); ++j) {
-      tensor->add_data(
-          (char *)(&(float_feed[vec_idx][j])), sizeof(float));
+      tensor->add_data((char *)(&(float_feed[vec_idx][j])),  // NOLINT
+                       sizeof(float));
    }
    vec_idx++;
  }

  vec_idx = 0;
-  for (auto & name : int_feed_name) {
+  for (auto &name : int_feed_name) {
    int idx = _feed_name_to_idx[name];
-    Tensor * tensor = tensor_vec[idx];
+    Tensor *tensor = tensor_vec[idx];
    for (int j = 0; j < _shape[idx].size(); ++j) {
      tensor->add_shape(_shape[idx][j]);
    }
    tensor->set_elem_type(0);
    for (int j = 0; j < int_feed[vec_idx].size(); ++j) {
-      tensor->add_data(
-          (char *)(&(int_feed[vec_idx][j])), sizeof(int64_t));
+      tensor->add_data((char *)(&(int_feed[vec_idx][j])),  // NOLINT
+                       sizeof(int64_t));
    }
    vec_idx++;
  }
@@ -139,13 +136,13 @@ void PredictorClient::predict(
    LOG(ERROR) << "failed call predictor with req: " << req.ShortDebugString();
    exit(-1);
  } else {
-    for (auto & name : fetch_name) {
+    for (auto &name : fetch_name) {
      int idx = _fetch_name_to_idx[name];
      int len = res.insts(0).tensor_array(idx).data_size();
      (*fetch_result)[name].resize(len);
      for (int i = 0; i < len; ++i) {
-        (*fetch_result)[name][i] = *(const float *)
-                    res.insts(0).tensor_array(idx).data(i).c_str();
+        (*fetch_result)[name][i] =
+            *(const float *)res.insts(0).tensor_array(idx).data(i).c_str();
      }
    }
  }
@@ -154,12 +151,12 @@ void PredictorClient::predict(
 }

 void PredictorClient::predict_with_profile(
-    const std::vector<std::vector<float> > & float_feed,
-    const std::vector<std::string> & float_feed_name,
-    const std::vector<std::vector<int64_t> > & int_feed,
-    const std::vector<std::string> & int_feed_name,
-    const std::vector<std::string> & fetch_name,
-    FetchedMap * fetch_result) {
+    const std::vector<std::vector<float>> &float_feed,
+    const std::vector<std::string> &float_feed_name,
+    const std::vector<std::vector<int64_t>> &int_feed,
+    const std::vector<std::string> &int_feed_name,
+    const std::vector<std::string> &fetch_name,
+    FetchedMap *fetch_result) {
  return;
 }


--- a/tools/cpp_examples/demo-client/src/general_model.h
+++ b/tools/cpp_examples/demo-client/src/general_model.h
@@ -18,9 +18,9 @@
 #include <unistd.h>

 #include <fstream>
+#include <map>
 #include <string>
 #include <vector>
-#include <map>

 #include "sdk-cpp/builtin_format.pb.h"
 #include "sdk-cpp/general_model_service.pb.h"
@@ -37,44 +37,40 @@ namespace general_model {

 typedef std::map<std::string, std::vector<float>> FetchedMap;

-
 class PredictorClient {
 public:
  PredictorClient() {}
  ~PredictorClient() {}

-  void init(const std::string & client_conf);
-  void set_predictor_conf(
-      const std::string& conf_path,
-      const std::string& conf_file);
+  void init(const std::string& client_conf);
+  void set_predictor_conf(const std::string& conf_path,
+                          const std::string& conf_file);
  int create_predictor();

-  void predict(
-      const std::vector<std::vector<float> > & float_feed,
-      const std::vector<std::string> & float_feed_name,
-      const std::vector<std::vector<int64_t> > & int_feed,
-      const std::vector<std::string> & int_feed_name,
-      const std::vector<std::string> & fetch_name,
-      FetchedMap * result_map);
+  void predict(const std::vector<std::vector<float>>& float_feed,
+               const std::vector<std::string>& float_feed_name,
+               const std::vector<std::vector<int64_t>>& int_feed,
+               const std::vector<std::string>& int_feed_name,
+               const std::vector<std::string>& fetch_name,
+               FetchedMap* result_map);

-  void predict_with_profile(
-      const std::vector<std::vector<float> > & float_feed,
-      const std::vector<std::string> & float_feed_name,
-      const std::vector<std::vector<int64_t> > & int_feed,
-      const std::vector<std::string> & int_feed_name,
-      const std::vector<std::string> & fetch_name,
-      FetchedMap * result_map);
+  void predict_with_profile(const std::vector<std::vector<float>>& float_feed,
+                            const std::vector<std::string>& float_feed_name,
+                            const std::vector<std::vector<int64_t>>& int_feed,
+                            const std::vector<std::string>& int_feed_name,
+                            const std::vector<std::string>& fetch_name,
+                            FetchedMap* result_map);

 private:
  PredictorApi _api;
-  Predictor * _predictor;
+  Predictor* _predictor;
  std::string _predictor_conf;
  std::string _predictor_path;
  std::string _conf_file;
  std::map<std::string, int> _feed_name_to_idx;
  std::map<std::string, int> _fetch_name_to_idx;
  std::map<std::string, std::string> _fetch_name_to_var_name;
-  std::vector<std::vector<int> > _shape;
+  std::vector<std::vector<int>> _shape;
 };

 }  // namespace general_model

--- a/tools/cpp_examples/demo-client/src/general_model_main.cpp
+++ b/tools/cpp_examples/demo-client/src/general_model_main.cpp
@@ -15,20 +15,20 @@
 #include <fstream>
 #include <vector>

-#include "general_model.h"
+#include "general_model.h"  // NOLINT

-using namespace std;
+using namespace std;  // NOLINT

 using baidu::paddle_serving::general_model::PredictorClient;
 using baidu::paddle_serving::general_model::FetchedMap;

-int main(int argc, char * argv[]) {
-  PredictorClient * client = new PredictorClient();
+int main(int argc, char* argv[]) {
+  PredictorClient* client = new PredictorClient();
  client->init("inference.conf");
  client->set_predictor_conf("./", "predictor.conf");
  client->create_predictor();
-  std::vector<std::vector<float> > float_feed;
-  std::vector<std::vector<int64_t> > int_feed;
+  std::vector<std::vector<float>> float_feed;
+  std::vector<std::vector<int64_t>> int_feed;
  std::vector<std::string> float_feed_name;
  std::vector<std::string> int_feed_name = {"words", "label"};
  std::vector<std::string> fetch_name = {"cost", "acc", "prediction"};
@@ -53,13 +53,14 @@ int main(int argc, char * argv[]) {
    cin >> label;
    int_feed.push_back({label});

-    
    FetchedMap result;

-    client->predict(
-        float_feed, float_feed_name,
-        int_feed, int_feed_name, fetch_name,
-        &result);
+    client->predict(float_feed,
+                    float_feed_name,
+                    int_feed,
+                    int_feed_name,
+                    fetch_name,
+                    &result);

    cout << label << "\t" << result["prediction"][1] << endl;


--- a/tools/cpp_examples/demo-client/src/load_general_model.cpp
+++ b/tools/cpp_examples/demo-client/src/load_general_model.cpp
@@ -18,14 +18,14 @@

 #include <fstream>
 #include "core/sdk-cpp/builtin_format.pb.h"
-#include "core/sdk-cpp/load_general_model_service.pb.h"
 #include "core/sdk-cpp/include/common.h"
 #include "core/sdk-cpp/include/predictor_sdk.h"
+#include "core/sdk-cpp/load_general_model_service.pb.h"

 using baidu::paddle_serving::sdk_cpp::Predictor;
 using baidu::paddle_serving::sdk_cpp::PredictorApi;
-using baidu::paddle_serving::predictor::
-load_general_model_service::RequestAndResponse;
+using baidu::paddle_serving::predictor::load_general_model_service::
+    RequestAndResponse;

 int create_req(RequestAndResponse& req) {  // NOLINT
  req.set_a(1);

--- a/tools/cpp_examples/demo-client/src/pybind_general_model.cpp
+++ b/tools/cpp_examples/demo-client/src/pybind_general_model.cpp
--- a/tools/cpp_examples/demo-serving/op/bert_service_op.h
+++ b/tools/cpp_examples/demo-serving/op/bert_service_op.h
--- a/tools/cpp_examples/demo-serving/op/classify_op.cpp
+++ b/tools/cpp_examples/demo-serving/op/classify_op.cpp
--- a/tools/cpp_examples/demo-serving/op/classify_op.h
+++ b/tools/cpp_examples/demo-serving/op/classify_op.h
--- a/tools/cpp_examples/demo-serving/op/ctr_prediction_op.h
+++ b/tools/cpp_examples/demo-serving/op/ctr_prediction_op.h
--- a/tools/cpp_examples/demo-serving/op/general_model_op.h
+++ b/tools/cpp_examples/demo-serving/op/general_model_op.h
--- a/tools/cpp_examples/demo-serving/op/kvdb_echo_op.h
+++ b/tools/cpp_examples/demo-serving/op/kvdb_echo_op.h
--- a/tools/cpp_examples/demo-serving/op/load_general_model_conf_op.h
+++ b/tools/cpp_examples/demo-serving/op/load_general_model_conf_op.h
--- a/tools/cpp_examples/demo-serving/op/reader_op.h
+++ b/tools/cpp_examples/demo-serving/op/reader_op.h
--- a/tools/cpp_examples/demo-serving/op/text_classification_op.h
+++ b/tools/cpp_examples/demo-serving/op/text_classification_op.h
--- a/tools/cpp_examples/demo-serving/op/write_json_op.cpp
+++ b/tools/cpp_examples/demo-serving/op/write_json_op.cpp
--- a/tools/cpp_examples/demo-serving/op/write_json_op.h
+++ b/tools/cpp_examples/demo-serving/op/write_json_op.h
--- a/tools/cpp_examples/demo-serving/op/write_op.cpp
+++ b/tools/cpp_examples/demo-serving/op/write_op.cpp
--- a/tools/cpp_examples/demo-serving/op/write_op.h
+++ b/tools/cpp_examples/demo-serving/op/write_op.h
--- a/tools/cpp_examples/demo-serving/proto/general_model_service.proto
+++ b/tools/cpp_examples/demo-serving/proto/general_model_service.proto
--- a/tools/cpp_examples/elastic-ctr/client/demo/elastic_ctr.py
+++ b/tools/cpp_examples/elastic-ctr/client/demo/elastic_ctr.py
--- a/tools/cpp_examples/elastic-ctr/serving/op/elastic_ctr_prediction_op.cpp
+++ b/tools/cpp_examples/elastic-ctr/serving/op/elastic_ctr_prediction_op.cpp
--- a/tools/cpp_examples/elastic-ctr/serving/op/elastic_ctr_prediction_op.h
+++ b/tools/cpp_examples/elastic-ctr/serving/op/elastic_ctr_prediction_op.h
--- a/tools/serving_build.sh
+++ b/tools/serving_build.sh
--- a/tools/serving_check_style.sh
+++ b/tools/serving_check_style.sh