diff --git a/demo-client/CMakeLists.txt b/demo-client/CMakeLists.txt
index 6095849284a5055a42011c2f403329eefbaaa7df..4f24237fe922695d1eab095e96baabf76b002f93 100644
--- a/demo-client/CMakeLists.txt
+++ b/demo-client/CMakeLists.txt
@@ -31,10 +31,14 @@ target_link_libraries(load_general_model -Wl,--whole-archive sdk-cpp -Wl,--no-wh
         -lz)
 
 add_library(paddle_serving_client SHARED src/general_model.cpp src/pybind_general_model.cpp)
-add_dependencies(paddle_serving_client pybind)
-target_link_libraries(paddle_serving_client brpc configure protobuf leveldb -lcrypto
+target_link_libraries(paddle_serving_client brpc configure protobuf leveldb pybind python -lcrypto
         -lssl -lz -lrt)
 
+add_executable(general_model_main ${CMAKE_CURRENT_LIST_DIR}/src/general_model_main.cpp)
+target_link_libraries(general_model_main -Wl,--whole-archive sdk-cpp paddle_serving_client -Wl,--no-whole-archive
+        -lpthread -lcrypto -lm -lrt -lssl -ldl
+	-lz)
+
 add_executable(echo ${CMAKE_CURRENT_LIST_DIR}/src/echo.cpp)
 target_link_libraries(echo -Wl,--whole-archive sdk-cpp -Wl,--no-whole-archive
         -lpthread -lcrypto -lm -lrt -lssl -ldl
diff --git a/demo-client/inference.conf b/demo-client/inference.conf
new file mode 100644
index 0000000000000000000000000000000000000000..fe6710ddad0d498b3d1aebfd7efd3368c874d353
--- /dev/null
+++ b/demo-client/inference.conf
@@ -0,0 +1,6 @@
+2 3
+words 1 -1
+label 1 1
+cost mean_0.tmp_0
+acc accuracy_0.tmp_0
+prediction fc_1.tmp_2
diff --git a/demo-client/predictor.conf b/demo-client/predictor.conf
new file mode 100644
index 0000000000000000000000000000000000000000..33d0a84418b24b3ba590f4acfe7dab876c004694
--- /dev/null
+++ b/demo-client/predictor.conf
@@ -0,0 +1,37 @@
+default_variant_conf {
+  tag: "default"
+  connection_conf {
+    connect_timeout_ms: 2000
+    rpc_timeout_ms: 20000
+    connect_retry_count: 2
+    max_connection_per_host: 100
+    hedge_request_timeout_ms: -1
+    hedge_fetch_retry_count: 2
+    connection_type: "pooled"
+  }
+  naming_conf {
+    cluster_filter_strategy: "Default"
+    load_balance_strategy: "la"
+  }
+  rpc_parameter {
+    compress_type: 0
+    package_size: 20
+    protocol: "baidu_std"
+    max_channel_per_request: 3
+  }
+}
+
+predictors {
+  name: "general_model"
+  service_name: "baidu.paddle_serving.predictor.general_model.GeneralModelService"
+  endpoint_router: "WeightedRandomRender"
+  weighted_random_render_conf {
+    variant_weight_list: "50"
+  }
+  variants {
+    tag: "var1"
+    naming_conf {
+      cluster: "list://127.0.0.1:9292"
+    }
+  }
+}
diff --git a/demo-client/setup.py b/demo-client/setup.py
new file mode 100644
index 0000000000000000000000000000000000000000..983ac4b5c2e0303db41f1950df93d53daf2dccd7
--- /dev/null
+++ b/demo-client/setup.py
@@ -0,0 +1,69 @@
+import os
+import re
+import sys
+import platform
+import subprocess
+
+from setuptools import setup, Extension
+from setuptools.command.build_ext import build_ext
+from distutils.version import LooseVersion
+
+
+class CMakeExtension(Extension):
+    def __init__(self, name, sourcedir=''):
+        Extension.__init__(self, name, sources=[])
+        self.sourcedir = os.path.abspath(sourcedir)
+
+
+class CMakeBuild(build_ext):
+    def run(self):
+        try:
+            out = subprocess.check_output(['cmake', '--version'])
+        except OSError:
+            raise RuntimeError("CMake must be installed to build the following extensions: " +
+                               ", ".join(e.name for e in self.extensions))
+
+        if platform.system() == "Windows":
+            cmake_version = LooseVersion(re.search(r'version\s*([\d.]+)', out.decode()).group(1))
+            if cmake_version < '3.1.0':
+                raise RuntimeError("CMake >= 3.1.0 is required on Windows")
+
+        for ext in self.extensions:
+            self.build_extension(ext)
+
+    def build_extension(self, ext):
+        extdir = os.path.abspath(os.path.dirname(self.get_ext_fullpath(ext.name)))
+        cmake_args = ['-DCMAKE_LIBRARY_OUTPUT_DIRECTORY=' + extdir,
+                      '-DPYTHON_EXECUTABLE=' + sys.executable]
+
+        cfg = 'Debug' if self.debug else 'Release'
+        build_args = ['--config', cfg]
+
+        if platform.system() == "Windows":
+            cmake_args += ['-DCMAKE_LIBRARY_OUTPUT_DIRECTORY_{}={}'.format(cfg.upper(), extdir)]
+            if sys.maxsize > 2**32:
+                cmake_args += ['-A', 'x64']
+            build_args += ['--', '/m']
+        else:
+            cmake_args += ['-DCMAKE_BUILD_TYPE=' + cfg]
+            build_args += ['--', '-j2']
+
+        env = os.environ.copy()
+        env['CXXFLAGS'] = '{} -DVERSION_INFO=\\"{}\\"'.format(env.get('CXXFLAGS', ''),
+                                                              self.distribution.get_version())
+        if not os.path.exists(self.build_temp):
+            os.makedirs(self.build_temp)
+        subprocess.check_call(['cmake', ext.sourcedir] + cmake_args, cwd=self.build_temp, env=env)
+        subprocess.check_call(['cmake', '--build', '.'] + build_args, cwd=self.build_temp)
+
+setup(
+    name='paddle_serving_client',
+    version='0.0.1',
+    author='PaddlePaddle Authors',
+    author_email='guru4elephant@gmail.com',
+    description='Paddle Serving Python Client API',
+    long_description='',
+    ext_modules=[CMakeExtension('paddle_serving_client')],
+    cmdclass=dict(build_ext=CMakeBuild),
+    zip_safe=False,
+)
diff --git a/demo-client/src/general_model.cpp b/demo-client/src/general_model.cpp
index 1ff604d329dd1cbfd1db102f1bb4d843549c20d4..6ce7e6c0b7315c2acf2f96b64a5ffc0e9fcce17b 100644
--- a/demo-client/src/general_model.cpp
+++ b/demo-client/src/general_model.cpp
@@ -29,16 +29,68 @@ namespace baidu {
 namespace paddle_serving {
 namespace general_model {
 
-void PredictorClient::connect(const std::vector<std::string> & ep_list) {
-  _eplist = ep_list;
+void PredictorClient::init(const std::string & conf_file) {
+  _conf_file = conf_file;
+  std::ifstream fin(conf_file);
+  if (!fin) {
+    LOG(ERROR) << "Your inference conf file can not be found";
+    exit(-1);
+  }
+  _feed_name_to_idx.clear();
+  _fetch_name_to_idx.clear();
+  _shape.clear();
+  int feed_var_num = 0;
+  int fetch_var_num = 0;
+  fin >> feed_var_num >> fetch_var_num;
+  std::string name;
+  std::string fetch_var_name;
+  int shape_num = 0;
+  int dim = 0;
+  for (int i = 0; i < feed_var_num; ++i) {
+    fin >> name;
+    _feed_name_to_idx[name] = i;
+    fin >> shape_num;
+    std::vector<int> tmp_feed_shape;
+    for (int j = 0; j < shape_num; ++j) {
+      fin >> dim;
+      tmp_feed_shape.push_back(dim);
+    }
+    _shape.push_back(tmp_feed_shape);
+  }
+
+  for (int i = 0; i < fetch_var_num; ++i) {
+    fin >> name;
+    fin >> fetch_var_name;
+    _fetch_name_to_idx[name] = i;
+    _fetch_name_to_var_name[name] = fetch_var_name;
+  }
+}
+
+void PredictorClient::set_predictor_conf(
+    const std::string & conf_path,
+    const std::string & conf_file) {
+  _predictor_path = conf_path;
+  _predictor_conf = conf_file;
 }
 
-FetchedMap & PredictorClient::predict(
+int PredictorClient::create_predictor() {
+  if (_api.create(_predictor_path.c_str(), _predictor_conf.c_str()) != 0) {
+    LOG(ERROR) << "Predictor Creation Failed";
+    return -1;
+  }
+  _api.thrd_initialize();
+}
+
+void PredictorClient::predict(
     const std::vector<std::vector<float> > & float_feed,
     const std::vector<std::string> & float_feed_name,
     const std::vector<std::vector<int64_t> > & int_feed,
     const std::vector<std::string> & int_feed_name,
-    const std::vector<std::string> & fetch_name) {
+    const std::vector<std::string> & fetch_name,
+    FetchedMap * fetch_result) {
+
+  _api.thrd_clear();
+  _predictor = _api.fetch_predictor("general_model");
   Request req;
   std::vector<Tensor *> tensor_vec;
   FeedInst * inst = req.add_insts();
@@ -58,11 +110,10 @@ FetchedMap & PredictorClient::predict(
       tensor->add_shape(_shape[idx][j]);
     }
     tensor->set_elem_type(1);
-    tensor->mutable_data()->Reserve(
-        float_feed[vec_idx].size() * sizeof(float));
-    void * dst_ptr = tensor->mutable_data()->mutable_data();
-    memcpy(dst_ptr, float_feed[vec_idx].data(),
-           float_feed[vec_idx].size() * sizeof(float));
+    for (int j = 0; j < float_feed[vec_idx].size(); ++j) {
+      tensor->add_data(
+          (char *)(&(float_feed[vec_idx][j])), sizeof(float));
+    }
     vec_idx++;
   }
 
@@ -74,37 +125,43 @@ FetchedMap & PredictorClient::predict(
       tensor->add_shape(_shape[idx][j]);
     }
     tensor->set_elem_type(0);
-    tensor->mutable_data()->Reserve(
-        int_feed[vec_idx].size() * sizeof(int64_t));
-    void * dst_ptr = tensor->mutable_data()->mutable_data();
-    memcpy(dst_ptr, int_feed[vec_idx].data(),
-           int_feed[idx].size() * sizeof(int64_t));
+    for (int j = 0; j < int_feed[vec_idx].size(); ++j) {
+      tensor->add_data(
+          (char *)(&(int_feed[vec_idx][j])), sizeof(int64_t));
+    }
+    vec_idx++;
   }
 
-  std::map<std::string, std::vector<float> > result;
+  // std::map<std::string, std::vector<float> > result;
   Response res;
+
+  res.Clear();
   if (_predictor->inference(&req, &res) != 0) {
-    FetchInst * inst = res.add_insts();
+    LOG(ERROR) << "failed call predictor with req: " << req.ShortDebugString();
+    exit(-1);
+  } else {
     for (auto & name : fetch_name) {
       int idx = _fetch_name_to_idx[name];
-      result[name].resize(inst->tensor_array(idx).data_size() / sizeof(float));
-      memcpy(result[name].data(),
-             inst->mutable_tensor_array(idx)->mutable_data(),
-             inst->tensor_array(idx).data_size() / sizeof(float));
+      int len = res.insts(0).tensor_array(idx).data_size();
+      (*fetch_result)[name].resize(len);
+      for (int i = 0; i < len; ++i) {
+        (*fetch_result)[name][i] = *(const float *)
+                    res.insts(0).tensor_array(idx).data(i).c_str();
+      }
     }
   }
 
-  return result;
+  return;
 }
 
-FetchedMap & PredictorClient::predict_with_profile(
+void PredictorClient::predict_with_profile(
     const std::vector<std::vector<float> > & float_feed,
     const std::vector<std::string> & float_feed_name,
     const std::vector<std::vector<int64_t> > & int_feed,
     const std::vector<std::string> & int_feed_name,
-    const std::vector<std::string> & fetch_name) {
-  FetchedMap res;
-  return res;
+    const std::vector<std::string> & fetch_name,
+    FetchedMap * fetch_result) {
+  return;
 }
 
 }  // namespace general_model
diff --git a/demo-client/src/general_model.h b/demo-client/src/general_model.h
index 2f3658364cd7b98b95bad6bae2bffa40bcd5bf59..267e10da0136b311ba0b109553c5b9fd086485e2 100644
--- a/demo-client/src/general_model.h
+++ b/demo-client/src/general_model.h
@@ -43,28 +43,36 @@ class PredictorClient {
   ~PredictorClient() {}
 
   void init(const std::string & client_conf);
-  void connect(const std::vector<std::string> & ep_list);
-  
-  FetchedMap & predict(
+  void set_predictor_conf(
+      const std::string& conf_path,
+      const std::string& conf_file);
+  int create_predictor();
+
+  void predict(
       const std::vector<std::vector<float> > & float_feed,
       const std::vector<std::string> & float_feed_name,
       const std::vector<std::vector<int64_t> > & int_feed,
       const std::vector<std::string> & int_feed_name,
-      const std::vector<std::string> & fetch_name);
+      const std::vector<std::string> & fetch_name,
+      FetchedMap * result_map);
 
-  FetchedMap & predict_with_profile(
+  void predict_with_profile(
       const std::vector<std::vector<float> > & float_feed,
       const std::vector<std::string> & float_feed_name,
       const std::vector<std::vector<int64_t> > & int_feed,
       const std::vector<std::string> & int_feed_name,
-      const std::vector<std::string> & fetch_name);
+      const std::vector<std::string> & fetch_name,
+      FetchedMap * result_map);
 
  private:
   PredictorApi _api;
   Predictor * _predictor;
-  std::vector<std::string> _eplist;
+  std::string _predictor_conf;
+  std::string _predictor_path;
+  std::string _conf_file;
   std::map<std::string, int> _feed_name_to_idx;
   std::map<std::string, int> _fetch_name_to_idx;
+  std::map<std::string, std::string> _fetch_name_to_var_name;
   std::vector<std::vector<int> > _shape;
 };
 
diff --git a/demo-client/src/general_model_main.cpp b/demo-client/src/general_model_main.cpp
new file mode 100644
index 0000000000000000000000000000000000000000..8bf2e1793c3ac77ddff1fce5de1630b78be88318
--- /dev/null
+++ b/demo-client/src/general_model_main.cpp
@@ -0,0 +1,71 @@
+// Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include <fstream>
+#include <vector>
+
+#include "general_model.h"
+
+using namespace std;
+
+using baidu::paddle_serving::general_model::PredictorClient;
+using baidu::paddle_serving::general_model::FetchedMap;
+
+int main(int argc, char * argv[]) {
+  PredictorClient * client = new PredictorClient();
+  client->init("inference.conf");
+  client->set_predictor_conf("./", "predictor.conf");
+  client->create_predictor();
+  std::vector<std::vector<float> > float_feed;
+  std::vector<std::vector<int64_t> > int_feed;
+  std::vector<std::string> float_feed_name;
+  std::vector<std::string> int_feed_name = {"words", "label"};
+  std::vector<std::string> fetch_name = {"cost", "acc", "prediction"};
+
+  std::string line;
+  int64_t text_id = 0;
+  int64_t label = 0;
+  int text_id_num = 0;
+  int label_num = 0;
+  int line_num = 0;
+  while (cin >> text_id_num) {
+    int_feed.clear();
+    float_feed.clear();
+    std::vector<int64_t> ids;
+    ids.reserve(text_id_num);
+    for (int i = 0; i < text_id_num; ++i) {
+      cin >> text_id;
+      ids.push_back(text_id);
+    }
+    int_feed.push_back(ids);
+    cin >> label_num;
+    cin >> label;
+    int_feed.push_back({label});
+
+    
+    FetchedMap result;
+
+    client->predict(
+        float_feed, float_feed_name,
+        int_feed, int_feed_name, fetch_name,
+        &result);
+
+    cout << label << "\t" << result["prediction"][1] << endl;
+
+    line_num++;
+    if (line_num % 100 == 0) {
+      cerr << "line num: " << line_num << endl;
+    }
+  }
+}
diff --git a/demo-client/src/pybind_general_model.cpp b/demo-client/src/pybind_general_model.cpp
index c8b4150dd5385e923451a67ade99d46132f2fd6f..0ce939ebe0547089773ccdacaa707c1df048ffe5 100644
--- a/demo-client/src/pybind_general_model.cpp
+++ b/demo-client/src/pybind_general_model.cpp
@@ -5,6 +5,8 @@
 
 namespace py = pybind11;
 
+using baidu::paddle_serving::general_model::FetchedMap;
+
 namespace baidu {
 namespace paddle_serving {
 namespace general_model {
@@ -18,9 +20,14 @@ PYBIND11_MODULE(paddle_serving_client, m) {
            [](PredictorClient &self, const std::string & conf) {
              self.init(conf);
            })
-      .def("connect",
-           [](PredictorClient &self, const std::vector<std::string> & ep_list) {
-             self.connect(ep_list);
+      .def("set_predictor_conf",
+           [](PredictorClient &self, const std::string & conf_path,
+              const std::string & conf_file) {
+             self.set_predictor_conf(conf_path, conf_file);
+           })
+      .def("create_predictor",
+           [](PredictorClient & self) {
+             self.create_predictor();
            })
       .def("predict",
            [](PredictorClient &self,
@@ -28,9 +35,11 @@ PYBIND11_MODULE(paddle_serving_client, m) {
               const std::vector<std::string> & float_feed_name,
               const std::vector<std::vector<int64_t> > & int_feed,
               const std::vector<std::string> & int_feed_name,
-              const std::vector<std::string> & fetch_name) {
+              const std::vector<std::string> & fetch_name,
+              FetchedMap * fetch_result) {
              return self.predict(float_feed, float_feed_name,
-                                 int_feed, int_feed_name, fetch_name);
+                                 int_feed, int_feed_name, fetch_name,
+                                 fetch_result);
            });
 }