Merge pull request #38 from PaddlePaddle/develop

Sync codes

Merge pull request #38 from PaddlePaddle/develop
Sync codes
eeafa32c · TeslaZhao · GitHub · 17963299 · 1cab575a · eeafa32c
6 changed file
--- a/core/predictor/framework/infer.h
+++ b/core/predictor/framework/infer.h
@@ -68,7 +68,9 @@ class InferEngine {
  virtual int thrd_initialize() { return thrd_initialize_impl(); }
  virtual int thrd_clear() { return thrd_clear_impl(); }
  virtual int thrd_finalize() { return thrd_finalize_impl(); }
-  virtual int infer(const void* in, void* out, uint32_t batch_size = -1) { return infer_impl(in, out, batch_size); }
+  virtual int infer(const void* in, void* out, uint32_t batch_size = -1) {
+    return infer_impl(in, out, batch_size);
+  }

  virtual int reload() = 0;

@@ -208,7 +210,6 @@ class ReloadableInferEngine : public InferEngine {
  }

  uint64_t version() const { return _version; }
-  
  uint32_t thread_num() const { return _infer_thread_num; }

 private:
@@ -335,7 +336,7 @@ class DBReloadableInferEngine : public ReloadableInferEngine {

    md->cores[next_idx] = new (std::nothrow) EngineCore;

-    //params.dump();
+    // params.dump();
    if (!md->cores[next_idx] || md->cores[next_idx]->create(conf) != 0) {
      LOG(ERROR) << "Failed create model, path: " << conf.model_dir();
      return -1;
@@ -491,71 +492,86 @@ class CloneDBReloadableInferEngine
      _pd;  // 进程级EngineCore，多个线程级EngineCore共用该对象的模型数据
 };

-template <typename PaddleInferenceCore>
+template <typename EngineCore>
 #ifdef WITH_TRT
-class FluidInferEngine : public DBReloadableInferEngine<PaddleInferenceCore> {
+class FluidInferEngine : public DBReloadableInferEngine<EngineCore> {
 #else
-class FluidInferEngine : public CloneDBReloadableInferEngine<PaddleInferenceCore> {
+class FluidInferEngine : public CloneDBReloadableInferEngine<EngineCore> {
 #endif
 public:  // NOLINT
  FluidInferEngine() {}
  ~FluidInferEngine() {}
  typedef std::vector<paddle::PaddleTensor> TensorVector;
  int infer_impl(const void* in, void* out, uint32_t batch_size = -1) {
-    //First of all, get the real core acording to the template parameter 'PaddleInferenceCore'.
-    PaddleInferenceCore* core =DBReloadableInferEngine<PaddleInferenceCore>::get_core();
+    // First of all, get the real core acording to the
+    // Template parameter <EngineCore>.
+    EngineCore* core = DBReloadableInferEngine<EngineCore>::get_core();
    if (!core || !core->get()) {
      LOG(ERROR) << "Failed get fluid core in infer_impl()";
      return -1;
    }
-    //We use the for loop to process the input data.
-    //Inside each for loop, use the in[i]->name as inputName and call 'core->GetInputHandle(inputName)' to get the pointer of InputData.
-    //Set the lod and shape information of InputData first. then copy data from cpu to the core.
-    const TensorVector* tensorVector_in_pointer = reinterpret_cast<const TensorVector*>(in);
+    // We use the for loop to process the input data.
+    // Inside each for loop, use the in[i]->name as inputName and call
+    // 'core->GetInputHandle(inputName)' to get the pointer of InputData.
+    // Set the lod and shape information of InputData first.
+    // Then copy data from cpu to the core.
+    const TensorVector* tensorVector_in_pointer =
+      reinterpret_cast<const TensorVector*>(in);
    for (int i=0; i < tensorVector_in_pointer->size(); ++i) {
-      auto lod_tensor_in = core->GetInputHandle((*tensorVector_in_pointer)[i].name);
+      auto lod_tensor_in =
+        core->GetInputHandle((*tensorVector_in_pointer)[i].name);
      lod_tensor_in->SetLoD((*tensorVector_in_pointer)[i].lod);
      lod_tensor_in->Reshape((*tensorVector_in_pointer)[i].shape);
      void* origin_data = (*tensorVector_in_pointer)[i].data.data();
-      //Because the core needs to determine the size of memory space according to the data type passed in.
-      //The pointer type of data must be one of float *,int64_t*,int32_t* instead void*.
+      // Because the core needs to determine the size of memory space
+      // according to the data type passed in.
+      // The pointer type of data must be one of
+      // float *,int64_t*,int32_t* instead void*.
      if ((*tensorVector_in_pointer)[i].dtype == paddle::PaddleDType::FLOAT32) {
        float* data = static_cast<float*>(origin_data);
        lod_tensor_in->CopyFromCpu(data);
-      }else if ((*tensorVector_in_pointer)[i].dtype == paddle::PaddleDType::INT64) {
+      } else if ((*tensorVector_in_pointer)[i].dtype ==
+                paddle::PaddleDType::INT64) {
        int64_t* data = static_cast<int64_t*>(origin_data);
        lod_tensor_in->CopyFromCpu(data);
-      }else if ((*tensorVector_in_pointer)[i].dtype == paddle::PaddleDType::INT32) {
+      } else if ((*tensorVector_in_pointer)[i].dtype ==
+                paddle::PaddleDType::INT32) {
        int32_t* data = static_cast<int32_t*>(origin_data);
        lod_tensor_in->CopyFromCpu(data);
      }
    }
-    //After the input data is passed in, call 'core->Run()' perform the prediction process.
+    // After the input data is passed in,
+    // call 'core->Run()' perform the prediction process.
    if (!core->Run()) {
        LOG(ERROR) << "Failed run fluid family core";
        return -1;
    }
-    
-    //In order to get the results, first, call the 'core->GetOutputNames()' to get the name of output(which is a dict like {OutputName:pointer of OutputValue}).
-    //Then, use for-loop to get OutputValue by calling 'core->GetOutputHandle'.
+    // In order to get the results,
+    // first, call the 'core->GetOutputNames()' to get the name of output
+    // (which is a dict like {OutputName:pointer of OutputValue}).
+    // Then, use for-loop to get OutputValue by calling 'core->GetOutputHandle'.
    std::vector<std::string> outnames = core->GetOutputNames();
    std::vector<int> output_shape;
-    int out_num =0;
-    int dataType =0;
+    int out_num = 0;
+    int dataType = 0;
    void* databuf_data = NULL;
    char* databuf_char = NULL;
    size_t databuf_size = 0;
-    TensorVector* tensorVector_out_pointer = reinterpret_cast<TensorVector*>(out);
+    TensorVector* tensorVector_out_pointer =
+                  reinterpret_cast<TensorVector*>(out);
    if (!tensorVector_out_pointer) {
      LOG(ERROR) << "tensorVector_out_pointer is nullptr,error";
      return -1;
    }
-    //Get the type and shape information of OutputData first. then copy data to cpu from the core.
-    //The pointer type of data_out must be one of float *,int64_t*,int32_t* instead void*.
+    // Get the type and shape information of OutputData first.
+    // then copy data to cpu from the core.
+    // The pointer type of data_out must be one of
+    // float *,int64_t*,int32_t* instead void*.
    for (int i=0; i < outnames.size(); ++i) {
      auto lod_tensor_out = core->GetOutputHandle(outnames[i]);
      output_shape = lod_tensor_out->shape();
-      out_num = std::accumulate(output_shape.begin(), output_shape.end(), 1, std::multiplies<int>());
+      out_num = std::accumulate(
+          output_shape.begin(), output_shape.end(), 1, std::multiplies<int>());
      dataType = lod_tensor_out->type();
      if (dataType == paddle::PaddleDType::FLOAT32) {
        databuf_size = out_num*sizeof(float);
@@ -567,7 +583,7 @@ class FluidInferEngine : public CloneDBReloadableInferEngine<PaddleInferenceCore
        float* data_out = reinterpret_cast<float*>(databuf_data);
        lod_tensor_out->CopyToCpu(data_out);
        databuf_char = reinterpret_cast<char*>(data_out);
-      }else if (dataType == paddle::PaddleDType::INT64) {
+      } else if (dataType == paddle::PaddleDType::INT64) {
        databuf_size = out_num*sizeof(int64_t);
        databuf_data = MempoolWrapper::instance().malloc(databuf_size);
        if (!databuf_data) {
@@ -577,7 +593,7 @@ class FluidInferEngine : public CloneDBReloadableInferEngine<PaddleInferenceCore
        int64_t* data_out = reinterpret_cast<int64_t*>(databuf_data);
        lod_tensor_out->CopyToCpu(data_out);
        databuf_char = reinterpret_cast<char*>(data_out);
-      }else if (dataType == paddle::PaddleDType::INT32) {
+      } else if (dataType == paddle::PaddleDType::INT32) {
        databuf_size = out_num*sizeof(int32_t);
        databuf_data = MempoolWrapper::instance().malloc(databuf_size);
        if (!databuf_data) {
@@ -588,9 +604,11 @@ class FluidInferEngine : public CloneDBReloadableInferEngine<PaddleInferenceCore
        lod_tensor_out->CopyToCpu(data_out);
        databuf_char = reinterpret_cast<char*>(data_out);
      }
-      //Because task scheduling requires OPs to use 'Channel'(which is a data structure) to transfer data between OPs.
-      //We need to copy the processed data to the 'Channel' for the next OP.
-      //In this function, it means we should copy the 'databuf_char' to the pointer 'void* out'.(which is also called ‘tensorVector_out_pointer’)
+      // Because task scheduling requires OPs to use 'Channel'
+      // (which is a data structure) to transfer data between OPs.
+      // We need to copy the processed data to the 'Channel' for the next OP.
+      // In this function, it means we should copy the 'databuf_char' to
+      // 'void* out'.(which is also called ‘tensorVector_out_pointer’)
      paddle::PaddleTensor tensor_out;
      tensor_out.name = outnames[i];
      tensor_out.dtype = paddle::PaddleDType(dataType);
@@ -611,8 +629,6 @@ class FluidInferEngine : public CloneDBReloadableInferEngine<PaddleInferenceCore
  int task_infer_impl(const BatchTensor& in, BatchTensor& out) {  // NOLINT
    return infer_impl(&in, &out);
  }
-
-
 };

 typedef FactoryPool<InferEngine> StaticInferFactory;
@@ -797,7 +813,9 @@ class VersionedInferEngine : public InferEngine {
  int thrd_finalize_impl() { return -1; }
  int thrd_clear_impl() { return -1; }
  int proc_finalize_impl() { return -1; }
-  int infer_impl(const void* in, void* out, uint32_t batch_size = -1) { return -1; }
+  int infer_impl(const void* in, void* out, uint32_t batch_size = -1) {
+    return -1;
+  }
  int task_infer_impl(const BatchTensor& in, BatchTensor& out) {  // NOLINT
    return -1;
  }  // NOLINT
@@ -927,7 +945,7 @@ class InferManager {
  }

  // Versioned inference interface
-  int infer(const char* model_name, 
+  int infer(const char* model_name,
            const void* in,
            void* out,
            uint32_t batch_size,

--- a/paddle_inference/paddle/include/paddle_engine.h
+++ b/paddle_inference/paddle/include/paddle_engine.h
@@ -57,9 +57,9 @@ PrecisionType GetPrecision(const std::string& precision_data) {
 }

 // Engine Base
-class PaddleEngineBase {
+class EngineCore {
 public:
-  virtual ~PaddleEngineBase() {}
+  virtual ~EngineCore() {}
  virtual std::vector<std::string> GetInputNames() {
    return _predictor->GetInputNames();
  }
@@ -107,7 +107,7 @@ class PaddleEngineBase {
 };

 // Paddle Inference Engine
-class PaddleInferenceEngine : public PaddleEngineBase {
+class PaddleInferenceEngine : public EngineCore {
 public:
  int create(const configure::EngineDesc& engine_conf) {
    std::string model_path = engine_conf.model_dir();

--- a/python/examples/fit_a_line/benchmark.py
+++ b/python/examples/fit_a_line/benchmark.py
@@ -25,19 +25,24 @@ args = benchmark_args()


 def single_func(idx, resource):
+    train_reader = paddle.batch(
+        paddle.reader.shuffle(
+            paddle.dataset.uci_housing.train(), buf_size=500),
+        batch_size=1)
+    total_number = sum(1 for _ in train_reader())
+
    if args.request == "rpc":
        client = Client()
        client.load_client_config(args.model)
        client.connect([args.endpoint])
-        train_reader = paddle.batch(
-            paddle.reader.shuffle(
-                paddle.dataset.uci_housing.train(), buf_size=500),
-            batch_size=1)
        start = time.time()
        for data in train_reader():
+            #new_data = np.zeros((1, 13)).astype("float32")
+            #new_data[0] = data[0][0]
+            #fetch_map = client.predict(feed={"x": new_data}, fetch=["price"], batch=True)
            fetch_map = client.predict(feed={"x": data[0][0]}, fetch=["price"])
        end = time.time()
-        return [[end - start]]
+        return [[end - start], [total_number]]
    elif args.request == "http":
        train_reader = paddle.batch(
            paddle.reader.shuffle(
@@ -49,7 +54,7 @@ def single_func(idx, resource):
                'http://{}/uci/prediction'.format(args.endpoint),
                data={"x": data[0]})
        end = time.time()
-        return [[end - start]]
+        return [[end - start], [total_number]]


 multi_thread_runner = MultiThreadRunner()

--- a/python/paddle_serving_client/client.py
+++ b/python/paddle_serving_client/client.py
@@ -155,7 +155,7 @@ class Client(object):
        file_path_list = []
        for single_model_config in model_config_path_list:
            if os.path.isdir(single_model_config):
-                file_path_list.append("{}/serving_server_conf.prototxt".format(
+                file_path_list.append("{}/serving_client_conf.prototxt".format(
                    single_model_config))
            elif os.path.isfile(single_model_config):
                file_path_list.append(single_model_config)
@@ -574,7 +574,7 @@ class MultiLangClient(object):
        file_path_list = []
        for single_model_config in model_config_path_list:
            if os.path.isdir(single_model_config):
-                file_path_list.append("{}/serving_server_conf.prototxt".format(
+                file_path_list.append("{}/serving_client_conf.prototxt".format(
                    single_model_config))
            elif os.path.isfile(single_model_config):
                file_path_list.append(single_model_config)

--- a/python/paddle_serving_server/serve.py
+++ b/python/paddle_serving_server/serve.py
@@ -103,7 +103,7 @@ def serve_args():


 def start_standard_model(serving_port):  # pylint: disable=doc-string-missing
-    args = parse_args()
+    args = serve_args()
    thread_num = args.thread
    model = args.model
    port = serving_port
@@ -410,6 +410,7 @@ if __name__ == "__main__":
            use_lite=args.use_lite,
            use_xpu=args.use_xpu,
            ir_optim=args.ir_optim,
+            thread_num=args.thread,
            precision=args.precision,
            use_calib=args.use_calib)
        web_service.run_rpc_service()

--- a/python/paddle_serving_server/web_service.py
+++ b/python/paddle_serving_server/web_service.py
@@ -176,10 +176,12 @@ class WebService(object):
                       use_xpu=False,
                       ir_optim=False,
                       gpuid=0,
+                       thread_num=2,
                       mem_optim=True):
        print("This API will be deprecated later. Please do not use it")
        self.workdir = workdir
        self.port = port
+        self.thread_num = thread_num
        self.device = device
        self.gpuid = gpuid
        self.port_list = []
@@ -197,7 +199,7 @@ class WebService(object):
                    self.workdir,
                    self.port_list[0],
                    -1,
-                    thread_num=2,
+                    thread_num=self.thread_num,
                    mem_optim=mem_optim,
                    use_lite=use_lite,
                    use_xpu=use_xpu,
@@ -211,7 +213,7 @@ class WebService(object):
                        "{}_{}".format(self.workdir, i),
                        self.port_list[i],
                        gpuid,
-                        thread_num=2,
+                        thread_num=self.thread_num,
                        mem_optim=mem_optim,
                        use_lite=use_lite,
                        use_xpu=use_xpu,