fix bug and names

07eadb82 · HexToString · 9256114c · 07eadb82 · 07eadb82 · 07eadb82
4 changed file
--- a/core/predictor/framework/infer.h
+++ b/core/predictor/framework/infer.h
@@ -21,6 +21,7 @@
 #include <utility>
 #include <vector>
 #include <numeric>
+#include <functional>
 #include "core/predictor/common/inner_common.h"
 #include "core/predictor/framework/bsf.h"
 #include "core/predictor/framework/factory.h"
@@ -68,7 +69,9 @@ class InferEngine {
  virtual int thrd_initialize() { return thrd_initialize_impl(); }
  virtual int thrd_clear() { return thrd_clear_impl(); }
  virtual int thrd_finalize() { return thrd_finalize_impl(); }
-  virtual int infer(const void* in, void* out, uint32_t batch_size = -1) { return infer_impl(in, out, batch_size); }
+  virtual int infer(const void* in, void* out, uint32_t batch_size = -1) {
+    return infer_impl(in, out, batch_size);
+  }

  virtual int reload() = 0;

@@ -208,7 +211,6 @@ class ReloadableInferEngine : public InferEngine {
  }

  uint64_t version() const { return _version; }
-  
  uint32_t thread_num() const { return _infer_thread_num; }

 private:
@@ -335,7 +337,7 @@ class DBReloadableInferEngine : public ReloadableInferEngine {

    md->cores[next_idx] = new (std::nothrow) EngineCore;

-    //params.dump();
+    // params.dump();
    if (!md->cores[next_idx] || md->cores[next_idx]->create(conf) != 0) {
      LOG(ERROR) << "Failed create model, path: " << conf.model_dir();
      return -1;
@@ -491,71 +493,86 @@ class CloneDBReloadableInferEngine
      _pd;  // 进程级EngineCore，多个线程级EngineCore共用该对象的模型数据
 };

-template <typename PaddleInferenceCore>
+template <typename EngineCore>
 #ifdef WITH_TRT
-class FluidInferEngine : public DBReloadableInferEngine<PaddleInferenceCore> {
+class FluidInferEngine : public DBReloadableInferEngine<EngineCore> {
 #else
-class FluidInferEngine : public CloneDBReloadableInferEngine<PaddleInferenceCore> {
+class FluidInferEngine : public CloneDBReloadableInferEngine<EngineCore> {
 #endif
 public:  // NOLINT
  FluidInferEngine() {}
  ~FluidInferEngine() {}
  typedef std::vector<paddle::PaddleTensor> TensorVector;
  int infer_impl(const void* in, void* out, uint32_t batch_size = -1) {
-    //First of all, get the real core acording to the template parameter 'PaddleInferenceCore'.
-    PaddleInferenceCore* core =DBReloadableInferEngine<PaddleInferenceCore>::get_core();
+    // First of all, get the real core acording to the
+    // Template parameter <EngineCore>.
+    EngineCore* core = DBReloadableInferEngine<EngineCore>::get_core();
    if (!core || !core->get()) {
      LOG(ERROR) << "Failed get fluid core in infer_impl()";
      return -1;
    }
-    //We use the for loop to process the input data.
-    //Inside each for loop, use the in[i]->name as inputName and call 'core->GetInputHandle(inputName)' to get the pointer of InputData.
-    //Set the lod and shape information of InputData first. then copy data from cpu to the core.
-    const TensorVector* tensorVector_in_pointer = reinterpret_cast<const TensorVector*>(in);
+    // We use the for loop to process the input data.
+    // Inside each for loop, use the in[i]->name as inputName and call
+    // 'core->GetInputHandle(inputName)' to get the pointer of InputData.
+    // Set the lod and shape information of InputData first.
+    // Then copy data from cpu to the core.
+    const TensorVector* tensorVector_in_pointer =
+      reinterpret_cast<const TensorVector*>(in);
    for (int i=0; i < tensorVector_in_pointer->size(); ++i) {
-      auto lod_tensor_in = core->GetInputHandle((*tensorVector_in_pointer)[i].name);
+      auto lod_tensor_in =
+        core->GetInputHandle((*tensorVector_in_pointer)[i].name);
      lod_tensor_in->SetLoD((*tensorVector_in_pointer)[i].lod);
      lod_tensor_in->Reshape((*tensorVector_in_pointer)[i].shape);
      void* origin_data = (*tensorVector_in_pointer)[i].data.data();
-      //Because the core needs to determine the size of memory space according to the data type passed in.
-      //The pointer type of data must be one of float *,int64_t*,int32_t* instead void*.
+      // Because the core needs to determine the size of memory space
+      // according to the data type passed in.
+      // The pointer type of data must be one of
+      // float *,int64_t*,int32_t* instead void*.
      if ((*tensorVector_in_pointer)[i].dtype == paddle::PaddleDType::FLOAT32) {
        float* data = static_cast<float*>(origin_data);
        lod_tensor_in->CopyFromCpu(data);
-      }else if ((*tensorVector_in_pointer)[i].dtype == paddle::PaddleDType::INT64) {
+      } else if ((*tensorVector_in_pointer)[i].dtype ==
+                paddle::PaddleDType::INT64) {
        int64_t* data = static_cast<int64_t*>(origin_data);
        lod_tensor_in->CopyFromCpu(data);
-      }else if ((*tensorVector_in_pointer)[i].dtype == paddle::PaddleDType::INT32) {
+      } else if ((*tensorVector_in_pointer)[i].dtype ==
+                paddle::PaddleDType::INT32) {
        int32_t* data = static_cast<int32_t*>(origin_data);
        lod_tensor_in->CopyFromCpu(data);
      }
    }
-    //After the input data is passed in, call 'core->Run()' perform the prediction process.
+    // After the input data is passed in,
+    // call 'core->Run()' perform the prediction process.
    if (!core->Run()) {
        LOG(ERROR) << "Failed run fluid family core";
        return -1;
    }
-    
-    //In order to get the results, first, call the 'core->GetOutputNames()' to get the name of output(which is a dict like {OutputName:pointer of OutputValue}).
-    //Then, use for-loop to get OutputValue by calling 'core->GetOutputHandle'.
+    // In order to get the results,
+    // first, call the 'core->GetOutputNames()' to get the name of output
+    // (which is a dict like {OutputName:pointer of OutputValue}).
+    // Then, use for-loop to get OutputValue by calling 'core->GetOutputHandle'.
    std::vector<std::string> outnames = core->GetOutputNames();
    std::vector<int> output_shape;
-    int out_num =0;
-    int dataType =0;
+    int out_num = 0;
+    int dataType = 0;
    void* databuf_data = NULL;
    char* databuf_char = NULL;
    size_t databuf_size = 0;
-    TensorVector* tensorVector_out_pointer = reinterpret_cast<TensorVector*>(out);
+    TensorVector* tensorVector_out_pointer =
+                  reinterpret_cast<TensorVector*>(out);
    if (!tensorVector_out_pointer) {
      LOG(ERROR) << "tensorVector_out_pointer is nullptr,error";
      return -1;
    }
-    //Get the type and shape information of OutputData first. then copy data to cpu from the core.
-    //The pointer type of data_out must be one of float *,int64_t*,int32_t* instead void*.
+    // Get the type and shape information of OutputData first.
+    // then copy data to cpu from the core.
+    // The pointer type of data_out must be one of
+    // float *,int64_t*,int32_t* instead void*.
    for (int i=0; i < outnames.size(); ++i) {
      auto lod_tensor_out = core->GetOutputHandle(outnames[i]);
      output_shape = lod_tensor_out->shape();
-      out_num = std::accumulate(output_shape.begin(), output_shape.end(), 1, std::multiplies<int>());
+      out_num = std::accumulate(
+          output_shape.begin(), output_shape.end(), 1, std::multiplies<int>());
      dataType = lod_tensor_out->type();
      if (dataType == paddle::PaddleDType::FLOAT32) {
        databuf_size = out_num*sizeof(float);
@@ -567,7 +584,7 @@ class FluidInferEngine : public CloneDBReloadableInferEngine<PaddleInferenceCore
        float* data_out = reinterpret_cast<float*>(databuf_data);
        lod_tensor_out->CopyToCpu(data_out);
        databuf_char = reinterpret_cast<char*>(data_out);
-      }else if (dataType == paddle::PaddleDType::INT64) {
+      } else if (dataType == paddle::PaddleDType::INT64) {
        databuf_size = out_num*sizeof(int64_t);
        databuf_data = MempoolWrapper::instance().malloc(databuf_size);
        if (!databuf_data) {
@@ -577,7 +594,7 @@ class FluidInferEngine : public CloneDBReloadableInferEngine<PaddleInferenceCore
        int64_t* data_out = reinterpret_cast<int64_t*>(databuf_data);
        lod_tensor_out->CopyToCpu(data_out);
        databuf_char = reinterpret_cast<char*>(data_out);
-      }else if (dataType == paddle::PaddleDType::INT32) {
+      } else if (dataType == paddle::PaddleDType::INT32) {
        databuf_size = out_num*sizeof(int32_t);
        databuf_data = MempoolWrapper::instance().malloc(databuf_size);
        if (!databuf_data) {
@@ -588,9 +605,11 @@ class FluidInferEngine : public CloneDBReloadableInferEngine<PaddleInferenceCore
        lod_tensor_out->CopyToCpu(data_out);
        databuf_char = reinterpret_cast<char*>(data_out);
      }
-      //Because task scheduling requires OPs to use 'Channel'(which is a data structure) to transfer data between OPs.
-      //We need to copy the processed data to the 'Channel' for the next OP.
-      //In this function, it means we should copy the 'databuf_char' to the pointer 'void* out'.(which is also called ‘tensorVector_out_pointer’)
+      // Because task scheduling requires OPs to use 'Channel'
+      // (which is a data structure) to transfer data between OPs.
+      // We need to copy the processed data to the 'Channel' for the next OP.
+      // In this function, it means we should copy the 'databuf_char' to
+      // 'void* out'.(which is also called ‘tensorVector_out_pointer’)
      paddle::PaddleTensor tensor_out;
      tensor_out.name = outnames[i];
      tensor_out.dtype = paddle::PaddleDType(dataType);
@@ -611,8 +630,6 @@ class FluidInferEngine : public CloneDBReloadableInferEngine<PaddleInferenceCore
  int task_infer_impl(const BatchTensor& in, BatchTensor& out) {  // NOLINT
    return infer_impl(&in, &out);
  }
-
-
 };

 typedef FactoryPool<InferEngine> StaticInferFactory;
@@ -797,7 +814,9 @@ class VersionedInferEngine : public InferEngine {
  int thrd_finalize_impl() { return -1; }
  int thrd_clear_impl() { return -1; }
  int proc_finalize_impl() { return -1; }
-  int infer_impl(const void* in, void* out, uint32_t batch_size = -1) { return -1; }
+  int infer_impl(const void* in, void* out, uint32_t batch_size = -1) {
+    return -1;
+  }
  int task_infer_impl(const BatchTensor& in, BatchTensor& out) {  // NOLINT
    return -1;
  }  // NOLINT
@@ -927,7 +946,7 @@ class InferManager {
  }

  // Versioned inference interface
-  int infer(const char* model_name, 
+  int infer(const char* model_name,
            const void* in,
            void* out,
            uint32_t batch_size,

--- a/paddle_inference/paddle/include/paddle_engine.h
+++ b/paddle_inference/paddle/include/paddle_engine.h
@@ -42,9 +42,9 @@ static const int max_batch = 32;
 static const int min_subgraph_size = 3;

 // Engine Base
-class PaddleEngineBase {
+class EngineCore {
 public:
-  virtual ~PaddleEngineBase() {}
+  virtual ~EngineCore() {}
  virtual std::vector<std::string> GetInputNames() {
    return _predictor->GetInputNames();
  }
@@ -92,7 +92,7 @@ class PaddleEngineBase {
 };

 // Paddle Inference Engine
-class PaddleInferenceEngine : public PaddleEngineBase {
+class PaddleInferenceEngine : public EngineCore {
 public:
  int create(const configure::EngineDesc& engine_conf) {
    std::string model_path = engine_conf.model_dir();

--- a/python/paddle_serving_server/serve.py
+++ b/python/paddle_serving_server/serve.py
@@ -93,7 +93,7 @@ def serve_args():


 def start_standard_model(serving_port):  # pylint: disable=doc-string-missing
-    args = parse_args()
+    args = serve_args()
    thread_num = args.thread
    model = args.model
    port = serving_port
@@ -109,7 +109,7 @@ def start_standard_model(serving_port):  # pylint: disable=doc-string-missing
    if model == "":
        print("You must specify your serving model")
        exit(-1)
-    
+
    for single_model_config in args.model:
        if os.path.isdir(single_model_config):
            pass
@@ -131,11 +131,10 @@ def start_standard_model(serving_port):  # pylint: disable=doc-string-missing
            infer_op_name = "general_detection"
        general_infer_op = op_maker.create(infer_op_name)
        op_seq_maker.add_op(general_infer_op)
-    
+
    general_response_op = op_maker.create('general_response')
    op_seq_maker.add_op(general_response_op)

-
    server = None
    if use_multilang:
        server = serving.MultiLangServer()
@@ -199,7 +198,7 @@ def start_gpu_card_model(index, gpuid, port, args):  # pylint: disable=doc-strin
            infer_op_name = "general_infer"
        general_infer_op = op_maker.create(infer_op_name)
        op_seq_maker.add_op(general_infer_op)
-    
+
    general_response_op = op_maker.create('general_response')
    op_seq_maker.add_op(general_response_op)

@@ -297,7 +296,8 @@ class MainService(BaseHTTPRequestHandler):
            key = base64.b64decode(post_data["key"].encode())
            for single_model_config in args.model:
                if os.path.isfile(single_model_config):
-                    raise ValueError("The input of --model should be a dir not file.")
+                    raise ValueError(
+                        "The input of --model should be a dir not file.")
                with open(single_model_config + "/key", "wb") as f:
                    f.write(key)
            return True
@@ -309,7 +309,8 @@ class MainService(BaseHTTPRequestHandler):
            key = base64.b64decode(post_data["key"].encode())
            for single_model_config in args.model:
                if os.path.isfile(single_model_config):
-                    raise ValueError("The input of --model should be a dir not file.")
+                    raise ValueError(
+                        "The input of --model should be a dir not file.")
                with open(single_model_config + "/key", "rb") as f:
                    cur_key = f.read()
                if key != cur_key:
@@ -394,7 +395,8 @@ if __name__ == "__main__":
            device=args.device,
            use_lite=args.use_lite,
            use_xpu=args.use_xpu,
-            ir_optim=args.ir_optim)
+            ir_optim=args.ir_optim,
+            thread_num=args.thread)
        web_service.run_rpc_service()

        app_instance = Flask(__name__)

--- a/python/paddle_serving_server/web_service.py
+++ b/python/paddle_serving_server/web_service.py
@@ -27,6 +27,7 @@ import os
 from paddle_serving_server import pipeline
 from paddle_serving_server.pipeline import Op

+
 def port_is_available(port):
    with closing(socket.socket(socket.AF_INET, socket.SOCK_STREAM)) as sock:
        sock.settimeout(2)
@@ -36,6 +37,7 @@ def port_is_available(port):
    else:
        return False

+
 class WebService(object):
    def __init__(self, name="default_service"):
        self.name = name
@@ -63,7 +65,9 @@ class WebService(object):
    def run_service(self):
        self._server.run_server()

-    def load_model_config(self, server_config_dir_paths, client_config_path=None):
+    def load_model_config(self,
+                          server_config_dir_paths,
+                          client_config_path=None):
        if isinstance(server_config_dir_paths, str):
            server_config_dir_paths = [server_config_dir_paths]
        elif isinstance(server_config_dir_paths, list):
@@ -73,14 +77,16 @@ class WebService(object):
            if os.path.isdir(single_model_config):
                pass
            elif os.path.isfile(single_model_config):
-                raise ValueError("The input of --model should be a dir not file.")
+                raise ValueError(
+                    "The input of --model should be a dir not file.")
        self.server_config_dir_paths = server_config_dir_paths
        from .proto import general_model_config_pb2 as m_config
        import google.protobuf.text_format
        file_path_list = []
        for single_model_config in self.server_config_dir_paths:
-            file_path_list.append( "{}/serving_server_conf.prototxt".format(single_model_config) )
-        
+            file_path_list.append("{}/serving_server_conf.prototxt".format(
+                single_model_config))
+
        model_conf = m_config.GeneralModelConfig()
        f = open(file_path_list[0], 'r')
        model_conf = google.protobuf.text_format.Merge(
@@ -130,7 +136,7 @@ class WebService(object):
                infer_op_name = "general_infer"
            general_infer_op = op_maker.create(infer_op_name)
            op_seq_maker.add_op(general_infer_op)
-        
+
        general_response_op = op_maker.create('general_response')
        op_seq_maker.add_op(general_response_op)

@@ -146,7 +152,8 @@ class WebService(object):
        if use_xpu:
            server.set_xpu()

-        server.load_model_config(self.server_config_dir_paths)#brpc Server support server_config_dir_paths
+        server.load_model_config(self.server_config_dir_paths
+                                 )  #brpc Server support server_config_dir_paths
        if gpuid >= 0:
            server.set_gpuid(gpuid)
        server.prepare_server(workdir=workdir, port=port, device=device)
@@ -163,10 +170,12 @@ class WebService(object):
                       use_xpu=False,
                       ir_optim=False,
                       gpuid=0,
+                       thread_num=2,
                       mem_optim=True):
        print("This API will be deprecated later. Please do not use it")
        self.workdir = workdir
        self.port = port
+        self.thread_num = thread_num
        self.device = device
        self.gpuid = gpuid
        self.port_list = []
@@ -184,7 +193,7 @@ class WebService(object):
                    self.workdir,
                    self.port_list[0],
                    -1,
-                    thread_num=2,
+                    thread_num=self.thread_num,
                    mem_optim=mem_optim,
                    use_lite=use_lite,
                    use_xpu=use_xpu,
@@ -196,7 +205,7 @@ class WebService(object):
                        "{}_{}".format(self.workdir, i),
                        self.port_list[i],
                        gpuid,
-                        thread_num=2,
+                        thread_num=self.thread_num,
                        mem_optim=mem_optim,
                        use_lite=use_lite,
                        use_xpu=use_xpu,
@@ -297,9 +306,13 @@ class WebService(object):
            # default self.gpus = [0].
            if len(self.gpus) == 0:
                self.gpus.append(0)
-            self.client.load_model_config(self.server_config_dir_paths[0], use_gpu=True, gpu_id=self.gpus[0])
+            self.client.load_model_config(
+                self.server_config_dir_paths[0],
+                use_gpu=True,
+                gpu_id=self.gpus[0])
        else:
-            self.client.load_model_config(self.server_config_dir_paths[0], use_gpu=False)
+            self.client.load_model_config(
+                self.server_config_dir_paths[0], use_gpu=False)

    def run_web_service(self):
        print("This API will be deprecated later. Please do not use it")