Merge pull request #1294 from HexToString/develop-p

C++Serving修复一些小问题导致的异常

Merge pull request #1294 from HexToString/develop-p
C++Serving修复一些小问题导致的异常
30788724 · Jiawei Wang · GitHub · 0fb6940c · bf5a47be · 30788724
7 changed file
--- a/core/predictor/framework/infer.cpp
+++ b/core/predictor/framework/infer.cpp
@@ -382,20 +382,24 @@ int VersionedInferEngine::task_infer_impl(const void* in,
  return -1;
 }
-int InferManager::proc_initialize(const char* path, const char* file) {
+int InferManager::proc_initialize(const char* path,
+                                  const char* file,
+                                  std::shared_ptr<int> engine_index_ptr) {
  ModelToolkitConf model_toolkit_conf;
  if (configure::read_proto_conf(path, file, &model_toolkit_conf) != 0) {
    LOG(ERROR) << "failed load infer config, path: " << path << "/" << file;
    return -1;
  }
  uint32_t engine_num = model_toolkit_conf.engines_size();
-  im::bsf::TaskExecutorVector<TaskT>::instance().resize(engine_num);
+  im::bsf::TaskExecutorVector<TaskT>::instance().resize(*engine_index_ptr+engine_num);
  for (uint32_t ei = 0; ei < engine_num; ++ei) {
    LOG(INFO) << "model_toolkit_conf.engines(" << ei
              << ").name: " << model_toolkit_conf.engines(ei).name();
    std::string engine_name = model_toolkit_conf.engines(ei).name();
    VersionedInferEngine* engine = new (std::nothrow) VersionedInferEngine();
-    engine->set_model_index(ei);
+    int temp_engine_index_ptr = *engine_index_ptr;
+    engine->set_model_index(temp_engine_index_ptr);
+    *engine_index_ptr = temp_engine_index_ptr + 1;
    if (!engine) {
      LOG(ERROR) << "Failed generate versioned engine: " << engine_name;
      return -1;

--- a/core/predictor/framework/infer.h
+++ b/core/predictor/framework/infer.h
@@ -18,6 +18,7 @@
 #include <sys/types.h>
 #include <unistd.h>
 #include <functional>
+#include <memory>
 #include <numeric>
 #include <string>
 #include <utility>
@@ -337,12 +338,19 @@ class CloneDBReloadableInferEngine
    md->cores[next_idx] = new (std::nothrow) EngineCore;
    // params.dump();
+    // gpu_ids_num > 0 is always true.
+    // if use CPU, gpu_ids = [-1].
+    // if gpu_ids_num = 0, which means no gpuid is given.
+    // so we should set gpu_ids_num = 1, and gpu_id = -1.
+    // so that we can create at least 1 predictor.
    size_t gpu_ids_num = conf.gpu_ids_size();
    im::bsf::AutoMutex lock(DBReloadableInferEngine<EngineCore>::_mutex);
    int gpu_id = -1;
    if (gpu_ids_num > 0) {
      gpu_id = conf.gpu_ids(DBReloadableInferEngine<EngineCore>::gpu_index %
                            gpu_ids_num);
+    } else {
+      gpu_ids_num = 1;
    }
    // gpu_index will be set to be 0, when load() or proc_initial() is called.
    // gpu_index < gpu_ids_num, means there are predictors still not create
@@ -365,14 +373,11 @@ class CloneDBReloadableInferEngine
        _cloneTemplate[DBReloadableInferEngine<EngineCore>::gpu_index - 1] = md;
      }
    } else {
-      // when gpu_id = -1, means we use cpu, but the index should be 0.
+      int template_index = DBReloadableInferEngine<EngineCore>::gpu_index %
-      // _cloneTemplate[-1] will occur error.
+                           _cloneTemplate.size();
-      // actually, when gpu_id = -1, there is only 1 predictor in
-      // _cloneTemplate.
-      // so the index should always be 0 when gpu_id = -1.
-      if (gpu_id == -1) gpu_id = 0;
      if (!md->cores[next_idx] ||
-          md->cores[next_idx]->clone(_cloneTemplate[gpu_id]->get()) != 0) {
+          md->cores[next_idx]->clone(_cloneTemplate[template_index]->get()) !=
+              0) {
        LOG(ERROR) << "Failed clone model from core";
        return -1;
      }
@@ -591,7 +596,9 @@ class InferManager {
    return ins;
  }
-  int proc_initialize(const char* path, const char* file);
+  int proc_initialize(const char* path,
+                      const char* file,
+                      std::shared_ptr<int> engine_index_ptr);
  int thrd_initialize();

--- a/core/predictor/framework/resource.cpp
+++ b/core/predictor/framework/resource.cpp
@@ -135,12 +135,14 @@ int Resource::initialize(const std::string& path, const std::string& file) {
  if (FLAGS_enable_model_toolkit) {
    size_t model_toolkit_num = resource_conf.model_toolkit_path_size();
+    std::shared_ptr<int> engine_index_ptr(new int(0));
    for (size_t mi = 0; mi < model_toolkit_num; ++mi) {
      std::string model_toolkit_path = resource_conf.model_toolkit_path(mi);
      std::string model_toolkit_file = resource_conf.model_toolkit_file(mi);
-      if (InferManager::instance().proc_initialize(
+      if (InferManager::instance().proc_initialize(model_toolkit_path.c_str(),
-              model_toolkit_path.c_str(), model_toolkit_file.c_str()) != 0) {
+                                                   model_toolkit_file.c_str(),
+                                                   engine_index_ptr) != 0) {
        LOG(ERROR) << "failed proc initialize modeltoolkit, config: "
                   << model_toolkit_path << "/" << model_toolkit_file;
        return -1;

--- a/core/predictor/framework/resource.h
+++ b/core/predictor/framework/resource.h
@@ -16,6 +16,7 @@
 #include <map>
 #include <memory>
 #include <string>
+#include <utility>
 #include <vector>
 #include "core/cube/cube-api/include/cube_api.h"
 #include "core/predictor/common/inner_common.h"

--- a/core/predictor/framework/server.cpp
+++ b/core/predictor/framework/server.cpp
@@ -96,7 +96,6 @@ int ServerManager::start_and_wait() {
    LOG(ERROR) << "Failed to start Paddle Inference Server";
    return -1;
  }
-  LOG(WARNING) << "Finsh start C++ PaddleServing.";
  _server.RunUntilAskedToQuit();
  ServerManager::stop_reloader();

--- a/python/paddle_serving_server/server.py
+++ b/python/paddle_serving_server/server.py
@@ -41,6 +41,8 @@ from multiprocessing import Pool, Process
 from concurrent import futures
+# The whole file is about to be discarded.
+# We will use default config-file to start C++Server.
 class Server(object):
    def __init__(self):
        """
@@ -172,8 +174,7 @@ class Server(object):
        if isinstance(gpuid, int):
            self.gpuid = str(gpuid)
        elif isinstance(gpuid, list):
-            gpu_list = [str(x) for x in gpuid]
+            self.gpuid = [str(x) for x in gpuid]
-            self.gpuid = ",".join(gpu_list)
        else:
            self.gpuid = gpuid
@@ -200,8 +201,14 @@ class Server(object):
            self.model_toolkit_conf = []
        self.device = device
+        # Generally, self.gpuid = str[] or str.
+        # such as "0" or ["0"] or ["0,1"] or ["0,1" , "1,2"]
        if isinstance(self.gpuid, str):
            self.gpuid = [self.gpuid]
+        # when len(self.gpuid) means no gpuid is specified.
+        # if self.device == "gpu" or self.use_trt:
+        # we assume you forget to set gpuid, so set gpuid = ['0'];
        if len(self.gpuid) == 0:
            if self.device == "gpu" or self.use_trt:
                self.gpuid.append("0")
@@ -240,8 +247,6 @@ class Server(object):
            engine.use_lite = self.use_lite
            engine.use_xpu = self.use_xpu
            engine.use_gpu = False
-            if self.device == "gpu" or self.use_trt:
-                engine.use_gpu = True
            if len(self.gpuid) == 0:
                raise ValueError("CPU: self.gpuid = -1, GPU: must set it ")
@@ -249,6 +254,18 @@ class Server(object):
            for ids in op_gpu_list:
                engine.gpu_ids.extend([int(ids)])
+            if self.device == "gpu" or self.use_trt:
+                engine.use_gpu = True
+                # this is for Mixed use of GPU and CPU
+                # if model-1 use GPU and set the device="gpu"
+                # but gpuid[1] = "-1" which means use CPU in Model-2
+                # so config about GPU should be False.
+                if len(op_gpu_list) == 1:
+                    if int(op_gpu_list[0]) == -1:
+                        engine.use_gpu = False
+                        engine.gpu_multi_stream = False
+                        engine.use_trt = False
            if os.path.exists('{}/__params__'.format(model_config_path)):
                engine.combined_model = True
            else:
@@ -540,71 +557,38 @@ class Server(object):
        else:
            print("Use local bin : {}".format(self.bin_path))
        #self.check_cuda()
-        # Todo: merge CPU and GPU code, remove device to model_toolkit
+        command = "{} " \
-        if self.device == "cpu" or self.device == "arm":
+                    "-enable_model_toolkit " \
-            command = "{} " \
+                    "-inferservice_path {} " \
-                      "-enable_model_toolkit " \
+                    "-inferservice_file {} " \
-                      "-inferservice_path {} " \
+                    "-max_concurrency {} " \
-                      "-inferservice_file {} " \
+                    "-num_threads {} " \
-                      "-max_concurrency {} " \
+                    "-port {} " \
-                      "-num_threads {} " \
+                    "-precision {} " \
-                      "-port {} " \
+                    "-use_calib {} " \
-                      "-precision {} " \
+                    "-reload_interval_s {} " \
-                      "-use_calib {} " \
+                    "-resource_path {} " \
-                      "-reload_interval_s {} " \
+                    "-resource_file {} " \
-                      "-resource_path {} " \
+                    "-workflow_path {} " \
-                      "-resource_file {} " \
+                    "-workflow_file {} " \
-                      "-workflow_path {} " \
+                    "-bthread_concurrency {} " \
-                      "-workflow_file {} " \
+                    "-max_body_size {} ".format(
-                      "-bthread_concurrency {} " \
+                        self.bin_path,
-                      "-max_body_size {} ".format(
+                        self.workdir,
-                          self.bin_path,
+                        self.infer_service_fn,
-                          self.workdir,
+                        self.max_concurrency,
-                          self.infer_service_fn,
+                        self.num_threads,
-                          self.max_concurrency,
+                        self.port,
-                          self.num_threads,
+                        self.precision,
-                          self.port,
+                        self.use_calib,
-                          self.precision,
+                        self.reload_interval_s,
-                          self.use_calib,
+                        self.workdir,
-                          self.reload_interval_s,
+                        self.resource_fn,
-                          self.workdir,
+                        self.workdir,
-                          self.resource_fn,
+                        self.workflow_fn,
-                          self.workdir,
+                        self.num_threads,
-                          self.workflow_fn,
+                        self.max_body_size)
-                          self.num_threads,
-                          self.max_body_size)
-        else:
-            command = "{} " \
-                      "-enable_model_toolkit " \
-                      "-inferservice_path {} " \
-                      "-inferservice_file {} " \
-                      "-max_concurrency {} " \
-                      "-num_threads {} " \
-                      "-port {} " \
-                      "-precision {} " \
-                      "-use_calib {} " \
-                      "-reload_interval_s {} " \
-                      "-resource_path {} " \
-                      "-resource_file {} " \
-                      "-workflow_path {} " \
-                      "-workflow_file {} " \
-                      "-bthread_concurrency {} " \
-                      "-max_body_size {} ".format(
-                          self.bin_path,
-                          self.workdir,
-                          self.infer_service_fn,
-                          self.max_concurrency,
-                          self.num_threads,
-                          self.port,
-                          self.precision,
-                          self.use_calib,
-                          self.reload_interval_s,
-                          self.workdir,
-                          self.resource_fn,
-                          self.workdir,
-                          self.workflow_fn,
-                          self.num_threads,
-                          self.max_body_size)
        print("Going to Run Comand")
        print(command)

--- a/python/paddle_serving_server/web_service.py
+++ b/python/paddle_serving_server/web_service.py
@@ -108,8 +108,7 @@ class WebService(object):
        if isinstance(gpus, int):
            self.gpus = str(gpus)
        elif isinstance(gpus, list):
-            gpu_list = [str(x) for x in gpus]
+            self.gpus = [str(x) for x in gpus]
-            self.gpus = ",".join(gpu_list)
        else:
            self.gpus = gpus
@@ -261,8 +260,7 @@ class WebService(object):
        if isinstance(gpuid, int):
            self.gpus = str(gpuid)
        elif isinstance(gpuid, list):
-            gpu_list = [str(x) for x in gpuid]
+            self.gpus = [str(x) for x in gpuid]
-            self.gpus = ",".join(gpu_list)
        else:
            self.gpus = gpuid
@@ -363,7 +361,8 @@ class WebService(object):
            # default self.gpus = [0].
            if len(self.gpus) == 0:
                self.gpus.append(0)
+            # right now, local Predictor only support 1 card.
+            # no matter how many gpu_id is in gpus, we only use the first one.
            gpu_id = (self.gpus[0].split(","))[0]
            self.client.load_model_config(
                self.server_config_dir_paths[0], use_gpu=True, gpu_id=gpu_id)