未验证 提交 30788724 编写于 作者: J Jiawei Wang 提交者: GitHub

Merge pull request #1294 from HexToString/develop-p

C++Serving修复一些小问题导致的异常
......@@ -382,20 +382,24 @@ int VersionedInferEngine::task_infer_impl(const void* in,
return -1;
}
int InferManager::proc_initialize(const char* path, const char* file) {
int InferManager::proc_initialize(const char* path,
const char* file,
std::shared_ptr<int> engine_index_ptr) {
ModelToolkitConf model_toolkit_conf;
if (configure::read_proto_conf(path, file, &model_toolkit_conf) != 0) {
LOG(ERROR) << "failed load infer config, path: " << path << "/" << file;
return -1;
}
uint32_t engine_num = model_toolkit_conf.engines_size();
im::bsf::TaskExecutorVector<TaskT>::instance().resize(engine_num);
im::bsf::TaskExecutorVector<TaskT>::instance().resize(*engine_index_ptr+engine_num);
for (uint32_t ei = 0; ei < engine_num; ++ei) {
LOG(INFO) << "model_toolkit_conf.engines(" << ei
<< ").name: " << model_toolkit_conf.engines(ei).name();
std::string engine_name = model_toolkit_conf.engines(ei).name();
VersionedInferEngine* engine = new (std::nothrow) VersionedInferEngine();
engine->set_model_index(ei);
int temp_engine_index_ptr = *engine_index_ptr;
engine->set_model_index(temp_engine_index_ptr);
*engine_index_ptr = temp_engine_index_ptr + 1;
if (!engine) {
LOG(ERROR) << "Failed generate versioned engine: " << engine_name;
return -1;
......
......@@ -18,6 +18,7 @@
#include <sys/types.h>
#include <unistd.h>
#include <functional>
#include <memory>
#include <numeric>
#include <string>
#include <utility>
......@@ -337,12 +338,19 @@ class CloneDBReloadableInferEngine
md->cores[next_idx] = new (std::nothrow) EngineCore;
// params.dump();
// gpu_ids_num > 0 is always true.
// if use CPU, gpu_ids = [-1].
// if gpu_ids_num = 0, which means no gpuid is given.
// so we should set gpu_ids_num = 1, and gpu_id = -1.
// so that we can create at least 1 predictor.
size_t gpu_ids_num = conf.gpu_ids_size();
im::bsf::AutoMutex lock(DBReloadableInferEngine<EngineCore>::_mutex);
int gpu_id = -1;
if (gpu_ids_num > 0) {
gpu_id = conf.gpu_ids(DBReloadableInferEngine<EngineCore>::gpu_index %
gpu_ids_num);
} else {
gpu_ids_num = 1;
}
// gpu_index will be set to be 0, when load() or proc_initial() is called.
// gpu_index < gpu_ids_num, means there are predictors still not create
......@@ -365,14 +373,11 @@ class CloneDBReloadableInferEngine
_cloneTemplate[DBReloadableInferEngine<EngineCore>::gpu_index - 1] = md;
}
} else {
// when gpu_id = -1, means we use cpu, but the index should be 0.
// _cloneTemplate[-1] will occur error.
// actually, when gpu_id = -1, there is only 1 predictor in
// _cloneTemplate.
// so the index should always be 0 when gpu_id = -1.
if (gpu_id == -1) gpu_id = 0;
int template_index = DBReloadableInferEngine<EngineCore>::gpu_index %
_cloneTemplate.size();
if (!md->cores[next_idx] ||
md->cores[next_idx]->clone(_cloneTemplate[gpu_id]->get()) != 0) {
md->cores[next_idx]->clone(_cloneTemplate[template_index]->get()) !=
0) {
LOG(ERROR) << "Failed clone model from core";
return -1;
}
......@@ -591,7 +596,9 @@ class InferManager {
return ins;
}
int proc_initialize(const char* path, const char* file);
int proc_initialize(const char* path,
const char* file,
std::shared_ptr<int> engine_index_ptr);
int thrd_initialize();
......
......@@ -135,12 +135,14 @@ int Resource::initialize(const std::string& path, const std::string& file) {
if (FLAGS_enable_model_toolkit) {
size_t model_toolkit_num = resource_conf.model_toolkit_path_size();
std::shared_ptr<int> engine_index_ptr(new int(0));
for (size_t mi = 0; mi < model_toolkit_num; ++mi) {
std::string model_toolkit_path = resource_conf.model_toolkit_path(mi);
std::string model_toolkit_file = resource_conf.model_toolkit_file(mi);
if (InferManager::instance().proc_initialize(
model_toolkit_path.c_str(), model_toolkit_file.c_str()) != 0) {
if (InferManager::instance().proc_initialize(model_toolkit_path.c_str(),
model_toolkit_file.c_str(),
engine_index_ptr) != 0) {
LOG(ERROR) << "failed proc initialize modeltoolkit, config: "
<< model_toolkit_path << "/" << model_toolkit_file;
return -1;
......
......@@ -16,6 +16,7 @@
#include <map>
#include <memory>
#include <string>
#include <utility>
#include <vector>
#include "core/cube/cube-api/include/cube_api.h"
#include "core/predictor/common/inner_common.h"
......
......@@ -96,7 +96,6 @@ int ServerManager::start_and_wait() {
LOG(ERROR) << "Failed to start Paddle Inference Server";
return -1;
}
LOG(WARNING) << "Finsh start C++ PaddleServing.";
_server.RunUntilAskedToQuit();
ServerManager::stop_reloader();
......
......@@ -41,6 +41,8 @@ from multiprocessing import Pool, Process
from concurrent import futures
# The whole file is about to be discarded.
# We will use default config-file to start C++Server.
class Server(object):
def __init__(self):
"""
......@@ -172,8 +174,7 @@ class Server(object):
if isinstance(gpuid, int):
self.gpuid = str(gpuid)
elif isinstance(gpuid, list):
gpu_list = [str(x) for x in gpuid]
self.gpuid = ",".join(gpu_list)
self.gpuid = [str(x) for x in gpuid]
else:
self.gpuid = gpuid
......@@ -200,8 +201,14 @@ class Server(object):
self.model_toolkit_conf = []
self.device = device
# Generally, self.gpuid = str[] or str.
# such as "0" or ["0"] or ["0,1"] or ["0,1" , "1,2"]
if isinstance(self.gpuid, str):
self.gpuid = [self.gpuid]
# when len(self.gpuid) means no gpuid is specified.
# if self.device == "gpu" or self.use_trt:
# we assume you forget to set gpuid, so set gpuid = ['0'];
if len(self.gpuid) == 0:
if self.device == "gpu" or self.use_trt:
self.gpuid.append("0")
......@@ -240,8 +247,6 @@ class Server(object):
engine.use_lite = self.use_lite
engine.use_xpu = self.use_xpu
engine.use_gpu = False
if self.device == "gpu" or self.use_trt:
engine.use_gpu = True
if len(self.gpuid) == 0:
raise ValueError("CPU: self.gpuid = -1, GPU: must set it ")
......@@ -249,6 +254,18 @@ class Server(object):
for ids in op_gpu_list:
engine.gpu_ids.extend([int(ids)])
if self.device == "gpu" or self.use_trt:
engine.use_gpu = True
# this is for Mixed use of GPU and CPU
# if model-1 use GPU and set the device="gpu"
# but gpuid[1] = "-1" which means use CPU in Model-2
# so config about GPU should be False.
if len(op_gpu_list) == 1:
if int(op_gpu_list[0]) == -1:
engine.use_gpu = False
engine.gpu_multi_stream = False
engine.use_trt = False
if os.path.exists('{}/__params__'.format(model_config_path)):
engine.combined_model = True
else:
......@@ -540,71 +557,38 @@ class Server(object):
else:
print("Use local bin : {}".format(self.bin_path))
#self.check_cuda()
# Todo: merge CPU and GPU code, remove device to model_toolkit
if self.device == "cpu" or self.device == "arm":
command = "{} " \
"-enable_model_toolkit " \
"-inferservice_path {} " \
"-inferservice_file {} " \
"-max_concurrency {} " \
"-num_threads {} " \
"-port {} " \
"-precision {} " \
"-use_calib {} " \
"-reload_interval_s {} " \
"-resource_path {} " \
"-resource_file {} " \
"-workflow_path {} " \
"-workflow_file {} " \
"-bthread_concurrency {} " \
"-max_body_size {} ".format(
self.bin_path,
self.workdir,
self.infer_service_fn,
self.max_concurrency,
self.num_threads,
self.port,
self.precision,
self.use_calib,
self.reload_interval_s,
self.workdir,
self.resource_fn,
self.workdir,
self.workflow_fn,
self.num_threads,
self.max_body_size)
else:
command = "{} " \
"-enable_model_toolkit " \
"-inferservice_path {} " \
"-inferservice_file {} " \
"-max_concurrency {} " \
"-num_threads {} " \
"-port {} " \
"-precision {} " \
"-use_calib {} " \
"-reload_interval_s {} " \
"-resource_path {} " \
"-resource_file {} " \
"-workflow_path {} " \
"-workflow_file {} " \
"-bthread_concurrency {} " \
"-max_body_size {} ".format(
self.bin_path,
self.workdir,
self.infer_service_fn,
self.max_concurrency,
self.num_threads,
self.port,
self.precision,
self.use_calib,
self.reload_interval_s,
self.workdir,
self.resource_fn,
self.workdir,
self.workflow_fn,
self.num_threads,
self.max_body_size)
command = "{} " \
"-enable_model_toolkit " \
"-inferservice_path {} " \
"-inferservice_file {} " \
"-max_concurrency {} " \
"-num_threads {} " \
"-port {} " \
"-precision {} " \
"-use_calib {} " \
"-reload_interval_s {} " \
"-resource_path {} " \
"-resource_file {} " \
"-workflow_path {} " \
"-workflow_file {} " \
"-bthread_concurrency {} " \
"-max_body_size {} ".format(
self.bin_path,
self.workdir,
self.infer_service_fn,
self.max_concurrency,
self.num_threads,
self.port,
self.precision,
self.use_calib,
self.reload_interval_s,
self.workdir,
self.resource_fn,
self.workdir,
self.workflow_fn,
self.num_threads,
self.max_body_size)
print("Going to Run Comand")
print(command)
......
......@@ -108,8 +108,7 @@ class WebService(object):
if isinstance(gpus, int):
self.gpus = str(gpus)
elif isinstance(gpus, list):
gpu_list = [str(x) for x in gpus]
self.gpus = ",".join(gpu_list)
self.gpus = [str(x) for x in gpus]
else:
self.gpus = gpus
......@@ -261,8 +260,7 @@ class WebService(object):
if isinstance(gpuid, int):
self.gpus = str(gpuid)
elif isinstance(gpuid, list):
gpu_list = [str(x) for x in gpuid]
self.gpus = ",".join(gpu_list)
self.gpus = [str(x) for x in gpuid]
else:
self.gpus = gpuid
......@@ -363,7 +361,8 @@ class WebService(object):
# default self.gpus = [0].
if len(self.gpus) == 0:
self.gpus.append(0)
# right now, local Predictor only support 1 card.
# no matter how many gpu_id is in gpus, we only use the first one.
gpu_id = (self.gpus[0].split(","))[0]
self.client.load_model_config(
self.server_config_dir_paths[0], use_gpu=True, gpu_id=gpu_id)
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册