diff --git a/core/predictor/framework/infer.cpp b/core/predictor/framework/infer.cpp old mode 100644 new mode 100755 index fd80ed639bc51075dcf79bec8f33d724503fe617..fa77c50506c02c4350569349e4e6679dde173ffc --- a/core/predictor/framework/infer.cpp +++ b/core/predictor/framework/infer.cpp @@ -382,20 +382,24 @@ int VersionedInferEngine::task_infer_impl(const void* in, return -1; } -int InferManager::proc_initialize(const char* path, const char* file) { +int InferManager::proc_initialize(const char* path, + const char* file, + std::shared_ptr engine_index_ptr) { ModelToolkitConf model_toolkit_conf; if (configure::read_proto_conf(path, file, &model_toolkit_conf) != 0) { LOG(ERROR) << "failed load infer config, path: " << path << "/" << file; return -1; } uint32_t engine_num = model_toolkit_conf.engines_size(); - im::bsf::TaskExecutorVector::instance().resize(engine_num); + im::bsf::TaskExecutorVector::instance().resize(*engine_index_ptr+engine_num); for (uint32_t ei = 0; ei < engine_num; ++ei) { LOG(INFO) << "model_toolkit_conf.engines(" << ei << ").name: " << model_toolkit_conf.engines(ei).name(); std::string engine_name = model_toolkit_conf.engines(ei).name(); VersionedInferEngine* engine = new (std::nothrow) VersionedInferEngine(); - engine->set_model_index(ei); + int temp_engine_index_ptr = *engine_index_ptr; + engine->set_model_index(temp_engine_index_ptr); + *engine_index_ptr = temp_engine_index_ptr + 1; if (!engine) { LOG(ERROR) << "Failed generate versioned engine: " << engine_name; return -1; diff --git a/core/predictor/framework/infer.h b/core/predictor/framework/infer.h old mode 100644 new mode 100755 index 3cdef9dc92a302885da46c64e690855d0472c7ea..93be13c684874b8b5a6686f3aeddd2942037d84c --- a/core/predictor/framework/infer.h +++ b/core/predictor/framework/infer.h @@ -18,6 +18,7 @@ #include #include #include +#include #include #include #include @@ -337,12 +338,19 @@ class CloneDBReloadableInferEngine md->cores[next_idx] = new (std::nothrow) EngineCore; // params.dump(); + // gpu_ids_num > 0 is always true. + // if use CPU, gpu_ids = [-1]. + // if gpu_ids_num = 0, which means no gpuid is given. + // so we should set gpu_ids_num = 1, and gpu_id = -1. + // so that we can create at least 1 predictor. size_t gpu_ids_num = conf.gpu_ids_size(); im::bsf::AutoMutex lock(DBReloadableInferEngine::_mutex); int gpu_id = -1; if (gpu_ids_num > 0) { gpu_id = conf.gpu_ids(DBReloadableInferEngine::gpu_index % gpu_ids_num); + } else { + gpu_ids_num = 1; } // gpu_index will be set to be 0, when load() or proc_initial() is called. // gpu_index < gpu_ids_num, means there are predictors still not create @@ -365,14 +373,11 @@ class CloneDBReloadableInferEngine _cloneTemplate[DBReloadableInferEngine::gpu_index - 1] = md; } } else { - // when gpu_id = -1, means we use cpu, but the index should be 0. - // _cloneTemplate[-1] will occur error. - // actually, when gpu_id = -1, there is only 1 predictor in - // _cloneTemplate. - // so the index should always be 0 when gpu_id = -1. - if (gpu_id == -1) gpu_id = 0; + int template_index = DBReloadableInferEngine::gpu_index % + _cloneTemplate.size(); if (!md->cores[next_idx] || - md->cores[next_idx]->clone(_cloneTemplate[gpu_id]->get()) != 0) { + md->cores[next_idx]->clone(_cloneTemplate[template_index]->get()) != + 0) { LOG(ERROR) << "Failed clone model from core"; return -1; } @@ -591,7 +596,9 @@ class InferManager { return ins; } - int proc_initialize(const char* path, const char* file); + int proc_initialize(const char* path, + const char* file, + std::shared_ptr engine_index_ptr); int thrd_initialize(); diff --git a/core/predictor/framework/resource.cpp b/core/predictor/framework/resource.cpp old mode 100644 new mode 100755 index 37c8092a7c206ae91ac783b15f3aadce780f0132..1da9783888fa379b653eaa46311c10f3d6c6ec66 --- a/core/predictor/framework/resource.cpp +++ b/core/predictor/framework/resource.cpp @@ -135,12 +135,14 @@ int Resource::initialize(const std::string& path, const std::string& file) { if (FLAGS_enable_model_toolkit) { size_t model_toolkit_num = resource_conf.model_toolkit_path_size(); + std::shared_ptr engine_index_ptr(new int(0)); for (size_t mi = 0; mi < model_toolkit_num; ++mi) { std::string model_toolkit_path = resource_conf.model_toolkit_path(mi); std::string model_toolkit_file = resource_conf.model_toolkit_file(mi); - if (InferManager::instance().proc_initialize( - model_toolkit_path.c_str(), model_toolkit_file.c_str()) != 0) { + if (InferManager::instance().proc_initialize(model_toolkit_path.c_str(), + model_toolkit_file.c_str(), + engine_index_ptr) != 0) { LOG(ERROR) << "failed proc initialize modeltoolkit, config: " << model_toolkit_path << "/" << model_toolkit_file; return -1; diff --git a/core/predictor/framework/resource.h b/core/predictor/framework/resource.h old mode 100644 new mode 100755 index e144120e5a67bc2a43433cb3857331e9d1a465cf..d8a114dab581b71182c1a510db16aa0d2e818b0a --- a/core/predictor/framework/resource.h +++ b/core/predictor/framework/resource.h @@ -16,6 +16,7 @@ #include #include #include +#include #include #include "core/cube/cube-api/include/cube_api.h" #include "core/predictor/common/inner_common.h" diff --git a/core/predictor/framework/server.cpp b/core/predictor/framework/server.cpp index 996ab9120a21b4719d1203a5de430fc71d89cb52..8ced6f1e9936059ada169633e21690d13bc48ae3 100755 --- a/core/predictor/framework/server.cpp +++ b/core/predictor/framework/server.cpp @@ -96,7 +96,6 @@ int ServerManager::start_and_wait() { LOG(ERROR) << "Failed to start Paddle Inference Server"; return -1; } - LOG(WARNING) << "Finsh start C++ PaddleServing."; _server.RunUntilAskedToQuit(); ServerManager::stop_reloader(); diff --git a/python/paddle_serving_server/server.py b/python/paddle_serving_server/server.py index 1559536b493a0b928cf5f06a07576a0a3c1ac024..2cc839d00e31199d743ae219998843560e10daaa 100755 --- a/python/paddle_serving_server/server.py +++ b/python/paddle_serving_server/server.py @@ -41,6 +41,8 @@ from multiprocessing import Pool, Process from concurrent import futures +# The whole file is about to be discarded. +# We will use default config-file to start C++Server. class Server(object): def __init__(self): """ @@ -172,8 +174,7 @@ class Server(object): if isinstance(gpuid, int): self.gpuid = str(gpuid) elif isinstance(gpuid, list): - gpu_list = [str(x) for x in gpuid] - self.gpuid = ",".join(gpu_list) + self.gpuid = [str(x) for x in gpuid] else: self.gpuid = gpuid @@ -200,8 +201,14 @@ class Server(object): self.model_toolkit_conf = [] self.device = device + # Generally, self.gpuid = str[] or str. + # such as "0" or ["0"] or ["0,1"] or ["0,1" , "1,2"] if isinstance(self.gpuid, str): self.gpuid = [self.gpuid] + + # when len(self.gpuid) means no gpuid is specified. + # if self.device == "gpu" or self.use_trt: + # we assume you forget to set gpuid, so set gpuid = ['0']; if len(self.gpuid) == 0: if self.device == "gpu" or self.use_trt: self.gpuid.append("0") @@ -240,8 +247,6 @@ class Server(object): engine.use_lite = self.use_lite engine.use_xpu = self.use_xpu engine.use_gpu = False - if self.device == "gpu" or self.use_trt: - engine.use_gpu = True if len(self.gpuid) == 0: raise ValueError("CPU: self.gpuid = -1, GPU: must set it ") @@ -249,6 +254,18 @@ class Server(object): for ids in op_gpu_list: engine.gpu_ids.extend([int(ids)]) + if self.device == "gpu" or self.use_trt: + engine.use_gpu = True + # this is for Mixed use of GPU and CPU + # if model-1 use GPU and set the device="gpu" + # but gpuid[1] = "-1" which means use CPU in Model-2 + # so config about GPU should be False. + if len(op_gpu_list) == 1: + if int(op_gpu_list[0]) == -1: + engine.use_gpu = False + engine.gpu_multi_stream = False + engine.use_trt = False + if os.path.exists('{}/__params__'.format(model_config_path)): engine.combined_model = True else: @@ -540,71 +557,38 @@ class Server(object): else: print("Use local bin : {}".format(self.bin_path)) #self.check_cuda() - # Todo: merge CPU and GPU code, remove device to model_toolkit - if self.device == "cpu" or self.device == "arm": - command = "{} " \ - "-enable_model_toolkit " \ - "-inferservice_path {} " \ - "-inferservice_file {} " \ - "-max_concurrency {} " \ - "-num_threads {} " \ - "-port {} " \ - "-precision {} " \ - "-use_calib {} " \ - "-reload_interval_s {} " \ - "-resource_path {} " \ - "-resource_file {} " \ - "-workflow_path {} " \ - "-workflow_file {} " \ - "-bthread_concurrency {} " \ - "-max_body_size {} ".format( - self.bin_path, - self.workdir, - self.infer_service_fn, - self.max_concurrency, - self.num_threads, - self.port, - self.precision, - self.use_calib, - self.reload_interval_s, - self.workdir, - self.resource_fn, - self.workdir, - self.workflow_fn, - self.num_threads, - self.max_body_size) - else: - command = "{} " \ - "-enable_model_toolkit " \ - "-inferservice_path {} " \ - "-inferservice_file {} " \ - "-max_concurrency {} " \ - "-num_threads {} " \ - "-port {} " \ - "-precision {} " \ - "-use_calib {} " \ - "-reload_interval_s {} " \ - "-resource_path {} " \ - "-resource_file {} " \ - "-workflow_path {} " \ - "-workflow_file {} " \ - "-bthread_concurrency {} " \ - "-max_body_size {} ".format( - self.bin_path, - self.workdir, - self.infer_service_fn, - self.max_concurrency, - self.num_threads, - self.port, - self.precision, - self.use_calib, - self.reload_interval_s, - self.workdir, - self.resource_fn, - self.workdir, - self.workflow_fn, - self.num_threads, - self.max_body_size) + command = "{} " \ + "-enable_model_toolkit " \ + "-inferservice_path {} " \ + "-inferservice_file {} " \ + "-max_concurrency {} " \ + "-num_threads {} " \ + "-port {} " \ + "-precision {} " \ + "-use_calib {} " \ + "-reload_interval_s {} " \ + "-resource_path {} " \ + "-resource_file {} " \ + "-workflow_path {} " \ + "-workflow_file {} " \ + "-bthread_concurrency {} " \ + "-max_body_size {} ".format( + self.bin_path, + self.workdir, + self.infer_service_fn, + self.max_concurrency, + self.num_threads, + self.port, + self.precision, + self.use_calib, + self.reload_interval_s, + self.workdir, + self.resource_fn, + self.workdir, + self.workflow_fn, + self.num_threads, + self.max_body_size) + print("Going to Run Comand") print(command) diff --git a/python/paddle_serving_server/web_service.py b/python/paddle_serving_server/web_service.py index f514376c6e9f0b113dd63c486be42b2088c80b6d..de7cec92e7430593ae53a570b07c15126b3c1d6e 100755 --- a/python/paddle_serving_server/web_service.py +++ b/python/paddle_serving_server/web_service.py @@ -108,8 +108,7 @@ class WebService(object): if isinstance(gpus, int): self.gpus = str(gpus) elif isinstance(gpus, list): - gpu_list = [str(x) for x in gpus] - self.gpus = ",".join(gpu_list) + self.gpus = [str(x) for x in gpus] else: self.gpus = gpus @@ -261,8 +260,7 @@ class WebService(object): if isinstance(gpuid, int): self.gpus = str(gpuid) elif isinstance(gpuid, list): - gpu_list = [str(x) for x in gpuid] - self.gpus = ",".join(gpu_list) + self.gpus = [str(x) for x in gpuid] else: self.gpus = gpuid @@ -363,7 +361,8 @@ class WebService(object): # default self.gpus = [0]. if len(self.gpus) == 0: self.gpus.append(0) - + # right now, local Predictor only support 1 card. + # no matter how many gpu_id is in gpus, we only use the first one. gpu_id = (self.gpus[0].split(","))[0] self.client.load_model_config( self.server_config_dir_paths[0], use_gpu=True, gpu_id=gpu_id)