提交 07eadb82 编写于 作者: H HexToString

fix bug and names

上级 9256114c
......@@ -21,6 +21,7 @@
#include <utility>
#include <vector>
#include <numeric>
#include <functional>
#include "core/predictor/common/inner_common.h"
#include "core/predictor/framework/bsf.h"
#include "core/predictor/framework/factory.h"
......@@ -68,7 +69,9 @@ class InferEngine {
virtual int thrd_initialize() { return thrd_initialize_impl(); }
virtual int thrd_clear() { return thrd_clear_impl(); }
virtual int thrd_finalize() { return thrd_finalize_impl(); }
virtual int infer(const void* in, void* out, uint32_t batch_size = -1) { return infer_impl(in, out, batch_size); }
virtual int infer(const void* in, void* out, uint32_t batch_size = -1) {
return infer_impl(in, out, batch_size);
}
virtual int reload() = 0;
......@@ -208,7 +211,6 @@ class ReloadableInferEngine : public InferEngine {
}
uint64_t version() const { return _version; }
uint32_t thread_num() const { return _infer_thread_num; }
private:
......@@ -335,7 +337,7 @@ class DBReloadableInferEngine : public ReloadableInferEngine {
md->cores[next_idx] = new (std::nothrow) EngineCore;
//params.dump();
// params.dump();
if (!md->cores[next_idx] || md->cores[next_idx]->create(conf) != 0) {
LOG(ERROR) << "Failed create model, path: " << conf.model_dir();
return -1;
......@@ -491,71 +493,86 @@ class CloneDBReloadableInferEngine
_pd; // 进程级EngineCore,多个线程级EngineCore共用该对象的模型数据
};
template <typename PaddleInferenceCore>
template <typename EngineCore>
#ifdef WITH_TRT
class FluidInferEngine : public DBReloadableInferEngine<PaddleInferenceCore> {
class FluidInferEngine : public DBReloadableInferEngine<EngineCore> {
#else
class FluidInferEngine : public CloneDBReloadableInferEngine<PaddleInferenceCore> {
class FluidInferEngine : public CloneDBReloadableInferEngine<EngineCore> {
#endif
public: // NOLINT
FluidInferEngine() {}
~FluidInferEngine() {}
typedef std::vector<paddle::PaddleTensor> TensorVector;
int infer_impl(const void* in, void* out, uint32_t batch_size = -1) {
//First of all, get the real core acording to the template parameter 'PaddleInferenceCore'.
PaddleInferenceCore* core =DBReloadableInferEngine<PaddleInferenceCore>::get_core();
// First of all, get the real core acording to the
// Template parameter <EngineCore>.
EngineCore* core = DBReloadableInferEngine<EngineCore>::get_core();
if (!core || !core->get()) {
LOG(ERROR) << "Failed get fluid core in infer_impl()";
return -1;
}
//We use the for loop to process the input data.
//Inside each for loop, use the in[i]->name as inputName and call 'core->GetInputHandle(inputName)' to get the pointer of InputData.
//Set the lod and shape information of InputData first. then copy data from cpu to the core.
const TensorVector* tensorVector_in_pointer = reinterpret_cast<const TensorVector*>(in);
// We use the for loop to process the input data.
// Inside each for loop, use the in[i]->name as inputName and call
// 'core->GetInputHandle(inputName)' to get the pointer of InputData.
// Set the lod and shape information of InputData first.
// Then copy data from cpu to the core.
const TensorVector* tensorVector_in_pointer =
reinterpret_cast<const TensorVector*>(in);
for (int i=0; i < tensorVector_in_pointer->size(); ++i) {
auto lod_tensor_in = core->GetInputHandle((*tensorVector_in_pointer)[i].name);
auto lod_tensor_in =
core->GetInputHandle((*tensorVector_in_pointer)[i].name);
lod_tensor_in->SetLoD((*tensorVector_in_pointer)[i].lod);
lod_tensor_in->Reshape((*tensorVector_in_pointer)[i].shape);
void* origin_data = (*tensorVector_in_pointer)[i].data.data();
//Because the core needs to determine the size of memory space according to the data type passed in.
//The pointer type of data must be one of float *,int64_t*,int32_t* instead void*.
// Because the core needs to determine the size of memory space
// according to the data type passed in.
// The pointer type of data must be one of
// float *,int64_t*,int32_t* instead void*.
if ((*tensorVector_in_pointer)[i].dtype == paddle::PaddleDType::FLOAT32) {
float* data = static_cast<float*>(origin_data);
lod_tensor_in->CopyFromCpu(data);
}else if ((*tensorVector_in_pointer)[i].dtype == paddle::PaddleDType::INT64) {
} else if ((*tensorVector_in_pointer)[i].dtype ==
paddle::PaddleDType::INT64) {
int64_t* data = static_cast<int64_t*>(origin_data);
lod_tensor_in->CopyFromCpu(data);
}else if ((*tensorVector_in_pointer)[i].dtype == paddle::PaddleDType::INT32) {
} else if ((*tensorVector_in_pointer)[i].dtype ==
paddle::PaddleDType::INT32) {
int32_t* data = static_cast<int32_t*>(origin_data);
lod_tensor_in->CopyFromCpu(data);
}
}
//After the input data is passed in, call 'core->Run()' perform the prediction process.
// After the input data is passed in,
// call 'core->Run()' perform the prediction process.
if (!core->Run()) {
LOG(ERROR) << "Failed run fluid family core";
return -1;
}
//In order to get the results, first, call the 'core->GetOutputNames()' to get the name of output(which is a dict like {OutputName:pointer of OutputValue}).
//Then, use for-loop to get OutputValue by calling 'core->GetOutputHandle'.
// In order to get the results,
// first, call the 'core->GetOutputNames()' to get the name of output
// (which is a dict like {OutputName:pointer of OutputValue}).
// Then, use for-loop to get OutputValue by calling 'core->GetOutputHandle'.
std::vector<std::string> outnames = core->GetOutputNames();
std::vector<int> output_shape;
int out_num =0;
int dataType =0;
int out_num = 0;
int dataType = 0;
void* databuf_data = NULL;
char* databuf_char = NULL;
size_t databuf_size = 0;
TensorVector* tensorVector_out_pointer = reinterpret_cast<TensorVector*>(out);
TensorVector* tensorVector_out_pointer =
reinterpret_cast<TensorVector*>(out);
if (!tensorVector_out_pointer) {
LOG(ERROR) << "tensorVector_out_pointer is nullptr,error";
return -1;
}
//Get the type and shape information of OutputData first. then copy data to cpu from the core.
//The pointer type of data_out must be one of float *,int64_t*,int32_t* instead void*.
// Get the type and shape information of OutputData first.
// then copy data to cpu from the core.
// The pointer type of data_out must be one of
// float *,int64_t*,int32_t* instead void*.
for (int i=0; i < outnames.size(); ++i) {
auto lod_tensor_out = core->GetOutputHandle(outnames[i]);
output_shape = lod_tensor_out->shape();
out_num = std::accumulate(output_shape.begin(), output_shape.end(), 1, std::multiplies<int>());
out_num = std::accumulate(
output_shape.begin(), output_shape.end(), 1, std::multiplies<int>());
dataType = lod_tensor_out->type();
if (dataType == paddle::PaddleDType::FLOAT32) {
databuf_size = out_num*sizeof(float);
......@@ -567,7 +584,7 @@ class FluidInferEngine : public CloneDBReloadableInferEngine<PaddleInferenceCore
float* data_out = reinterpret_cast<float*>(databuf_data);
lod_tensor_out->CopyToCpu(data_out);
databuf_char = reinterpret_cast<char*>(data_out);
}else if (dataType == paddle::PaddleDType::INT64) {
} else if (dataType == paddle::PaddleDType::INT64) {
databuf_size = out_num*sizeof(int64_t);
databuf_data = MempoolWrapper::instance().malloc(databuf_size);
if (!databuf_data) {
......@@ -577,7 +594,7 @@ class FluidInferEngine : public CloneDBReloadableInferEngine<PaddleInferenceCore
int64_t* data_out = reinterpret_cast<int64_t*>(databuf_data);
lod_tensor_out->CopyToCpu(data_out);
databuf_char = reinterpret_cast<char*>(data_out);
}else if (dataType == paddle::PaddleDType::INT32) {
} else if (dataType == paddle::PaddleDType::INT32) {
databuf_size = out_num*sizeof(int32_t);
databuf_data = MempoolWrapper::instance().malloc(databuf_size);
if (!databuf_data) {
......@@ -588,9 +605,11 @@ class FluidInferEngine : public CloneDBReloadableInferEngine<PaddleInferenceCore
lod_tensor_out->CopyToCpu(data_out);
databuf_char = reinterpret_cast<char*>(data_out);
}
//Because task scheduling requires OPs to use 'Channel'(which is a data structure) to transfer data between OPs.
//We need to copy the processed data to the 'Channel' for the next OP.
//In this function, it means we should copy the 'databuf_char' to the pointer 'void* out'.(which is also called ‘tensorVector_out_pointer’)
// Because task scheduling requires OPs to use 'Channel'
// (which is a data structure) to transfer data between OPs.
// We need to copy the processed data to the 'Channel' for the next OP.
// In this function, it means we should copy the 'databuf_char' to
// 'void* out'.(which is also called ‘tensorVector_out_pointer’)
paddle::PaddleTensor tensor_out;
tensor_out.name = outnames[i];
tensor_out.dtype = paddle::PaddleDType(dataType);
......@@ -611,8 +630,6 @@ class FluidInferEngine : public CloneDBReloadableInferEngine<PaddleInferenceCore
int task_infer_impl(const BatchTensor& in, BatchTensor& out) { // NOLINT
return infer_impl(&in, &out);
}
};
typedef FactoryPool<InferEngine> StaticInferFactory;
......@@ -797,7 +814,9 @@ class VersionedInferEngine : public InferEngine {
int thrd_finalize_impl() { return -1; }
int thrd_clear_impl() { return -1; }
int proc_finalize_impl() { return -1; }
int infer_impl(const void* in, void* out, uint32_t batch_size = -1) { return -1; }
int infer_impl(const void* in, void* out, uint32_t batch_size = -1) {
return -1;
}
int task_infer_impl(const BatchTensor& in, BatchTensor& out) { // NOLINT
return -1;
} // NOLINT
......@@ -927,7 +946,7 @@ class InferManager {
}
// Versioned inference interface
int infer(const char* model_name,
int infer(const char* model_name,
const void* in,
void* out,
uint32_t batch_size,
......
......@@ -42,9 +42,9 @@ static const int max_batch = 32;
static const int min_subgraph_size = 3;
// Engine Base
class PaddleEngineBase {
class EngineCore {
public:
virtual ~PaddleEngineBase() {}
virtual ~EngineCore() {}
virtual std::vector<std::string> GetInputNames() {
return _predictor->GetInputNames();
}
......@@ -92,7 +92,7 @@ class PaddleEngineBase {
};
// Paddle Inference Engine
class PaddleInferenceEngine : public PaddleEngineBase {
class PaddleInferenceEngine : public EngineCore {
public:
int create(const configure::EngineDesc& engine_conf) {
std::string model_path = engine_conf.model_dir();
......
......@@ -93,7 +93,7 @@ def serve_args():
def start_standard_model(serving_port): # pylint: disable=doc-string-missing
args = parse_args()
args = serve_args()
thread_num = args.thread
model = args.model
port = serving_port
......@@ -109,7 +109,7 @@ def start_standard_model(serving_port): # pylint: disable=doc-string-missing
if model == "":
print("You must specify your serving model")
exit(-1)
for single_model_config in args.model:
if os.path.isdir(single_model_config):
pass
......@@ -131,11 +131,10 @@ def start_standard_model(serving_port): # pylint: disable=doc-string-missing
infer_op_name = "general_detection"
general_infer_op = op_maker.create(infer_op_name)
op_seq_maker.add_op(general_infer_op)
general_response_op = op_maker.create('general_response')
op_seq_maker.add_op(general_response_op)
server = None
if use_multilang:
server = serving.MultiLangServer()
......@@ -199,7 +198,7 @@ def start_gpu_card_model(index, gpuid, port, args): # pylint: disable=doc-strin
infer_op_name = "general_infer"
general_infer_op = op_maker.create(infer_op_name)
op_seq_maker.add_op(general_infer_op)
general_response_op = op_maker.create('general_response')
op_seq_maker.add_op(general_response_op)
......@@ -297,7 +296,8 @@ class MainService(BaseHTTPRequestHandler):
key = base64.b64decode(post_data["key"].encode())
for single_model_config in args.model:
if os.path.isfile(single_model_config):
raise ValueError("The input of --model should be a dir not file.")
raise ValueError(
"The input of --model should be a dir not file.")
with open(single_model_config + "/key", "wb") as f:
f.write(key)
return True
......@@ -309,7 +309,8 @@ class MainService(BaseHTTPRequestHandler):
key = base64.b64decode(post_data["key"].encode())
for single_model_config in args.model:
if os.path.isfile(single_model_config):
raise ValueError("The input of --model should be a dir not file.")
raise ValueError(
"The input of --model should be a dir not file.")
with open(single_model_config + "/key", "rb") as f:
cur_key = f.read()
if key != cur_key:
......@@ -394,7 +395,8 @@ if __name__ == "__main__":
device=args.device,
use_lite=args.use_lite,
use_xpu=args.use_xpu,
ir_optim=args.ir_optim)
ir_optim=args.ir_optim,
thread_num=args.thread)
web_service.run_rpc_service()
app_instance = Flask(__name__)
......
......@@ -27,6 +27,7 @@ import os
from paddle_serving_server import pipeline
from paddle_serving_server.pipeline import Op
def port_is_available(port):
with closing(socket.socket(socket.AF_INET, socket.SOCK_STREAM)) as sock:
sock.settimeout(2)
......@@ -36,6 +37,7 @@ def port_is_available(port):
else:
return False
class WebService(object):
def __init__(self, name="default_service"):
self.name = name
......@@ -63,7 +65,9 @@ class WebService(object):
def run_service(self):
self._server.run_server()
def load_model_config(self, server_config_dir_paths, client_config_path=None):
def load_model_config(self,
server_config_dir_paths,
client_config_path=None):
if isinstance(server_config_dir_paths, str):
server_config_dir_paths = [server_config_dir_paths]
elif isinstance(server_config_dir_paths, list):
......@@ -73,14 +77,16 @@ class WebService(object):
if os.path.isdir(single_model_config):
pass
elif os.path.isfile(single_model_config):
raise ValueError("The input of --model should be a dir not file.")
raise ValueError(
"The input of --model should be a dir not file.")
self.server_config_dir_paths = server_config_dir_paths
from .proto import general_model_config_pb2 as m_config
import google.protobuf.text_format
file_path_list = []
for single_model_config in self.server_config_dir_paths:
file_path_list.append( "{}/serving_server_conf.prototxt".format(single_model_config) )
file_path_list.append("{}/serving_server_conf.prototxt".format(
single_model_config))
model_conf = m_config.GeneralModelConfig()
f = open(file_path_list[0], 'r')
model_conf = google.protobuf.text_format.Merge(
......@@ -130,7 +136,7 @@ class WebService(object):
infer_op_name = "general_infer"
general_infer_op = op_maker.create(infer_op_name)
op_seq_maker.add_op(general_infer_op)
general_response_op = op_maker.create('general_response')
op_seq_maker.add_op(general_response_op)
......@@ -146,7 +152,8 @@ class WebService(object):
if use_xpu:
server.set_xpu()
server.load_model_config(self.server_config_dir_paths)#brpc Server support server_config_dir_paths
server.load_model_config(self.server_config_dir_paths
) #brpc Server support server_config_dir_paths
if gpuid >= 0:
server.set_gpuid(gpuid)
server.prepare_server(workdir=workdir, port=port, device=device)
......@@ -163,10 +170,12 @@ class WebService(object):
use_xpu=False,
ir_optim=False,
gpuid=0,
thread_num=2,
mem_optim=True):
print("This API will be deprecated later. Please do not use it")
self.workdir = workdir
self.port = port
self.thread_num = thread_num
self.device = device
self.gpuid = gpuid
self.port_list = []
......@@ -184,7 +193,7 @@ class WebService(object):
self.workdir,
self.port_list[0],
-1,
thread_num=2,
thread_num=self.thread_num,
mem_optim=mem_optim,
use_lite=use_lite,
use_xpu=use_xpu,
......@@ -196,7 +205,7 @@ class WebService(object):
"{}_{}".format(self.workdir, i),
self.port_list[i],
gpuid,
thread_num=2,
thread_num=self.thread_num,
mem_optim=mem_optim,
use_lite=use_lite,
use_xpu=use_xpu,
......@@ -297,9 +306,13 @@ class WebService(object):
# default self.gpus = [0].
if len(self.gpus) == 0:
self.gpus.append(0)
self.client.load_model_config(self.server_config_dir_paths[0], use_gpu=True, gpu_id=self.gpus[0])
self.client.load_model_config(
self.server_config_dir_paths[0],
use_gpu=True,
gpu_id=self.gpus[0])
else:
self.client.load_model_config(self.server_config_dir_paths[0], use_gpu=False)
self.client.load_model_config(
self.server_config_dir_paths[0], use_gpu=False)
def run_web_service(self):
print("This API will be deprecated later. Please do not use it")
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册