未验证 提交 1cab575a 编写于 作者: J Jiawei Wang 提交者: GitHub

Merge pull request #1136 from HexToString/merge_branch

fix code style and bug and name confused
...@@ -68,7 +68,9 @@ class InferEngine { ...@@ -68,7 +68,9 @@ class InferEngine {
virtual int thrd_initialize() { return thrd_initialize_impl(); } virtual int thrd_initialize() { return thrd_initialize_impl(); }
virtual int thrd_clear() { return thrd_clear_impl(); } virtual int thrd_clear() { return thrd_clear_impl(); }
virtual int thrd_finalize() { return thrd_finalize_impl(); } virtual int thrd_finalize() { return thrd_finalize_impl(); }
virtual int infer(const void* in, void* out, uint32_t batch_size = -1) { return infer_impl(in, out, batch_size); } virtual int infer(const void* in, void* out, uint32_t batch_size = -1) {
return infer_impl(in, out, batch_size);
}
virtual int reload() = 0; virtual int reload() = 0;
...@@ -208,7 +210,6 @@ class ReloadableInferEngine : public InferEngine { ...@@ -208,7 +210,6 @@ class ReloadableInferEngine : public InferEngine {
} }
uint64_t version() const { return _version; } uint64_t version() const { return _version; }
uint32_t thread_num() const { return _infer_thread_num; } uint32_t thread_num() const { return _infer_thread_num; }
private: private:
...@@ -335,7 +336,7 @@ class DBReloadableInferEngine : public ReloadableInferEngine { ...@@ -335,7 +336,7 @@ class DBReloadableInferEngine : public ReloadableInferEngine {
md->cores[next_idx] = new (std::nothrow) EngineCore; md->cores[next_idx] = new (std::nothrow) EngineCore;
//params.dump(); // params.dump();
if (!md->cores[next_idx] || md->cores[next_idx]->create(conf) != 0) { if (!md->cores[next_idx] || md->cores[next_idx]->create(conf) != 0) {
LOG(ERROR) << "Failed create model, path: " << conf.model_dir(); LOG(ERROR) << "Failed create model, path: " << conf.model_dir();
return -1; return -1;
...@@ -491,71 +492,86 @@ class CloneDBReloadableInferEngine ...@@ -491,71 +492,86 @@ class CloneDBReloadableInferEngine
_pd; // 进程级EngineCore,多个线程级EngineCore共用该对象的模型数据 _pd; // 进程级EngineCore,多个线程级EngineCore共用该对象的模型数据
}; };
template <typename PaddleInferenceCore> template <typename EngineCore>
#ifdef WITH_TRT #ifdef WITH_TRT
class FluidInferEngine : public DBReloadableInferEngine<PaddleInferenceCore> { class FluidInferEngine : public DBReloadableInferEngine<EngineCore> {
#else #else
class FluidInferEngine : public CloneDBReloadableInferEngine<PaddleInferenceCore> { class FluidInferEngine : public CloneDBReloadableInferEngine<EngineCore> {
#endif #endif
public: // NOLINT public: // NOLINT
FluidInferEngine() {} FluidInferEngine() {}
~FluidInferEngine() {} ~FluidInferEngine() {}
typedef std::vector<paddle::PaddleTensor> TensorVector; typedef std::vector<paddle::PaddleTensor> TensorVector;
int infer_impl(const void* in, void* out, uint32_t batch_size = -1) { int infer_impl(const void* in, void* out, uint32_t batch_size = -1) {
//First of all, get the real core acording to the template parameter 'PaddleInferenceCore'. // First of all, get the real core acording to the
PaddleInferenceCore* core =DBReloadableInferEngine<PaddleInferenceCore>::get_core(); // Template parameter <EngineCore>.
EngineCore* core = DBReloadableInferEngine<EngineCore>::get_core();
if (!core || !core->get()) { if (!core || !core->get()) {
LOG(ERROR) << "Failed get fluid core in infer_impl()"; LOG(ERROR) << "Failed get fluid core in infer_impl()";
return -1; return -1;
} }
//We use the for loop to process the input data. // We use the for loop to process the input data.
//Inside each for loop, use the in[i]->name as inputName and call 'core->GetInputHandle(inputName)' to get the pointer of InputData. // Inside each for loop, use the in[i]->name as inputName and call
//Set the lod and shape information of InputData first. then copy data from cpu to the core. // 'core->GetInputHandle(inputName)' to get the pointer of InputData.
const TensorVector* tensorVector_in_pointer = reinterpret_cast<const TensorVector*>(in); // Set the lod and shape information of InputData first.
// Then copy data from cpu to the core.
const TensorVector* tensorVector_in_pointer =
reinterpret_cast<const TensorVector*>(in);
for (int i=0; i < tensorVector_in_pointer->size(); ++i) { for (int i=0; i < tensorVector_in_pointer->size(); ++i) {
auto lod_tensor_in = core->GetInputHandle((*tensorVector_in_pointer)[i].name); auto lod_tensor_in =
core->GetInputHandle((*tensorVector_in_pointer)[i].name);
lod_tensor_in->SetLoD((*tensorVector_in_pointer)[i].lod); lod_tensor_in->SetLoD((*tensorVector_in_pointer)[i].lod);
lod_tensor_in->Reshape((*tensorVector_in_pointer)[i].shape); lod_tensor_in->Reshape((*tensorVector_in_pointer)[i].shape);
void* origin_data = (*tensorVector_in_pointer)[i].data.data(); void* origin_data = (*tensorVector_in_pointer)[i].data.data();
//Because the core needs to determine the size of memory space according to the data type passed in. // Because the core needs to determine the size of memory space
//The pointer type of data must be one of float *,int64_t*,int32_t* instead void*. // according to the data type passed in.
// The pointer type of data must be one of
// float *,int64_t*,int32_t* instead void*.
if ((*tensorVector_in_pointer)[i].dtype == paddle::PaddleDType::FLOAT32) { if ((*tensorVector_in_pointer)[i].dtype == paddle::PaddleDType::FLOAT32) {
float* data = static_cast<float*>(origin_data); float* data = static_cast<float*>(origin_data);
lod_tensor_in->CopyFromCpu(data); lod_tensor_in->CopyFromCpu(data);
}else if ((*tensorVector_in_pointer)[i].dtype == paddle::PaddleDType::INT64) { } else if ((*tensorVector_in_pointer)[i].dtype ==
paddle::PaddleDType::INT64) {
int64_t* data = static_cast<int64_t*>(origin_data); int64_t* data = static_cast<int64_t*>(origin_data);
lod_tensor_in->CopyFromCpu(data); lod_tensor_in->CopyFromCpu(data);
}else if ((*tensorVector_in_pointer)[i].dtype == paddle::PaddleDType::INT32) { } else if ((*tensorVector_in_pointer)[i].dtype ==
paddle::PaddleDType::INT32) {
int32_t* data = static_cast<int32_t*>(origin_data); int32_t* data = static_cast<int32_t*>(origin_data);
lod_tensor_in->CopyFromCpu(data); lod_tensor_in->CopyFromCpu(data);
} }
} }
//After the input data is passed in, call 'core->Run()' perform the prediction process. // After the input data is passed in,
// call 'core->Run()' perform the prediction process.
if (!core->Run()) { if (!core->Run()) {
LOG(ERROR) << "Failed run fluid family core"; LOG(ERROR) << "Failed run fluid family core";
return -1; return -1;
} }
// In order to get the results,
//In order to get the results, first, call the 'core->GetOutputNames()' to get the name of output(which is a dict like {OutputName:pointer of OutputValue}). // first, call the 'core->GetOutputNames()' to get the name of output
//Then, use for-loop to get OutputValue by calling 'core->GetOutputHandle'. // (which is a dict like {OutputName:pointer of OutputValue}).
// Then, use for-loop to get OutputValue by calling 'core->GetOutputHandle'.
std::vector<std::string> outnames = core->GetOutputNames(); std::vector<std::string> outnames = core->GetOutputNames();
std::vector<int> output_shape; std::vector<int> output_shape;
int out_num =0; int out_num = 0;
int dataType =0; int dataType = 0;
void* databuf_data = NULL; void* databuf_data = NULL;
char* databuf_char = NULL; char* databuf_char = NULL;
size_t databuf_size = 0; size_t databuf_size = 0;
TensorVector* tensorVector_out_pointer = reinterpret_cast<TensorVector*>(out); TensorVector* tensorVector_out_pointer =
reinterpret_cast<TensorVector*>(out);
if (!tensorVector_out_pointer) { if (!tensorVector_out_pointer) {
LOG(ERROR) << "tensorVector_out_pointer is nullptr,error"; LOG(ERROR) << "tensorVector_out_pointer is nullptr,error";
return -1; return -1;
} }
//Get the type and shape information of OutputData first. then copy data to cpu from the core. // Get the type and shape information of OutputData first.
//The pointer type of data_out must be one of float *,int64_t*,int32_t* instead void*. // then copy data to cpu from the core.
// The pointer type of data_out must be one of
// float *,int64_t*,int32_t* instead void*.
for (int i=0; i < outnames.size(); ++i) { for (int i=0; i < outnames.size(); ++i) {
auto lod_tensor_out = core->GetOutputHandle(outnames[i]); auto lod_tensor_out = core->GetOutputHandle(outnames[i]);
output_shape = lod_tensor_out->shape(); output_shape = lod_tensor_out->shape();
out_num = std::accumulate(output_shape.begin(), output_shape.end(), 1, std::multiplies<int>()); out_num = std::accumulate(
output_shape.begin(), output_shape.end(), 1, std::multiplies<int>());
dataType = lod_tensor_out->type(); dataType = lod_tensor_out->type();
if (dataType == paddle::PaddleDType::FLOAT32) { if (dataType == paddle::PaddleDType::FLOAT32) {
databuf_size = out_num*sizeof(float); databuf_size = out_num*sizeof(float);
...@@ -567,7 +583,7 @@ class FluidInferEngine : public CloneDBReloadableInferEngine<PaddleInferenceCore ...@@ -567,7 +583,7 @@ class FluidInferEngine : public CloneDBReloadableInferEngine<PaddleInferenceCore
float* data_out = reinterpret_cast<float*>(databuf_data); float* data_out = reinterpret_cast<float*>(databuf_data);
lod_tensor_out->CopyToCpu(data_out); lod_tensor_out->CopyToCpu(data_out);
databuf_char = reinterpret_cast<char*>(data_out); databuf_char = reinterpret_cast<char*>(data_out);
}else if (dataType == paddle::PaddleDType::INT64) { } else if (dataType == paddle::PaddleDType::INT64) {
databuf_size = out_num*sizeof(int64_t); databuf_size = out_num*sizeof(int64_t);
databuf_data = MempoolWrapper::instance().malloc(databuf_size); databuf_data = MempoolWrapper::instance().malloc(databuf_size);
if (!databuf_data) { if (!databuf_data) {
...@@ -577,7 +593,7 @@ class FluidInferEngine : public CloneDBReloadableInferEngine<PaddleInferenceCore ...@@ -577,7 +593,7 @@ class FluidInferEngine : public CloneDBReloadableInferEngine<PaddleInferenceCore
int64_t* data_out = reinterpret_cast<int64_t*>(databuf_data); int64_t* data_out = reinterpret_cast<int64_t*>(databuf_data);
lod_tensor_out->CopyToCpu(data_out); lod_tensor_out->CopyToCpu(data_out);
databuf_char = reinterpret_cast<char*>(data_out); databuf_char = reinterpret_cast<char*>(data_out);
}else if (dataType == paddle::PaddleDType::INT32) { } else if (dataType == paddle::PaddleDType::INT32) {
databuf_size = out_num*sizeof(int32_t); databuf_size = out_num*sizeof(int32_t);
databuf_data = MempoolWrapper::instance().malloc(databuf_size); databuf_data = MempoolWrapper::instance().malloc(databuf_size);
if (!databuf_data) { if (!databuf_data) {
...@@ -588,9 +604,11 @@ class FluidInferEngine : public CloneDBReloadableInferEngine<PaddleInferenceCore ...@@ -588,9 +604,11 @@ class FluidInferEngine : public CloneDBReloadableInferEngine<PaddleInferenceCore
lod_tensor_out->CopyToCpu(data_out); lod_tensor_out->CopyToCpu(data_out);
databuf_char = reinterpret_cast<char*>(data_out); databuf_char = reinterpret_cast<char*>(data_out);
} }
//Because task scheduling requires OPs to use 'Channel'(which is a data structure) to transfer data between OPs. // Because task scheduling requires OPs to use 'Channel'
//We need to copy the processed data to the 'Channel' for the next OP. // (which is a data structure) to transfer data between OPs.
//In this function, it means we should copy the 'databuf_char' to the pointer 'void* out'.(which is also called ‘tensorVector_out_pointer’) // We need to copy the processed data to the 'Channel' for the next OP.
// In this function, it means we should copy the 'databuf_char' to
// 'void* out'.(which is also called ‘tensorVector_out_pointer’)
paddle::PaddleTensor tensor_out; paddle::PaddleTensor tensor_out;
tensor_out.name = outnames[i]; tensor_out.name = outnames[i];
tensor_out.dtype = paddle::PaddleDType(dataType); tensor_out.dtype = paddle::PaddleDType(dataType);
...@@ -611,8 +629,6 @@ class FluidInferEngine : public CloneDBReloadableInferEngine<PaddleInferenceCore ...@@ -611,8 +629,6 @@ class FluidInferEngine : public CloneDBReloadableInferEngine<PaddleInferenceCore
int task_infer_impl(const BatchTensor& in, BatchTensor& out) { // NOLINT int task_infer_impl(const BatchTensor& in, BatchTensor& out) { // NOLINT
return infer_impl(&in, &out); return infer_impl(&in, &out);
} }
}; };
typedef FactoryPool<InferEngine> StaticInferFactory; typedef FactoryPool<InferEngine> StaticInferFactory;
...@@ -797,7 +813,9 @@ class VersionedInferEngine : public InferEngine { ...@@ -797,7 +813,9 @@ class VersionedInferEngine : public InferEngine {
int thrd_finalize_impl() { return -1; } int thrd_finalize_impl() { return -1; }
int thrd_clear_impl() { return -1; } int thrd_clear_impl() { return -1; }
int proc_finalize_impl() { return -1; } int proc_finalize_impl() { return -1; }
int infer_impl(const void* in, void* out, uint32_t batch_size = -1) { return -1; } int infer_impl(const void* in, void* out, uint32_t batch_size = -1) {
return -1;
}
int task_infer_impl(const BatchTensor& in, BatchTensor& out) { // NOLINT int task_infer_impl(const BatchTensor& in, BatchTensor& out) { // NOLINT
return -1; return -1;
} // NOLINT } // NOLINT
...@@ -927,7 +945,7 @@ class InferManager { ...@@ -927,7 +945,7 @@ class InferManager {
} }
// Versioned inference interface // Versioned inference interface
int infer(const char* model_name, int infer(const char* model_name,
const void* in, const void* in,
void* out, void* out,
uint32_t batch_size, uint32_t batch_size,
......
...@@ -57,9 +57,9 @@ PrecisionType GetPrecision(const std::string& precision_data) { ...@@ -57,9 +57,9 @@ PrecisionType GetPrecision(const std::string& precision_data) {
} }
// Engine Base // Engine Base
class PaddleEngineBase { class EngineCore {
public: public:
virtual ~PaddleEngineBase() {} virtual ~EngineCore() {}
virtual std::vector<std::string> GetInputNames() { virtual std::vector<std::string> GetInputNames() {
return _predictor->GetInputNames(); return _predictor->GetInputNames();
} }
...@@ -107,7 +107,7 @@ class PaddleEngineBase { ...@@ -107,7 +107,7 @@ class PaddleEngineBase {
}; };
// Paddle Inference Engine // Paddle Inference Engine
class PaddleInferenceEngine : public PaddleEngineBase { class PaddleInferenceEngine : public EngineCore {
public: public:
int create(const configure::EngineDesc& engine_conf) { int create(const configure::EngineDesc& engine_conf) {
std::string model_path = engine_conf.model_dir(); std::string model_path = engine_conf.model_dir();
......
...@@ -25,19 +25,24 @@ args = benchmark_args() ...@@ -25,19 +25,24 @@ args = benchmark_args()
def single_func(idx, resource): def single_func(idx, resource):
train_reader = paddle.batch(
paddle.reader.shuffle(
paddle.dataset.uci_housing.train(), buf_size=500),
batch_size=1)
total_number = sum(1 for _ in train_reader())
if args.request == "rpc": if args.request == "rpc":
client = Client() client = Client()
client.load_client_config(args.model) client.load_client_config(args.model)
client.connect([args.endpoint]) client.connect([args.endpoint])
train_reader = paddle.batch(
paddle.reader.shuffle(
paddle.dataset.uci_housing.train(), buf_size=500),
batch_size=1)
start = time.time() start = time.time()
for data in train_reader(): for data in train_reader():
#new_data = np.zeros((1, 13)).astype("float32")
#new_data[0] = data[0][0]
#fetch_map = client.predict(feed={"x": new_data}, fetch=["price"], batch=True)
fetch_map = client.predict(feed={"x": data[0][0]}, fetch=["price"]) fetch_map = client.predict(feed={"x": data[0][0]}, fetch=["price"])
end = time.time() end = time.time()
return [[end - start]] return [[end - start], [total_number]]
elif args.request == "http": elif args.request == "http":
train_reader = paddle.batch( train_reader = paddle.batch(
paddle.reader.shuffle( paddle.reader.shuffle(
...@@ -49,7 +54,7 @@ def single_func(idx, resource): ...@@ -49,7 +54,7 @@ def single_func(idx, resource):
'http://{}/uci/prediction'.format(args.endpoint), 'http://{}/uci/prediction'.format(args.endpoint),
data={"x": data[0]}) data={"x": data[0]})
end = time.time() end = time.time()
return [[end - start]] return [[end - start], [total_number]]
multi_thread_runner = MultiThreadRunner() multi_thread_runner = MultiThreadRunner()
......
...@@ -155,7 +155,7 @@ class Client(object): ...@@ -155,7 +155,7 @@ class Client(object):
file_path_list = [] file_path_list = []
for single_model_config in model_config_path_list: for single_model_config in model_config_path_list:
if os.path.isdir(single_model_config): if os.path.isdir(single_model_config):
file_path_list.append("{}/serving_server_conf.prototxt".format( file_path_list.append("{}/serving_client_conf.prototxt".format(
single_model_config)) single_model_config))
elif os.path.isfile(single_model_config): elif os.path.isfile(single_model_config):
file_path_list.append(single_model_config) file_path_list.append(single_model_config)
...@@ -574,7 +574,7 @@ class MultiLangClient(object): ...@@ -574,7 +574,7 @@ class MultiLangClient(object):
file_path_list = [] file_path_list = []
for single_model_config in model_config_path_list: for single_model_config in model_config_path_list:
if os.path.isdir(single_model_config): if os.path.isdir(single_model_config):
file_path_list.append("{}/serving_server_conf.prototxt".format( file_path_list.append("{}/serving_client_conf.prototxt".format(
single_model_config)) single_model_config))
elif os.path.isfile(single_model_config): elif os.path.isfile(single_model_config):
file_path_list.append(single_model_config) file_path_list.append(single_model_config)
......
...@@ -103,7 +103,7 @@ def serve_args(): ...@@ -103,7 +103,7 @@ def serve_args():
def start_standard_model(serving_port): # pylint: disable=doc-string-missing def start_standard_model(serving_port): # pylint: disable=doc-string-missing
args = parse_args() args = serve_args()
thread_num = args.thread thread_num = args.thread
model = args.model model = args.model
port = serving_port port = serving_port
...@@ -410,6 +410,7 @@ if __name__ == "__main__": ...@@ -410,6 +410,7 @@ if __name__ == "__main__":
use_lite=args.use_lite, use_lite=args.use_lite,
use_xpu=args.use_xpu, use_xpu=args.use_xpu,
ir_optim=args.ir_optim, ir_optim=args.ir_optim,
thread_num=args.thread,
precision=args.precision, precision=args.precision,
use_calib=args.use_calib) use_calib=args.use_calib)
web_service.run_rpc_service() web_service.run_rpc_service()
......
...@@ -176,10 +176,12 @@ class WebService(object): ...@@ -176,10 +176,12 @@ class WebService(object):
use_xpu=False, use_xpu=False,
ir_optim=False, ir_optim=False,
gpuid=0, gpuid=0,
thread_num=2,
mem_optim=True): mem_optim=True):
print("This API will be deprecated later. Please do not use it") print("This API will be deprecated later. Please do not use it")
self.workdir = workdir self.workdir = workdir
self.port = port self.port = port
self.thread_num = thread_num
self.device = device self.device = device
self.gpuid = gpuid self.gpuid = gpuid
self.port_list = [] self.port_list = []
...@@ -197,7 +199,7 @@ class WebService(object): ...@@ -197,7 +199,7 @@ class WebService(object):
self.workdir, self.workdir,
self.port_list[0], self.port_list[0],
-1, -1,
thread_num=2, thread_num=self.thread_num,
mem_optim=mem_optim, mem_optim=mem_optim,
use_lite=use_lite, use_lite=use_lite,
use_xpu=use_xpu, use_xpu=use_xpu,
...@@ -211,7 +213,7 @@ class WebService(object): ...@@ -211,7 +213,7 @@ class WebService(object):
"{}_{}".format(self.workdir, i), "{}_{}".format(self.workdir, i),
self.port_list[i], self.port_list[i],
gpuid, gpuid,
thread_num=2, thread_num=self.thread_num,
mem_optim=mem_optim, mem_optim=mem_optim,
use_lite=use_lite, use_lite=use_lite,
use_xpu=use_xpu, use_xpu=use_xpu,
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册