未验证 提交 eeafa32c 编写于 作者: T TeslaZhao 提交者: GitHub

Merge pull request #38 from PaddlePaddle/develop

Sync codes
......@@ -68,7 +68,9 @@ class InferEngine {
virtual int thrd_initialize() { return thrd_initialize_impl(); }
virtual int thrd_clear() { return thrd_clear_impl(); }
virtual int thrd_finalize() { return thrd_finalize_impl(); }
virtual int infer(const void* in, void* out, uint32_t batch_size = -1) { return infer_impl(in, out, batch_size); }
virtual int infer(const void* in, void* out, uint32_t batch_size = -1) {
return infer_impl(in, out, batch_size);
}
virtual int reload() = 0;
......@@ -208,7 +210,6 @@ class ReloadableInferEngine : public InferEngine {
}
uint64_t version() const { return _version; }
uint32_t thread_num() const { return _infer_thread_num; }
private:
......@@ -335,7 +336,7 @@ class DBReloadableInferEngine : public ReloadableInferEngine {
md->cores[next_idx] = new (std::nothrow) EngineCore;
//params.dump();
// params.dump();
if (!md->cores[next_idx] || md->cores[next_idx]->create(conf) != 0) {
LOG(ERROR) << "Failed create model, path: " << conf.model_dir();
return -1;
......@@ -491,71 +492,86 @@ class CloneDBReloadableInferEngine
_pd; // 进程级EngineCore,多个线程级EngineCore共用该对象的模型数据
};
template <typename PaddleInferenceCore>
template <typename EngineCore>
#ifdef WITH_TRT
class FluidInferEngine : public DBReloadableInferEngine<PaddleInferenceCore> {
class FluidInferEngine : public DBReloadableInferEngine<EngineCore> {
#else
class FluidInferEngine : public CloneDBReloadableInferEngine<PaddleInferenceCore> {
class FluidInferEngine : public CloneDBReloadableInferEngine<EngineCore> {
#endif
public: // NOLINT
FluidInferEngine() {}
~FluidInferEngine() {}
typedef std::vector<paddle::PaddleTensor> TensorVector;
int infer_impl(const void* in, void* out, uint32_t batch_size = -1) {
//First of all, get the real core acording to the template parameter 'PaddleInferenceCore'.
PaddleInferenceCore* core =DBReloadableInferEngine<PaddleInferenceCore>::get_core();
// First of all, get the real core acording to the
// Template parameter <EngineCore>.
EngineCore* core = DBReloadableInferEngine<EngineCore>::get_core();
if (!core || !core->get()) {
LOG(ERROR) << "Failed get fluid core in infer_impl()";
return -1;
}
//We use the for loop to process the input data.
//Inside each for loop, use the in[i]->name as inputName and call 'core->GetInputHandle(inputName)' to get the pointer of InputData.
//Set the lod and shape information of InputData first. then copy data from cpu to the core.
const TensorVector* tensorVector_in_pointer = reinterpret_cast<const TensorVector*>(in);
// We use the for loop to process the input data.
// Inside each for loop, use the in[i]->name as inputName and call
// 'core->GetInputHandle(inputName)' to get the pointer of InputData.
// Set the lod and shape information of InputData first.
// Then copy data from cpu to the core.
const TensorVector* tensorVector_in_pointer =
reinterpret_cast<const TensorVector*>(in);
for (int i=0; i < tensorVector_in_pointer->size(); ++i) {
auto lod_tensor_in = core->GetInputHandle((*tensorVector_in_pointer)[i].name);
auto lod_tensor_in =
core->GetInputHandle((*tensorVector_in_pointer)[i].name);
lod_tensor_in->SetLoD((*tensorVector_in_pointer)[i].lod);
lod_tensor_in->Reshape((*tensorVector_in_pointer)[i].shape);
void* origin_data = (*tensorVector_in_pointer)[i].data.data();
//Because the core needs to determine the size of memory space according to the data type passed in.
//The pointer type of data must be one of float *,int64_t*,int32_t* instead void*.
// Because the core needs to determine the size of memory space
// according to the data type passed in.
// The pointer type of data must be one of
// float *,int64_t*,int32_t* instead void*.
if ((*tensorVector_in_pointer)[i].dtype == paddle::PaddleDType::FLOAT32) {
float* data = static_cast<float*>(origin_data);
lod_tensor_in->CopyFromCpu(data);
}else if ((*tensorVector_in_pointer)[i].dtype == paddle::PaddleDType::INT64) {
} else if ((*tensorVector_in_pointer)[i].dtype ==
paddle::PaddleDType::INT64) {
int64_t* data = static_cast<int64_t*>(origin_data);
lod_tensor_in->CopyFromCpu(data);
}else if ((*tensorVector_in_pointer)[i].dtype == paddle::PaddleDType::INT32) {
} else if ((*tensorVector_in_pointer)[i].dtype ==
paddle::PaddleDType::INT32) {
int32_t* data = static_cast<int32_t*>(origin_data);
lod_tensor_in->CopyFromCpu(data);
}
}
//After the input data is passed in, call 'core->Run()' perform the prediction process.
// After the input data is passed in,
// call 'core->Run()' perform the prediction process.
if (!core->Run()) {
LOG(ERROR) << "Failed run fluid family core";
return -1;
}
//In order to get the results, first, call the 'core->GetOutputNames()' to get the name of output(which is a dict like {OutputName:pointer of OutputValue}).
//Then, use for-loop to get OutputValue by calling 'core->GetOutputHandle'.
// In order to get the results,
// first, call the 'core->GetOutputNames()' to get the name of output
// (which is a dict like {OutputName:pointer of OutputValue}).
// Then, use for-loop to get OutputValue by calling 'core->GetOutputHandle'.
std::vector<std::string> outnames = core->GetOutputNames();
std::vector<int> output_shape;
int out_num =0;
int dataType =0;
int out_num = 0;
int dataType = 0;
void* databuf_data = NULL;
char* databuf_char = NULL;
size_t databuf_size = 0;
TensorVector* tensorVector_out_pointer = reinterpret_cast<TensorVector*>(out);
TensorVector* tensorVector_out_pointer =
reinterpret_cast<TensorVector*>(out);
if (!tensorVector_out_pointer) {
LOG(ERROR) << "tensorVector_out_pointer is nullptr,error";
return -1;
}
//Get the type and shape information of OutputData first. then copy data to cpu from the core.
//The pointer type of data_out must be one of float *,int64_t*,int32_t* instead void*.
// Get the type and shape information of OutputData first.
// then copy data to cpu from the core.
// The pointer type of data_out must be one of
// float *,int64_t*,int32_t* instead void*.
for (int i=0; i < outnames.size(); ++i) {
auto lod_tensor_out = core->GetOutputHandle(outnames[i]);
output_shape = lod_tensor_out->shape();
out_num = std::accumulate(output_shape.begin(), output_shape.end(), 1, std::multiplies<int>());
out_num = std::accumulate(
output_shape.begin(), output_shape.end(), 1, std::multiplies<int>());
dataType = lod_tensor_out->type();
if (dataType == paddle::PaddleDType::FLOAT32) {
databuf_size = out_num*sizeof(float);
......@@ -567,7 +583,7 @@ class FluidInferEngine : public CloneDBReloadableInferEngine<PaddleInferenceCore
float* data_out = reinterpret_cast<float*>(databuf_data);
lod_tensor_out->CopyToCpu(data_out);
databuf_char = reinterpret_cast<char*>(data_out);
}else if (dataType == paddle::PaddleDType::INT64) {
} else if (dataType == paddle::PaddleDType::INT64) {
databuf_size = out_num*sizeof(int64_t);
databuf_data = MempoolWrapper::instance().malloc(databuf_size);
if (!databuf_data) {
......@@ -577,7 +593,7 @@ class FluidInferEngine : public CloneDBReloadableInferEngine<PaddleInferenceCore
int64_t* data_out = reinterpret_cast<int64_t*>(databuf_data);
lod_tensor_out->CopyToCpu(data_out);
databuf_char = reinterpret_cast<char*>(data_out);
}else if (dataType == paddle::PaddleDType::INT32) {
} else if (dataType == paddle::PaddleDType::INT32) {
databuf_size = out_num*sizeof(int32_t);
databuf_data = MempoolWrapper::instance().malloc(databuf_size);
if (!databuf_data) {
......@@ -588,9 +604,11 @@ class FluidInferEngine : public CloneDBReloadableInferEngine<PaddleInferenceCore
lod_tensor_out->CopyToCpu(data_out);
databuf_char = reinterpret_cast<char*>(data_out);
}
//Because task scheduling requires OPs to use 'Channel'(which is a data structure) to transfer data between OPs.
//We need to copy the processed data to the 'Channel' for the next OP.
//In this function, it means we should copy the 'databuf_char' to the pointer 'void* out'.(which is also called ‘tensorVector_out_pointer’)
// Because task scheduling requires OPs to use 'Channel'
// (which is a data structure) to transfer data between OPs.
// We need to copy the processed data to the 'Channel' for the next OP.
// In this function, it means we should copy the 'databuf_char' to
// 'void* out'.(which is also called ‘tensorVector_out_pointer’)
paddle::PaddleTensor tensor_out;
tensor_out.name = outnames[i];
tensor_out.dtype = paddle::PaddleDType(dataType);
......@@ -611,8 +629,6 @@ class FluidInferEngine : public CloneDBReloadableInferEngine<PaddleInferenceCore
int task_infer_impl(const BatchTensor& in, BatchTensor& out) { // NOLINT
return infer_impl(&in, &out);
}
};
typedef FactoryPool<InferEngine> StaticInferFactory;
......@@ -797,7 +813,9 @@ class VersionedInferEngine : public InferEngine {
int thrd_finalize_impl() { return -1; }
int thrd_clear_impl() { return -1; }
int proc_finalize_impl() { return -1; }
int infer_impl(const void* in, void* out, uint32_t batch_size = -1) { return -1; }
int infer_impl(const void* in, void* out, uint32_t batch_size = -1) {
return -1;
}
int task_infer_impl(const BatchTensor& in, BatchTensor& out) { // NOLINT
return -1;
} // NOLINT
......@@ -927,7 +945,7 @@ class InferManager {
}
// Versioned inference interface
int infer(const char* model_name,
int infer(const char* model_name,
const void* in,
void* out,
uint32_t batch_size,
......
......@@ -57,9 +57,9 @@ PrecisionType GetPrecision(const std::string& precision_data) {
}
// Engine Base
class PaddleEngineBase {
class EngineCore {
public:
virtual ~PaddleEngineBase() {}
virtual ~EngineCore() {}
virtual std::vector<std::string> GetInputNames() {
return _predictor->GetInputNames();
}
......@@ -107,7 +107,7 @@ class PaddleEngineBase {
};
// Paddle Inference Engine
class PaddleInferenceEngine : public PaddleEngineBase {
class PaddleInferenceEngine : public EngineCore {
public:
int create(const configure::EngineDesc& engine_conf) {
std::string model_path = engine_conf.model_dir();
......
......@@ -25,19 +25,24 @@ args = benchmark_args()
def single_func(idx, resource):
train_reader = paddle.batch(
paddle.reader.shuffle(
paddle.dataset.uci_housing.train(), buf_size=500),
batch_size=1)
total_number = sum(1 for _ in train_reader())
if args.request == "rpc":
client = Client()
client.load_client_config(args.model)
client.connect([args.endpoint])
train_reader = paddle.batch(
paddle.reader.shuffle(
paddle.dataset.uci_housing.train(), buf_size=500),
batch_size=1)
start = time.time()
for data in train_reader():
#new_data = np.zeros((1, 13)).astype("float32")
#new_data[0] = data[0][0]
#fetch_map = client.predict(feed={"x": new_data}, fetch=["price"], batch=True)
fetch_map = client.predict(feed={"x": data[0][0]}, fetch=["price"])
end = time.time()
return [[end - start]]
return [[end - start], [total_number]]
elif args.request == "http":
train_reader = paddle.batch(
paddle.reader.shuffle(
......@@ -49,7 +54,7 @@ def single_func(idx, resource):
'http://{}/uci/prediction'.format(args.endpoint),
data={"x": data[0]})
end = time.time()
return [[end - start]]
return [[end - start], [total_number]]
multi_thread_runner = MultiThreadRunner()
......
......@@ -155,7 +155,7 @@ class Client(object):
file_path_list = []
for single_model_config in model_config_path_list:
if os.path.isdir(single_model_config):
file_path_list.append("{}/serving_server_conf.prototxt".format(
file_path_list.append("{}/serving_client_conf.prototxt".format(
single_model_config))
elif os.path.isfile(single_model_config):
file_path_list.append(single_model_config)
......@@ -574,7 +574,7 @@ class MultiLangClient(object):
file_path_list = []
for single_model_config in model_config_path_list:
if os.path.isdir(single_model_config):
file_path_list.append("{}/serving_server_conf.prototxt".format(
file_path_list.append("{}/serving_client_conf.prototxt".format(
single_model_config))
elif os.path.isfile(single_model_config):
file_path_list.append(single_model_config)
......
......@@ -103,7 +103,7 @@ def serve_args():
def start_standard_model(serving_port): # pylint: disable=doc-string-missing
args = parse_args()
args = serve_args()
thread_num = args.thread
model = args.model
port = serving_port
......@@ -410,6 +410,7 @@ if __name__ == "__main__":
use_lite=args.use_lite,
use_xpu=args.use_xpu,
ir_optim=args.ir_optim,
thread_num=args.thread,
precision=args.precision,
use_calib=args.use_calib)
web_service.run_rpc_service()
......
......@@ -176,10 +176,12 @@ class WebService(object):
use_xpu=False,
ir_optim=False,
gpuid=0,
thread_num=2,
mem_optim=True):
print("This API will be deprecated later. Please do not use it")
self.workdir = workdir
self.port = port
self.thread_num = thread_num
self.device = device
self.gpuid = gpuid
self.port_list = []
......@@ -197,7 +199,7 @@ class WebService(object):
self.workdir,
self.port_list[0],
-1,
thread_num=2,
thread_num=self.thread_num,
mem_optim=mem_optim,
use_lite=use_lite,
use_xpu=use_xpu,
......@@ -211,7 +213,7 @@ class WebService(object):
"{}_{}".format(self.workdir, i),
self.port_list[i],
gpuid,
thread_num=2,
thread_num=self.thread_num,
mem_optim=mem_optim,
use_lite=use_lite,
use_xpu=use_xpu,
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册