提交 f30447d6 编写于 作者: H HexToString

update doc and comment

上级 8c834fba
...@@ -133,6 +133,8 @@ struct Task { ...@@ -133,6 +133,8 @@ struct Task {
int element_num = 1; int element_num = 1;
if ((*inVectorT_ptr)[feedvar_index].shape.size() == 1) { if ((*inVectorT_ptr)[feedvar_index].shape.size() == 1) {
// cause shape[0] is batch_size. // cause shape[0] is batch_size.
// [10,1] = [10], so if shape[1] doesn`t exist.
// should return 1.
return 1; return 1;
} }
// start from shape[1], cause shape[0] = batch_size. // start from shape[1], cause shape[0] = batch_size.
...@@ -516,6 +518,13 @@ class BatchTasks { ...@@ -516,6 +518,13 @@ class BatchTasks {
}; };
// BSF task handle // BSF task handle
// TaskHandler is the handle of Task.
// `read_fd` is used for receive signal in brpc Thread.
// 'write_fd' is used for write signal in bsf Thread.
// when TaskMeta is done, bsf Thread will write to 'write_fd'.
// brpc Thread is keeping reading 'read_fd' in a while loop.
// brpc Thread will receive signal when TaskMeta is done.
// so `read_fd` and 'write_fd' is used for communicate in different Thread.
template <typename TaskT> template <typename TaskT>
struct TaskHandler { struct TaskHandler {
int read_fd; int read_fd;
...@@ -538,9 +547,11 @@ struct TaskHandler { ...@@ -538,9 +547,11 @@ struct TaskHandler {
} }
}; };
// TaskExecutor is a Thread pool.
template <typename TaskT> template <typename TaskT>
class TaskExecutor; class TaskExecutor;
// ThreadContext is used for start a bsf Thread.
template <typename TaskT> template <typename TaskT>
struct ThreadContext { struct ThreadContext {
TaskExecutor<TaskT>* executor; TaskExecutor<TaskT>* executor;
...@@ -561,6 +572,15 @@ struct ThreadContext { ...@@ -561,6 +572,15 @@ struct ThreadContext {
} }
}; };
// TaskExecutor is a Thread pool.
// Each Model corresponding to a Model.
// TaskT is actually a Request preprocessed by ReaderOp.
// TaskT will be divided as TaskMeta which will be
// put into _task_queue in brpc-Thread by schedule().
// TaskHander will be returned to brpc-Thread.
// start() function will create `thread_num` bsf Threads.
// every bsf Thread check the _task_queue and take TaskMeta from it.
// when a Task`s all TaskMeta is done, TaskHander will be noticed.
template <typename TaskT> template <typename TaskT>
class TaskExecutor { class TaskExecutor {
public: public:
...@@ -595,12 +615,6 @@ class TaskExecutor { ...@@ -595,12 +615,6 @@ class TaskExecutor {
TaskExecutor(); TaskExecutor();
} }
} }
/*
static TaskExecutor<TaskT>* instance() {
static TaskExecutor<TaskT> singleton;
return &singleton;
}
*/
void set_batch_size(size_t batch_size) { _batch_size = batch_size; } void set_batch_size(size_t batch_size) { _batch_size = batch_size; }
...@@ -661,6 +675,9 @@ class TaskExecutor { ...@@ -661,6 +675,9 @@ class TaskExecutor {
boost::function<void(const void*, void*)> _fn; boost::function<void(const void*, void*)> _fn;
}; };
// TaskExecutorVector is a SingleTon class.
// Each Model corresponding to a TaskExecutor.
// So we need several TaskExecutor when there are more than 1 Model.
template <typename TaskT> template <typename TaskT>
class TaskExecutorVector { class TaskExecutorVector {
public: public:
...@@ -689,6 +706,11 @@ class TaskExecutorVector { ...@@ -689,6 +706,11 @@ class TaskExecutorVector {
std::vector<TaskExecutor<TaskT>> _vector_executor; std::vector<TaskExecutor<TaskT>> _vector_executor;
}; };
// TaskManager is actually a wrapper of Request in bsf.
// TaskManager`s schedule() change Request to be TaskT.
// and divided TaskT into several TaskMeta to put into the TaskExecutor`s
// task_queue.
// wait() is a while loop to receive signal when a whole Task is done.
template <typename InItemT, typename OutItemT> template <typename InItemT, typename OutItemT>
class TaskManager { class TaskManager {
public: public:
......
...@@ -260,17 +260,27 @@ class DBReloadableInferEngine : public ReloadableInferEngine { ...@@ -260,17 +260,27 @@ class DBReloadableInferEngine : public ReloadableInferEngine {
} }
int thrd_clear_impl() { int thrd_clear_impl() {
// for bsf-Task-threads
// actually, there are 2 kinds of multi-thread. // actually, there are 2 kinds of multi-thread.
// 1. brpc thread 2. bsf Task thread // 1. brpc thread 2. bsf Task thread
// each request is in 1-single brpc thread. // each request is in 1-single brpc thread.
// IF (bsf Task thread is not used) // IF (bsf Task thread is not used)
// every single brpc thread thread corresponds to all the EngineCores. // every single brpc thread corresponds to all the DBReloadableInferEngines.
// each request runs all models in 1-single thread brpc thread. // each request runs all models in 1-single brpc thread.
// every single brpc thread will create or clone N predictor.
// IF (bsf Task thread is used) // N = the number of Model.
// so if there are 2 models, and --thread 10.
// each brpc thread will create predictor of Model-1 and Model-2.
// there are totally 10 predictors of Model-1 and 10 predictors of Model-2
// cause there are 10 brpc threads.
// IF bsf Task thread is used。
// there will be a ThreadPool called bsf TaskExecutor. // there will be a ThreadPool called bsf TaskExecutor.
// in TaskExecutor, 1 bsf thread corresponds to 1 EngineCore. // TaskExecutorVector is the vector of TaskExecutor.
// the number of TaskExecutor equals to the number of Model.
// 1 TaskExecutor corresponding to 1 Model.
// 1 TaskExecutor have N bsf threads.
// 1 bsf thread corresponds to 1 predictor of
// the Model corresponding to the TaskExecutor.
// brpc thread only put the data into the task_queue(which is in // brpc thread only put the data into the task_queue(which is in
// TaskExecutor) // TaskExecutor)
// EngineCore->infer() is running in bsf Task thread. // EngineCore->infer() is running in bsf Task thread.
...@@ -335,8 +345,8 @@ class CloneDBReloadableInferEngine ...@@ -335,8 +345,8 @@ class CloneDBReloadableInferEngine
gpu_ids_num); gpu_ids_num);
} }
// gpu_index will be set to be 0, when load() or proc_initial() is called. // gpu_index will be set to be 0, when load() or proc_initial() is called.
// gpu_index < gpu_ids_num, means there are still not create on some GPU // gpu_index < gpu_ids_num, means there are predictors still not create
// card. // on some GPU card.
// so we need to create the predictor. // so we need to create the predictor.
// gpu_index >= gpu_ids_num, means each GPU card has already create one. // gpu_index >= gpu_ids_num, means each GPU card has already create one.
// so we need to clone the predictor. // so we need to clone the predictor.
...@@ -356,6 +366,10 @@ class CloneDBReloadableInferEngine ...@@ -356,6 +366,10 @@ class CloneDBReloadableInferEngine
} }
} else { } else {
// when gpu_id = -1, means we use cpu, but the index should be 0. // when gpu_id = -1, means we use cpu, but the index should be 0.
// _cloneTemplate[-1] will occur error.
// actually, when gpu_id = -1, there is only 1 predictor in
// _cloneTemplate.
// so the index should always be 0 when gpu_id = -1.
if (gpu_id == -1) gpu_id = 0; if (gpu_id == -1) gpu_id = 0;
if (!md->cores[next_idx] || if (!md->cores[next_idx] ||
md->cores[next_idx]->clone(_cloneTemplate[gpu_id]->get()) != 0) { md->cores[next_idx]->clone(_cloneTemplate[gpu_id]->get()) != 0) {
......
...@@ -13,7 +13,8 @@ tar xf faster_rcnn_hrnetv2p_w18_1x.tar ...@@ -13,7 +13,8 @@ tar xf faster_rcnn_hrnetv2p_w18_1x.tar
python -m paddle_serving_server.serve --model serving_server --port 9494 --gpu_ids 0 python -m paddle_serving_server.serve --model serving_server --port 9494 --gpu_ids 0
``` ```
This model support TensorRT, if you want a faster inference, please use `--use_trt`. This model support TensorRT, if you want a faster inference, please use `--use_trt`. But you need to do some extra work.
Please reference to https://github.com/PaddlePaddle/Paddle-Inference-Demo/blob/master/c%2B%2B/paddle-trt/trt_dynamic_shape_test.cc#L40
### Prediction ### Prediction
......
...@@ -13,7 +13,8 @@ wget --no-check-certificate https://paddle-serving.bj.bcebos.com/pddet_demo/2.0/ ...@@ -13,7 +13,8 @@ wget --no-check-certificate https://paddle-serving.bj.bcebos.com/pddet_demo/2.0/
tar xf faster_rcnn_hrnetv2p_w18_1x.tar tar xf faster_rcnn_hrnetv2p_w18_1x.tar
python -m paddle_serving_server.serve --model serving_server --port 9494 --gpu_ids 0 python -m paddle_serving_server.serve --model serving_server --port 9494 --gpu_ids 0
``` ```
该模型支持TensorRT,如果想要更快的预测速度,可以开启`--use_trt`选项。 该模型支持TensorRT,如果想要更快的预测速度,可以开启`--use_trt`选项,但此时需要额外设置子图的TRT变长最大最小最优shape.
请参考https://github.com/PaddlePaddle/Paddle-Inference-Demo/blob/master/c%2B%2B/paddle-trt/trt_dynamic_shape_test.cc#L40
### 执行预测 ### 执行预测
``` ```
......
...@@ -13,7 +13,8 @@ tar xf faster_rcnn_r50_fpn_1x_coco.tar ...@@ -13,7 +13,8 @@ tar xf faster_rcnn_r50_fpn_1x_coco.tar
python -m paddle_serving_server.serve --model serving_server --port 9494 --gpu_ids 0 python -m paddle_serving_server.serve --model serving_server --port 9494 --gpu_ids 0
``` ```
This model support TensorRT, if you want a faster inference, please use `--use_trt`. This model support TensorRT, if you want a faster inference, please use `--use_trt`. But you need to do some extra work.
Please reference to https://github.com/PaddlePaddle/Paddle-Inference-Demo/blob/master/c%2B%2B/paddle-trt/trt_dynamic_shape_test.cc#L40
### Perform prediction ### Perform prediction
......
...@@ -13,7 +13,8 @@ wget --no-check-certificate https://paddle-serving.bj.bcebos.com/pddet_demo/2.0/ ...@@ -13,7 +13,8 @@ wget --no-check-certificate https://paddle-serving.bj.bcebos.com/pddet_demo/2.0/
tar xf faster_rcnn_r50_fpn_1x_coco.tar tar xf faster_rcnn_r50_fpn_1x_coco.tar
python -m paddle_serving_server.serve --model serving_server --port 9494 --gpu_ids 0 python -m paddle_serving_server.serve --model serving_server --port 9494 --gpu_ids 0
``` ```
该模型支持TensorRT,如果想要更快的预测速度,可以开启`--use_trt`选项。 该模型支持TensorRT,如果想要更快的预测速度,可以开启`--use_trt`选项,但此时需要额外设置子图的TRT变长最大最小最优shape.
请参考https://github.com/PaddlePaddle/Paddle-Inference-Demo/blob/master/c%2B%2B/paddle-trt/trt_dynamic_shape_test.cc#L40
### 执行预测 ### 执行预测
``` ```
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册