提交 f30447d6 编写于 作者: H HexToString

update doc and comment

上级 8c834fba
......@@ -133,6 +133,8 @@ struct Task {
int element_num = 1;
if ((*inVectorT_ptr)[feedvar_index].shape.size() == 1) {
// cause shape[0] is batch_size.
// [10,1] = [10], so if shape[1] doesn`t exist.
// should return 1.
return 1;
}
// start from shape[1], cause shape[0] = batch_size.
......@@ -516,6 +518,13 @@ class BatchTasks {
};
// BSF task handle
// TaskHandler is the handle of Task.
// `read_fd` is used for receive signal in brpc Thread.
// 'write_fd' is used for write signal in bsf Thread.
// when TaskMeta is done, bsf Thread will write to 'write_fd'.
// brpc Thread is keeping reading 'read_fd' in a while loop.
// brpc Thread will receive signal when TaskMeta is done.
// so `read_fd` and 'write_fd' is used for communicate in different Thread.
template <typename TaskT>
struct TaskHandler {
int read_fd;
......@@ -538,9 +547,11 @@ struct TaskHandler {
}
};
// TaskExecutor is a Thread pool.
template <typename TaskT>
class TaskExecutor;
// ThreadContext is used for start a bsf Thread.
template <typename TaskT>
struct ThreadContext {
TaskExecutor<TaskT>* executor;
......@@ -561,6 +572,15 @@ struct ThreadContext {
}
};
// TaskExecutor is a Thread pool.
// Each Model corresponding to a Model.
// TaskT is actually a Request preprocessed by ReaderOp.
// TaskT will be divided as TaskMeta which will be
// put into _task_queue in brpc-Thread by schedule().
// TaskHander will be returned to brpc-Thread.
// start() function will create `thread_num` bsf Threads.
// every bsf Thread check the _task_queue and take TaskMeta from it.
// when a Task`s all TaskMeta is done, TaskHander will be noticed.
template <typename TaskT>
class TaskExecutor {
public:
......@@ -595,12 +615,6 @@ class TaskExecutor {
TaskExecutor();
}
}
/*
static TaskExecutor<TaskT>* instance() {
static TaskExecutor<TaskT> singleton;
return &singleton;
}
*/
void set_batch_size(size_t batch_size) { _batch_size = batch_size; }
......@@ -661,6 +675,9 @@ class TaskExecutor {
boost::function<void(const void*, void*)> _fn;
};
// TaskExecutorVector is a SingleTon class.
// Each Model corresponding to a TaskExecutor.
// So we need several TaskExecutor when there are more than 1 Model.
template <typename TaskT>
class TaskExecutorVector {
public:
......@@ -689,6 +706,11 @@ class TaskExecutorVector {
std::vector<TaskExecutor<TaskT>> _vector_executor;
};
// TaskManager is actually a wrapper of Request in bsf.
// TaskManager`s schedule() change Request to be TaskT.
// and divided TaskT into several TaskMeta to put into the TaskExecutor`s
// task_queue.
// wait() is a while loop to receive signal when a whole Task is done.
template <typename InItemT, typename OutItemT>
class TaskManager {
public:
......
......@@ -89,7 +89,7 @@ class InferEngine {
void* out,
uint32_t batch_size = -1) = 0;
virtual int task_infer_impl(const void* in, void* out) = 0; // NOLINT
protected:
uint32_t _model_index;
// end: framework inner call
......@@ -260,17 +260,27 @@ class DBReloadableInferEngine : public ReloadableInferEngine {
}
int thrd_clear_impl() {
// for bsf-Task-threads
// actually, there are 2 kinds of multi-thread.
// 1. brpc thread 2. bsf Task thread
// each request is in 1-single brpc thread.
// IF (bsf Task thread is not used)
// every single brpc thread thread corresponds to all the EngineCores.
// each request runs all models in 1-single thread brpc thread.
// IF (bsf Task thread is used)
// every single brpc thread corresponds to all the DBReloadableInferEngines.
// each request runs all models in 1-single brpc thread.
// every single brpc thread will create or clone N predictor.
// N = the number of Model.
// so if there are 2 models, and --thread 10.
// each brpc thread will create predictor of Model-1 and Model-2.
// there are totally 10 predictors of Model-1 and 10 predictors of Model-2
// cause there are 10 brpc threads.
// IF bsf Task thread is used。
// there will be a ThreadPool called bsf TaskExecutor.
// in TaskExecutor, 1 bsf thread corresponds to 1 EngineCore.
// TaskExecutorVector is the vector of TaskExecutor.
// the number of TaskExecutor equals to the number of Model.
// 1 TaskExecutor corresponding to 1 Model.
// 1 TaskExecutor have N bsf threads.
// 1 bsf thread corresponds to 1 predictor of
// the Model corresponding to the TaskExecutor.
// brpc thread only put the data into the task_queue(which is in
// TaskExecutor)
// EngineCore->infer() is running in bsf Task thread.
......@@ -335,8 +345,8 @@ class CloneDBReloadableInferEngine
gpu_ids_num);
}
// gpu_index will be set to be 0, when load() or proc_initial() is called.
// gpu_index < gpu_ids_num, means there are still not create on some GPU
// card.
// gpu_index < gpu_ids_num, means there are predictors still not create
// on some GPU card.
// so we need to create the predictor.
// gpu_index >= gpu_ids_num, means each GPU card has already create one.
// so we need to clone the predictor.
......@@ -356,6 +366,10 @@ class CloneDBReloadableInferEngine
}
} else {
// when gpu_id = -1, means we use cpu, but the index should be 0.
// _cloneTemplate[-1] will occur error.
// actually, when gpu_id = -1, there is only 1 predictor in
// _cloneTemplate.
// so the index should always be 0 when gpu_id = -1.
if (gpu_id == -1) gpu_id = 0;
if (!md->cores[next_idx] ||
md->cores[next_idx]->clone(_cloneTemplate[gpu_id]->get()) != 0) {
......
......@@ -13,7 +13,8 @@ tar xf faster_rcnn_hrnetv2p_w18_1x.tar
python -m paddle_serving_server.serve --model serving_server --port 9494 --gpu_ids 0
```
This model support TensorRT, if you want a faster inference, please use `--use_trt`.
This model support TensorRT, if you want a faster inference, please use `--use_trt`. But you need to do some extra work.
Please reference to https://github.com/PaddlePaddle/Paddle-Inference-Demo/blob/master/c%2B%2B/paddle-trt/trt_dynamic_shape_test.cc#L40
### Prediction
......
......@@ -13,7 +13,8 @@ wget --no-check-certificate https://paddle-serving.bj.bcebos.com/pddet_demo/2.0/
tar xf faster_rcnn_hrnetv2p_w18_1x.tar
python -m paddle_serving_server.serve --model serving_server --port 9494 --gpu_ids 0
```
该模型支持TensorRT,如果想要更快的预测速度,可以开启`--use_trt`选项。
该模型支持TensorRT,如果想要更快的预测速度,可以开启`--use_trt`选项,但此时需要额外设置子图的TRT变长最大最小最优shape.
请参考https://github.com/PaddlePaddle/Paddle-Inference-Demo/blob/master/c%2B%2B/paddle-trt/trt_dynamic_shape_test.cc#L40
### 执行预测
```
......
......@@ -13,7 +13,8 @@ tar xf faster_rcnn_r50_fpn_1x_coco.tar
python -m paddle_serving_server.serve --model serving_server --port 9494 --gpu_ids 0
```
This model support TensorRT, if you want a faster inference, please use `--use_trt`.
This model support TensorRT, if you want a faster inference, please use `--use_trt`. But you need to do some extra work.
Please reference to https://github.com/PaddlePaddle/Paddle-Inference-Demo/blob/master/c%2B%2B/paddle-trt/trt_dynamic_shape_test.cc#L40
### Perform prediction
......
......@@ -13,7 +13,8 @@ wget --no-check-certificate https://paddle-serving.bj.bcebos.com/pddet_demo/2.0/
tar xf faster_rcnn_r50_fpn_1x_coco.tar
python -m paddle_serving_server.serve --model serving_server --port 9494 --gpu_ids 0
```
该模型支持TensorRT,如果想要更快的预测速度,可以开启`--use_trt`选项。
该模型支持TensorRT,如果想要更快的预测速度,可以开启`--use_trt`选项,但此时需要额外设置子图的TRT变长最大最小最优shape.
请参考https://github.com/PaddlePaddle/Paddle-Inference-Demo/blob/master/c%2B%2B/paddle-trt/trt_dynamic_shape_test.cc#L40
### 执行预测
```
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册