Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
Serving
提交
f30447d6
S
Serving
项目概览
PaddlePaddle
/
Serving
大约 1 年 前同步成功
通知
186
Star
833
Fork
253
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
105
列表
看板
标记
里程碑
合并请求
10
Wiki
2
Wiki
分析
仓库
DevOps
项目成员
Pages
S
Serving
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
105
Issue
105
列表
看板
标记
里程碑
合并请求
10
合并请求
10
Pages
分析
分析
仓库分析
DevOps
Wiki
2
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
f30447d6
编写于
7月 02, 2021
作者:
H
HexToString
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
update doc and comment
上级
8c834fba
变更
6
隐藏空白更改
内联
并排
Showing
6 changed file
with
59 addition
and
19 deletion
+59
-19
core/predictor/framework/bsf.h
core/predictor/framework/bsf.h
+28
-6
core/predictor/framework/infer.h
core/predictor/framework/infer.h
+23
-9
python/examples/detection/faster_rcnn_hrnetv2p_w18_1x/README.md
.../examples/detection/faster_rcnn_hrnetv2p_w18_1x/README.md
+2
-1
python/examples/detection/faster_rcnn_hrnetv2p_w18_1x/README_CN.md
...amples/detection/faster_rcnn_hrnetv2p_w18_1x/README_CN.md
+2
-1
python/examples/detection/faster_rcnn_r50_fpn_1x_coco/README.md
.../examples/detection/faster_rcnn_r50_fpn_1x_coco/README.md
+2
-1
python/examples/detection/faster_rcnn_r50_fpn_1x_coco/README_CN.md
...amples/detection/faster_rcnn_r50_fpn_1x_coco/README_CN.md
+2
-1
未找到文件。
core/predictor/framework/bsf.h
浏览文件 @
f30447d6
...
...
@@ -133,6 +133,8 @@ struct Task {
int
element_num
=
1
;
if
((
*
inVectorT_ptr
)[
feedvar_index
].
shape
.
size
()
==
1
)
{
// cause shape[0] is batch_size.
// [10,1] = [10], so if shape[1] doesn`t exist.
// should return 1.
return
1
;
}
// start from shape[1], cause shape[0] = batch_size.
...
...
@@ -516,6 +518,13 @@ class BatchTasks {
};
// BSF task handle
// TaskHandler is the handle of Task.
// `read_fd` is used for receive signal in brpc Thread.
// 'write_fd' is used for write signal in bsf Thread.
// when TaskMeta is done, bsf Thread will write to 'write_fd'.
// brpc Thread is keeping reading 'read_fd' in a while loop.
// brpc Thread will receive signal when TaskMeta is done.
// so `read_fd` and 'write_fd' is used for communicate in different Thread.
template
<
typename
TaskT
>
struct
TaskHandler
{
int
read_fd
;
...
...
@@ -538,9 +547,11 @@ struct TaskHandler {
}
};
// TaskExecutor is a Thread pool.
template
<
typename
TaskT
>
class
TaskExecutor
;
// ThreadContext is used for start a bsf Thread.
template
<
typename
TaskT
>
struct
ThreadContext
{
TaskExecutor
<
TaskT
>*
executor
;
...
...
@@ -561,6 +572,15 @@ struct ThreadContext {
}
};
// TaskExecutor is a Thread pool.
// Each Model corresponding to a Model.
// TaskT is actually a Request preprocessed by ReaderOp.
// TaskT will be divided as TaskMeta which will be
// put into _task_queue in brpc-Thread by schedule().
// TaskHander will be returned to brpc-Thread.
// start() function will create `thread_num` bsf Threads.
// every bsf Thread check the _task_queue and take TaskMeta from it.
// when a Task`s all TaskMeta is done, TaskHander will be noticed.
template
<
typename
TaskT
>
class
TaskExecutor
{
public:
...
...
@@ -595,12 +615,6 @@ class TaskExecutor {
TaskExecutor
();
}
}
/*
static TaskExecutor<TaskT>* instance() {
static TaskExecutor<TaskT> singleton;
return &singleton;
}
*/
void
set_batch_size
(
size_t
batch_size
)
{
_batch_size
=
batch_size
;
}
...
...
@@ -661,6 +675,9 @@ class TaskExecutor {
boost
::
function
<
void
(
const
void
*
,
void
*
)
>
_fn
;
};
// TaskExecutorVector is a SingleTon class.
// Each Model corresponding to a TaskExecutor.
// So we need several TaskExecutor when there are more than 1 Model.
template
<
typename
TaskT
>
class
TaskExecutorVector
{
public:
...
...
@@ -689,6 +706,11 @@ class TaskExecutorVector {
std
::
vector
<
TaskExecutor
<
TaskT
>>
_vector_executor
;
};
// TaskManager is actually a wrapper of Request in bsf.
// TaskManager`s schedule() change Request to be TaskT.
// and divided TaskT into several TaskMeta to put into the TaskExecutor`s
// task_queue.
// wait() is a while loop to receive signal when a whole Task is done.
template
<
typename
InItemT
,
typename
OutItemT
>
class
TaskManager
{
public:
...
...
core/predictor/framework/infer.h
100755 → 100644
浏览文件 @
f30447d6
...
...
@@ -89,7 +89,7 @@ class InferEngine {
void
*
out
,
uint32_t
batch_size
=
-
1
)
=
0
;
virtual
int
task_infer_impl
(
const
void
*
in
,
void
*
out
)
=
0
;
// NOLINT
protected:
uint32_t
_model_index
;
// end: framework inner call
...
...
@@ -260,17 +260,27 @@ class DBReloadableInferEngine : public ReloadableInferEngine {
}
int
thrd_clear_impl
()
{
// for bsf-Task-threads
// actually, there are 2 kinds of multi-thread.
// 1. brpc thread 2. bsf Task thread
// each request is in 1-single brpc thread.
// IF (bsf Task thread is not used)
// every single brpc thread thread corresponds to all the EngineCores.
// each request runs all models in 1-single thread brpc thread.
// IF (bsf Task thread is used)
// every single brpc thread corresponds to all the DBReloadableInferEngines.
// each request runs all models in 1-single brpc thread.
// every single brpc thread will create or clone N predictor.
// N = the number of Model.
// so if there are 2 models, and --thread 10.
// each brpc thread will create predictor of Model-1 and Model-2.
// there are totally 10 predictors of Model-1 and 10 predictors of Model-2
// cause there are 10 brpc threads.
// IF bsf Task thread is used。
// there will be a ThreadPool called bsf TaskExecutor.
// in TaskExecutor, 1 bsf thread corresponds to 1 EngineCore.
// TaskExecutorVector is the vector of TaskExecutor.
// the number of TaskExecutor equals to the number of Model.
// 1 TaskExecutor corresponding to 1 Model.
// 1 TaskExecutor have N bsf threads.
// 1 bsf thread corresponds to 1 predictor of
// the Model corresponding to the TaskExecutor.
// brpc thread only put the data into the task_queue(which is in
// TaskExecutor)
// EngineCore->infer() is running in bsf Task thread.
...
...
@@ -335,8 +345,8 @@ class CloneDBReloadableInferEngine
gpu_ids_num
);
}
// gpu_index will be set to be 0, when load() or proc_initial() is called.
// gpu_index < gpu_ids_num, means there are
still not create on some GPU
// card.
// gpu_index < gpu_ids_num, means there are
predictors still not create
//
on some GPU
card.
// so we need to create the predictor.
// gpu_index >= gpu_ids_num, means each GPU card has already create one.
// so we need to clone the predictor.
...
...
@@ -356,6 +366,10 @@ class CloneDBReloadableInferEngine
}
}
else
{
// when gpu_id = -1, means we use cpu, but the index should be 0.
// _cloneTemplate[-1] will occur error.
// actually, when gpu_id = -1, there is only 1 predictor in
// _cloneTemplate.
// so the index should always be 0 when gpu_id = -1.
if
(
gpu_id
==
-
1
)
gpu_id
=
0
;
if
(
!
md
->
cores
[
next_idx
]
||
md
->
cores
[
next_idx
]
->
clone
(
_cloneTemplate
[
gpu_id
]
->
get
())
!=
0
)
{
...
...
python/examples/detection/faster_rcnn_hrnetv2p_w18_1x/README.md
浏览文件 @
f30447d6
...
...
@@ -13,7 +13,8 @@ tar xf faster_rcnn_hrnetv2p_w18_1x.tar
python -m paddle_serving_server.serve --model serving_server --port 9494 --gpu_ids 0
```
This model support TensorRT, if you want a faster inference, please use
`--use_trt`
.
This model support TensorRT, if you want a faster inference, please use
`--use_trt`
. But you need to do some extra work.
Please reference to https://github.com/PaddlePaddle/Paddle-Inference-Demo/blob/master/c%2B%2B/paddle-trt/trt_dynamic_shape_test.cc#L40
### Prediction
...
...
python/examples/detection/faster_rcnn_hrnetv2p_w18_1x/README_CN.md
浏览文件 @
f30447d6
...
...
@@ -13,7 +13,8 @@ wget --no-check-certificate https://paddle-serving.bj.bcebos.com/pddet_demo/2.0/
tar xf faster_rcnn_hrnetv2p_w18_1x.tar
python -m paddle_serving_server.serve --model serving_server --port 9494 --gpu_ids 0
```
该模型支持TensorRT,如果想要更快的预测速度,可以开启
`--use_trt`
选项。
该模型支持TensorRT,如果想要更快的预测速度,可以开启
`--use_trt`
选项,但此时需要额外设置子图的TRT变长最大最小最优shape.
请参考https://github.com/PaddlePaddle/Paddle-Inference-Demo/blob/master/c%2B%2B/paddle-trt/trt_dynamic_shape_test.cc#L40
### 执行预测
```
...
...
python/examples/detection/faster_rcnn_r50_fpn_1x_coco/README.md
浏览文件 @
f30447d6
...
...
@@ -13,7 +13,8 @@ tar xf faster_rcnn_r50_fpn_1x_coco.tar
python -m paddle_serving_server.serve --model serving_server --port 9494 --gpu_ids 0
```
This model support TensorRT, if you want a faster inference, please use
`--use_trt`
.
This model support TensorRT, if you want a faster inference, please use
`--use_trt`
. But you need to do some extra work.
Please reference to https://github.com/PaddlePaddle/Paddle-Inference-Demo/blob/master/c%2B%2B/paddle-trt/trt_dynamic_shape_test.cc#L40
### Perform prediction
...
...
python/examples/detection/faster_rcnn_r50_fpn_1x_coco/README_CN.md
浏览文件 @
f30447d6
...
...
@@ -13,7 +13,8 @@ wget --no-check-certificate https://paddle-serving.bj.bcebos.com/pddet_demo/2.0/
tar xf faster_rcnn_r50_fpn_1x_coco.tar
python -m paddle_serving_server.serve --model serving_server --port 9494 --gpu_ids 0
```
该模型支持TensorRT,如果想要更快的预测速度,可以开启
`--use_trt`
选项。
该模型支持TensorRT,如果想要更快的预测速度,可以开启
`--use_trt`
选项,但此时需要额外设置子图的TRT变长最大最小最优shape.
请参考https://github.com/PaddlePaddle/Paddle-Inference-Demo/blob/master/c%2B%2B/paddle-trt/trt_dynamic_shape_test.cc#L40
### 执行预测
```
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录