未验证 提交 f93ceea8 编写于 作者: 文幕地方's avatar 文幕地方 提交者: GitHub

add PP-OCRv3-IE, PP-OCRv3-SA, PP-OCRv3-TTS (#5670)

* add face system

* add nlp

* add tts

* update model_zoo

* fix trt error

* update shape_info_filename

* rm   shape_info_filename  in config

* rm   shape_info_filename  in config

* rm   shape_info_filename  in config

* reset modelcenter commit

* rm mkldnn bf16
上级 aa762a43
ENV:
min_subgraph_size: 3
shape_info_filename: ./
trt_calib_mode: False
cpu_threads: 1
trt_use_static: False
......
ENV:
min_subgraph_size: 3
shape_info_filename: ./
trt_calib_mode: False
cpu_threads: 1
trt_use_static: False
......
ENV:
min_subgraph_size: 3
shape_info_filename: ./
trt_calib_mode: False
cpu_threads: 1
trt_use_static: False
......
ENV:
min_subgraph_size: 3
shape_info_filename: ./
trt_calib_mode: False
cpu_threads: 1
trt_use_static: False
......
ENV:
min_subgraph_size: 3
shape_info_filename: ./
trt_calib_mode: False
cpu_threads: 1
trt_use_static: False
......
ENV:
min_subgraph_size: 3
shape_info_filename: ./
trt_calib_mode: False
cpu_threads: 1
trt_use_static: False
......
......@@ -2,7 +2,6 @@ image_shape: &image_shape 320
ENV:
min_subgraph_size: 3
shape_info_filename: ./
trt_calib_mode: False
cpu_threads: 1
trt_use_static: False
......
......@@ -2,7 +2,6 @@ image_shape: &image_shape 608
ENV:
min_subgraph_size: 3
shape_info_filename: ./
trt_calib_mode: False
cpu_threads: 1
trt_use_static: False
......
......@@ -2,7 +2,6 @@ image_shape: &image_shape 640
ENV:
min_subgraph_size: 3
shape_info_filename: ./
trt_calib_mode: False
cpu_threads: 1
trt_use_static: False
......
......@@ -2,7 +2,6 @@ image_shape: &image_shape 640
ENV:
min_subgraph_size: 3
shape_info_filename: ./
trt_calib_mode: False
cpu_threads: 1
trt_use_static: False
......
......@@ -2,7 +2,6 @@ image_shape: &image_shape 640
ENV:
min_subgraph_size: 3
shape_info_filename: ./
trt_calib_mode: False
cpu_threads: 1
trt_use_static: False
......
ENV:
min_subgraph_size: 3
trt_calib_mode: False
cpu_threads: 1
trt_use_static: False
save_img: True
save_res: True
return_res: True
MODEL:
- DetectionOp:
name: det
param_path: paddlecv://models/face_detection/inference.pdiparams
model_path: paddlecv://models/face_detection/inference.pdmodel
batch_size: 1
PreProcess:
- NormalizeImage:
is_scale: false
mean: [123, 117, 104]
std: [127.502231, 127.502231, 127.502231]
- Permute:
PostProcess:
- ParserDetResults:
label_list:
- face
threshold: 0.5
Inputs:
- input.image
- BboxCropOp:
name: bbox_crop
Inputs:
- input.image
- det.dt_bboxes
- ClassificationOp:
name: cls
param_path: paddlecv://models/face_attribute/inference.pdiparams
model_path: paddlecv://models/face_attribute/inference.pdmodel
batch_size: 8
PreProcess:
- ResizeImage:
size: [ 224, 224 ]
- NormalizeImage:
scale: 0.00392157
mean: [ 0.485, 0.456, 0.406 ]
std: [ 0.229, 0.224, 0.225 ]
order: ''
channel_num: 3
- ToCHWImage:
- ExpandDim:
axis: 0
PostProcess:
- FaceAttribute:
Inputs:
- bbox_crop.crop_image
- DetOutput:
name: vis
Inputs:
- input.fn
- input.image
- det.dt_bboxes
- det.dt_scores
- det.dt_cls_names
- cls.class_ids
- cls.scores
- cls.label_names
image_shape: &image_shape 640
ENV:
min_subgraph_size: 3
trt_calib_mode: False
cpu_threads: 1
trt_use_static: False
save_img: True
save_res: False
return_res: True
MODEL:
- DetectionOp:
name: det
param_path: paddlecv://models/mot_ppyoloe_l_36e_pphuman/model.pdiparams
model_path: paddlecv://models/mot_ppyoloe_l_36e_pphuman/model.pdmodel
batch_size: 1
image_shape: [3, *image_shape, *image_shape]
PreProcess:
- Resize:
interp: 2
keep_ratio: false
target_size: [*image_shape, *image_shape]
- Permute:
PostProcess:
- ParserDetResults:
label_list:
- pedestrian
threshold: 0.1
Inputs:
- input.image
- TrackerOP:
name: tracker
type: OCSORTTracker
tracker_configs:
det_thresh: 0.4
max_age: 30
min_hits: 3
iou_threshold: 0.3
delta_t: 3
inertia: 0.2
vertical_ratio: 0
min_box_area: 0
use_byte: False
PostProcess:
- ParserTrackerResults:
label_list:
- pedestrian
Inputs:
- det.dt_bboxes
- det.dt_scores
- det.dt_class_ids
- BboxCropOp:
name: bbox_crop
Inputs:
- input.image
- tracker.tk_bboxes
- ClassificationOp:
name: cls
param_path: paddlecv://models/person_attribute/inference.pdiparams
model_path: paddlecv://models/person_attribute/inference.pdmodel
batch_size: 8
PreProcess:
- ResizeImage:
size: [256, 192]
- NormalizeImage:
scale: 0.00392157
mean: [ 0.485, 0.456, 0.406 ]
std: [ 0.229, 0.224, 0.225 ]
order: ''
channel_num: 3
- ToCHWImage:
- ExpandDim:
axis: 0
PostProcess:
- PersonAttribute:
Inputs:
- bbox_crop.crop_image
- TrackerOutput:
name: vis
Inputs:
- input.fn
- input.image
- tracker.tk_bboxes
- tracker.tk_scores
- tracker.tk_ids
- tracker.tk_cls_ids
- tracker.tk_cls_names
- cls.class_ids
- cls.scores
- cls.label_names
......@@ -2,7 +2,6 @@ image_shape: &image_shape 640
ENV:
min_subgraph_size: 3
shape_info_filename: ./
trt_calib_mode: False
cpu_threads: 1
trt_use_static: False
......
ENV:
min_subgraph_size: 3
shape_info_filename: ./
trt_calib_mode: False
cpu_threads: 1
trt_use_static: False
......
ENV:
min_subgraph_size: 3
trt_calib_mode: False
cpu_threads: 1
trt_use_static: False
save_img: True
save_res: True
return_res: True
MODEL:
- OcrDbDetOp:
name: det
param_path: paddlecv://models/ch_PP-OCRv3_det_infer/inference.pdiparams
model_path: paddlecv://models/ch_PP-OCRv3_det_infer/inference.pdmodel
batch_size: 1
PreProcess:
- RGB2BGR:
- DetResizeForTest:
limit_side_len: 960
limit_type: "max"
- NormalizeImage:
std: [0.229, 0.224, 0.225]
mean: [0.485, 0.456, 0.406]
scale: '1./255.'
order: 'hwc'
- ToCHWImage:
- ExpandDim:
axis: 0
- KeepKeys:
keep_keys: ['image', 'shape']
PostProcess:
- DBPostProcess:
thresh: 0.3
box_thresh: 0.6
max_candidates: 1000
unclip_ratio: 1.5
use_dilation: False
score_mode: "fast"
box_type: "quad"
Inputs:
- input.image
- PolyCropOp:
name: crop
Inputs:
- input.image
- det.dt_polys
- OcrCrnnRecOp:
name: rec
param_path: paddlecv://models/ch_PP-OCRv3_rec_infer/inference.pdiparams
model_path: paddlecv://models/ch_PP-OCRv3_rec_infer/inference.pdmodel
batch_size: 6
PreProcess:
- RGB2BGR:
- ReisizeNormImg:
rec_image_shape: [3, 48, 320]
PostProcess:
- CTCLabelDecode:
character_dict_path: "paddlecv://dict/ocr/ch_dict.txt"
use_space_char: true
Inputs:
- crop.crop_image
- InformationExtractionOp:
name: nlp
schema: ['时间', '始发站', '终点站']
batch_size: 1
PostProcess:
- InformationExtractionDecode:
Inputs:
- rec.rec_text
- OCROutput:
name: vis
font_path: paddlecv://fonts/simfang.ttf
Inputs:
- input.fn
- input.image
- det.dt_polys
- rec.rec_text
- rec.rec_score
- nlp.text
- nlp.type
ENV:
min_subgraph_size: 3
trt_calib_mode: False
cpu_threads: 1
trt_use_static: False
save_img: True
save_res: True
return_res: True
MODEL:
- OcrDbDetOp:
name: det
param_path: paddlecv://models/ch_PP-OCRv3_det_infer/inference.pdiparams
model_path: paddlecv://models/ch_PP-OCRv3_det_infer/inference.pdmodel
batch_size: 1
PreProcess:
- RGB2BGR:
- DetResizeForTest:
limit_side_len: 960
limit_type: "max"
- NormalizeImage:
std: [0.229, 0.224, 0.225]
mean: [0.485, 0.456, 0.406]
scale: '1./255.'
order: 'hwc'
- ToCHWImage:
- ExpandDim:
axis: 0
- KeepKeys:
keep_keys: ['image', 'shape']
PostProcess:
- DBPostProcess:
thresh: 0.3
box_thresh: 0.6
max_candidates: 1000
unclip_ratio: 1.5
use_dilation: False
score_mode: "fast"
box_type: "quad"
Inputs:
- input.image
- PolyCropOp:
name: crop
Inputs:
- input.image
- det.dt_polys
- OcrCrnnRecOp:
name: rec
param_path: paddlecv://models/ch_PP-OCRv3_rec_infer/inference.pdiparams
model_path: paddlecv://models/ch_PP-OCRv3_rec_infer/inference.pdmodel
batch_size: 6
PreProcess:
- RGB2BGR:
- ReisizeNormImg:
rec_image_shape: [3, 48, 320]
PostProcess:
- CTCLabelDecode:
character_dict_path: "paddlecv://dict/ocr/ch_dict.txt"
use_space_char: true
Inputs:
- crop.crop_image
- SentimentAnalysisOp:
name: nlp
batch_size: 1
PostProcess:
- SentimentAnalysisDecode:
Inputs:
- rec.rec_text
- OCROutput:
name: vis
font_path: paddlecv://fonts/simfang.ttf
Inputs:
- input.fn
- input.image
- det.dt_polys
- rec.rec_text
- rec.rec_score
- nlp.label
ENV:
min_subgraph_size: 3
trt_calib_mode: False
cpu_threads: 1
trt_use_static: False
save_img: True
save_res: True
return_res: True
MODEL:
- OcrDbDetOp:
name: det
param_path: paddlecv://models/ch_PP-OCRv3_det_infer/inference.pdiparams
model_path: paddlecv://models/ch_PP-OCRv3_det_infer/inference.pdmodel
batch_size: 1
PreProcess:
- RGB2BGR:
- DetResizeForTest:
limit_side_len: 960
limit_type: "max"
- NormalizeImage:
std: [0.229, 0.224, 0.225]
mean: [0.485, 0.456, 0.406]
scale: '1./255.'
order: 'hwc'
- ToCHWImage:
- ExpandDim:
axis: 0
- KeepKeys:
keep_keys: ['image', 'shape']
PostProcess:
- DBPostProcess:
thresh: 0.3
box_thresh: 0.6
max_candidates: 1000
unclip_ratio: 1.5
use_dilation: False
score_mode: "fast"
box_type: "quad"
Inputs:
- input.image
- PolyCropOp:
name: crop
Inputs:
- input.image
- det.dt_polys
- OcrCrnnRecOp:
name: rec
param_path: paddlecv://models/ch_PP-OCRv3_rec_infer/inference.pdiparams
model_path: paddlecv://models/ch_PP-OCRv3_rec_infer/inference.pdmodel
batch_size: 6
PreProcess:
- RGB2BGR:
- ReisizeNormImg:
rec_image_shape: [3, 48, 320]
PostProcess:
- CTCLabelDecode:
character_dict_path: "paddlecv://dict/ocr/ch_dict.txt"
use_space_char: true
Inputs:
- crop.crop_image
- TTSOp:
name: tts
Inputs:
- input.fn
- rec.rec_text
- OCROutput:
name: vis
font_path: paddlecv://fonts/simfang.ttf
Inputs:
- input.fn
- input.image
- det.dt_polys
- rec.rec_text
- rec.rec_score
- tts.fn
ENV:
min_subgraph_size: 3
shape_info_filename: ./
trt_calib_mode: False
cpu_threads: 1
trt_use_static: False
......
ENV:
min_subgraph_size: 3
shape_info_filename: ./
trt_calib_mode: False
cpu_threads: 1
trt_use_static: False
......
ENV:
min_subgraph_size: 3
shape_info_filename: ./
trt_calib_mode: False
cpu_threads: 1
trt_use_static: False
......
ENV:
min_subgraph_size: 3
shape_info_filename: ./
trt_calib_mode: False
cpu_threads: 1
trt_use_static: False
......
ENV:
min_subgraph_size: 3
shape_info_filename: ./
trt_calib_mode: False
cpu_threads: 1
trt_use_static: False
......
ENV:
min_subgraph_size: 3
shape_info_filename: ./
trt_calib_mode: False
cpu_threads: 1
trt_use_static: False
......
ENV:
min_subgraph_size: 3
shape_info_filename: ./
trt_calib_mode: False
cpu_threads: 1
trt_use_static: False
......
ENV:
min_subgraph_size: 3
shape_info_filename: ./
trt_calib_mode: False
cpu_threads: 1
trt_use_static: False
......
......@@ -5,7 +5,6 @@ kpt_image_shape: &kpt_image_shape [*kpt_image_width, *kpt_image_height]
ENV:
min_subgraph_size: 3
shape_info_filename: ./
trt_calib_mode: False
cpu_threads: 1
trt_use_static: False
......
image_shape: &image_shape 640
ENV:
min_subgraph_size: 3
trt_calib_mode: False
cpu_threads: 1
trt_use_static: False
save_img: True
save_res: True
return_res: True
MODEL:
- DetectionOp:
name: det
param_path: paddlecv://models/mot_ppyoloe_l_36e_ppvehicle/model.pdiparams
model_path: paddlecv://models/mot_ppyoloe_l_36e_ppvehicle/model.pdmodel
batch_size: 1
image_shape: [3, *image_shape, *image_shape]
PreProcess:
- Resize:
interp: 2
keep_ratio: false
target_size: [*image_shape, *image_shape]
- Permute:
PostProcess:
- ParserDetResults:
label_list:
- vehicle
threshold: 0.1
Inputs:
- input.image
- TrackerOP:
name: tracker
type: OCSORTTracker
tracker_configs:
det_thresh: 0.4
max_age: 30
min_hits: 3
iou_threshold: 0.3
delta_t: 3
inertia: 0.2
vertical_ratio: 0
min_box_area: 0
use_byte: False
PostProcess:
- ParserTrackerResults:
label_list:
- vehicle
Inputs:
- det.dt_bboxes
- det.dt_scores
- det.dt_class_ids
- BboxCropOp:
name: bbox_crop
Inputs:
- input.image
- tracker.tk_bboxes
- ClassificationOp:
name: cls
param_path: paddlecv://models/vehicle_attribute/inference.pdiparams
model_path: paddlecv://models/vehicle_attribute/inference.pdmodel
batch_size: 8
PreProcess:
- ResizeImage:
size: [256, 192]
- NormalizeImage:
scale: 0.00392157
mean: [ 0.485, 0.456, 0.406 ]
std: [ 0.229, 0.224, 0.225 ]
order: ''
channel_num: 3
- ToCHWImage:
- ExpandDim:
axis: 0
PostProcess:
- VehicleAttribute:
Inputs:
- bbox_crop.crop_image
- TrackerOutput:
name: vis
Inputs:
- input.fn
- input.image
- tracker.tk_bboxes
- tracker.tk_scores
- tracker.tk_ids
- tracker.tk_cls_ids
- tracker.tk_cls_names
- cls.class_ids
- cls.scores
- cls.label_names
......@@ -2,7 +2,6 @@ image_shape: &image_shape 640
ENV:
min_subgraph_size: 3
shape_info_filename: ./
trt_calib_mode: False
cpu_threads: 1
trt_use_static: False
......
......@@ -2,7 +2,6 @@ ENV:
run_mode: paddle
device: GPU
min_subgraph_size: 3
shape_info_filename: ./
trt_calib_mode: False
cpu_threads: 1
trt_use_static: False
......
......@@ -2,7 +2,6 @@ ENV:
run_mode: paddle
device: GPU
min_subgraph_size: 3
shape_info_filename: ./
trt_calib_mode: False
cpu_threads: 1
trt_use_static: False
......
......@@ -2,7 +2,6 @@ ENV:
run_mode: paddle
device: GPU
min_subgraph_size: 3
shape_info_filename: ./
trt_calib_mode: False
cpu_threads: 1
trt_use_static: False
......
......@@ -2,7 +2,6 @@ ENV:
run_mode: paddle
device: GPU
min_subgraph_size: 3
shape_info_filename: ./
trt_calib_mode: False
cpu_threads: 1
trt_use_static: False
......
......@@ -2,7 +2,6 @@ ENV:
run_mode: paddle
device: GPU
min_subgraph_size: 3
shape_info_filename: ./
trt_calib_mode: False
cpu_threads: 1
trt_use_static: False
......
......@@ -2,7 +2,6 @@ ENV:
run_mode: paddle
device: GPU
min_subgraph_size: 3
shape_info_filename: ./
trt_calib_mode: False
cpu_threads: 1
trt_use_static: False
......
......@@ -2,7 +2,6 @@ ENV:
run_mode: paddle
device: GPU
min_subgraph_size: 3
shape_info_filename: ./
trt_calib_mode: False
cpu_threads: 1
trt_use_static: False
......
......@@ -2,7 +2,6 @@ ENV:
run_mode: paddle
device: GPU
min_subgraph_size: 3
shape_info_filename: ./
trt_calib_mode: False
cpu_threads: 1
trt_use_static: False
......
......@@ -6,7 +6,6 @@ ENV:
run_mode: paddle
device: GPU
min_subgraph_size: 3
shape_info_filename: ./
trt_calib_mode: False
cpu_threads: 1
trt_use_static: False
......
......@@ -2,7 +2,6 @@ ENV:
run_mode: paddle
device: GPU
min_subgraph_size: 3
shape_info_filename: ./
trt_calib_mode: False
cpu_threads: 1
trt_use_static: False
......
......@@ -2,7 +2,6 @@ ENV:
run_mode: paddle
device: GPU
min_subgraph_size: 3
shape_info_filename: ./
trt_calib_mode: False
cpu_threads: 1
trt_use_static: False
......
......@@ -2,7 +2,6 @@ ENV:
run_mode: paddle
device: GPU
min_subgraph_size: 3
shape_info_filename: ./
trt_calib_mode: False
cpu_threads: 1
trt_use_static: False
......
......@@ -2,7 +2,6 @@ ENV:
run_mode: paddle
device: GPU
min_subgraph_size: 3
shape_info_filename: ./
trt_calib_mode: False
cpu_threads: 1
trt_use_static: False
......
......@@ -2,7 +2,6 @@ ENV:
run_mode: paddle
device: GPU
min_subgraph_size: 3
shape_info_filename: ./
trt_calib_mode: False
cpu_threads: 1
trt_use_static: False
......
......@@ -2,7 +2,6 @@ ENV:
run_mode: paddle
device: GPU
min_subgraph_size: 3
shape_info_filename: ./
trt_calib_mode: False
cpu_threads: 1
trt_use_static: False
......
......@@ -2,7 +2,6 @@ ENV:
run_mode: paddle
device: GPU
min_subgraph_size: 3
shape_info_filename: ./
trt_calib_mode: False
cpu_threads: 1
trt_use_static: False
......
......@@ -2,7 +2,6 @@ ENV:
run_mode: paddle
device: GPU
min_subgraph_size: 3
shape_info_filename: ./
trt_calib_mode: False
cpu_threads: 1
trt_use_static: False
......
......@@ -2,7 +2,6 @@ ENV:
run_mode: paddle
device: GPU
min_subgraph_size: 3
shape_info_filename: ./
trt_calib_mode: False
cpu_threads: 1
trt_use_static: False
......
......@@ -2,7 +2,6 @@ ENV:
run_mode: paddle
device: GPU
min_subgraph_size: 3
shape_info_filename: ./
trt_calib_mode: False
cpu_threads: 1
trt_use_static: False
......
......@@ -2,7 +2,6 @@ ENV:
run_mode: paddle
device: GPU
min_subgraph_size: 3
shape_info_filename: ./
trt_calib_mode: False
cpu_threads: 1
trt_use_static: False
......
......@@ -2,7 +2,6 @@ ENV:
run_mode: paddle
device: GPU
min_subgraph_size: 3
shape_info_filename: ./
trt_calib_mode: False
cpu_threads: 1
trt_use_static: False
......
......@@ -2,7 +2,6 @@ ENV:
run_mode: paddle
device: GPU
min_subgraph_size: 3
shape_info_filename: ./
trt_calib_mode: False
cpu_threads: 1
trt_use_static: False
......
......@@ -7,7 +7,6 @@
```
ENV:
min_subgraph_size: 3 # TensorRT最小子图大小
shape_info_filename: ./ # TensorRT shape收集文件路径
trt_calib_mode: False # 如果设置TensorRT离线量化校准,需要设置为True
cpu_threads: 1 # CPU部署时线程数
trt_use_static: False # TensorRT部署是否加载预生成的engine文件
......
......@@ -44,7 +44,7 @@ class PaddleCV(object):
def __init__(self,
task_name=None,
config_path=None,
output_dir=None,
output_dir='output',
run_mode='paddle',
device='CPU'):
......
single_op/PP-YOLOv2
single_op/PP-PicoDet
single_op/PP-LiteSeg
single_op/PP-YOLOE+
single_op/PP-MattingV1
single_op/PP-YOLO
single_op/PP-LCNetV2
single_op/PP-HGNet
single_op/PP-LCNet
single_op/PP-HumanSegV2
single_op/PP-YOLOE
system/PP-Structure-layout-table
system/PP-Structure-re
system/PP-Structure
system/PP-OCRv2
system/PP-Vehicle-Attr
system/PP-Vehicle
system/PP-OCRv3-TTS
system/PP-ShiTuV2
system/PP-Structure-table
system/PP-Human
system/PP-TinyPose
system/PP-ShiTu
system/PP-Structure-layout-table
system/PP-OCRv3-SA
system/PP-OCRv2
system/PP-OCRv3
system/Face-Detection-Attr
system/PP-ShiTu
system/PP-Structure
system/PP-Structure-ser
system/PP-TinyPose
system/PP-Structure-re
system/PP-Human
system/PP-OCRv3-IE
system/PP-Human-Attr
single_op/PP-HumanSegV2
single_op/PP-LCNet
single_op/PP-YOLOv2
single_op/PP-LCNetV2
single_op/PP-YOLOE
single_op/PP-YOLO
single_op/PP-YOLOE+
single_op/PP-PicoDet
single_op/PP-HGNet
single_op/PP-MattingV1
single_op/PP-LiteSeg
......@@ -45,12 +45,18 @@ TASK_DICT = {
# system
'PP-OCRv2': 'paddlecv://configs/system/PP-OCRv2.yml',
'PP-OCRv3': 'paddlecv://configs/system/PP-OCRv3.yml',
'PP-OCRv3-IE': 'paddlecv://configs/system/PP-OCRv3-IE.yml',
'PP-OCRv3-SA': 'paddlecv://configs/system/PP-OCRv3-SA.yml',
'PP-OCRv3-TTS': 'paddlecv://configs/system/PP-OCRv3-TTS.yml',
'PP-Structure': 'paddlecv://configs/system/PP-Structure.yml',
'PP-ShiTuV2': 'paddlecv://configs/system/PP-ShiTuV2.yml',
'PP-ShiTu': 'paddlecv://configs/system/PP-ShiTu.yml',
'PP-Human': 'paddlecv://configs/system/PP-Human.yml',
'PP-Human-Attr': 'paddlecv://configs/system/PP-Human-Attr.yml',
'PP-Vehicle': 'paddlecv://configs/system/PP-Vehicle.yml',
'PP-Vehicle-Attr': 'paddlecv://configs/system/PP-Vehicle-Attr.yml',
'PP-TinyPose': 'paddlecv://configs/system/PP-TinyPose.yml',
'Face-Detection-Attr': 'paddlecv://configs/system/Face-Detection-Attr.yml',
}
......
......@@ -17,6 +17,8 @@ from . import detection
from . import keypoint
from . import ocr
from . import segmentation
from . import nlp
from . import speech
from .classification import *
from .feature_extraction import *
......@@ -24,7 +26,11 @@ from .detection import *
from .keypoint import *
from .segmentation import *
from .ocr import *
from .nlp import *
from .speech import *
__all__ = classification.__all__ + detection.__all__ + keypoint.__all__
__all__ += segmentation.__all__
__all__ += ocr.__all__
__all__ += nlp.__all__
__all__ += speech.__all__
......@@ -41,7 +41,7 @@ class ModelBaseOp(BaseOp):
self.name = model_cfg["name"]
self.frame = -1
self.predictor = PaddlePredictor(param_path, model_path, env_cfg,
delete_pass)
delete_pass, self.name)
self.input_names = self.predictor.get_input_names()
keys = self.get_output_keys()
......
......@@ -100,10 +100,16 @@ class ClassificationOp(ModelBaseOp):
sub_start_idx = curr_offsef_id
sub_end_idx = curr_offsef_id + sub_index_list[idx]
output = outputs[sub_start_idx:sub_end_idx]
if len(output) > 0:
output = {k: [o[k] for o in output] for k in output[0]}
if is_list is not True:
output = {k: output[k][0] for k in output}
else:
output = {
self.output_keys[0]: [],
self.output_keys[1]: [],
self.output_keys[2]: []
}
pipe_outputs.append(output)
curr_offsef_id = sub_end_idx
return pipe_outputs
......@@ -67,3 +67,224 @@ class Topk(object):
result[output_keys[2]] = label_name_list
y.append(result)
return y
class VehicleAttribute(object):
def __init__(self, color_threshold=0.5, type_threshold=0.5):
self.color_threshold = color_threshold
self.type_threshold = type_threshold
self.color_list = [
"yellow", "orange", "green", "gray", "red", "blue", "white",
"golden", "brown", "black"
]
self.type_list = [
"sedan", "suv", "van", "hatchback", "mpv", "pickup", "bus",
"truck", "estate"
]
def __call__(self, x, output_keys):
# postprocess output of predictor
batch_res = []
for idx, res in enumerate(x):
res = res.tolist()
label_res = []
color_idx = np.argmax(res[:10])
type_idx = np.argmax(res[10:])
print(color_idx, type_idx)
if res[color_idx] >= self.color_threshold:
color_info = f"Color: ({self.color_list[color_idx]}, prob: {res[color_idx]})"
else:
color_info = "Color unknown"
if res[type_idx + 10] >= self.type_threshold:
type_info = f"Type: ({self.type_list[type_idx]}, prob: {res[type_idx + 10]})"
else:
type_info = "Type unknown"
label_res = f"{color_info}, {type_info}"
threshold_list = [self.color_threshold
] * 10 + [self.type_threshold] * 9
pred_res = (np.array(res) > np.array(threshold_list)
).astype(np.int8).tolist()
scores = np.array(res)[(
np.array(res) > np.array(threshold_list))].tolist()
batch_res.append({
output_keys[0]: pred_res,
output_keys[1]: scores,
output_keys[2]: label_res
})
return batch_res
class PersonAttribute(object):
def __init__(self,
threshold=0.5,
glasses_threshold=0.3,
hold_threshold=0.6):
self.threshold = threshold
self.glasses_threshold = glasses_threshold
self.hold_threshold = hold_threshold
def __call__(self, x, output_keys):
# postprocess output of predictor
age_list = ['AgeLess18', 'Age18-60', 'AgeOver60']
direct_list = ['Front', 'Side', 'Back']
bag_list = ['HandBag', 'ShoulderBag', 'Backpack']
upper_list = ['UpperStride', 'UpperLogo', 'UpperPlaid', 'UpperSplice']
lower_list = [
'LowerStripe', 'LowerPattern', 'LongCoat', 'Trousers', 'Shorts',
'Skirt&Dress'
]
batch_res = []
for idx, res in enumerate(x):
res = res.tolist()
label_res = []
# gender
gender = 'Female' if res[22] > self.threshold else 'Male'
label_res.append(gender)
# age
age = age_list[np.argmax(res[19:22])]
label_res.append(age)
# direction
direction = direct_list[np.argmax(res[23:])]
label_res.append(direction)
# glasses
glasses = 'Glasses: '
if res[1] > self.glasses_threshold:
glasses += 'True'
else:
glasses += 'False'
label_res.append(glasses)
# hat
hat = 'Hat: '
if res[0] > self.threshold:
hat += 'True'
else:
hat += 'False'
label_res.append(hat)
# hold obj
hold_obj = 'HoldObjectsInFront: '
if res[18] > self.hold_threshold:
hold_obj += 'True'
else:
hold_obj += 'False'
label_res.append(hold_obj)
# bag
bag = bag_list[np.argmax(res[15:18])]
bag_score = res[15 + np.argmax(res[15:18])]
bag_label = bag if bag_score > self.threshold else 'No bag'
label_res.append(bag_label)
# upper
upper_res = res[4:8]
upper_label = 'Upper:'
sleeve = 'LongSleeve' if res[3] > res[2] else 'ShortSleeve'
upper_label += ' {}'.format(sleeve)
for i, r in enumerate(upper_res):
if r > self.threshold:
upper_label += ' {}'.format(upper_list[i])
label_res.append(upper_label)
# lower
lower_res = res[8:14]
lower_label = 'Lower: '
has_lower = False
for i, l in enumerate(lower_res):
if l > self.threshold:
lower_label += ' {}'.format(lower_list[i])
has_lower = True
if not has_lower:
lower_label += ' {}'.format(lower_list[np.argmax(lower_res)])
label_res.append(lower_label)
# shoe
shoe = 'Boots' if res[14] > self.threshold else 'No boots'
label_res.append(shoe)
threshold_list = [0.5] * len(res)
threshold_list[1] = self.glasses_threshold
threshold_list[18] = self.hold_threshold
pred_res = (np.array(res) > np.array(threshold_list)
).astype(np.int8).tolist()
scores = np.array(res)[(
np.array(res) > np.array(threshold_list))].tolist()
batch_res.append({
output_keys[0]: pred_res,
output_keys[1]: scores,
output_keys[2]: label_res,
})
return batch_res
class VehicleAttribute(object):
def __init__(self, color_threshold=0.5, type_threshold=0.5):
self.color_threshold = color_threshold
self.type_threshold = type_threshold
self.color_list = [
"yellow", "orange", "green", "gray", "red", "blue", "white",
"golden", "brown", "black"
]
self.type_list = [
"sedan", "suv", "van", "hatchback", "mpv", "pickup", "bus",
"truck", "estate"
]
def __call__(self, x, output_keys):
# postprocess output of predictor
batch_res = []
for idx, res in enumerate(x):
res = res.tolist()
label_res = []
color_idx = np.argmax(res[:10])
type_idx = np.argmax(res[10:])
print(color_idx, type_idx)
if res[color_idx] >= self.color_threshold:
color_info = f"Color: ({self.color_list[color_idx]}, prob: {res[color_idx]})"
else:
color_info = "Color unknown"
if res[type_idx + 10] >= self.type_threshold:
type_info = f"Type: ({self.type_list[type_idx]}, prob: {res[type_idx + 10]})"
else:
type_info = "Type unknown"
label_res = f"{color_info}, {type_info}"
threshold_list = [self.color_threshold
] * 10 + [self.type_threshold] * 9
pred_res = (np.array(res) > np.array(threshold_list)
).astype(np.int8).tolist()
scores = np.array(res)[(
np.array(res) > np.array(threshold_list))].tolist()
batch_res.append({
output_keys[0]: pred_res,
output_keys[1]: scores,
output_keys[2]: label_res
})
return batch_res
class FaceAttribute(object):
def __init__(self, threshold=0.5):
self.threshold = threshold
self.label_list = np.array([
'短胡子', '弯眉毛', '有吸引力', '眼袋', '秃顶', '刘海', '厚嘴唇', '大鼻子', '黑色头发',
'金色头发', '模糊', '棕色头发', '浓眉毛', '胖的', '双下巴', '眼镜', '山羊胡子', '灰白头发',
'浓妆', '高颧骨', '男性', '嘴巴微张', '胡子,髭', '小眼睛', '没有胡子', '鸭蛋脸', '皮肤苍白',
'尖鼻子', '发际线后移', '连鬓胡子', '红润双颊', '微笑', '直发', '卷发', '戴耳环', '戴帽子',
'涂唇膏', '戴项链', '戴领带', '年轻'
])
def __call__(self, x, output_keys):
# postprocess output of predictor
batch_res = []
for idx, res in enumerate(x):
pred_idx = res > self.threshold
pred_score = res[pred_idx]
pred_res = self.label_list[pred_idx]
batch_res.append({
output_keys[0]: pred_idx.astype(np.int8).tolist(),
output_keys[1]: pred_score.tolist(),
output_keys[2]: pred_res.tolist()
})
return batch_res
......@@ -44,7 +44,9 @@ class DetectionOp(ModelBaseOp):
[1., 1.], dtype=np.float32),
'im_shape': np.array(
image.shape[:2], dtype=np.float32),
'input_shape': self.model_cfg["image_shape"],
'input_shape': self.model_cfg["image_shape"]
if 'image_shape' in self.model_cfg else np.array(
image.shape[:2], dtype=np.float32),
}
for ops in self.preprocessor:
image, im_info = ops(image, im_info)
......
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from .inference import InformationExtractionOp, SentimentAnalysisOp
__all__ = ['InformationExtractionOp', 'SentimentAnalysisOp']
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import importlib
from functools import reduce
import os
import numpy as np
import math
import paddle
from paddlenlp import Taskflow
from ..base import ModelBaseOp
from ppcv.ops.base import create_operators, BaseOp
from ppcv.core.workspace import register
from .postprocess import *
@register
class SentimentAnalysisOp(BaseOp):
def __init__(self, model_cfg, env_cfg):
super(SentimentAnalysisOp, self).__init__(model_cfg, env_cfg)
mod = importlib.import_module(__name__)
env_cfg["batch_size"] = model_cfg.get("batch_size", 1)
self.batch_size = env_cfg["batch_size"]
self.name = model_cfg["name"]
self.frame = -1
keys = self.get_output_keys()
self.output_keys = [self.name + '.' + key for key in keys]
self.postprocessor = create_operators(model_cfg["PostProcess"], mod)
self._init_task(model_cfg)
def _init_task(self, model_cfg):
task = model_cfg.get('task', 'sentiment_analysis')
self.nlp = Taskflow(task)
@classmethod
def get_output_keys(cls):
return ["label"]
def postprocess(self, inputs, result):
outputs = result
for idx, ops in enumerate(self.postprocessor):
if idx == len(self.postprocessor) - 1:
outputs = ops(outputs, self.output_keys)
else:
outputs = ops(outputs)
return outputs
@classmethod
def type(self):
return 'MODEL'
def infer(self, image_list):
inputs = []
batch_loop_cnt = math.ceil(float(len(image_list)) / self.batch_size)
results = []
for i in range(batch_loop_cnt):
start_index = i * self.batch_size
end_index = min((i + 1) * self.batch_size, len(image_list))
batch_image_list = image_list[start_index:end_index]
# preprocess
# model inference
result = self.nlp(batch_image_list)
# postprocess
result = self.postprocess(inputs, result)
results.extend(result)
# results = self.merge_batch_result(results)
return results
def __call__(self, inputs):
"""
step1: parser inputs
step2: run
step3: merge results
input: a list of dict
"""
key = self.input_keys[0]
is_list = False
if isinstance(inputs[0][key], (list, tuple)):
inputs = [input[key] for input in inputs]
is_list = True
else:
inputs = [[input[key]] for input in inputs]
sub_index_list = [len(input) for input in inputs]
inputs = reduce(lambda x, y: x.extend(y) or x, inputs)
# step2: run
outputs = self.infer(inputs)
# step3: merge
curr_offsef_id = 0
pipe_outputs = []
for idx in range(len(sub_index_list)):
sub_start_idx = curr_offsef_id
sub_end_idx = curr_offsef_id + sub_index_list[idx]
output = outputs[sub_start_idx:sub_end_idx]
output = {k: [o[k] for o in output] for k in output[0]}
if is_list is not True:
output = {k: output[k][0] for k in output}
pipe_outputs.append(output)
curr_offsef_id = sub_end_idx
return pipe_outputs
@register
class InformationExtractionOp(SentimentAnalysisOp):
def __init__(self, model_cfg, env_cfg):
super(InformationExtractionOp, self).__init__(model_cfg, env_cfg)
self._init_task(model_cfg)
def _init_task(self, model_cfg):
task = model_cfg.get('task', 'information_extraction')
schema = model_cfg.get('schema', ['时间', '地点', '人物'])
self.nlp = Taskflow(task, schema=schema)
@classmethod
def get_output_keys(cls):
return ["text", "type"]
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
class InformationExtractionDecode(object):
def __init__(self):
pass
def __call__(self, preds, output_keys):
results = []
for batch_idx, pred in enumerate(preds):
type_list = []
txt_list = []
for k, v_list in pred.items():
type_list.append(k)
txt_list.append([v['text'] for v in v_list])
results.append({
output_keys[0]: txt_list,
output_keys[1]: type_list
})
return results
class SentimentAnalysisDecode(object):
def __init__(self):
pass
def __call__(self, preds, output_keys):
results = []
for batch_idx, pred in enumerate(preds):
results.append({output_keys[0]: pred['label']})
return results
......@@ -238,8 +238,32 @@ class DBPostProcess(object):
"box_type can only be one of ['quad', 'poly']")
boxes_batch.append({
output_keys[0]: boxes,
output_keys[0]: sorted_boxes(boxes),
output_keys[1]: scores,
})
return boxes_batch[0]
def sorted_boxes(dt_boxes):
"""
Sort text boxes in order from top to bottom, left to right
args:
dt_boxes(array):detected text boxes with shape [4, 2]
return:
sorted boxes(array) with shape [4, 2]
"""
num_boxes = dt_boxes.shape[0]
sorted_boxes = sorted(dt_boxes, key=lambda x: (x[0][1], x[0][0]))
_boxes = list(sorted_boxes)
for i in range(num_boxes - 1):
for j in range(i, 0, -1):
if abs(_boxes[j + 1][0][1] - _boxes[j][0][1]) < 10 and \
(_boxes[j + 1][0][0] < _boxes[j][0][0]):
tmp = _boxes[j]
_boxes[j] = _boxes[j + 1]
_boxes[j + 1] = tmp
else:
break
return np.array(_boxes)
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from .inference import TTSOp
__all__ = ['TTSOp']
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import importlib
import pathlib
import os
from paddlespeech.cli.tts import TTSExecutor
from ppcv.ops.base import create_operators, BaseOp
from ppcv.core.workspace import register
@register
class TTSOp(BaseOp):
def __init__(self, model_cfg, env_cfg):
super(TTSOp, self).__init__(model_cfg, env_cfg)
mod = importlib.import_module(__name__)
env_cfg["batch_size"] = model_cfg.get("batch_size", 1)
self.batch_size = env_cfg["batch_size"]
self.name = model_cfg["name"]
self.frame = -1
keys = self.get_output_keys()
self.output_keys = [self.name + '.' + key for key in keys]
self.tts = TTSExecutor()
self.output_dir = self.env_cfg.get('output_dir', 'output')
@classmethod
def get_output_keys(cls):
return ["fn"]
@classmethod
def type(self):
return 'MODEL'
def infer(self, inputs):
results = []
for data in inputs:
img_path = data[self.input_keys[0]]
txts = data[self.input_keys[1]]
save_path = os.path.join(self.output_dir,
pathlib.Path(img_path).stem + '.wav')
# model inference
self.tts(text=''.join(txts),
output=save_path,
am='fastspeech2_mix',
voc='hifigan_csmsc',
lang='mix',
spk_id=174)
results.append({self.output_keys[0]: save_path})
return results
def __call__(self, inputs):
"""
step1: parser inputs
step2: run
step3: merge results
input: a list of dict
"""
# step2: run
outputs = self.infer(inputs)
return outputs
......@@ -96,7 +96,8 @@ class DetOutput(OutputBaseOp):
def __call__(self, inputs):
total_res = []
for res in inputs:
fn, image, dt_bboxes, dt_scores, dt_cls_names = res.values()
fn, image, dt_bboxes, dt_scores, dt_cls_names = list(res.values(
))[:5]
image = draw_det(image, dt_bboxes, dt_scores, dt_cls_names)
res.pop('input.image')
if self.frame_id != -1:
......
......@@ -83,27 +83,27 @@ class OCROutput(OutputBaseOp):
def __init__(self, model_cfg, env_cfg):
super(OCROutput, self).__init__(model_cfg, env_cfg)
font_path = model_cfg.get('font_path', None)
if font_path is not None:
self.font_path = get_font_path(font_path)
else:
self.font_path = None
def __call__(self, inputs):
total_res = []
for input in inputs:
fn, image, dt_polys = list(input.values())[:3]
rec_text = input.get('rec.rec_text', None)
rec_score = input.get('rec.rec_score', None)
res = dict(
filename=fn,
dt_polys=dt_polys.tolist(),
rec_text=rec_text,
rec_score=rec_score)
input.pop('input.image')
input['det.dt_polys'] = input['det.dt_polys'].tolist()
res = input
if self.frame_id != -1:
res.update({'frame_id': frame_id})
logger.info(res)
if self.save_img:
image = image[:, :, ::-1]
if rec_text is not None:
if 'rec.rec_text' in input:
image = self.draw_ocr_box_txt(
Image.fromarray(image), dt_polys, rec_text, rec_score)
Image.fromarray(image), input['det.dt_polys'],
input['rec.rec_text'], input['rec.rec_score'])
else:
image = draw_boxes(image, dt_polys.reshape([-1, 8]))
file_name = os.path.split(fn)[-1]
......@@ -132,7 +132,7 @@ class OCROutput(OutputBaseOp):
img_left = image.copy()
img_right = np.ones((h, w, 3), dtype=np.uint8) * 255
random.seed(0)
boxes = np.array(boxes)
draw_left = ImageDraw.Draw(img_left)
if txts is None or len(txts) != len(boxes):
txts = [None] * len(boxes)
......
......@@ -72,8 +72,8 @@ class TrackerOutput(OutputBaseOp):
total_res = []
vis_images = []
for res in inputs:
fn, image, tk_bboxes, tk_scores, tk_ids, tk_cls_ids, tk_cls_names = res.values(
)
fn, image, tk_bboxes, tk_scores, tk_ids, tk_cls_ids, tk_cls_names = list(
res.values())[:7]
tk_names = [
'{} {}'.format(tk_cls_name, tk_id)
for tk_id, tk_cls_name in zip(tk_ids, tk_cls_names)
......
......@@ -21,16 +21,26 @@ from paddle.inference import create_predictor
class PaddlePredictor(object):
def __init__(self, param_path, model_path, config, delete_pass=[]):
def __init__(self,
param_path,
model_path,
config,
delete_pass=[],
name='model'):
super().__init__()
run_mode = config.get("run_mode", "paddle") # used trt or mkldnn
shape_info_filename = os.path.join(
config.get("output_dir", "output"),
'{}_{}_shape_info.txt'.format(name, run_mode))
self.predictor, self.inference_config, self.input_names, self.input_tensors, self.output_tensors = self.create_paddle_predictor(
param_path,
model_path,
batch_size=config['batch_size'],
run_mode=config.get("run_mode", "paddle"), # used trt or mkldnn
run_mode=run_mode,
device=config.get("device", "CPU"),
min_subgraph_size=config["min_subgraph_size"],
shape_info_filename=config["shape_info_filename"],
shape_info_filename=shape_info_filename,
trt_calib_mode=config["trt_calib_mode"],
cpu_threads=config["cpu_threads"],
trt_use_static=config["trt_use_static"],
......@@ -53,9 +63,12 @@ class PaddlePredictor(object):
f"inference model: {model_path} or param: {param_path} does not exist, please check again..."
)
assert run_mode in [
"paddle", "trt_fp32", "trt_fp16", "trt_int8", "mkldnn",
"mkldnn_bf16"
], "The run_mode must be 'paddle', 'trt_fp32', 'trt_fp16', 'trt_int8', 'mkldnn', 'mkldnn_bf16', but received run_mode: {}".format(
"paddle",
"trt_fp32",
"trt_fp16",
"trt_int8",
"mkldnn",
], "The run_mode must be 'paddle', 'trt_fp32', 'trt_fp16', 'trt_int8', 'mkldnn', but received run_mode: {}".format(
run_mode)
config = Config(model_path, param_path)
if device == 'GPU':
......@@ -66,8 +79,6 @@ class PaddlePredictor(object):
try:
config.enable_mkldnn()
config.set_cpu_math_library_num_threads(cpu_threads)
if 'bf16' in run_mode:
config.enable_mkldnn_bfloat16()
except Exception as e:
print(
"The current environment does not support `mkldnn`, so disable mkldnn."
......@@ -85,7 +96,7 @@ class PaddlePredictor(object):
max_batch_size=batch_size,
min_subgraph_size=min_subgraph_size,
precision_mode=precision_map[run_mode],
trt_use_static=trt_use_static,
use_static=trt_use_static,
use_calib_mode=trt_calib_mode)
if shape_info_filename is not None:
......
......@@ -4,3 +4,5 @@ opencv-contrib-python
PyYAML>=5.1
Pillow
faiss-cpu==1.7.1.post2
paddlenlp
paddlespeech
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册