提交 2a4f46a2 编写于 作者: Q qingqing01

Fix conflicts

[submodule "PaddleNLP/language_representations_kit/ERNIE"]
path = PaddleNLP/language_representations_kit/ERNIE
url = https://github.com/PaddlePaddle/ERNIE
[submodule "PaddleRL"]
path = PaddleRL
url = https://github.com/PaddlePaddle/PARL
......
......@@ -61,14 +61,9 @@ PaddleDetection的目的是为工业界和学术界提供大量易使用的目
## 开始
在预测阶段,可以通过运行以下指令得到可视化结果并保存在`output`目录下。
```bash
export PYTHONPATH=`pwd`:$PYTHONPATH
python tools/infer.py -c configs/mask_rcnn_r50_1x.yml \
-o weights=https://paddlemodels.bj.bcebos.com/object_detection/mask_rcnn_r50_1x.tar \
--infer_img=demo/000000570688.jpg
```
## 快速入门
PaddleDetection提供了快速开始的demo利于用户能够快速上手,示例请参考[QUICK_STARTED_cn.md](docs/QUICK_STARTED_cn.md)
更多训练及评估流程,请参考[GETTING_STARTED_cn.md](docs/GETTING_STARTED_cn.md).
......
architecture: BlazeFace
max_iters: 320000
train_feed: SSDTrainFeed
eval_feed: SSDEvalFeed
test_feed: SSDTestFeed
pretrain_weights:
use_gpu: true
snapshot_iter: 10000
log_smooth_window: 20
log_iter: 20
metric: WIDERFACE
save_dir: output
weights: output/blazeface/model_final/
# 1(label_class) + 1(background)
num_classes: 2
BlazeFace:
backbone: BlazeNet
output_decoder:
keep_top_k: 750
nms_threshold: 0.3
nms_top_k: 5000
score_threshold: 0.01
min_sizes: [[16.,24.], [32., 48., 64., 80., 96., 128.]]
use_density_prior_box: false
BlazeNet:
with_extra_blocks: true
lite_edition: false
LearningRate:
base_lr: 0.001
schedulers:
- !PiecewiseDecay
gamma: 0.1
milestones: [240000, 300000]
OptimizerBuilder:
optimizer:
momentum: 0.0
type: RMSPropOptimizer
regularizer:
factor: 0.0005
type: L2
SSDTrainFeed:
batch_size: 8
use_process: True
dataset:
dataset_dir: dataset/wider_face
annotation: wider_face_split/wider_face_train_bbx_gt.txt
image_dir: WIDER_train/images
image_shape: [3, 640, 640]
sample_transforms:
- !DecodeImage
to_rgb: true
with_mixup: false
- !NormalizeBox {}
- !RandomDistort
brightness_lower: 0.875
brightness_upper: 1.125
is_order: true
- !ExpandImage
max_ratio: 4
prob: 0.5
- !CropImageWithDataAchorSampling
anchor_sampler:
- [1, 10, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.2, 0.0]
batch_sampler:
- [1, 50, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0]
- [1, 50, 0.3, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0]
- [1, 50, 0.3, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0]
- [1, 50, 0.3, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0]
- [1, 50, 0.3, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0]
target_size: 640
- !RandomInterpImage
target_size: 640
- !RandomFlipImage
is_normalized: true
- !Permute {}
- !NormalizeImage
is_scale: false
mean: [104, 117, 123]
std: [127.502231, 127.502231, 127.502231]
SSDEvalFeed:
batch_size: 1
use_process: false
fields: ['image', 'im_id', 'gt_box']
dataset:
dataset_dir: dataset/wider_face
annotation: annotFile.txt #wider_face_split/wider_face_val_bbx_gt.txt
image_dir: WIDER_val/images
drop_last: false
image_shape: [3, 640, 640]
sample_transforms:
- !DecodeImage
to_rgb: true
with_mixup: false
- !NormalizeBox {}
- !ResizeImage
interp: 1
target_size: 640
use_cv2: false
- !Permute {}
- !NormalizeImage
is_scale: false
mean: [104, 117, 123]
std: [127.502231, 127.502231, 127.502231]
SSDTestFeed:
batch_size: 1
use_process: false
dataset:
use_default_label: true
drop_last: false
image_shape: [3, 640, 640]
sample_transforms:
- !DecodeImage
to_rgb: true
with_mixup: false
- !ResizeImage
interp: 1
target_size: 640
use_cv2: false
- !Permute {}
- !NormalizeImage
is_scale: false
mean: [104, 117, 123]
std: [127.502231, 127.502231, 127.502231]
architecture: BlazeFace
max_iters: 320000
train_feed: SSDTrainFeed
eval_feed: SSDEvalFeed
test_feed: SSDTestFeed
pretrain_weights:
use_gpu: true
snapshot_iter: 10000
log_smooth_window: 20
log_iter: 20
metric: WIDERFACE
save_dir: output
weights: output/blazeface_nas/model_final/
# 1(label_class) + 1(background)
num_classes: 2
BlazeFace:
backbone: BlazeNet
output_decoder:
keep_top_k: 750
nms_threshold: 0.3
nms_top_k: 5000
score_threshold: 0.01
min_sizes: [[16.,24.], [32., 48., 64., 80., 96., 128.]]
use_density_prior_box: false
BlazeNet:
blaze_filters: [[12, 12], [12, 12, 2], [12, 12]]
double_blaze_filters: [[12, 16, 24, 2], [24, 12, 24], [24, 16, 72, 2], [72, 12, 72]]
with_extra_blocks: true
lite_edition: false
LearningRate:
base_lr: 0.001
schedulers:
- !PiecewiseDecay
gamma: 0.1
milestones: [240000, 300000]
OptimizerBuilder:
optimizer:
momentum: 0.0
type: RMSPropOptimizer
regularizer:
factor: 0.0005
type: L2
SSDTrainFeed:
batch_size: 8
use_process: True
dataset:
dataset_dir: dataset/wider_face
annotation: wider_face_split/wider_face_train_bbx_gt.txt
image_dir: WIDER_train/images
image_shape: [3, 640, 640]
sample_transforms:
- !DecodeImage
to_rgb: true
with_mixup: false
- !NormalizeBox {}
- !RandomDistort
brightness_lower: 0.875
brightness_upper: 1.125
is_order: true
- !ExpandImage
max_ratio: 4
prob: 0.5
- !CropImageWithDataAchorSampling
anchor_sampler:
- [1, 10, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.2, 0.0]
batch_sampler:
- [1, 50, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0]
- [1, 50, 0.3, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0]
- [1, 50, 0.3, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0]
- [1, 50, 0.3, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0]
- [1, 50, 0.3, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0]
target_size: 640
- !RandomInterpImage
target_size: 640
- !RandomFlipImage
is_normalized: true
- !Permute {}
- !NormalizeImage
is_scale: false
mean: [104, 117, 123]
std: [127.502231, 127.502231, 127.502231]
SSDEvalFeed:
batch_size: 1
use_process: false
fields: ['image', 'im_id', 'gt_box']
dataset:
dataset_dir: dataset/wider_face
annotation: wider_face_split/wider_face_val_bbx_gt.txt
image_dir: WIDER_val/images
drop_last: false
image_shape: [3, 640, 640]
sample_transforms:
- !DecodeImage
to_rgb: true
with_mixup: false
- !NormalizeBox {}
- !ResizeImage
interp: 1
target_size: 640
use_cv2: false
- !Permute {}
- !NormalizeImage
is_scale: false
mean: [104, 117, 123]
std: [127.502231, 127.502231, 127.502231]
SSDTestFeed:
batch_size: 1
use_process: false
dataset:
use_default_label: true
drop_last: false
image_shape: [3, 640, 640]
sample_transforms:
- !DecodeImage
to_rgb: true
with_mixup: false
- !ResizeImage
interp: 1
target_size: 640
use_cv2: false
- !Permute {}
- !NormalizeImage
is_scale: false
mean: [104, 117, 123]
std: [127.502231, 127.502231, 127.502231]
architecture: FaceBoxes
train_feed: SSDTrainFeed
eval_feed: SSDEvalFeed
test_feed: SSDTestFeed
pretrain_weights:
use_gpu: true
max_iters: 320000
snapshot_iter: 10000
log_smooth_window: 20
log_iter: 20
metric: WIDERFACE
save_dir: output
weights: output/faceboxes/model_final/
# 1(label_class) + 1(background)
num_classes: 2
FaceBoxes:
backbone: FaceBoxNet
densities: [[4, 2, 1], [1], [1]]
fixed_sizes: [[32., 64., 128.], [256.], [512.]]
output_decoder:
keep_top_k: 750
nms_threshold: 0.3
nms_top_k: 5000
score_threshold: 0.01
FaceBoxNet:
with_extra_blocks: true
lite_edition: false
LearningRate:
base_lr: 0.001
schedulers:
- !PiecewiseDecay
gamma: 0.1
milestones: [240000, 300000]
OptimizerBuilder:
optimizer:
momentum: 0.0
type: RMSPropOptimizer
regularizer:
factor: 0.0005
type: L2
SSDTrainFeed:
batch_size: 8
use_process: True
dataset:
dataset_dir: dataset/wider_face
annotation: wider_face_split/wider_face_train_bbx_gt.txt
image_dir: WIDER_train/images
image_shape: [3, 640, 640]
sample_transforms:
- !DecodeImage
to_rgb: true
with_mixup: false
- !NormalizeBox {}
- !RandomDistort
brightness_lower: 0.875
brightness_upper: 1.125
is_order: true
- !ExpandImage
max_ratio: 4
prob: 0.5
- !CropImageWithDataAchorSampling
anchor_sampler:
- [1, 10, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.2, 0.0]
batch_sampler:
- [1, 50, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0]
- [1, 50, 0.3, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0]
- [1, 50, 0.3, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0]
- [1, 50, 0.3, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0]
- [1, 50, 0.3, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0]
target_size: 640
- !RandomInterpImage
target_size: 640
- !RandomFlipImage
is_normalized: true
- !Permute {}
- !NormalizeImage
is_scale: false
mean: [104, 117, 123]
std: [127.502231, 127.502231, 127.502231]
SSDEvalFeed:
batch_size: 1
use_process: false
fields: ['image', 'im_id', 'gt_box']
dataset:
dataset_dir: dataset/wider_face
annotation: wider_face_split/wider_face_val_bbx_gt.txt
image_dir: WIDER_val/images
drop_last: false
image_shape: [3, 640, 640]
sample_transforms:
- !DecodeImage
to_rgb: true
with_mixup: false
- !NormalizeBox {}
- !ResizeImage
interp: 1
target_size: 640
use_cv2: false
- !Permute {}
- !NormalizeImage
is_scale: false
mean: [104, 117, 123]
std: [127.502231, 127.502231, 127.502231]
SSDTestFeed:
batch_size: 1
use_process: false
dataset:
use_default_label: true
drop_last: false
image_shape: [3, 640, 640]
sample_transforms:
- !DecodeImage
to_rgb: true
with_mixup: false
- !ResizeImage
interp: 1
target_size: 640
use_cv2: false
- !Permute {}
- !NormalizeImage
is_scale: false
mean: [104, 117, 123]
std: [127.502231, 127.502231, 127.502231]
architecture: FaceBoxes
train_feed: SSDTrainFeed
eval_feed: SSDEvalFeed
test_feed: SSDTestFeed
pretrain_weights:
use_gpu: true
max_iters: 320000
snapshot_iter: 10000
log_smooth_window: 20
log_iter: 20
metric: WIDERFACE
save_dir: output
weights: output/faceboxes_lite/model_final/
# 1(label_class) + 1(background)
num_classes: 2
FaceBoxes:
backbone: FaceBoxNet
densities: [[2, 1, 1], [1, 1]]
fixed_sizes: [[16., 32., 64.], [96., 128.]]
output_decoder:
keep_top_k: 750
nms_threshold: 0.3
nms_top_k: 5000
score_threshold: 0.01
FaceBoxNet:
with_extra_blocks: true
lite_edition: true
LearningRate:
base_lr: 0.001
schedulers:
- !PiecewiseDecay
gamma: 0.1
milestones: [240000, 300000]
OptimizerBuilder:
optimizer:
momentum: 0.0
type: RMSPropOptimizer
regularizer:
factor: 0.0005
type: L2
SSDTrainFeed:
batch_size: 8
use_process: True
dataset:
dataset_dir: dataset/wider_face
annotation: wider_face_split/wider_face_train_bbx_gt.txt
image_dir: WIDER_train/images
image_shape: [3, 640, 640]
sample_transforms:
- !DecodeImage
to_rgb: true
with_mixup: false
- !NormalizeBox {}
- !RandomDistort
brightness_lower: 0.875
brightness_upper: 1.125
is_order: true
- !ExpandImage
max_ratio: 4
prob: 0.5
- !CropImageWithDataAchorSampling
anchor_sampler:
- [1, 10, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.2, 0.0]
batch_sampler:
- [1, 50, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0]
- [1, 50, 0.3, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0]
- [1, 50, 0.3, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0]
- [1, 50, 0.3, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0]
- [1, 50, 0.3, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0]
target_size: 640
- !RandomInterpImage
target_size: 640
- !RandomFlipImage
is_normalized: true
- !Permute {}
- !NormalizeImage
is_scale: false
mean: [104, 117, 123]
std: [127.502231, 127.502231, 127.502231]
SSDEvalFeed:
batch_size: 1
use_process: false
fields: ['image', 'im_id', 'gt_box']
dataset:
dataset_dir: dataset/wider_face
annotation: wider_face_split/wider_face_val_bbx_gt.txt
image_dir: WIDER_val/images
drop_last: false
image_shape: [3, 640, 640]
sample_transforms:
- !DecodeImage
to_rgb: true
with_mixup: false
- !NormalizeBox {}
- !ResizeImage
interp: 1
target_size: 640
use_cv2: false
- !Permute {}
- !NormalizeImage
is_scale: false
mean: [104, 117, 123]
std: [127.502231, 127.502231, 127.502231]
SSDTestFeed:
batch_size: 1
use_process: false
dataset:
use_default_label: true
drop_last: false
image_shape: [3, 640, 640]
sample_transforms:
- !DecodeImage
to_rgb: true
with_mixup: false
- !ResizeImage
interp: 1
target_size: 640
use_cv2: false
- !Permute {}
- !NormalizeImage
is_scale: false
mean: [104, 117, 123]
std: [127.502231, 127.502231, 127.502231]
architecture: YOLOv3
train_feed: YoloTrainFeed
eval_feed: YoloEvalFeed
test_feed: YoloTestFeed
use_gpu: true
max_iters: 20000
log_smooth_window: 20
save_dir: output
snapshot_iter: 200
metric: VOC
map_type: 11point
pretrain_weights: https://paddlemodels.bj.bcebos.com/object_detection/yolov3_mobilenet_v1.tar
weights: output/yolov3_mobilenet_v1_fruit/best_model
num_classes: 3
finetune_exclude_pretrained_params: ['yolo_output']
YOLOv3:
backbone: MobileNet
yolo_head: YOLOv3Head
MobileNet:
norm_type: sync_bn
norm_decay: 0.
conv_group_scale: 1
with_extra_blocks: false
YOLOv3Head:
anchor_masks: [[6, 7, 8], [3, 4, 5], [0, 1, 2]]
anchors: [[10, 13], [16, 30], [33, 23],
[30, 61], [62, 45], [59, 119],
[116, 90], [156, 198], [373, 326]]
norm_decay: 0.
ignore_thresh: 0.7
label_smooth: true
nms:
background_label: -1
keep_top_k: 100
nms_threshold: 0.45
nms_top_k: 1000
normalized: false
score_threshold: 0.01
LearningRate:
base_lr: 0.00001
schedulers:
- !PiecewiseDecay
gamma: 0.1
milestones:
- 15000
- 18000
- !LinearWarmup
start_factor: 0.
steps: 100
OptimizerBuilder:
optimizer:
momentum: 0.9
type: Momentum
regularizer:
factor: 0.0005
type: L2
YoloTrainFeed:
batch_size: 1
dataset:
dataset_dir: dataset/fruit/fruit-detection
annotation: ./ImageSets/Main/train.txt
image_dir: ./JPEGImages
use_default_label: false
num_workers: 16
bufsize: 128
use_process: true
mixup_epoch: -1
sample_transforms:
- !DecodeImage
to_rgb: true
with_mixup: false
- !NormalizeBox {}
- !ExpandImage
max_ratio: 4.0
mean: [123.675, 116.28, 103.53]
prob: 0.5
- !RandomInterpImage
max_size: 0
target_size: 608
- !RandomFlipImage
is_mask_flip: false
is_normalized: true
prob: 0.5
- !NormalizeImage
is_channel_first: false
is_scale: true
mean:
- 0.485
- 0.456
- 0.406
std:
- 0.229
- 0.224
- 0.225
- !Permute
channel_first: true
to_bgr: false
batch_transforms:
- !RandomShape
sizes: [608]
with_background: false
YoloEvalFeed:
batch_size: 1
image_shape: [3, 608, 608]
dataset:
dataset_dir: dataset/fruit/fruit-detection
annotation: ./ImageSets/Main/val.txt
image_dir: ./JPEGImages
use_default_label: false
YoloTestFeed:
batch_size: 1
image_shape: [3, 608, 608]
dataset:
dataset_dir: dataset/fruit/fruit-detection
annotation: ./ImageSets/Main/label_list.txt
use_default_label: false
DIR="$( cd "$(dirname "$0")" ; pwd -P )"
cd "$DIR"
# Download the data.
echo "Downloading..."
wget https://dataset.bj.bcebos.com/PaddleDetection_demo/fruit-detection.tar
# Extract the data.
echo "Extracting..."
tar xvf fruit-detection.tar
cd fruit-detection
tar xvf Annotations.tar
tar xvf ImageSets.tar
tar xvf JPEGImages.tar
rm -rf ./*.tar
......@@ -37,9 +37,12 @@ python tools/train.py -c configs/faster_rcnn_r50_1x.yml -o use_gpu=false
- `--eval`: Whether to perform evaluation in training, default is `False`
- `--output_eval`: If perform evaluation in training, this edits evaluation directory, default is current directory.
- `-d` or `--dataset_dir`: Dataset path, same as `dataset_dir` of configs. Such as: `-d dataset/coco`
- `-o`: Set configuration options in config file. Such as: `-o max_iters=180000`
- `-c`: Select config file and all files are saved in `configs/`
- `-o`: Set configuration options in config file. Such as: `-o max_iters=180000`. `-o` has higher priority to file configured by `-c`
- `--use_tb`: Whether to record the data with [tb-paddle](https://github.com/linshuliang/tb-paddle), so as to display in Tensorboard, default is `False`
- `--tb_log_dir`: tb-paddle logging directory for scalar, default is `tb_log_dir/scalar`
- `--fp16`: Whether to enable mixed precision training (requires GPU), default is `False`
- `--loss_scale`: Loss scaling factor for mixed precision training, default is `8.0`
##### Examples
......@@ -57,7 +60,7 @@ causes time-consuming in training, we suggest decreasing evaluation times or eva
the best model with highest MAP is saved at each `snapshot_iter`. `best_model` has the same path as `model_final`.
- configuration options and assign Dataset path
- Configure dataset path
```bash
export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
export PYTHONPATH=$PYTHONPATH:.
......@@ -65,6 +68,17 @@ python -u tools/train.py -c configs/faster_rcnn_r50_1x.yml \
-d dataset/coco
```
- Fine-tune other task
When using pre-trained model to fine-tune other task, the excluded pre-trained parameters can be set by finetune_exclude_pretrained_params in YAML config or -o finetune_exclude_pretrained_params in the arguments.
```bash
export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
export PYTHONPATH=$PYTHONPATH:.
python -u tools/train.py -c configs/faster_rcnn_r50_1x.yml \
-o pretrain_weights=output/faster_rcnn_r50_1x/model_final/ \
finetune_exclude_pretrained_params = ['cls_score','bbox_pred']
```
##### NOTES
......@@ -73,6 +87,7 @@ python -u tools/train.py -c configs/faster_rcnn_r50_1x.yml \
- Dataset will be downloaded automatically and cached in `~/.cache/paddle/dataset` if not be found locally.
- Pretrained model is downloaded automatically and cached in `~/.cache/paddle/weights`.
- Model checkpoints are saved in `output` by default (configurable).
- When finetuning, users could set `pretrain_weights` to the models published by PaddlePaddle. Parameters matched by fields in finetune_exclude_pretrained_params will be ignored in loading and fields can be wildcard matching. For detailed information, please refer to [Transfer Learning](TRANSFER_LEARNING.md).
- To check out hyper parameters used, please refer to the [configs](../configs).
- RCNN models training on CPU is not supported on PaddlePaddle<=1.5.1 and will be fixed on later version.
......@@ -80,7 +95,6 @@ python -u tools/train.py -c configs/faster_rcnn_r50_1x.yml \
## Evaluation
```bash
# run on GPU with:
export PYTHONPATH=$PYTHONPATH:.
......@@ -97,7 +111,7 @@ python tools/eval.py -c configs/faster_rcnn_r50_1x.yml
#### Examples
- configuration options && assign Dataset path
- Evaluate by specified weights path and dataset path
```bash
# run on GPU with:
export PYTHONPATH=$PYTHONPATH:.
......@@ -107,7 +121,7 @@ python -u tools/eval.py -c configs/faster_rcnn_r50_1x.yml \
-d dataset/coco
```
- Evaluation with json
- Evaluate with json
```bash
# run on GPU with:
export PYTHONPATH=$PYTHONPATH:.
......@@ -171,11 +185,11 @@ python tools/infer.py -c configs/faster_rcnn_r50_1x.yml \
--use_tb=Ture
```
The visualization files are saved in `output` by default, to specify a different path, simply add a `--output_dir=` flag.
`--draw_threshold` is an optional argument. Default is 0.5.
The visualization files are saved in `output` by default, to specify a different path, simply add a `--output_dir=` flag.
`--draw_threshold` is an optional argument. Default is 0.5.
Different thresholds will produce different results depending on the calculation of [NMS](https://ieeexplore.ieee.org/document/1699659).
If users want to infer according to customized model path, `-o weights` can be set for specified path.
`--use_tb` is an optional argument, if `--use_tb` is `True`, the tb-paddle will record data in directory,
`--use_tb` is an optional argument, if `--use_tb` is `True`, the tb-paddle will record data in directory,
so users can see the results in Tensorboard.
- Save inference model
......@@ -196,18 +210,25 @@ Save inference model by set `--save_inference_model`, which can be loaded by Pad
**Q:** Why do I get `NaN` loss values during single GPU training? </br>
**A:** The default learning rate is tuned to multi-GPU training (8x GPUs), it must
be adapted for single GPU training accordingly (e.g., divide by 8).
The calculation rules are as follows,they are equivalent: </br>
be adapted for single GPU training accordingly (e.g., divide by 8).
The calculation rules are as follows,they are equivalent: </br>
| GPU number | Learning rate | Max_iters | Milestones |
| :---------: | :------------: | :-------: | :--------------: |
| GPU number | Learning rate | Max_iters | Milestones |
| :---------: | :------------: | :-------: | :--------------: |
| 2 | 0.0025 | 720000 | [480000, 640000] |
| 4 | 0.005 | 360000 | [240000, 320000] |
| 8 | 0.01 | 180000 | [120000, 160000] |
**Q:** How to reduce GPU memory usage? </br>
**A:** Setting environment variable FLAGS_conv_workspace_size_limit to a smaller
number can reduce GPU memory footprint without affecting training speed.
Take Mask-RCNN (R50) as example, by setting `export FLAGS_conv_workspace_size_limit=512`,
batch size could reach 4 per GPU (Tesla V100 16GB).
**Q:** How to change data preprocessing? </br>
**A:** Set `sample_transform` in configuration. Note that **the whole transforms** need to be added in configuration.
For example, `DecodeImage`, `NormalizeImage` and `Permute` in RCNN models. For detail description, please refer
to [config_example](config_example).
......@@ -38,9 +38,12 @@ python tools/train.py -c configs/faster_rcnn_r50_1x.yml -o use_gpu=false
- `--eval`: 是否边训练边测试,默认是 `False`
- `--output_eval`: 如果边训练边测试, 这个参数可以编辑评测保存json路径, 默认是当前目录。
- `-d` or `--dataset_dir`: 数据集路径, 同配置文件里的`dataset_dir`. 例如: `-d dataset/coco`
- `-o`: 设置配置文件里的参数内容。 例如: `-o max_iters=180000`
- `-c`: 选择配置文件,所有配置文件在`configs/`
- `-o`: 设置配置文件里的参数内容。例如: `-o max_iters=180000`。使用`-o`配置相较于`-c`选择的配置文件具有更高的优先级。
- `--use_tb`: 是否使用[tb-paddle](https://github.com/linshuliang/tb-paddle)记录数据,进而在TensorBoard中显示,默认是False。
- `--tb_log_dir`: 指定 tb-paddle 记录数据的存储路径,默认是`tb_log_dir/scalar`
- `--fp16`: 是否使用混合精度训练模式(需GPU训练),默认是`False`
- `--loss_scale`: 设置混合精度训练模式中损失值的缩放比例,默认是`8.0`
##### 例子
......@@ -57,7 +60,7 @@ python -u tools/train.py -c configs/faster_rcnn_r50_1x.yml --eval
当边训练边测试时,在每次snapshot\_iter会评测出最佳mAP模型保存到
`best_model`文件夹下,`best_model`的路径和`model_final`的路径相同。
- 设置配置文件参数 && 指定数据集路径
- 指定数据集路径
```bash
export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
......@@ -66,6 +69,18 @@ python -u tools/train.py -c configs/faster_rcnn_r50_1x.yml \
-d dataset/coco
```
- Fine-tune其他任务
使用预训练模型fine-tune其他任务时,在YAML配置文件中设置`finetune_exclude_pretrained_params`或在命令行中添加`-o finetune_exclude_pretrained_params`对预训练模型进行选择性加载。
```bash
export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
export PYTHONPATH=$PYTHONPATH:.
python -u tools/train.py -c configs/faster_rcnn_r50_1x.yml \
-o pretrain_weights=output/faster_rcnn_r50_1x/model_final/ \
finetune_exclude_pretrained_params = ['cls_score','bbox_pred']
```
##### 提示
- `CUDA_VISIBLE_DEVICES` 参数可以指定不同的GPU。例如: `export CUDA_VISIBLE_DEVICES=0,1,2,3`. GPU计算规则可以参考 [FAQ](#faq)
......@@ -73,6 +88,7 @@ python -u tools/train.py -c configs/faster_rcnn_r50_1x.yml \
- 若本地未找到数据集,将自动下载数据集并保存在`~/.cache/paddle/dataset`中。
- 预训练模型自动下载并保存在`〜/.cache/paddle/weights`中。
- 模型checkpoints默认保存在`output`中(可配置)。
- 进行模型fine-tune时,用户可将`pretrain_weights`配置为PaddlePaddle发布的模型,加载模型时finetune_exclude_pretrained_params中的字段匹配的参数不被加载,可以为通配符匹配方式。详细说明请参考[Transfer Learning](TRANSFER_LEARNING_cn.md)
- 更多参数配置,请参考[配置文件](../configs)
- RCNN系列模型CPU训练在PaddlePaddle 1.5.1及以下版本暂不支持,将在下个版本修复。
......@@ -96,7 +112,7 @@ python tools/eval.py -c configs/faster_rcnn_r50_1x.yml
#### 例子
- 设置配置文件参数 && 指定数据集路径
- 指定数据集路径
```bash
# GPU评估
export CUDA_VISIBLE_DEVICES=0
......@@ -170,7 +186,7 @@ python tools/infer.py -c configs/faster_rcnn_r50_1x.yml \
```
可视化文件默认保存在`output`中,可通过`--output_dir=`指定不同的输出路径。
可视化文件默认保存在`output`中,可通过`--output_dir=`指定不同的输出路径。
`--draw_threshold` 是个可选参数. 根据 [NMS](https://ieeexplore.ieee.org/document/1699659) 的计算,
不同阈值会产生不同的结果。如果用户需要对自定义路径的模型进行推断,可以设置`-o weights`指定模型路径。
`--use_tb`是个可选参数,当为`True`时,可使用 TensorBoard 来可视化参数的变化趋势和图片。
......@@ -191,12 +207,12 @@ python tools/infer.py -c configs/faster_rcnn_r50_1x.yml --infer_img=demo/0000005
## FAQ
**Q:** 为什么我使用单GPU训练loss会出`NaN`? </br>
**A:** 默认学习率是适配多GPU训练(8x GPU),若使用单GPU训练,须对应调整学习率(例如,除以8)。
计算规则表如下所示,它们是等价的: </br>
**A:** 默认学习率是适配多GPU训练(8x GPU),若使用单GPU训练,须对应调整学习率(例如,除以8)。
计算规则表如下所示,它们是等价的: </br>
| GPU数 | 学习率 | 最大轮数 | 变化节点 |
| :---------: | :------------: | :-------: | :--------------: |
| GPU数 | 学习率 | 最大轮数 | 变化节点 |
| :---------: | :------------: | :-------: | :--------------: |
| 2 | 0.0025 | 720000 | [480000, 640000] |
| 4 | 0.005 | 360000 | [240000, 320000] |
| 8 | 0.01 | 180000 | [120000, 160000] |
......@@ -206,3 +222,8 @@ python tools/infer.py -c configs/faster_rcnn_r50_1x.yml --infer_img=demo/0000005
**A:** 可通过设置环境变量`FLAGS_conv_workspace_size_limit`为较小的值来减少显存消耗,并且不
会影响训练速度。以Mask-RCNN(R50)为例,设置`export FLAGS_conv_workspace_size_limit = 512`
batch size可以达到每GPU 4 (Tesla V100 16GB)。
**Q:** 如何修改数据预处理? </br>
**A:** 可在配置文件中设置 `sample_transform`。注意需要在配置文件中加入**完整预处理**
例如RCNN模型中`DecodeImage`, `NormalizeImage` and `Permute`。更多详细描述请参考[配置案例](config_example)
English | [简体中文](QUICK_STARTED_cn.md)
# Quick Start
This tutorial fine-tunes a tiny dataset by pretrained detection model for users to get a model and learn PaddleDetection quickly. The model can be trained in around 15min with good performance.
## Data Preparation
Dataset refers to [Kaggle](https://www.kaggle.com/mbkinaci/fruit-images-for-object-detection), which contains 240 images in train dataset and 60 images in test dataset. Data categories are apple, orange and banana. Download [here](https://dataset.bj.bcebos.com/PaddleDetection_demo/fruit-detection.tar) and uncompress the dataset after download, script for data preparation is located at [download.sh](../dataset/fruit/download.sh). Command is as follows:
```bash
cd dataset/fruit
sh download.sh
```
- **Note: before started, run the following command and specifiy the GPU**
```bash
export PYTHONPATH=$PYTHONPATH:.
export CUDA_VISIBLE_DEVICES=0
```
Training:
```bash
python -u tools/train.py -c configs/yolov3_mobilenet_v1_fruit.yml \
--use_tb=True \
--tb_log_dir=tb_fruit_dir/scalar \
--eval \
```
Use `yolov3_mobilenet_v1` to fine-tune the model from COCO dataset. Meanwhile, loss and mAP can be observed on tensorboard.
```bash
tensorboard --logdir tb_fruit_dir/scalar/ --host <host_IP> --port <port_num>
```
Result on tensorboard is shown below:
<div align="center">
<img src="../demo/tensorboard_fruit.jpg" />
</div>
Model can be downloaded [here](https://paddlemodels.bj.bcebos.com/object_detection/yolov3_mobilenet_v1_fruit.tar)
Evaluation:
```bash
python -u tools/eval.py -c configs/yolov3_mobilenet_v1_fruit.yml
```
Inference:
```bash
python -u tools/infer.py -c configs/yolov3_mobilenet_v1_fruit.yml \
-o weights=https://paddlemodels.bj.bcebos.com/object_detection/yolov3_mobilenet_v1_fruit.tar \
--infer_img=demo/orange_71.jpg
```
Inference images are shown below:
<p align="center">
<img src="../demo/orange_71.jpg" height=400 width=400 hspace='10'/>
<img src="../demo/orange_71_detection.jpg" height=400 width=400 hspace='10'/>
</p>
For detailed infomation of training and evalution, please refer to [GETTING_STARTED.md](GETTING_STARTED.md).
[English](QUICK_STARTED.md) | 简体中文
# 快速开始
为了使得用户能够在很短的时间内快速产出模型,掌握PaddleDetection的使用方式,这篇教程通过一个预训练检测模型对小数据集进行finetune。在P40上单卡大约15min即可产出一个效果不错的模型。
## 数据准备
数据集参考[Kaggle数据集](https://www.kaggle.com/mbkinaci/fruit-images-for-object-detection),其中训练数据集240张图片,测试数据集60张图片,数据类别为3类:苹果,橘子,香蕉。[下载链接](https://dataset.bj.bcebos.com/PaddleDetection_demo/fruit-detection.tar)。数据下载后分别解压即可, 数据准备脚本位于[download.sh](../dataset/fruit/download.sh)。下载数据方式如下:
```bash
cd dataset/fruit
sh download.sh
```
- **注:在开始前,运行如下命令并指定GPU**
```bash
export PYTHONPATH=$PYTHONPATH:.
export CUDA_VISIBLE_DEVICES=0
```
训练命令如下:
```bash
python -u tools/train.py -c configs/yolov3_mobilenet_v1_fruit.yml \
--use_tb=True \
--tb_log_dir=tb_fruit_dir/scalar \
--eval \
```
训练使用`yolov3_mobilenet_v1`基于COCO数据集训练好的模型进行finetune。训练期间可以通过tensorboard实时观察loss和精度值,启动命令如下:
```bash
tensorboard --logdir tb_fruit_dir/scalar/ --host <host_IP> --port <port_num>
```
tensorboard结果显示如下:
<div align="center">
<img src="../demo/tensorboard_fruit.jpg" />
</div>
训练模型[下载链接](https://paddlemodels.bj.bcebos.com/object_detection/yolov3_mobilenet_v1_fruit.tar)
评估命令如下:
```bash
python -u tools/eval.py -c configs/yolov3_mobilenet_v1_fruit.yml
```
预测命令如下
```bash
python -u tools/infer.py -c configs/yolov3_mobilenet_v1_fruit.yml \
-o weights=https://paddlemodels.bj.bcebos.com/object_detection/yolov3_mobilenet_v1_fruit.tar \
--infer_img=demo/orange_71.jpg
```
预测图片如下:
<p align="center">
<img src="../demo/orange_71.jpg" height=400 width=400 hspace='10'/>
<img src="../demo/orange_71_detection.jpg" height=400 width=400 hspace='10'/>
</p>
更多训练及评估流程,请参考[GETTING_STARTED_cn.md](GETTING_STARTED_cn.md).
# Transfer Learning
Transfer learning aims at learning new knowledge from existing knowledge. For example, take pretrained model from ImageNet to initialize detection models, or take pretrained model from COCO dataset to initialize train detection models in PascalVOC dataset.
In transfer learning, if different dataset and the number of classes is used, the dimensional inconsistency will causes in loading parameters related to the number of classes; On the other hand, if more complicated model is used, need to motify the open-source model construction and selective load parameters. Thus, PaddleDetection should designate parameter fields and ignore loading the parameters which match the fields.
## Transfer Learning in PaddleDetection
In transfer learning, it's needed to load pretrained model selectively. Set `finetune_exclude_pretrained_params` in YAML configuration files or set `-o finetune_exclude_pretrained_params` in command line.
```python
export PYTHONPATH=$PYTHONPATH:.
export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
python -u tools/train.py -c configs/faster_rcnn_r50_1x.yml \
-o pretrain_weights=https://paddlemodels.bj.bcebos.com/object_detection/faster_rcnn_r50_1x.tar \
finetune_exclude_pretrained_params=['cls_score','bbox_pred']
```
* Note:
1. The path in pretrain\_weights is the open-source model link of faster RCNN from COCO dataset. For full models link, please refer to [MODEL_ZOO](MODEL_ZOO.md)
2. The parameter fields are set in finetune\_exclude\_pretrained\_params. If the name of parameter matches field (wildcard matching), the parameter will be ignored in loading.
If users want to fine-tune by own dataet, and remain the model construction, need to ignore the parameters related to the number of classes. PaddleDetection lists ignored parameter fields corresponding to different model type. The table is shown below: </br>
| model type | ignored parameter fields |
| :----------------: | :---------------------------------------: |
| Faster RCNN | cls\_score, bbox\_pred |
| Cascade RCNN | cls\_score, bbox\_pred |
| Mask RCNN | cls\_score, bbox\_pred, mask\_fcn\_logits |
| Cascade-Mask RCNN | cls\_score, bbox\_pred, mask\_fcn\_logits |
| RetinaNet | retnet\_cls\_pred\_fpn |
| SSD | ^conv2d\_ |
| YOLOv3 | yolo\_output |
# 迁移学习
迁移学习为利用已有知识,对新知识进行学习。例如利用ImageNet分类预训练模型做初始化来训练检测模型,利用在COCO数据集上的检测模型做初始化来训练基于PascalVOC数据集的检测模型。
在进行迁移学习时,由于会使用不同的数据集,数据类别数与COCO/VOC数据类别不同,导致在加载PaddlePaddle开源模型时,与类别数相关的权重(例如分类模块的fc层)会出现维度不匹配的问题;另外,如果需要结构更加复杂的模型,需要对已有开源模型结构进行调整,对应权重也需要选择性加载。因此,需要检测库能够指定参数字段,在加载模型时不加载匹配的权重。
## PaddleDetection进行迁移学习
在迁移学习中,对预训练模型进行选择性加载,可通过在 YMAL 配置文件中通过设置 finetune_exclude_pretrained_params字段,也可通过在 train.py的启动参数中设置 -o finetune_exclude_pretrained_params。
```python
export PYTHONPATH=$PYTHONPATH:.
export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7
python -u tools/train.py -c configs/faster_rcnn_r50_1x.yml \
-o pretrain_weights=https://paddlemodels.bj.bcebos.com/object_detection/faster_rcnn_r50_1x.tar \
finetune_exclude_pretrained_params=['cls_score','bbox_pred']
```
* 说明:
1. pretrain\_weights的路径为COCO数据集上开源的faster RCNN模型链接,完整模型链接可参考[MODEL_ZOO](MODEL_ZOO_cn.md)
2. finetune\_exclude\_pretrained\_params中设置参数字段,如果参数名能够匹配以上参数字段(通配符匹配方式),则在模型加载时忽略该参数。
如果用户需要利用自己的数据进行finetune,模型结构不变,只需要忽略与类别数相关的参数。PaddleDetection给出了不同模型类型所对应的忽略参数字段。如下表所示:</br>
| 模型类型 | 忽略参数字段 |
| :----------------: | :---------------------------------------: |
| Faster RCNN | cls\_score, bbox\_pred |
| Cascade RCNN | cls\_score, bbox\_pred |
| Mask RCNN | cls\_score, bbox\_pred, mask\_fcn\_logits |
| Cascade-Mask RCNN | cls\_score, bbox\_pred, mask\_fcn\_logits |
| RetinaNet | retnet\_cls\_pred\_fpn |
| SSD | ^conv2d\_ |
| YOLOv3 | yolo\_output |
......@@ -37,6 +37,10 @@ from __future__ import absolute_import
from .dataset import Dataset
from .reader import Reader
from .data_feed import create_reader
__all__ = ['Dataset', 'Reader', 'create_reader']
import traceback
if traceback.extract_stack()[0][
0] == 'ppdet/data/tools/generate_data_for_training.py':
__all__ = ['Dataset', 'Reader']
else:
from .data_feed import create_reader
__all__ = ['Dataset', 'Reader', 'create_reader']
......@@ -30,8 +30,8 @@ from ppdet.data.transform.operators import (
Permute)
from ppdet.data.transform.arrange_sample import (
ArrangeRCNN, ArrangeTestRCNN, ArrangeSSD, ArrangeEvalSSD, ArrangeTestSSD,
ArrangeYOLO, ArrangeEvalYOLO, ArrangeTestYOLO)
ArrangeRCNN, ArrangeEvalRCNN, ArrangeTestRCNN, ArrangeSSD, ArrangeEvalSSD,
ArrangeTestSSD, ArrangeYOLO, ArrangeEvalYOLO, ArrangeTestYOLO)
__all__ = [
'PadBatch', 'MultiScale', 'RandomShape', 'DataSet', 'CocoDataSet',
......@@ -476,7 +476,8 @@ class FasterRCNNEvalFeed(DataFeed):
def __init__(self,
dataset=CocoDataSet(COCO_VAL_ANNOTATION,
COCO_VAL_IMAGE_DIR).__dict__,
fields=['image', 'im_info', 'im_id', 'im_shape'],
fields=['image', 'im_info', 'im_id', 'im_shape', 'gt_box',
'gt_label', 'is_difficult'],
image_shape=[3, 800, 1333],
sample_transforms=[
DecodeImage(to_rgb=True),
......@@ -494,7 +495,7 @@ class FasterRCNNEvalFeed(DataFeed):
drop_last=False,
num_workers=2,
use_padded_im_info=True):
sample_transforms.append(ArrangeTestRCNN())
sample_transforms.append(ArrangeEvalRCNN())
super(FasterRCNNEvalFeed, self).__init__(
dataset,
fields,
......@@ -780,7 +781,7 @@ class SSDEvalFeed(DataFeed):
bufsize=10,
use_process=False,
memsize=None):
sample_transforms.append(ArrangeEvalSSD())
sample_transforms.append(ArrangeEvalSSD(fields))
super(SSDEvalFeed, self).__init__(
dataset,
fields,
......
......@@ -120,6 +120,10 @@ def load(fname,
elif fname.endswith('.json'):
from . import coco_loader
records, cname2cid = coco_loader.load(fname, samples, with_background)
elif "wider_face" in fname:
from . import widerface_loader
records = widerface_loader.load(fname, samples)
return records
elif os.path.isfile(fname):
from . import voc_loader
if use_default_label is None or cname2cid is not None:
......
......@@ -59,13 +59,13 @@ class RoiDbSource(Dataset):
"""
super(RoiDbSource, self).__init__()
self._epoch = -1
assert os.path.isfile(anno_file) or os.path.isdir(
anno_file), 'invalid file[%s] for RoiDbSource' % (anno_file)
assert os.path.isfile(anno_file) or os.path.isdir(anno_file), \
'anno_file {} is not a file or a directory'.format(anno_file)
self._fname = anno_file
self._image_dir = image_dir
if image_dir is not None:
assert os.path.isdir(image_dir), 'invalid image directory[%s]' % (
image_dir)
assert os.path.isdir(image_dir), \
'image_dir {} is not a directory'.format(image_dir)
self._roidb = None
self._pos = -1
self._drained = False
......
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import os
import numpy as np
import logging
logger = logging.getLogger(__name__)
def load(anno_path,
sample_num=-1,
cname2cid=None,
with_background=True):
"""
Load WiderFace records with 'anno_path'
Args:
anno_path (str): root directory for voc annotation data
sample_num (int): number of samples to load, -1 means all
with_background (bool): whether load background as a class.
if True, total class number will
be 2. default True
Returns:
(records, catname2clsid)
'records' is list of dict whose structure is:
{
'im_file': im_fname, # image file name
'im_id': im_id, # image id
'gt_class': gt_class,
'gt_bbox': gt_bbox,
}
'cname2id' is a dict to map category name to class id
"""
txt_file = anno_path
records = []
ct = 0
file_lists = _load_file_list(txt_file)
cname2cid = widerface_label(with_background)
for item in file_lists:
im_fname = item[0]
im_id = np.array([ct])
gt_bbox = np.zeros((len(item) - 2, 4), dtype=np.float32)
gt_class = np.ones((len(item) - 2, 1), dtype=np.int32)
for index_box in range(len(item)):
if index_box >= 2:
temp_info_box = item[index_box].split(' ')
xmin = float(temp_info_box[0])
ymin = float(temp_info_box[1])
w = float(temp_info_box[2])
h = float(temp_info_box[3])
# Filter out wrong labels
if w < 0 or h < 0:
continue
xmin = max(0, xmin)
ymin = max(0, ymin)
xmax = xmin + w
ymax = ymin + h
gt_bbox[index_box - 2] = [xmin, ymin, xmax, ymax]
widerface_rec = {
'im_file': im_fname,
'im_id': im_id,
'gt_bbox': gt_bbox,
'gt_class': gt_class,
}
# logger.debug
if len(item) != 0:
records.append(widerface_rec)
ct += 1
if sample_num > 0 and ct >= sample_num:
break
assert len(records) > 0, 'not found any widerface in %s' % (anno_path)
logger.info('{} samples in file {}'.format(ct, anno_path))
return records, cname2cid
def _load_file_list(input_txt):
with open(input_txt, 'r') as f_dir:
lines_input_txt = f_dir.readlines()
file_dict = {}
num_class = 0
for i in range(len(lines_input_txt)):
line_txt = lines_input_txt[i].strip('\n\t\r')
if '.jpg' in line_txt:
if i != 0:
num_class += 1
file_dict[num_class] = []
file_dict[num_class].append(line_txt)
if '.jpg' not in line_txt:
if len(line_txt) > 6:
split_str = line_txt.split(' ')
x1_min = float(split_str[0])
y1_min = float(split_str[1])
x2_max = float(split_str[2])
y2_max = float(split_str[3])
line_txt = str(x1_min) + ' ' + str(y1_min) + ' ' + str(
x2_max) + ' ' + str(y2_max)
file_dict[num_class].append(line_txt)
else:
file_dict[num_class].append(line_txt)
return list(file_dict.values())
def widerface_label(with_background=True):
labels_map = {
'face': 1
}
if not with_background:
labels_map = {k: v - 1 for k, v in labels_map.items()}
return labels_map
......@@ -90,6 +90,47 @@ class ArrangeRCNN(BaseOperator):
return outs
@register_op
class ArrangeEvalRCNN(BaseOperator):
"""
Transform dict to the tuple format needed for evaluation.
"""
def __init__(self):
super(ArrangeEvalRCNN, self).__init__()
def __call__(self, sample, context=None):
"""
Args:
sample: a dict which contains image
info and annotation info.
context: a dict which contains additional info.
Returns:
sample: a tuple containing the following items:
(image, im_info, im_id, im_shape, gt_bbox,
gt_class, difficult)
"""
im = sample['image']
keys = list(sample.keys())
if 'im_info' in keys:
im_info = sample['im_info']
else:
raise KeyError("The dataset doesn't have 'im_info' key.")
im_id = sample['im_id']
h = sample['h']
w = sample['w']
# For rcnn models in eval and infer stage, original image size
# is needed to clip the bounding boxes. And box clip op in
# bbox prediction needs im_info as input in format of [N, 3],
# so im_shape is appended by 1 to match dimension.
im_shape = np.array((h, w, 1), dtype=np.float32)
gt_bbox = sample['gt_bbox']
gt_class = sample['gt_class']
difficult = sample['difficult']
outs = (im, im_info, im_id, im_shape, gt_bbox, gt_class, difficult)
return outs
@register_op
class ArrangeTestRCNN(BaseOperator):
"""
......@@ -159,8 +200,9 @@ class ArrangeEvalSSD(BaseOperator):
Transform dict to tuple format needed for training.
"""
def __init__(self):
def __init__(self, fields):
super(ArrangeEvalSSD, self).__init__()
self.fields = fields
def __call__(self, sample, context=None):
"""
......@@ -171,17 +213,26 @@ class ArrangeEvalSSD(BaseOperator):
Returns:
sample: a tuple containing the following items: (image)
"""
im = sample['image']
outs = []
if len(sample['gt_bbox']) != len(sample['gt_class']):
raise ValueError("gt num mismatch: bbox and class.")
im_id = sample['im_id']
h = sample['h']
w = sample['w']
im_shape = np.array((h, w))
gt_bbox = sample['gt_bbox']
gt_class = sample['gt_class']
difficult = sample['difficult']
outs = (im, im_shape, im_id, gt_bbox, gt_class, difficult)
for field in self.fields:
if field == 'im_shape':
h = sample['h']
w = sample['w']
im_shape = np.array((h, w))
outs.append(im_shape)
elif field == 'is_difficult':
outs.append(sample['difficult'])
elif field == 'gt_box':
outs.append(sample['gt_bbox'])
elif field == 'gt_label':
outs.append(sample['gt_class'])
else:
outs.append(sample[field])
outs = tuple(outs)
return outs
......
......@@ -18,6 +18,9 @@ from __future__ import division
from __future__ import print_function
import numpy as np
import random
import math
import cv2
def meet_emit_constraint(src_bbox, sample_bbox):
......@@ -40,20 +43,35 @@ def clip_bbox(src_bbox):
def bbox_area(src_bbox):
width = src_bbox[2] - src_bbox[0]
height = src_bbox[3] - src_bbox[1]
return width * height
if src_bbox[2] < src_bbox[0] or src_bbox[3] < src_bbox[1]:
return 0.
else:
width = src_bbox[2] - src_bbox[0]
height = src_bbox[3] - src_bbox[1]
return width * height
def is_overlap(object_bbox, sample_bbox):
if object_bbox[0] >= sample_bbox[2] or \
object_bbox[2] <= sample_bbox[0] or \
object_bbox[1] >= sample_bbox[3] or \
object_bbox[3] <= sample_bbox[1]:
return False
else:
return True
def filter_and_process(sample_bbox, bboxes, labels, scores=None):
new_bboxes = []
new_labels = []
new_scores = []
for i in range(len(labels)):
for i in range(len(bboxes)):
new_bbox = [0, 0, 0, 0]
obj_bbox = [bboxes[i][0], bboxes[i][1], bboxes[i][2], bboxes[i][3]]
if not meet_emit_constraint(obj_bbox, sample_bbox):
continue
if not is_overlap(obj_bbox, sample_bbox):
continue
sample_width = sample_bbox[2] - sample_bbox[0]
sample_height = sample_bbox[3] - sample_bbox[1]
new_bbox[0] = (obj_bbox[0] - sample_bbox[0]) / sample_width
......@@ -72,6 +90,26 @@ def filter_and_process(sample_bbox, bboxes, labels, scores=None):
return bboxes, labels, scores
def bbox_area_sampling(bboxes, labels, scores, target_size, min_size):
new_bboxes = []
new_labels = []
new_scores = []
for i, bbox in enumerate(bboxes):
w = float((bbox[2] - bbox[0]) * target_size)
h = float((bbox[3] - bbox[1]) * target_size)
if w * h < float(min_size * min_size):
continue
else:
new_bboxes.append(bbox)
new_labels.append(labels[i])
if scores is not None and scores.size != 0:
new_scores.append(scores[i])
bboxes = np.array(new_bboxes)
labels = np.array(new_labels)
scores = np.array(new_scores)
return bboxes, labels, scores
def generate_sample_bbox(sampler):
scale = np.random.uniform(sampler[2], sampler[3])
aspect_ratio = np.random.uniform(sampler[4], sampler[5])
......@@ -89,6 +127,112 @@ def generate_sample_bbox(sampler):
return sampled_bbox
def generate_sample_bbox_square(sampler, image_width, image_height):
scale = np.random.uniform(sampler[2], sampler[3])
aspect_ratio = np.random.uniform(sampler[4], sampler[5])
aspect_ratio = max(aspect_ratio, (scale**2.0))
aspect_ratio = min(aspect_ratio, 1 / (scale**2.0))
bbox_width = scale * (aspect_ratio**0.5)
bbox_height = scale / (aspect_ratio**0.5)
if image_height < image_width:
bbox_width = bbox_height * image_height / image_width
else:
bbox_height = bbox_width * image_width / image_height
xmin_bound = 1 - bbox_width
ymin_bound = 1 - bbox_height
xmin = np.random.uniform(0, xmin_bound)
ymin = np.random.uniform(0, ymin_bound)
xmax = xmin + bbox_width
ymax = ymin + bbox_height
sampled_bbox = [xmin, ymin, xmax, ymax]
return sampled_bbox
def data_anchor_sampling(bbox_labels, image_width, image_height, scale_array,
resize_width):
num_gt = len(bbox_labels)
# np.random.randint range: [low, high)
rand_idx = np.random.randint(0, num_gt) if num_gt != 0 else 0
if num_gt != 0:
norm_xmin = bbox_labels[rand_idx][0]
norm_ymin = bbox_labels[rand_idx][1]
norm_xmax = bbox_labels[rand_idx][2]
norm_ymax = bbox_labels[rand_idx][3]
xmin = norm_xmin * image_width
ymin = norm_ymin * image_height
wid = image_width * (norm_xmax - norm_xmin)
hei = image_height * (norm_ymax - norm_ymin)
range_size = 0
area = wid * hei
for scale_ind in range(0, len(scale_array) - 1):
if area > scale_array[scale_ind] ** 2 and area < \
scale_array[scale_ind + 1] ** 2:
range_size = scale_ind + 1
break
if area > scale_array[len(scale_array) - 2]**2:
range_size = len(scale_array) - 2
scale_choose = 0.0
if range_size == 0:
rand_idx_size = 0
else:
# np.random.randint range: [low, high)
rng_rand_size = np.random.randint(0, range_size + 1)
rand_idx_size = rng_rand_size % (range_size + 1)
if rand_idx_size == range_size:
min_resize_val = scale_array[rand_idx_size] / 2.0
max_resize_val = min(2.0 * scale_array[rand_idx_size],
2 * math.sqrt(wid * hei))
scale_choose = random.uniform(min_resize_val, max_resize_val)
else:
min_resize_val = scale_array[rand_idx_size] / 2.0
max_resize_val = 2.0 * scale_array[rand_idx_size]
scale_choose = random.uniform(min_resize_val, max_resize_val)
sample_bbox_size = wid * resize_width / scale_choose
w_off_orig = 0.0
h_off_orig = 0.0
if sample_bbox_size < max(image_height, image_width):
if wid <= sample_bbox_size:
w_off_orig = np.random.uniform(xmin + wid - sample_bbox_size,
xmin)
else:
w_off_orig = np.random.uniform(xmin,
xmin + wid - sample_bbox_size)
if hei <= sample_bbox_size:
h_off_orig = np.random.uniform(ymin + hei - sample_bbox_size,
ymin)
else:
h_off_orig = np.random.uniform(ymin,
ymin + hei - sample_bbox_size)
else:
w_off_orig = np.random.uniform(image_width - sample_bbox_size, 0.0)
h_off_orig = np.random.uniform(image_height - sample_bbox_size, 0.0)
w_off_orig = math.floor(w_off_orig)
h_off_orig = math.floor(h_off_orig)
# Figure out top left coordinates.
w_off = float(w_off_orig / image_width)
h_off = float(h_off_orig / image_height)
sampled_bbox = [
w_off, h_off, w_off + float(sample_bbox_size / image_width),
h_off + float(sample_bbox_size / image_height)
]
return sampled_bbox
else:
return 0
def jaccard_overlap(sample_bbox, object_bbox):
if sample_bbox[0] >= object_bbox[2] or \
sample_bbox[2] <= object_bbox[0] or \
......@@ -108,6 +252,29 @@ def jaccard_overlap(sample_bbox, object_bbox):
return overlap
def intersect_bbox(bbox1, bbox2):
if bbox2[0] > bbox1[2] or bbox2[2] < bbox1[0] or \
bbox2[1] > bbox1[3] or bbox2[3] < bbox1[1]:
intersection_box = [0.0, 0.0, 0.0, 0.0]
else:
intersection_box = [
max(bbox1[0], bbox2[0]), max(bbox1[1], bbox2[1]),
min(bbox1[2], bbox2[2]), min(bbox1[3], bbox2[3])
]
return intersection_box
def bbox_coverage(bbox1, bbox2):
inter_box = intersect_bbox(bbox1, bbox2)
intersect_size = bbox_area(inter_box)
if intersect_size > 0:
bbox1_size = bbox_area(bbox1)
return intersect_size / bbox1_size
else:
return 0.
def satisfy_sample_constraint(sampler,
sample_bbox,
gt_bboxes,
......@@ -136,3 +303,87 @@ def satisfy_sample_constraint(sampler,
return np.all(satisfied)
else:
return False
def satisfy_sample_constraint_coverage(sampler, sample_bbox, gt_bboxes):
if sampler[6] == 0 and sampler[7] == 0:
has_jaccard_overlap = False
else:
has_jaccard_overlap = True
if sampler[8] == 0 and sampler[9] == 0:
has_object_coverage = False
else:
has_object_coverage = True
if not has_jaccard_overlap and not has_object_coverage:
return True
found = False
for i in range(len(gt_bboxes)):
object_bbox = [
gt_bboxes[i][0], gt_bboxes[i][1], gt_bboxes[i][2], gt_bboxes[i][3]
]
if has_jaccard_overlap:
overlap = jaccard_overlap(sample_bbox, object_bbox)
if sampler[6] != 0 and \
overlap < sampler[6]:
continue
if sampler[7] != 0 and \
overlap > sampler[7]:
continue
found = True
if has_object_coverage:
object_coverage = bbox_coverage(object_bbox, sample_bbox)
if sampler[8] != 0 and \
object_coverage < sampler[8]:
continue
if sampler[9] != 0 and \
object_coverage > sampler[9]:
continue
found = True
if found:
return True
return found
def crop_image_sampling(img, sample_bbox, image_width, image_height,
target_size):
# no clipping here
xmin = int(sample_bbox[0] * image_width)
xmax = int(sample_bbox[2] * image_width)
ymin = int(sample_bbox[1] * image_height)
ymax = int(sample_bbox[3] * image_height)
w_off = xmin
h_off = ymin
width = xmax - xmin
height = ymax - ymin
cross_xmin = max(0.0, float(w_off))
cross_ymin = max(0.0, float(h_off))
cross_xmax = min(float(w_off + width - 1.0), float(image_width))
cross_ymax = min(float(h_off + height - 1.0), float(image_height))
cross_width = cross_xmax - cross_xmin
cross_height = cross_ymax - cross_ymin
roi_xmin = 0 if w_off >= 0 else abs(w_off)
roi_ymin = 0 if h_off >= 0 else abs(h_off)
roi_width = cross_width
roi_height = cross_height
roi_y1 = int(roi_ymin)
roi_y2 = int(roi_ymin + roi_height)
roi_x1 = int(roi_xmin)
roi_x2 = int(roi_xmin + roi_width)
cross_y1 = int(cross_ymin)
cross_y2 = int(cross_ymin + cross_height)
cross_x1 = int(cross_xmin)
cross_x2 = int(cross_xmin + cross_width)
sample_img = np.zeros((height, width, 3))
sample_img[roi_y1: roi_y2, roi_x1: roi_x2] = \
img[cross_y1: cross_y2, cross_x1: cross_x2]
sample_img = cv2.resize(
sample_img, (target_size, target_size), interpolation=cv2.INTER_AREA)
return sample_img
......@@ -31,7 +31,9 @@ from PIL import Image, ImageEnhance
from ppdet.core.workspace import serializable
from .op_helper import (satisfy_sample_constraint, filter_and_process,
generate_sample_bbox, clip_bbox)
generate_sample_bbox, clip_bbox, data_anchor_sampling,
satisfy_sample_constraint_coverage, crop_image_sampling,
generate_sample_bbox_square, bbox_area_sampling)
logger = logging.getLogger(__name__)
......@@ -473,7 +475,7 @@ class ExpandImage(BaseOperator):
"""
Expand the image and modify bounding box.
Operators:
1. Scale the image weight and height.
1. Scale the image width and height.
2. Construct new images with new height and width.
3. Fill the new image with the mean.
4. Put original imge into new image.
......@@ -526,8 +528,6 @@ class CropImage(BaseOperator):
batch_sampler (list): Multiple sets of different
parameters for cropping.
satisfy_all (bool): whether all boxes must satisfy.
avoid_no_bbox (bool): whether to to avoid the
situation where the box does not appear.
e.g.[[1, 1, 1.0, 1.0, 1.0, 1.0, 0.0, 1.0],
[1, 50, 0.3, 1.0, 0.5, 2.0, 0.1, 1.0],
[1, 50, 0.3, 1.0, 0.5, 2.0, 0.3, 1.0],
......@@ -538,6 +538,8 @@ class CropImage(BaseOperator):
[max sample, max trial, min scale, max scale,
min aspect ratio, max aspect ratio,
min overlap, max overlap]
avoid_no_bbox (bool): whether to to avoid the
situation where the box does not appear.
"""
super(CropImage, self).__init__()
self.batch_sampler = batch_sampler
......@@ -548,7 +550,7 @@ class CropImage(BaseOperator):
"""
Crop the image and modify bounding box.
Operators:
1. Scale the image weight and height.
1. Scale the image width and height.
2. Crop the image according to a radom sample.
3. Rescale the bounding box.
4. Determine if the new bbox is satisfied in the new image.
......@@ -599,6 +601,151 @@ class CropImage(BaseOperator):
return sample
@register_op
class CropImageWithDataAchorSampling(BaseOperator):
def __init__(self,
batch_sampler,
anchor_sampler=None,
target_size=None,
das_anchor_scales=[16, 32, 64, 128],
sampling_prob=0.5,
min_size=8.,
avoid_no_bbox=True):
"""
Args:
anchor_sampler (list): anchor_sampling sets of different
parameters for cropping.
batch_sampler (list): Multiple sets of different
parameters for cropping.
e.g.[[1, 10, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 0.2, 0.0]]
[[1, 50, 1.0, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0],
[1, 50, 0.3, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0],
[1, 50, 0.3, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0],
[1, 50, 0.3, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0],
[1, 50, 0.3, 1.0, 1.0, 1.0, 0.0, 0.0, 1.0, 0.0]]
[max sample, max trial, min scale, max scale,
min aspect ratio, max aspect ratio,
min overlap, max overlap, min coverage, max coverage]
target_size (bool): target image size.
das_anchor_scales (list[float]): a list of anchor scales in data
anchor smapling.
min_size (float): minimum size of sampled bbox.
avoid_no_bbox (bool): whether to to avoid the
situation where the box does not appear.
"""
super(CropImageWithDataAchorSampling, self).__init__()
self.anchor_sampler = anchor_sampler
self.batch_sampler = batch_sampler
self.target_size = target_size
self.sampling_prob = sampling_prob
self.min_size = min_size
self.avoid_no_bbox = avoid_no_bbox
self.das_anchor_scales = np.array(das_anchor_scales)
def __call__(self, sample, context):
"""
Crop the image and modify bounding box.
Operators:
1. Scale the image width and height.
2. Crop the image according to a radom sample.
3. Rescale the bounding box.
4. Determine if the new bbox is satisfied in the new image.
Returns:
sample: the image, bounding box are replaced.
"""
assert 'image' in sample, "image data not found"
im = sample['image']
gt_bbox = sample['gt_bbox']
gt_class = sample['gt_class']
image_width = sample['w']
image_height = sample['h']
gt_score = None
if 'gt_score' in sample:
gt_score = sample['gt_score']
sampled_bbox = []
gt_bbox = gt_bbox.tolist()
prob = np.random.uniform(0., 1.)
if prob > self.sampling_prob: # anchor sampling
assert self.anchor_sampler
for sampler in self.anchor_sampler:
found = 0
for i in range(sampler[1]):
if found >= sampler[0]:
break
sample_bbox = data_anchor_sampling(
gt_bbox, image_width, image_height,
self.das_anchor_scales, self.target_size)
if sample_bbox == 0:
break
if satisfy_sample_constraint_coverage(sampler, sample_bbox,
gt_bbox):
sampled_bbox.append(sample_bbox)
found = found + 1
im = np.array(im)
while sampled_bbox:
idx = int(np.random.uniform(0, len(sampled_bbox)))
sample_bbox = sampled_bbox.pop(idx)
crop_bbox, crop_class, crop_score = filter_and_process(
sample_bbox, gt_bbox, gt_class, gt_score)
crop_bbox, crop_class, crop_score = bbox_area_sampling(
crop_bbox, crop_class, crop_score, self.target_size,
self.min_size)
if self.avoid_no_bbox:
if len(crop_bbox) < 1:
continue
im = crop_image_sampling(im, sample_bbox, image_width,
image_height, self.target_size)
sample['image'] = im
sample['gt_bbox'] = crop_bbox
sample['gt_class'] = crop_class
sample['gt_score'] = crop_score
return sample
return sample
else:
for sampler in self.batch_sampler:
found = 0
for i in range(sampler[1]):
if found >= sampler[0]:
break
sample_bbox = generate_sample_bbox_square(
sampler, image_width, image_height)
if satisfy_sample_constraint_coverage(sampler, sample_bbox,
gt_bbox):
sampled_bbox.append(sample_bbox)
found = found + 1
im = np.array(im)
while sampled_bbox:
idx = int(np.random.uniform(0, len(sampled_bbox)))
sample_bbox = sampled_bbox.pop(idx)
sample_bbox = clip_bbox(sample_bbox)
crop_bbox, crop_class, crop_score = filter_and_process(
sample_bbox, gt_bbox, gt_class, gt_score)
# sampling bbox according the bbox area
crop_bbox, crop_class, crop_score = bbox_area_sampling(
crop_bbox, crop_class, crop_score, self.target_size,
self.min_size)
if self.avoid_no_bbox:
if len(crop_bbox) < 1:
continue
xmin = int(sample_bbox[0] * image_width)
xmax = int(sample_bbox[2] * image_width)
ymin = int(sample_bbox[1] * image_height)
ymax = int(sample_bbox[3] * image_height)
im = im[ymin:ymax, xmin:xmax]
sample['image'] = im
sample['gt_bbox'] = crop_bbox
sample['gt_class'] = crop_class
sample['gt_score'] = crop_score
return sample
return sample
@register_op
class NormalizeBox(BaseOperator):
"""Transform the bounding box's coornidates to [0,1]."""
......
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import
from .mixed_precision import *
from . import mixed_precision
__all__ = mixed_precision.__all__
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import
from __future__ import print_function
import six
from paddle.fluid.framework import Parameter
from paddle.fluid import layers
from paddle.fluid import core
from paddle.fluid import unique_name
import paddle.fluid.layer_helper_base as lhb
import paddle.fluid.optimizer as optim
__all__ = ['mixed_precision_global_state', 'mixed_precision_context',
'StaticLossScale', 'DynamicLossScale']
_mixed_precision_global_state = None
def mixed_precision_global_state():
return _mixed_precision_global_state
class LossScale(object):
def __init__(self):
super(LossScale, self).__init__()
def get_loss_scale_var(self):
return self.scale
def increment(self):
raise NotImplementedError()
def decrement(self):
raise NotImplementedError()
class StaticLossScale(LossScale):
"""
Static (fixed) loss scale manager.
Args:
init_loss_scale (float): initial loss scale value.
Examples:
.. code-block:: python
from paddle import fluid
from ppdet.experimental import (mixed_precision_context,
StaticLossScale)
with mixed_precision_context(StaticLossScale(8.), True) as ctx:
# ...
# scale loss
loss_scale = ctx.get_loss_scale_var()
"""
def __init__(self, init_loss_scale=1.):
super(StaticLossScale, self).__init__()
self.scale = layers.create_global_var(
name=unique_name.generate("loss_scale"),
shape=[1],
value=init_loss_scale,
dtype='float32',
persistable=True)
class DynamicLossScale(LossScale):
"""
Dynamic loss scale manager. it works as follows:
if gradients is valid for `increment_every` steps, loss scale values is
increased by `factor`, otherwise loss scale values is decreased by `factor`
Args:
init_loss_scale (float): initial loss scale value.
increment_every (int): minimum 'good' steps before loss scale increase.
factor (float): increase/decrease loss scale by this much.
Examples:
.. code-block:: python
from paddle import fluid
from ppdet.experimental import (mixed_precision_context,
DynamicLossScale)
loss_scale = DynamicLossScale(8., 1000, 4.)
with mixed_precision_context(loss_scale, True) as ctx:
# ...
# scale loss
loss_scale = ctx.get_loss_scale_var()
"""
def __init__(self, init_loss_scale=2**15, increment_every=2000, factor=2.):
super(DynamicLossScale, self).__init__()
self.scale = layers.create_global_var(
name=unique_name.generate("loss_scale"),
shape=[1],
value=init_loss_scale,
dtype='float32',
persistable=True)
self.good_steps = layers.create_global_var(
name=unique_name.generate("good_steps"),
shape=[1],
value=0,
dtype='int32',
persistable=True)
self.increment_every = layers.fill_constant(
shape=[1], dtype='int32', value=increment_every)
self.factor = factor
def increment(self):
enough_steps = layers.less_than(self.increment_every,
self.good_steps + 1)
with layers.Switch() as switch:
with switch.case(enough_steps):
new_scale = self.scale * self.factor
scale_valid = layers.isfinite(new_scale)
with layers.Switch() as switch2:
with switch2.case(scale_valid):
layers.assign(new_scale, self.scale)
layers.assign(layers.zeros_like(self.good_steps),
self.good_steps)
with switch2.default():
layers.increment(self.good_steps)
with switch.default():
layers.increment(self.good_steps)
def decrement(self):
new_scale = self.scale / self.factor
one = layers.fill_constant(shape=[1], dtype='float32', value=1.0)
less_than_one = layers.less_than(new_scale, one)
with layers.Switch() as switch:
with switch.case(less_than_one):
layers.assign(one, self.scale)
with switch.default():
layers.assign(new_scale, self.scale)
layers.assign(layers.zeros_like(self.good_steps),
self.good_steps)
class mixed_precision_context(object):
"""
Context manager for mixed precision training.
Args:
loss_scale (float, str or obj): loss scale settings, can be:
1. an number: use fixed loss scale.
2. 'dynamic': use a default `DynamicLossScale`.
3. `DynamicLossScale` or `StaticLossScale` instance.
enabled (bool): enable mixed precision training.
Examples:
.. code-block:: python
from paddle import fluid
from ppdet.experimental import mixed_precision_context
with mixed_precision_context('dynamic', True) as ctx:
# cast inputs to float16
inputs = fluid.layers.cast(inputs, "float16")
# build model here
logits = model(inputs)
# use float32 for softmax
logits = fluid.layers.cast(logits, "float32")
softmax = fluid.layers.softmax(logits)
loss = fluid.layers.cross_entropy(input=softmax, label=label)
avg_loss = fluid.layers.mean(loss)
# scale loss
loss_scale = ctx.get_loss_scale_var()
avg_loss *= loss_scale
optimizer = fluid.optimizer.Momentum(...)
optimizer.minimize(avg_loss)
"""
def __init__(self, loss_scale=1., enabled=True):
super(mixed_precision_context, self).__init__()
self.enabled = enabled
if not enabled:
return
monkey_patch()
if isinstance(loss_scale, six.integer_types + (float,)):
self.loss_scale = StaticLossScale(loss_scale)
elif loss_scale == 'dynamic':
self.loss_scale = DynamicLossScale()
else:
assert isinstance(loss_scale, LossScale), \
"Invalid loss scale argument"
self.loss_scale = loss_scale
@property
def dynamic_scaling(self):
return isinstance(self.loss_scale, DynamicLossScale)
def __getattr__(self, attr):
if attr in ['get_loss_scale_var', 'increment', 'decrement']:
return getattr(self.loss_scale, attr)
def __enter__(self):
if not self.enabled:
return
global _mixed_precision_global_state
_mixed_precision_global_state = self
return mixed_precision_global_state()
def __exit__(self, *args):
if not self.enabled:
return
global _mixed_precision_global_state
_mixed_precision_global_state = None
return mixed_precision_global_state()
def create_parameter(self,
attr,
shape,
dtype,
is_bias=False,
default_initializer=None):
mp_state = mixed_precision_global_state()
is_half = (isinstance(dtype, str) and dtype == 'float16') \
or (isinstance(dtype, core.VarDesc.VarType)
and dtype == core.VarDesc.VarType.FP16)
if is_half and mp_state is not None:
dtype = 'float32'
param = self._create_parameter(attr, shape, dtype,
is_bias, default_initializer)
if not is_half or mp_state is None:
return param
param16 = self.main_program.current_block().create_var(
name=param.name + '.fp16',
dtype='float16',
type=param.type,
persistable=False)
self.append_op(
type='cast',
inputs={'X': [param]},
outputs={'Out': [param16]},
attrs={'in_dtype': param.dtype,
'out_dtype': param16.dtype})
return param16
def scale_gradient(block, context):
state = mixed_precision_global_state()
if state is None:
return
scale = state.get_loss_scale_var()
op_desc = block.desc.op(block.desc.op_size() - 1)
op_role_attr_name = core.op_proto_and_checker_maker.kOpRoleAttrName()
bwd_role = core.op_proto_and_checker_maker.OpRole.Backward
for name in [n for n in op_desc.output_arg_names() if n in context]:
fwd_var = block._var_recursive(context[name])
if not isinstance(fwd_var, Parameter):
continue # TODO verify all use cases
clip_op_desc = block.desc.append_op()
clip_op_desc.set_type("elementwise_div")
clip_op_desc.set_input("X", [name])
clip_op_desc.set_input("Y", [scale.name])
clip_op_desc.set_output("Out", [name])
clip_op_desc._set_attr(op_role_attr_name, bwd_role)
def update_loss_scale(grads):
state = mixed_precision_global_state()
if state is None or not state.dynamic_scaling:
return
per_grad_check = layers.stack([layers.reduce_sum(g) for g in grads])
grad_valid = layers.isfinite(per_grad_check)
with layers.Switch() as switch:
with switch.case(grad_valid):
state.increment()
with switch.default():
state.decrement()
return grad_valid
def backward(self, loss, **kwargs):
state = mixed_precision_global_state()
callbacks = 'callbacks' in kwargs and kwargs['callbacks'] or None
if callbacks is None:
from paddle.fluid.clip import error_clip_callback
callbacks = [error_clip_callback] # XXX what if gradient is zero?
if state is not None:
kwargs['callbacks'] = [scale_gradient] + callbacks
else:
kwargs['callbacks'] = callbacks
param_grads = self._backward(loss, **kwargs)
if state is not None:
grad_valid = update_loss_scale(v for k, v in param_grads)
if state.dynamic_scaling:
with layers.Switch() as switch:
with switch.case(grad_valid):
pass
with switch.default():
for _, g in param_grads:
layers.assign(layers.zeros_like(g), g)
return param_grads
mixed_precision_patched = False
# XXX this is a temporary measure, until thoroughly evaluated
def monkey_patch():
global mixed_precision_patched
if mixed_precision_patched:
return
create_parameter_orig = lhb.LayerHelperBase.create_parameter
lhb.LayerHelperBase.create_parameter = create_parameter
lhb.LayerHelperBase._create_parameter = create_parameter_orig
backward_orig = optim.Optimizer.backward
optim.Optimizer.backward = backward
optim.Optimizer._backward = backward_orig
mixed_precision_patched = True
......@@ -21,6 +21,8 @@ from . import cascade_mask_rcnn
from . import yolov3
from . import ssd
from . import retinanet
from . import blazeface
from . import faceboxes
from .faster_rcnn import *
from .mask_rcnn import *
......@@ -29,3 +31,5 @@ from .cascade_mask_rcnn import *
from .yolov3 import *
from .ssd import *
from .retinanet import *
from .blazeface import *
from .faceboxes import *
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import numpy as np
from paddle import fluid
from paddle.fluid.param_attr import ParamAttr
from paddle.fluid.regularizer import L2Decay
from ppdet.core.workspace import register
from ppdet.modeling.ops import SSDOutputDecoder
__all__ = ['BlazeFace']
@register
class BlazeFace(object):
"""
BlazeFace: Sub-millisecond Neural Face Detection on Mobile GPUs,
see https://arxiv.org/abs/1907.05047
Args:
backbone (object): backbone instance
output_decoder (object): `SSDOutputDecoder` instance
min_sizes (list|None): min sizes of generated prior boxes.
max_sizes (list|None): max sizes of generated prior boxes. Default: None.
num_classes (int): number of output classes
use_density_prior_box (bool): whether or not use density_prior_box
instead of prior_box
densities (list|None): the densities of generated density prior boxes,
this attribute should be a list or tuple of integers
"""
__category__ = 'architecture'
__inject__ = ['backbone', 'output_decoder']
__shared__ = ['num_classes']
def __init__(self,
backbone="BlazeNet",
output_decoder=SSDOutputDecoder().__dict__,
min_sizes=[[16., 24.], [32., 48., 64., 80., 96., 128.]],
max_sizes=None,
steps=[8., 16.],
num_classes=2,
use_density_prior_box=False,
densities=[[2, 2], [2, 1, 1, 1, 1, 1]]):
super(BlazeFace, self).__init__()
self.backbone = backbone
self.num_classes = num_classes
self.output_decoder = output_decoder
if isinstance(output_decoder, dict):
self.output_decoder = SSDOutputDecoder(**output_decoder)
self.min_sizes = min_sizes
self.max_sizes = max_sizes
self.steps = steps
self.use_density_prior_box = use_density_prior_box
self.densities = densities
def build(self, feed_vars, mode='train'):
im = feed_vars['image']
if mode == 'train':
gt_box = feed_vars['gt_box']
gt_label = feed_vars['gt_label']
body_feats = self.backbone(im)
locs, confs, box, box_var = self._multi_box_head(
inputs=body_feats,
image=im,
num_classes=self.num_classes,
use_density_prior_box=self.use_density_prior_box)
if mode == 'train':
loss = fluid.layers.ssd_loss(
locs,
confs,
gt_box,
gt_label,
box,
box_var,
overlap_threshold=0.35,
neg_overlap=0.35)
loss = fluid.layers.reduce_sum(loss)
loss.persistable = True
return {'loss': loss}
else:
pred = self.output_decoder(locs, confs, box, box_var)
return {'bbox': pred}
def _multi_box_head(self,
inputs,
image,
num_classes=2,
use_density_prior_box=False):
def permute_and_reshape(input, last_dim):
trans = fluid.layers.transpose(input, perm=[0, 2, 3, 1])
compile_shape = [0, -1, last_dim]
return fluid.layers.reshape(trans, shape=compile_shape)
def _is_list_or_tuple_(data):
return (isinstance(data, list) or isinstance(data, tuple))
locs, confs = [], []
boxes, vars = [], []
b_attr = ParamAttr(learning_rate=2., regularizer=L2Decay(0.))
for i, input in enumerate(inputs):
min_size = self.min_sizes[i]
if use_density_prior_box:
densities = self.densities[i]
box, var = fluid.layers.density_prior_box(
input,
image,
densities=densities,
fixed_sizes=min_size,
fixed_ratios=[1.],
clip=False,
offset=0.5)
else:
box, var = fluid.layers.prior_box(
input,
image,
min_sizes=min_size,
max_sizes=None,
steps=[self.steps[i]] * 2,
aspect_ratios=[1.],
clip=False,
flip=False,
offset=0.5)
num_boxes = box.shape[2]
box = fluid.layers.reshape(box, shape=[-1, 4])
var = fluid.layers.reshape(var, shape=[-1, 4])
num_loc_output = num_boxes * 4
num_conf_output = num_boxes * num_classes
# get loc
mbox_loc = fluid.layers.conv2d(
input, num_loc_output, 3, 1, 1, bias_attr=b_attr)
loc = permute_and_reshape(mbox_loc, 4)
# get conf
mbox_conf = fluid.layers.conv2d(
input, num_conf_output, 3, 1, 1, bias_attr=b_attr)
conf = permute_and_reshape(mbox_conf, 2)
locs.append(loc)
confs.append(conf)
boxes.append(box)
vars.append(var)
face_mbox_loc = fluid.layers.concat(locs, axis=1)
face_mbox_conf = fluid.layers.concat(confs, axis=1)
prior_boxes = fluid.layers.concat(boxes)
box_vars = fluid.layers.concat(vars)
return face_mbox_loc, face_mbox_conf, prior_boxes, box_vars
def train(self, feed_vars):
return self.build(feed_vars, 'train')
def eval(self, feed_vars):
return self.build(feed_vars, 'eval')
def test(self, feed_vars):
return self.build(feed_vars, 'test')
def is_bbox_normalized(self):
return True
......@@ -16,8 +16,11 @@ from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from collections import OrderedDict
import paddle.fluid as fluid
from ppdet.experimental import mixed_precision_global_state
from ppdet.core.workspace import register
__all__ = ['CascadeMaskRCNN']
......@@ -98,9 +101,19 @@ class CascadeMaskRCNN(object):
im_info = feed_vars['im_info']
mixed_precision_enabled = mixed_precision_global_state() is not None
# cast inputs to FP16
if mixed_precision_enabled:
im = fluid.layers.cast(im, 'float16')
# backbone
body_feats = self.backbone(im)
# cast features back to FP32
if mixed_precision_enabled:
body_feats = OrderedDict((k, fluid.layers.cast(v, 'float32'))
for k, v in body_feats.items())
# FPN
if self.fpn is not None:
body_feats, spatial_scale = self.fpn.get_output(body_feats)
......
......@@ -16,8 +16,11 @@ from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from collections import OrderedDict
import paddle.fluid as fluid
from ppdet.experimental import mixed_precision_global_state
from ppdet.core.workspace import register
__all__ = ['CascadeRCNN']
......@@ -87,9 +90,18 @@ class CascadeRCNN(object):
gt_box = feed_vars['gt_box']
is_crowd = feed_vars['is_crowd']
mixed_precision_enabled = mixed_precision_global_state() is not None
# cast inputs to FP16
if mixed_precision_enabled:
im = fluid.layers.cast(im, 'float16')
# backbone
body_feats = self.backbone(im)
# body_feat_names = list(body_feats.keys())
# cast features back to FP32
if mixed_precision_enabled:
body_feats = OrderedDict((k, fluid.layers.cast(v, 'float32'))
for k, v in body_feats.items())
# FPN
if self.fpn is not None:
......
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import numpy as np
from paddle import fluid
from paddle.fluid.param_attr import ParamAttr
from paddle.fluid.regularizer import L2Decay
from ppdet.core.workspace import register
from ppdet.modeling.ops import SSDOutputDecoder
__all__ = ['FaceBoxes']
@register
class FaceBoxes(object):
"""
FaceBoxes: Sub-millisecond Neural Face Detection on Mobile GPUs,
see https://https://arxiv.org/abs/1708.05234
Args:
backbone (object): backbone instance
output_decoder (object): `SSDOutputDecoder` instance
densities (list|None): the densities of generated density prior boxes,
this attribute should be a list or tuple of integers.
fixed_sizes (list|None): the fixed sizes of generated density prior boxes,
this attribute should a list or tuple of same length with `densities`.
num_classes (int): number of output classes
"""
__category__ = 'architecture'
__inject__ = ['backbone', 'output_decoder']
__shared__ = ['num_classes']
def __init__(self,
backbone="FaceBoxNet",
output_decoder=SSDOutputDecoder().__dict__,
densities=[[4, 2, 1], [1], [1]],
fixed_sizes=[[32., 64., 128.], [256.], [512.]],
num_classes=2):
super(FaceBoxes, self).__init__()
self.backbone = backbone
self.num_classes = num_classes
self.output_decoder = output_decoder
if isinstance(output_decoder, dict):
self.output_decoder = SSDOutputDecoder(**output_decoder)
self.densities = densities
self.fixed_sizes = fixed_sizes
def build(self, feed_vars, mode='train'):
im = feed_vars['image']
if mode == 'train':
gt_box = feed_vars['gt_box']
gt_label = feed_vars['gt_label']
body_feats = self.backbone(im)
locs, confs, box, box_var = self._multi_box_head(
inputs=body_feats, image=im, num_classes=self.num_classes)
if mode == 'train':
loss = fluid.layers.ssd_loss(
locs,
confs,
gt_box,
gt_label,
box,
box_var,
overlap_threshold=0.35,
neg_overlap=0.35)
loss = fluid.layers.reduce_sum(loss)
loss.persistable = True
return {'loss': loss}
else:
pred = self.output_decoder(locs, confs, box, box_var)
return {'bbox': pred}
def _multi_box_head(self, inputs, image, num_classes=2):
def permute_and_reshape(input, last_dim):
trans = fluid.layers.transpose(input, perm=[0, 2, 3, 1])
compile_shape = [0, -1, last_dim]
return fluid.layers.reshape(trans, shape=compile_shape)
def _is_list_or_tuple_(data):
return (isinstance(data, list) or isinstance(data, tuple))
locs, confs = [], []
boxes, vars = [], []
b_attr = ParamAttr(learning_rate=2., regularizer=L2Decay(0.))
for i, input in enumerate(inputs):
densities = self.densities[i]
fixed_sizes = self.fixed_sizes[i]
box, var = fluid.layers.density_prior_box(
input,
image,
densities=densities,
fixed_sizes=fixed_sizes,
fixed_ratios=[1.],
clip=False,
offset=0.5)
num_boxes = box.shape[2]
box = fluid.layers.reshape(box, shape=[-1, 4])
var = fluid.layers.reshape(var, shape=[-1, 4])
num_loc_output = num_boxes * 4
num_conf_output = num_boxes * num_classes
# get loc
mbox_loc = fluid.layers.conv2d(
input, num_loc_output, 3, 1, 1, bias_attr=b_attr)
loc = permute_and_reshape(mbox_loc, 4)
# get conf
mbox_conf = fluid.layers.conv2d(
input, num_conf_output, 3, 1, 1, bias_attr=b_attr)
conf = permute_and_reshape(mbox_conf, 2)
locs.append(loc)
confs.append(conf)
boxes.append(box)
vars.append(var)
face_mbox_loc = fluid.layers.concat(locs, axis=1)
face_mbox_conf = fluid.layers.concat(confs, axis=1)
prior_boxes = fluid.layers.concat(boxes)
box_vars = fluid.layers.concat(vars)
return face_mbox_loc, face_mbox_conf, prior_boxes, box_vars
def train(self, feed_vars):
return self.build(feed_vars, 'train')
def eval(self, feed_vars):
return self.build(feed_vars, 'eval')
def test(self, feed_vars):
return self.build(feed_vars, 'test')
def is_bbox_normalized(self):
return True
......@@ -16,8 +16,11 @@ from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from collections import OrderedDict
from paddle import fluid
from ppdet.experimental import mixed_precision_global_state
from ppdet.core.workspace import register
__all__ = ['FasterRCNN']
......@@ -67,9 +70,21 @@ class FasterRCNN(object):
is_crowd = feed_vars['is_crowd']
else:
im_shape = feed_vars['im_shape']
mixed_precision_enabled = mixed_precision_global_state() is not None
# cast inputs to FP16
if mixed_precision_enabled:
im = fluid.layers.cast(im, 'float16')
body_feats = self.backbone(im)
body_feat_names = list(body_feats.keys())
# cast features back to FP32
if mixed_precision_enabled:
body_feats = OrderedDict((k, fluid.layers.cast(v, 'float32'))
for k, v in body_feats.items())
if self.fpn is not None:
body_feats, spatial_scale = self.fpn.get_output(body_feats)
......@@ -94,7 +109,8 @@ class FasterRCNN(object):
bbox_outside_weights = outs[4]
else:
if self.rpn_only:
im_scale = fluid.layers.slice(im_info, [1], starts=[2], ends=[3])
im_scale = fluid.layers.slice(
im_info, [1], starts=[2], ends=[3])
im_scale = fluid.layers.sequence_expand(im_scale, rois)
rois = rois / im_scale
return {'proposal': rois}
......
......@@ -16,7 +16,11 @@ from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from paddle import fluid
from collections import OrderedDict
import paddle.fluid as fluid
from ppdet.experimental import mixed_precision_global_state
from ppdet.core.workspace import register
__all__ = ['MaskRCNN']
......@@ -79,8 +83,19 @@ class MaskRCNN(object):
"{} has no {} field".format(feed_vars, var)
im_info = feed_vars['im_info']
mixed_precision_enabled = mixed_precision_global_state() is not None
# cast inputs to FP16
if mixed_precision_enabled:
im = fluid.layers.cast(im, 'float16')
# backbone
body_feats = self.backbone(im)
# cast features back to FP32
if mixed_precision_enabled:
body_feats = OrderedDict((k, fluid.layers.cast(v, 'float32'))
for k, v in body_feats.items())
# FPN
if self.fpn is not None:
body_feats, spatial_scale = self.fpn.get_output(body_feats)
......@@ -133,7 +148,8 @@ class MaskRCNN(object):
else:
if self.rpn_only:
im_scale = fluid.layers.slice(im_info, [1], starts=[2], ends=[3])
im_scale = fluid.layers.slice(
im_info, [1], starts=[2], ends=[3])
im_scale = fluid.layers.sequence_expand(im_scale, rois)
rois = rois / im_scale
return {'proposal': rois}
......
......@@ -16,8 +16,11 @@ from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from collections import OrderedDict
import paddle.fluid as fluid
from ppdet.experimental import mixed_precision_global_state
from ppdet.core.workspace import register
__all__ = ['RetinaNet']
......@@ -50,9 +53,20 @@ class RetinaNet(object):
gt_box = feed_vars['gt_box']
gt_label = feed_vars['gt_label']
is_crowd = feed_vars['is_crowd']
mixed_precision_enabled = mixed_precision_global_state() is not None
# cast inputs to FP16
if mixed_precision_enabled:
im = fluid.layers.cast(im, 'float16')
# backbone
body_feats = self.backbone(im)
# cast features back to FP32
if mixed_precision_enabled:
body_feats = OrderedDict((k, fluid.layers.cast(v, 'float32'))
for k, v in body_feats.items())
# FPN
body_feats, spatial_scale = self.fpn.get_output(body_feats)
......
......@@ -16,8 +16,11 @@ from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from paddle import fluid
from collections import OrderedDict
import paddle.fluid as fluid
from ppdet.experimental import mixed_precision_global_state
from ppdet.core.workspace import register
from ppdet.modeling.ops import SSDOutputDecoder
......@@ -59,7 +62,22 @@ class SSD(object):
gt_box = feed_vars['gt_box']
gt_label = feed_vars['gt_label']
mixed_precision_enabled = mixed_precision_global_state() is not None
# cast inputs to FP16
if mixed_precision_enabled:
im = fluid.layers.cast(im, 'float16')
# backbone
body_feats = self.backbone(im)
if isinstance(body_feats, OrderedDict):
body_feat_names = list(body_feats.keys())
body_feats = [body_feats[name] for name in body_feat_names]
# cast features back to FP32
if mixed_precision_enabled:
body_feats = [fluid.layers.cast(v, 'float32') for v in body_feats]
locs, confs, box, box_var = self.multi_box_head(
inputs=body_feats, image=im, num_classes=self.num_classes)
......
......@@ -18,6 +18,9 @@ from __future__ import print_function
from collections import OrderedDict
from paddle import fluid
from ppdet.experimental import mixed_precision_global_state
from ppdet.core.workspace import register
__all__ = ['YOLOv3']
......@@ -43,12 +46,23 @@ class YOLOv3(object):
def build(self, feed_vars, mode='train'):
im = feed_vars['image']
mixed_precision_enabled = mixed_precision_global_state() is not None
# cast inputs to FP16
if mixed_precision_enabled:
im = fluid.layers.cast(im, 'float16')
body_feats = self.backbone(im)
if isinstance(body_feats, OrderedDict):
body_feat_names = list(body_feats.keys())
body_feats = [body_feats[name] for name in body_feat_names]
# cast features back to FP32
if mixed_precision_enabled:
body_feats = [fluid.layers.cast(v, 'float32') for v in body_feats]
if mode == 'train':
gt_box = feed_vars['gt_box']
gt_label = feed_vars['gt_label']
......
......@@ -21,6 +21,8 @@ from . import mobilenet
from . import senet
from . import fpn
from . import vgg
from . import blazenet
from . import faceboxnet
from .resnet import *
from .resnext import *
......@@ -29,3 +31,5 @@ from .mobilenet import *
from .senet import *
from .fpn import *
from .vgg import *
from .blazenet import *
from .faceboxnet import *
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from paddle import fluid
from paddle.fluid.param_attr import ParamAttr
from ppdet.core.workspace import register
__all__ = ['BlazeNet']
@register
class BlazeNet(object):
"""
BlazeFace, see https://arxiv.org/abs/1907.05047
Args:
blaze_filters (list): number of filter for each blaze block
double_blaze_filters (list): number of filter for each double_blaze block
with_extra_blocks (bool): whether or not extra blocks should be added
lite_edition (bool): whether or not is blazeface-lite
"""
def __init__(
self,
blaze_filters=[[24, 24], [24, 24], [24, 48, 2], [48, 48], [48, 48]],
double_blaze_filters=[[48, 24, 96, 2], [96, 24, 96], [96, 24, 96],
[96, 24, 96, 2], [96, 24, 96], [96, 24, 96]],
with_extra_blocks=True,
lite_edition=False):
super(BlazeNet, self).__init__()
self.blaze_filters = blaze_filters
self.double_blaze_filters = double_blaze_filters
self.with_extra_blocks = with_extra_blocks
self.lite_edition = lite_edition
def __call__(self, input):
if not self.lite_edition:
conv1_num_filters = self.blaze_filters[0][0]
conv = self._conv_norm(
input=input,
num_filters=conv1_num_filters,
filter_size=3,
stride=2,
padding=1,
act='relu',
name="conv1")
for k, v in enumerate(self.blaze_filters):
assert len(v) in [2, 3], \
"blaze_filters {} not in [2, 3]"
if len(v) == 2:
conv = self.BlazeBlock(
conv, v[0], v[1], name='blaze_{}'.format(k))
elif len(v) == 3:
conv = self.BlazeBlock(
conv,
v[0],
v[1],
stride=v[2],
name='blaze_{}'.format(k))
layers = []
for k, v in enumerate(self.double_blaze_filters):
assert len(v) in [3, 4], \
"blaze_filters {} not in [3, 4]"
if len(v) == 3:
conv = self.BlazeBlock(
conv,
v[0],
v[1],
double_channels=v[2],
name='double_blaze_{}'.format(k))
elif len(v) == 4:
layers.append(conv)
conv = self.BlazeBlock(
conv,
v[0],
v[1],
double_channels=v[2],
stride=v[3],
name='double_blaze_{}'.format(k))
layers.append(conv)
if not self.with_extra_blocks:
return layers[-1]
return layers[-2], layers[-1]
else:
conv1 = self._conv_norm(
input=input,
num_filters=24,
filter_size=5,
stride=2,
padding=2,
act='relu',
name="conv1")
conv2 = self.Blaze_lite(conv1, 24, 24, 1, 'conv2')
conv3 = self.Blaze_lite(conv2, 24, 28, 1, 'conv3')
conv4 = self.Blaze_lite(conv3, 28, 32, 2, 'conv4')
conv5 = self.Blaze_lite(conv4, 32, 36, 1, 'conv5')
conv6 = self.Blaze_lite(conv5, 36, 42, 1, 'conv6')
conv7 = self.Blaze_lite(conv6, 42, 48, 2, 'conv7')
in_ch = 48
for i in range(5):
conv7 = self.Blaze_lite(conv7, in_ch, in_ch + 8, 1,
'conv{}'.format(8 + i))
in_ch += 8
assert in_ch == 88
conv13 = self.Blaze_lite(conv7, 88, 96, 2, 'conv13')
for i in range(4):
conv13 = self.Blaze_lite(conv13, 96, 96, 1,
'conv{}'.format(14 + i))
return conv7, conv13
def BlazeBlock(self,
input,
in_channels,
out_channels,
double_channels=None,
stride=1,
use_5x5kernel=True,
name=None):
assert stride in [1, 2]
use_pool = not stride == 1
use_double_block = double_channels is not None
act = 'relu' if use_double_block else None
if use_5x5kernel:
conv_dw = self._conv_norm(
input=input,
filter_size=5,
num_filters=in_channels,
stride=stride,
padding=2,
num_groups=in_channels,
use_cudnn=False,
name=name + "1_dw")
else:
conv_dw_1 = self._conv_norm(
input=input,
filter_size=3,
num_filters=in_channels,
stride=1,
padding=1,
num_groups=in_channels,
use_cudnn=False,
name=name + "1_dw_1")
conv_dw = self._conv_norm(
input=conv_dw_1,
filter_size=3,
num_filters=in_channels,
stride=stride,
padding=1,
num_groups=in_channels,
use_cudnn=False,
name=name + "1_dw_2")
conv_pw = self._conv_norm(
input=conv_dw,
filter_size=1,
num_filters=out_channels,
stride=1,
padding=0,
act=act,
name=name + "1_sep")
if use_double_block:
if use_5x5kernel:
conv_dw = self._conv_norm(
input=conv_pw,
filter_size=5,
num_filters=out_channels,
stride=1,
padding=2,
use_cudnn=False,
name=name + "2_dw")
else:
conv_dw_1 = self._conv_norm(
input=conv_pw,
filter_size=3,
num_filters=out_channels,
stride=1,
padding=1,
num_groups=out_channels,
use_cudnn=False,
name=name + "2_dw_1")
conv_dw = self._conv_norm(
input=conv_dw_1,
filter_size=3,
num_filters=out_channels,
stride=1,
padding=1,
num_groups=out_channels,
use_cudnn=False,
name=name + "2_dw_2")
conv_pw = self._conv_norm(
input=conv_dw,
filter_size=1,
num_filters=double_channels,
stride=1,
padding=0,
name=name + "2_sep")
# shortcut
if use_pool:
shortcut_channel = double_channels or out_channels
shortcut_pool = self._pooling_block(input, stride, stride)
channel_pad = self._conv_norm(
input=shortcut_pool,
filter_size=1,
num_filters=shortcut_channel,
stride=1,
padding=0,
name="shortcut" + name)
return fluid.layers.elementwise_add(
x=channel_pad, y=conv_pw, act='relu')
return fluid.layers.elementwise_add(x=input, y=conv_pw, act='relu')
def Blaze_lite(self, input, in_channels, out_channels, stride=1, name=None):
assert stride in [1, 2]
use_pool = not stride == 1
ues_pad = not in_channels == out_channels
conv_dw = self._conv_norm(
input=input,
filter_size=3,
num_filters=in_channels,
stride=stride,
padding=1,
num_groups=in_channels,
name=name + "_dw")
conv_pw = self._conv_norm(
input=conv_dw,
filter_size=1,
num_filters=out_channels,
stride=1,
padding=0,
name=name + "_sep")
if use_pool:
shortcut_pool = self._pooling_block(input, stride, stride)
if ues_pad:
conv_pad = shortcut_pool if use_pool else input
channel_pad = self._conv_norm(
input=conv_pad,
filter_size=1,
num_filters=out_channels,
stride=1,
padding=0,
name="shortcut" + name)
return fluid.layers.elementwise_add(
x=channel_pad, y=conv_pw, act='relu')
return fluid.layers.elementwise_add(x=input, y=conv_pw, act='relu')
def _conv_norm(
self,
input,
filter_size,
num_filters,
stride,
padding,
num_groups=1,
act='relu', # None
use_cudnn=True,
name=None):
parameter_attr = ParamAttr(
learning_rate=0.1,
initializer=fluid.initializer.MSRA(),
name=name + "_weights")
conv = fluid.layers.conv2d(
input=input,
num_filters=num_filters,
filter_size=filter_size,
stride=stride,
padding=padding,
groups=num_groups,
act=None,
use_cudnn=use_cudnn,
param_attr=parameter_attr,
bias_attr=False)
return fluid.layers.batch_norm(input=conv, act=act)
def _pooling_block(self,
conv,
pool_size,
pool_stride,
pool_padding=0,
ceil_mode=True):
pool = fluid.layers.pool2d(
input=conv,
pool_size=pool_size,
pool_type='max',
pool_stride=pool_stride,
pool_padding=pool_padding,
ceil_mode=ceil_mode)
return pool
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from paddle import fluid
from paddle.fluid.param_attr import ParamAttr
from ppdet.core.workspace import register
__all__ = ['FaceBoxNet']
@register
class FaceBoxNet(object):
"""
FaceBoxes, see https://https://arxiv.org/abs/1708.05234
Args:
with_extra_blocks (bool): whether or not extra blocks should be added
lite_edition (bool): whether or not is FaceBoxes-lite
"""
def __init__(self,
with_extra_blocks=True,
lite_edition=False):
super(FaceBoxNet, self).__init__()
self.with_extra_blocks = with_extra_blocks
self.lite_edition = lite_edition
def __call__(self, input):
if self.lite_edition:
return self._simplified_edition(input)
else:
return self._original_edition(input)
def _simplified_edition(self, input):
conv_1_1 = self._conv_norm_crelu(
input=input,
num_filters=8,
filter_size=3,
stride=2,
padding=1,
act='relu',
name="conv_1_1")
conv_1_2 = self._conv_norm_crelu(
input=conv_1_1,
num_filters=24,
filter_size=3,
stride=2,
padding=1,
act='relu',
name="conv_1_2")
pool1 = fluid.layers.pool2d(
input=conv_1_2,
pool_size=3,
pool_padding=1,
pool_type='avg',
name="pool_1")
conv_2_1 = self._conv_norm(
input=pool1,
num_filters=48,
filter_size=3,
stride=2,
padding=1,
act='relu',
name="conv_2_1")
conv_2_2 = self._conv_norm(
input=conv_2_1,
num_filters=64,
filter_size=1,
stride=1,
padding=0,
act='relu',
name="conv_2_2")
conv_inception = conv_2_2
for i in range(3):
conv_inception = self._inceptionA(conv_inception, i)
layers = []
layers.append(conv_inception)
conv_3_1 = self._conv_norm(
input=conv_inception,
num_filters=128,
filter_size=1,
stride=1,
padding=0,
act='relu',
name="conv_3_1")
conv_3_2 = self._conv_norm(
input=conv_3_1,
num_filters=256,
filter_size=3,
stride=2,
padding=1,
act='relu',
name="conv_3_2")
layers.append(conv_3_2)
if not self.with_extra_blocks:
return layers[-1]
return layers[-2], layers[-1]
def _original_edition(self, input):
conv_1 = self._conv_norm_crelu(
input=input,
num_filters=24,
filter_size=7,
stride=4,
padding=3,
act='relu',
name="conv_1")
pool_1 = fluid.layers.pool2d(
input=conv_1,
pool_size=3,
pool_stride=2,
pool_padding=1,
pool_type='max',
name="pool_1")
conv_2 = self._conv_norm_crelu(
input=pool_1,
num_filters=64,
filter_size=5,
stride=2,
padding=2,
act='relu',
name="conv_2")
pool_2 = fluid.layers.pool2d(
input=conv_1,
pool_size=3,
pool_stride=2,
pool_padding=1,
pool_type='max',
name="pool_2")
conv_inception = pool_2
for i in range(3):
conv_inception = self._inceptionA(conv_inception, i)
layers = []
layers.append(conv_inception)
conv_3_1 = self._conv_norm(
input=conv_inception,
num_filters=128,
filter_size=1,
stride=1,
padding=0,
act='relu',
name="conv_3_1")
conv_3_2 = self._conv_norm(
input=conv_3_1,
num_filters=256,
filter_size=3,
stride=2,
padding=1,
act='relu',
name="conv_3_2")
layers.append(conv_3_2)
conv_4_1 = self._conv_norm(
input=conv_3_2,
num_filters=128,
filter_size=1,
stride=1,
padding=0,
act='relu',
name="conv_4_1")
conv_4_2 = self._conv_norm(
input=conv_4_1,
num_filters=256,
filter_size=3,
stride=2,
padding=1,
act='relu',
name="conv_4_2")
layers.append(conv_4_2)
if not self.with_extra_blocks:
return layers[-1]
return layers[-3], layers[-2], layers[-1]
def _conv_norm(
self,
input,
filter_size,
num_filters,
stride,
padding,
num_groups=1,
act='relu',
use_cudnn=True,
name=None):
parameter_attr = ParamAttr(
learning_rate=0.1,
initializer=fluid.initializer.MSRA(),
name=name + "_weights")
conv = fluid.layers.conv2d(
input=input,
num_filters=num_filters,
filter_size=filter_size,
stride=stride,
padding=padding,
groups=num_groups,
act=None,
use_cudnn=use_cudnn,
param_attr=parameter_attr,
bias_attr=False)
return fluid.layers.batch_norm(input=conv, act=act)
def _conv_norm_crelu(
self,
input,
filter_size,
num_filters,
stride,
padding,
num_groups=1,
act='relu',
use_cudnn=True,
name=None):
parameter_attr = ParamAttr(
learning_rate=0.1,
initializer=fluid.initializer.MSRA(),
name=name + "_weights")
conv = fluid.layers.conv2d(
input=input,
num_filters=num_filters,
filter_size=filter_size,
stride=stride,
padding=padding,
groups=num_groups,
act=None,
use_cudnn=use_cudnn,
param_attr=parameter_attr,
bias_attr=False)
conv_a = fluid.layers.batch_norm(input=conv, act=act)
conv_b = fluid.layers.scale(conv_a, -1)
concat = fluid.layers.concat([conv_a, conv_b], axis=1)
return concat
def _pooling_block(self,
conv,
pool_size,
pool_stride,
pool_padding=0,
ceil_mode=True):
pool = fluid.layers.pool2d(
input=conv,
pool_size=pool_size,
pool_type='max',
pool_stride=pool_stride,
pool_padding=pool_padding,
ceil_mode=ceil_mode)
return pool
def _inceptionA(self, data, idx):
idx = str(idx)
pool1 = fluid.layers.pool2d(
input=data,
pool_size=3,
pool_padding=1,
pool_type='avg',
name='inceptionA_' + idx + '_pool1')
conv1 = self._conv_norm(
input=pool1,
filter_size=1,
num_filters=32,
stride=1,
padding=0,
act='relu',
name='inceptionA_' + idx + '_conv1')
conv2 = self._conv_norm(
input=data,
filter_size=1,
num_filters=32,
stride=1,
padding=0,
act='relu',
name='inceptionA_' + idx + '_conv2')
conv3 = self._conv_norm(
input=data,
filter_size=1,
num_filters=24,
stride=1,
padding=0,
act='relu',
name='inceptionA_' + idx + '_conv3_1')
conv3 = self._conv_norm(
input=conv3,
filter_size=3,
num_filters=32,
stride=1,
padding=1,
act='relu',
name='inceptionA_' + idx + '_conv3_2')
conv4 = self._conv_norm(
input=data,
filter_size=1,
num_filters=24,
stride=1,
padding=0,
act='relu',
name='inceptionA_' + idx + '_conv4_1')
conv4 = self._conv_norm(
input=conv4,
filter_size=3,
num_filters=32,
stride=1,
padding=1,
act='relu',
name='inceptionA_' + idx + '_conv4_2')
conv4 = self._conv_norm(
input=conv4,
filter_size=3,
num_filters=32,
stride=1,
padding=1,
act='relu',
name='inceptionA_' + idx + '_conv4_3')
concat = fluid.layers.concat(
[conv1, conv2, conv3, conv4], axis=1)
return concat
......@@ -27,6 +27,7 @@ from paddle.fluid.initializer import MSRA
from ppdet.modeling.ops import MultiClassNMS
from ppdet.modeling.ops import ConvNorm
from ppdet.core.workspace import register, serializable
from ppdet.experimental import mixed_precision_global_state
__all__ = ['BBoxHead', 'TwoFCHead', 'XConvNormHead']
......@@ -120,6 +121,12 @@ class TwoFCHead(object):
def __call__(self, roi_feat):
fan = roi_feat.shape[1] * roi_feat.shape[2] * roi_feat.shape[3]
mixed_precision_enabled = mixed_precision_global_state() is not None
if mixed_precision_enabled:
roi_feat = fluid.layers.cast(roi_feat, 'float16')
fc6 = fluid.layers.fc(input=roi_feat,
size=self.mlp_dim,
act='relu',
......@@ -141,6 +148,10 @@ class TwoFCHead(object):
name='fc7_b',
learning_rate=2.,
regularizer=L2Decay(0.)))
if mixed_precision_enabled:
head_feat = fluid.layers.cast(head_feat, 'float32')
return head_feat
......@@ -280,7 +291,7 @@ class BBoxHead(object):
number of input images, each element consists of im_height,
im_width, im_scale.
im_shape (Variable): Actual shape of original image with shape
[B, 3]. B is the number of images, each element consists of
[B, 3]. B is the number of images, each element consists of
original_height, original_width, 1
Returns:
......
......@@ -17,10 +17,12 @@ from __future__ import division
from __future__ import print_function
from __future__ import unicode_literals
import errno
import os
import shutil
import time
import numpy as np
import re
import paddle.fluid as fluid
from .download import get_weights_path
......@@ -31,6 +33,7 @@ logger = logging.getLogger(__name__)
__all__ = [
'load_checkpoint',
'load_and_fusebn',
'load_params',
'save',
]
......@@ -44,28 +47,71 @@ def is_url(path):
return path.startswith('http://') or path.startswith('https://')
def load_pretrain(exe, prog, path):
def _get_weight_path(path):
env = os.environ
if 'PADDLE_TRAINERS_NUM' in env and 'PADDLE_TRAINER_ID' in env:
trainer_id = int(env['PADDLE_TRAINER_ID'])
num_trainers = int(env['PADDLE_TRAINERS_NUM'])
if num_trainers <= 1:
path = get_weights_path(path)
else:
from ppdet.utils.download import map_path, WEIGHTS_HOME
weight_path = map_path(path, WEIGHTS_HOME)
lock_path = weight_path + '.lock'
if not os.path.exists(weight_path):
try:
os.makedirs(os.path.dirname(weight_path))
except OSError as e:
if e.errno != errno.EEXIST:
raise
with open(lock_path, 'w'): # touch
os.utime(lock_path, None)
if trainer_id == 0:
get_weights_path(path)
os.remove(lock_path)
else:
while os.path.exists(lock_path):
time.sleep(1)
path = weight_path
else:
path = get_weights_path(path)
return path
def load_params(exe, prog, path, ignore_params=[]):
"""
Load model from the given path.
Args:
exe (fluid.Executor): The fluid.Executor object.
prog (fluid.Program): load weight to which Program object.
path (string): URL string or loca model path.
ignore_params (bool): ignore variable to load when finetuning.
"""
if is_url(path):
path = get_weights_path(path)
path = _get_weight_path(path)
if not os.path.exists(path):
raise ValueError("Model pretrain path {} does not "
"exists.".format(path))
logger.info('Loading pretrained model from {}...'.format(path))
logger.info('Loading parameters from {}...'.format(path))
def _if_exist(var):
b = os.path.exists(os.path.join(path, var.name))
if b:
do_ignore = False
param_exist = os.path.exists(os.path.join(path, var.name))
if len(ignore_params) > 0:
# Parameter related to num_classes will be ignored in finetuning
do_ignore_list = [
bool(re.match(name, var.name)) for name in ignore_params
]
do_ignore = any(do_ignore_list)
if do_ignore and param_exist:
logger.info('In load_params, ignore {}'.format(var.name))
do_load = param_exist and not do_ignore
if do_load:
logger.debug('load weight {}'.format(var.name))
return b
return do_load
fluid.io.load_vars(exe, path, prog, predicate=_if_exist)
......@@ -79,7 +125,7 @@ def load_checkpoint(exe, prog, path):
path (string): URL string or loca model path.
"""
if is_url(path):
path = get_weights_path(path)
path = _get_weight_path(path)
if not os.path.exists(path):
raise ValueError("Model checkpoint path {} does not "
......@@ -130,14 +176,16 @@ def load_and_fusebn(exe, prog, path):
path (string): the path to save model.
"""
logger.info('Load model and fuse batch norm from {}...'.format(path))
if is_url(path):
path = get_weights_path(path)
path = _get_weight_path(path)
if not os.path.exists(path):
raise ValueError("Model path {} does not exists.".format(path))
def _if_exist(var):
b = os.path.exists(os.path.join(path, var.name))
if b:
logger.debug('load weight {}'.format(var.name))
return b
......@@ -159,6 +207,7 @@ def load_and_fusebn(exe, prog, path):
inner_prog = fluid.Program()
inner_start_prog = fluid.Program()
inner_block = inner_prog.global_block()
with fluid.program_guard(inner_prog, inner_start_prog):
for block in prog.blocks:
ops = list(block.ops)
......@@ -181,10 +230,20 @@ def load_and_fusebn(exe, prog, path):
break
bias = block.var(bias_name)
mean_vb = fluid.layers.create_parameter(
bias.shape, bias.dtype, mean_name)
variance_vb = fluid.layers.create_parameter(
bias.shape, bias.dtype, variance_name)
mean_vb = inner_block.create_var(
name=mean_name,
type=bias.type,
shape=bias.shape,
dtype=bias.dtype,
persistable=True)
variance_vb = inner_block.create_var(
name=variance_name,
type=bias.type,
shape=bias.shape,
dtype=bias.dtype,
persistable=True)
mean_variances.add(mean_vb)
mean_variances.add(variance_vb)
......
......@@ -15,6 +15,8 @@
from argparse import ArgumentParser, RawDescriptionHelpFormatter
import yaml
import re
from ppdet.core.workspace import get_registered_modules
__all__ = ['ColorTTY', 'ArgsParser']
......@@ -77,3 +79,73 @@ class ArgsParser(ArgumentParser):
cur[key] = {}
cur = cur[key]
return config
def print_total_cfg(config):
modules = get_registered_modules()
color_tty = ColorTTY()
green = '___{}___'.format(color_tty.colors.index('green') + 31)
styled = {}
for key in config.keys():
if not config[key]: # empty schema
continue
if key not in modules and not hasattr(config[key], '__dict__'):
styled[key] = config[key]
continue
elif key in modules:
module = modules[key]
else:
type_name = type(config[key]).__name__
if type_name in modules:
module = modules[type_name].copy()
module.update({
k: v
for k, v in config[key].__dict__.items()
if k in module.schema
})
key += " ({})".format(type_name)
default = module.find_default_keys()
missing = module.find_missing_keys()
mismatch = module.find_mismatch_keys()
extra = module.find_extra_keys()
dep_missing = []
for dep in module.inject:
if isinstance(module[dep], str) and module[dep] != '<value>':
if module[dep] not in modules: # not a valid module
dep_missing.append(dep)
else:
dep_mod = modules[module[dep]]
# empty dict but mandatory
if not dep_mod and dep_mod.mandatory():
dep_missing.append(dep)
override = list(
set(module.keys()) - set(default) - set(extra) - set(dep_missing))
replacement = {}
for name in set(override + default + extra + mismatch + missing):
new_name = name
if name in missing:
value = "<missing>"
else:
value = module[name]
if name in extra:
value = dump_value(value) + " <extraneous>"
elif name in mismatch:
value = dump_value(value) + " <type mismatch>"
elif name in dep_missing:
value = dump_value(value) + " <module config missing>"
elif name in override and value != '<missing>':
mark = green
new_name = mark + name
replacement[new_name] = value
styled[key] = replacement
buffer = yaml.dump(styled, default_flow_style=False, default_style='')
buffer = (re.sub(r"<missing>", r"[31m<missing>[0m", buffer))
buffer = (re.sub(r"<extraneous>", r"[33m<extraneous>[0m", buffer))
buffer = (re.sub(r"<type mismatch>", r"[31m<type mismatch>[0m", buffer))
buffer = (re.sub(r"<module config missing>",
r"[31m<module config missing>[0m", buffer))
buffer = re.sub(r"___(\d+)___(.*?):", r"[\1m\2[0m:", buffer)
print(buffer)
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import
import os
import paddle.fluid as fluid
def nccl2_prepare(trainer_id, startup_prog, main_prog):
config = fluid.DistributeTranspilerConfig()
config.mode = "nccl2"
t = fluid.DistributeTranspiler(config=config)
t.transpile(
trainer_id,
trainers=os.environ.get('PADDLE_TRAINER_ENDPOINTS'),
current_endpoint=os.environ.get('PADDLE_CURRENT_ENDPOINT'),
startup_program=startup_prog,
program=main_prog)
def prepare_for_multi_process(exe, build_strategy, startup_prog, main_prog):
trainer_id = int(os.environ.get('PADDLE_TRAINER_ID', 0))
num_trainers = int(os.environ.get('PADDLE_TRAINERS_NUM', 1))
if num_trainers < 2:
return
build_strategy.num_trainers = num_trainers
build_strategy.trainer_id = trainer_id
nccl2_prepare(trainer_id, startup_prog, main_prog)
......@@ -81,7 +81,7 @@ def get_dataset_path(path, annotation, image_dir):
if _dataset_exists(path, annotation, image_dir):
return path
logger.info("Dataset {} not exists, try searching {} or "
logger.info("Dataset {} is not valid for reason above, try searching {} or "
"downloading dataset...".format(
osp.realpath(path), DATASET_HOME))
......@@ -125,7 +125,19 @@ def get_dataset_path(path, annotation, image_dir):
return data_dir
# not match any dataset in DATASETS
raise ValueError("{} not exists or unknow dataset type".format(path))
raise ValueError("Dataset {} is not valid and cannot parse dataset type "
"'{}' for automaticly downloading, which only supports "
"'voc' and 'coco' currently".format(path, osp.split(path)[-1]))
def map_path(url, root_dir):
# parse path after download to decompress under root_dir
fname = url.split('/')[-1]
zip_formats = ['.zip', '.tar', '.gz']
fpath = fname
for zip_format in zip_formats:
fpath = fpath.replace(zip_format, '')
return osp.join(root_dir, fpath)
def get_path(url, root_dir, md5sum=None):
......@@ -140,12 +152,7 @@ def get_path(url, root_dir, md5sum=None):
md5sum (str): md5 sum of download package
"""
# parse path after download to decompress under root_dir
fname = url.split('/')[-1]
zip_formats = ['.zip', '.tar', '.gz']
fpath = fname
for zip_format in zip_formats:
fpath = fpath.replace(zip_format, '')
fullpath = osp.join(root_dir, fpath)
fullpath = map_path(url, root_dir)
# For same zip file, decompressed directory name different
# from zip file name, rename by following map
......@@ -171,20 +178,23 @@ def _dataset_exists(path, annotation, image_dir):
Check if user define dataset exists
"""
if not osp.exists(path):
logger.info("Config dataset_dir {} not exits".format(path))
logger.info("Config dataset_dir {} is not exits, "
"dataset config is not valid".format(path))
return False
if annotation:
annotation_path = osp.join(path, annotation)
if not osp.isfile(annotation_path):
logger.info("Config annotation {} is not a "
"file".format(annotation_path))
"file, dataset config is not "
"valid".format(annotation_path))
return False
if image_dir:
image_path = osp.join(path, image_dir)
if not osp.isdir(image_path):
logger.info("Config image_dir {} is not a "
"directory".format(image_path))
"directory, dataset config is not "
"valid".format(image_path))
return False
return True
......
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import os
import numpy as np
from ppdet.data.source.widerface_loader import widerface_label
from ppdet.utils.coco_eval import bbox2out
import logging
logger = logging.getLogger(__name__)
__all__ = [
'get_shrink', 'bbox_vote', 'save_widerface_bboxes', 'save_fddb_bboxes',
'to_chw_bgr', 'bbox2out', 'get_category_info'
]
def to_chw_bgr(image):
"""
Transpose image from HWC to CHW and from RBG to BGR.
Args:
image (np.array): an image with HWC and RBG layout.
"""
# HWC to CHW
if len(image.shape) == 3:
image = np.swapaxes(image, 1, 2)
image = np.swapaxes(image, 1, 0)
# RBG to BGR
image = image[[2, 1, 0], :, :]
return image
def bbox_vote(det):
order = det[:, 4].ravel().argsort()[::-1]
det = det[order, :]
if det.shape[0] == 0:
dets = np.array([[10, 10, 20, 20, 0.002]])
det = np.empty(shape=[0, 5])
while det.shape[0] > 0:
# IOU
area = (det[:, 2] - det[:, 0] + 1) * (det[:, 3] - det[:, 1] + 1)
xx1 = np.maximum(det[0, 0], det[:, 0])
yy1 = np.maximum(det[0, 1], det[:, 1])
xx2 = np.minimum(det[0, 2], det[:, 2])
yy2 = np.minimum(det[0, 3], det[:, 3])
w = np.maximum(0.0, xx2 - xx1 + 1)
h = np.maximum(0.0, yy2 - yy1 + 1)
inter = w * h
o = inter / (area[0] + area[:] - inter)
# nms
merge_index = np.where(o >= 0.3)[0]
det_accu = det[merge_index, :]
det = np.delete(det, merge_index, 0)
if merge_index.shape[0] <= 1:
if det.shape[0] == 0:
try:
dets = np.row_stack((dets, det_accu))
except:
dets = det_accu
continue
det_accu[:, 0:4] = det_accu[:, 0:4] * np.tile(det_accu[:, -1:], (1, 4))
max_score = np.max(det_accu[:, 4])
det_accu_sum = np.zeros((1, 5))
det_accu_sum[:, 0:4] = np.sum(det_accu[:, 0:4],
axis=0) / np.sum(det_accu[:, -1:])
det_accu_sum[:, 4] = max_score
try:
dets = np.row_stack((dets, det_accu_sum))
except:
dets = det_accu_sum
dets = dets[0:750, :]
# Only keep 0.3 or more
keep_index = np.where(dets[:, 4] >= 0.01)[0]
dets = dets[keep_index, :]
return dets
def get_shrink(height, width):
"""
Args:
height (int): image height.
width (int): image width.
"""
# avoid out of memory
max_shrink_v1 = (0x7fffffff / 577.0 / (height * width))**0.5
max_shrink_v2 = ((678 * 1024 * 2.0 * 2.0) / (height * width))**0.5
def get_round(x, loc):
str_x = str(x)
if '.' in str_x:
str_before, str_after = str_x.split('.')
len_after = len(str_after)
if len_after >= 3:
str_final = str_before + '.' + str_after[0:loc]
return float(str_final)
else:
return x
max_shrink = get_round(min(max_shrink_v1, max_shrink_v2), 2) - 0.3
if max_shrink >= 1.5 and max_shrink < 2:
max_shrink = max_shrink - 0.1
elif max_shrink >= 2 and max_shrink < 3:
max_shrink = max_shrink - 0.2
elif max_shrink >= 3 and max_shrink < 4:
max_shrink = max_shrink - 0.3
elif max_shrink >= 4 and max_shrink < 5:
max_shrink = max_shrink - 0.4
elif max_shrink >= 5:
max_shrink = max_shrink - 0.5
shrink = max_shrink if max_shrink < 1 else 1
return shrink, max_shrink
def save_widerface_bboxes(image_path, bboxes_scores, output_dir):
image_name = image_path.split('/')[-1]
image_class = image_path.split('/')[-2]
odir = os.path.join(output_dir, image_class)
if not os.path.exists(odir):
os.makedirs(odir)
ofname = os.path.join(odir, '%s.txt' % (image_name[:-4]))
f = open(ofname, 'w')
f.write('{:s}\n'.format(image_class + '/' + image_name))
f.write('{:d}\n'.format(bboxes_scores.shape[0]))
for box_score in bboxes_scores:
xmin, ymin, xmax, ymax, score = box_score
f.write('{:.1f} {:.1f} {:.1f} {:.1f} {:.3f}\n'.format(xmin, ymin, (
xmax - xmin + 1), (ymax - ymin + 1), score))
f.close()
logger.info("The predicted result is saved as {}".format(ofname))
def save_fddb_bboxes(bboxes_scores,
output_dir,
output_fname='pred_fddb_res.txt'):
if not os.path.exists(output_dir):
os.makedirs(output_dir)
predict_file = os.path.join(output_dir, output_fname)
f = open(predict_file, 'w')
for image_path, dets in bboxes_scores.iteritems():
f.write('{:s}\n'.format(image_path))
f.write('{:d}\n'.format(dets.shape[0]))
for box_score in dets:
xmin, ymin, xmax, ymax, score = box_score
width, height = xmax - xmin, ymax - ymin
f.write('{:.1f} {:.1f} {:.1f} {:.1f} {:.3f}\n'
.format(xmin, ymin, width, height, score))
logger.info("The predicted result is saved as {}".format(predict_file))
return predict_file
def get_category_info(anno_file=None,
with_background=True,
use_default_label=False):
if use_default_label or anno_file is None \
or not os.path.exists(anno_file):
logger.info("Not found annotation file {}, load "
"wider-face categories.".format(anno_file))
return widerfaceall_category_info(with_background)
else:
logger.info("Load categories from {}".format(anno_file))
return get_category_info_from_anno(anno_file, with_background)
def get_category_info_from_anno(anno_file, with_background=True):
"""
Get class id to category id map and category id
to category name map from annotation file.
Args:
anno_file (str): annotation file path
with_background (bool, default True):
whether load background as class 0.
"""
cats = []
with open(anno_file) as f:
for line in f.readlines():
cats.append(line.strip())
if cats[0] != 'background' and with_background:
cats.insert(0, 'background')
if cats[0] == 'background' and not with_background:
cats = cats[1:]
clsid2catid = {i: i for i in range(len(cats))}
catid2name = {i: name for i, name in enumerate(cats)}
return clsid2catid, catid2name
def widerfaceall_category_info(with_background=True):
"""
Get class id to category id map and category id
to category name map of mixup wider_face dataset
Args:
with_background (bool, default True):
whether load background as class 0.
"""
label_map = widerface_label(with_background)
label_map = sorted(label_map.items(), key=lambda x: x[1])
cats = [l[0] for l in label_map]
if with_background:
cats.insert(0, 'background')
clsid2catid = {i: i for i in range(len(cats))}
catid2name = {i: name for i, name in enumerate(cats)}
return clsid2catid, catid2name
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import os
import time
import multiprocessing
import numpy as np
import datetime
from collections import deque
import sys
sys.path.append("../../")
from paddle.fluid.contrib.slim import Compressor
from paddle.fluid.framework import IrGraph
from paddle.fluid import core
from paddle.fluid.contrib.slim.quantization import QuantizationTransformPass
from paddle.fluid.contrib.slim.quantization import QuantizationFreezePass
from paddle.fluid.contrib.slim.quantization import ConvertToInt8Pass
from paddle.fluid.contrib.slim.quantization import TransformForMobilePass
def set_paddle_flags(**kwargs):
for key, value in kwargs.items():
if os.environ.get(key, None) is None:
os.environ[key] = str(value)
# NOTE(paddle-dev): All of these flags should be set before
# `import paddle`. Otherwise, it would not take any effect.
set_paddle_flags(
FLAGS_eager_delete_tensor_gb=0, # enable GC to save memory
)
from paddle import fluid
from ppdet.core.workspace import load_config, merge_config, create
from ppdet.data.data_feed import create_reader
from ppdet.utils.eval_utils import parse_fetches, eval_results
from ppdet.utils.stats import TrainingStats
from ppdet.utils.cli import ArgsParser
from ppdet.utils.check import check_gpu
import ppdet.utils.checkpoint as checkpoint
from ppdet.modeling.model_input import create_feed
import logging
FORMAT = '%(asctime)s-%(levelname)s: %(message)s'
logging.basicConfig(level=logging.INFO, format=FORMAT)
logger = logging.getLogger(__name__)
def eval_run(exe, compile_program, reader, keys, values, cls, test_feed):
"""
Run evaluation program, return program outputs.
"""
iter_id = 0
results = []
images_num = 0
start_time = time.time()
has_bbox = 'bbox' in keys
for data in reader():
data = test_feed.feed(data)
feed_data = {'image': data['image'],
'im_size': data['im_size']}
outs = exe.run(compile_program,
feed=feed_data,
fetch_list=values[0],
return_numpy=False)
outs.append(data['gt_box'])
outs.append(data['gt_label'])
outs.append(data['is_difficult'])
res = {
k: (np.array(v), v.recursive_sequence_lengths())
for k, v in zip(keys, outs)
}
results.append(res)
if iter_id % 100 == 0:
logger.info('Test iter {}'.format(iter_id))
iter_id += 1
images_num += len(res['bbox'][1][0]) if has_bbox else 1
logger.info('Test finish iter {}'.format(iter_id))
end_time = time.time()
fps = images_num / (end_time - start_time)
if has_bbox:
logger.info('Total number of images: {}, inference time: {} fps.'.
format(images_num, fps))
else:
logger.info('Total iteration: {}, inference time: {} batch/s.'.format(
images_num, fps))
return results
def main():
cfg = load_config(FLAGS.config)
if 'architecture' in cfg:
main_arch = cfg.architecture
else:
raise ValueError("'architecture' not specified in config file.")
merge_config(FLAGS.opt)
if 'log_iter' not in cfg:
cfg.log_iter = 20
# check if set use_gpu=True in paddlepaddle cpu version
check_gpu(cfg.use_gpu)
if cfg.use_gpu:
devices_num = fluid.core.get_cuda_device_count()
else:
devices_num = int(
os.environ.get('CPU_NUM', multiprocessing.cpu_count()))
if 'eval_feed' not in cfg:
eval_feed = create(main_arch + 'EvalFeed')
else:
eval_feed = create(cfg.eval_feed)
place = fluid.CUDAPlace(0) if cfg.use_gpu else fluid.CPUPlace()
exe = fluid.Executor(place)
_, test_feed_vars = create_feed(eval_feed, iterable=True)
eval_reader = create_reader(eval_feed, args_path=FLAGS.dataset_dir)
#eval_pyreader.decorate_sample_list_generator(eval_reader, place)
test_data_feed = fluid.DataFeeder(test_feed_vars.values(), place)
assert os.path.exists(FLAGS.model_path)
infer_prog, feed_names, fetch_targets = fluid.io.load_inference_model(
dirname=FLAGS.model_path, executor=exe,
model_filename=FLAGS.model_name,
params_filename=FLAGS.params_name)
eval_keys = ['bbox', 'gt_box', 'gt_label', 'is_difficult']
eval_values = ['multiclass_nms_0.tmp_0', 'gt_box', 'gt_label', 'is_difficult']
eval_cls = []
eval_values[0] = fetch_targets[0]
results = eval_run(exe, infer_prog, eval_reader,
eval_keys, eval_values, eval_cls, test_data_feed)
resolution = None
if 'mask' in results[0]:
resolution = model.mask_head.resolution
eval_results(results, eval_feed, cfg.metric, cfg.num_classes,
resolution, False, FLAGS.output_eval)
if __name__ == '__main__':
parser = ArgsParser()
parser.add_argument(
"-m",
"--model_path",
default=None,
type=str,
help="path of checkpoint")
parser.add_argument(
"--output_eval",
default=None,
type=str,
help="Evaluation directory, default is current directory.")
parser.add_argument(
"-d",
"--dataset_dir",
default=None,
type=str,
help="Dataset path, same as DataFeed.dataset.dataset_dir")
parser.add_argument(
"--model_name",
default='model',
type=str,
help="model file name to load_inference_model")
parser.add_argument(
"--params_name",
default='params',
type=str,
help="params file name to load_inference_model")
FLAGS = parser.parse_args()
main()
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import os
import sys
import glob
import numpy as np
from PIL import Image
sys.path.append("../../")
def set_paddle_flags(**kwargs):
for key, value in kwargs.items():
if os.environ.get(key, None) is None:
os.environ[key] = str(value)
# NOTE(paddle-dev): All of these flags should be set before
# `import paddle`. Otherwise, it would not take any effect.
set_paddle_flags(
FLAGS_eager_delete_tensor_gb=0, # enable GC to save memory
)
from paddle import fluid
from ppdet.utils.cli import print_total_cfg
from ppdet.core.workspace import load_config, merge_config, create
from ppdet.modeling.model_input import create_feed
from ppdet.data.data_feed import create_reader
from ppdet.utils.eval_utils import parse_fetches
from ppdet.utils.cli import ArgsParser
from ppdet.utils.check import check_gpu
from ppdet.utils.visualizer import visualize_results
import ppdet.utils.checkpoint as checkpoint
import logging
FORMAT = '%(asctime)s-%(levelname)s: %(message)s'
logging.basicConfig(level=logging.INFO, format=FORMAT)
logger = logging.getLogger(__name__)
def get_save_image_name(output_dir, image_path):
"""
Get save image name from source image path.
"""
if not os.path.exists(output_dir):
os.makedirs(output_dir)
image_name = os.path.split(image_path)[-1]
name, ext = os.path.splitext(image_name)
return os.path.join(output_dir, "{}".format(name)) + ext
def get_test_images(infer_dir, infer_img):
"""
Get image path list in TEST mode
"""
assert infer_img is not None or infer_dir is not None, \
"--infer_img or --infer_dir should be set"
assert infer_img is None or os.path.isfile(infer_img), \
"{} is not a file".format(infer_img)
assert infer_dir is None or os.path.isdir(infer_dir), \
"{} is not a directory".format(infer_dir)
images = []
# infer_img has a higher priority
if infer_img and os.path.isfile(infer_img):
images.append(infer_img)
return images
infer_dir = os.path.abspath(infer_dir)
assert os.path.isdir(infer_dir), \
"infer_dir {} is not a directory".format(infer_dir)
exts = ['jpg', 'jpeg', 'png', 'bmp']
exts += [ext.upper() for ext in exts]
for ext in exts:
images.extend(glob.glob('{}/*.{}'.format(infer_dir, ext)))
assert len(images) > 0, "no image found in {}".format(infer_dir)
logger.info("Found {} inference images in total.".format(len(images)))
return images
def main():
cfg = load_config(FLAGS.config)
if 'architecture' in cfg:
main_arch = cfg.architecture
else:
raise ValueError("'architecture' not specified in config file.")
merge_config(FLAGS.opt)
# check if set use_gpu=True in paddlepaddle cpu version
check_gpu(cfg.use_gpu)
# print_total_cfg(cfg)
if 'test_feed' not in cfg:
test_feed = create(main_arch + 'TestFeed')
else:
test_feed = create(cfg.test_feed)
test_images = get_test_images(FLAGS.infer_dir, FLAGS.infer_img)
test_feed.dataset.add_images(test_images)
place = fluid.CUDAPlace(0) if cfg.use_gpu else fluid.CPUPlace()
exe = fluid.Executor(place)
infer_prog, feed_var_names, fetch_list = fluid.io.load_inference_model(
dirname=FLAGS.model_path, model_filename=FLAGS.model_name,
params_filename=FLAGS.params_name,
executor=exe)
reader = create_reader(test_feed)
feeder = fluid.DataFeeder(place=place, feed_list=feed_var_names,
program=infer_prog)
# parse infer fetches
assert cfg.metric in ['COCO', 'VOC'], \
"unknown metric type {}".format(cfg.metric)
extra_keys = []
if cfg['metric'] == 'COCO':
extra_keys = ['im_info', 'im_id', 'im_shape']
if cfg['metric'] == 'VOC':
extra_keys = ['im_id', 'im_shape']
keys, values, _ = parse_fetches({'bbox':fetch_list}, infer_prog, extra_keys)
# parse dataset category
if cfg.metric == 'COCO':
from ppdet.utils.coco_eval import bbox2out, mask2out, get_category_info
if cfg.metric == "VOC":
from ppdet.utils.voc_eval import bbox2out, get_category_info
anno_file = getattr(test_feed.dataset, 'annotation', None)
with_background = getattr(test_feed, 'with_background', True)
use_default_label = getattr(test_feed, 'use_default_label', False)
clsid2catid, catid2name = get_category_info(anno_file, with_background,
use_default_label)
# whether output bbox is normalized in model output layer
is_bbox_normalized = False
# use tb-paddle to log image
if FLAGS.use_tb:
from tb_paddle import SummaryWriter
tb_writer = SummaryWriter(FLAGS.tb_log_dir)
tb_image_step = 0
tb_image_frame = 0 # each frame can display ten pictures at most.
imid2path = reader.imid2path
keys = ['bbox']
for iter_id, data in enumerate(reader()):
feed_data = [[d[0], d[1]] for d in data]
outs = exe.run(infer_prog,
feed=feeder.feed(feed_data),
fetch_list=fetch_list,
return_numpy=False)
res = {
k: (np.array(v), v.recursive_sequence_lengths())
for k, v in zip(keys, outs)
}
res['im_id'] = [[d[2] for d in data]]
logger.info('Infer iter {}'.format(iter_id))
bbox_results = None
mask_results = None
if 'bbox' in res:
bbox_results = bbox2out([res], clsid2catid, is_bbox_normalized)
if 'mask' in res:
mask_results = mask2out([res], clsid2catid,
model.mask_head.resolution)
# visualize result
im_ids = res['im_id'][0]
for im_id in im_ids:
image_path = imid2path[int(im_id)]
image = Image.open(image_path).convert('RGB')
# use tb-paddle to log original image
if FLAGS.use_tb:
original_image_np = np.array(image)
tb_writer.add_image(
"original/frame_{}".format(tb_image_frame),
original_image_np,
tb_image_step,
dataformats='HWC')
image = visualize_results(image,
int(im_id), catid2name,
FLAGS.draw_threshold, bbox_results,
mask_results)
# use tb-paddle to log image with bbox
if FLAGS.use_tb:
infer_image_np = np.array(image)
tb_writer.add_image(
"bbox/frame_{}".format(tb_image_frame),
infer_image_np,
tb_image_step,
dataformats='HWC')
tb_image_step += 1
if tb_image_step % 10 == 0:
tb_image_step = 0
tb_image_frame += 1
save_name = get_save_image_name(FLAGS.output_dir, image_path)
logger.info("Detection bbox results save in {}".format(save_name))
image.save(save_name, quality=95)
if __name__ == '__main__':
parser = ArgsParser()
parser.add_argument(
"--infer_dir",
type=str,
default=None,
help="Directory for images to perform inference on.")
parser.add_argument(
"--infer_img",
type=str,
default=None,
help="Image path, has higher priority over --infer_dir")
parser.add_argument(
"--output_dir",
type=str,
default="output",
help="Directory for storing the output visualization files.")
parser.add_argument(
"--draw_threshold",
type=float,
default=0.5,
help="Threshold to reserve the result for visualization.")
parser.add_argument(
"--use_tb",
type=bool,
default=False,
help="whether to record the data to Tensorboard.")
parser.add_argument(
'--tb_log_dir',
type=str,
default="tb_log_dir/image",
help='Tensorboard logging directory for image.')
parser.add_argument(
'--model_path',
type=str,
default=None,
help="inference model path")
parser.add_argument(
'--model_name',
type=str,
default='__model__.infer',
help="model filename for inference model")
parser.add_argument(
'--params_name',
type=str,
default='__params__',
help="params filename for inference model")
FLAGS = parser.parse_args()
main()
>运行该示例前请安装Paddle1.6或更高版本
# 检测模型卷积通道剪裁示例
## 概述
该示例使用PaddleSlim提供的[卷积通道剪裁压缩策略](https://github.com/PaddlePaddle/models/blob/develop/PaddleSlim/docs/tutorial.md#2-%E5%8D%B7%E7%A7%AF%E6%A0%B8%E5%89%AA%E8%A3%81%E5%8E%9F%E7%90%86)对检测库中的模型进行压缩。
在阅读该示例前,建议您先了解以下内容:
- <a href="../..README_cn.md">检测库的常规训练方法</a>
- [PaddleSlim使用文档](https://github.com/PaddlePaddle/models/blob/develop/PaddleSlim/docs/usage.md)
## 配置文件说明
关于配置文件如何编写您可以参考:
- [PaddleSlim配置文件编写说明](https://github.com/PaddlePaddle/models/blob/develop/PaddleSlim/docs/usage.md#122-%E9%85%8D%E7%BD%AE%E6%96%87%E4%BB%B6%E7%9A%84%E4%BD%BF%E7%94%A8)
- [裁剪策略配置文件编写说明](https://github.com/PaddlePaddle/models/blob/develop/PaddleSlim/docs/usage.md#22-%E6%A8%A1%E5%9E%8B%E9%80%9A%E9%81%93%E5%89%AA%E8%A3%81)
其中,配置文件中的`pruned_params`需要根据当前模型的网络结构特点设置,它用来指定要裁剪的parameters.
这里以MobileNetV1-YoloV3模型为例,其卷积可以三种:主干网络中的普通卷积,主干网络中的`depthwise convolution``yolo block`里的普通卷积。PaddleSlim暂时无法对`depthwise convolution`直接进行剪裁, 因为`depthwise convolution``channel`的变化会同时影响到前后的卷积层。我们这里只对主干网络中的普通卷积和`yolo block`里的普通卷积做裁剪。
通过以下方式可视化模型结构:
```
from paddle.fluid.framework import IrGraph
from paddle.fluid import core
graph = IrGraph(core.Graph(train_prog.desc), for_test=True)
marked_nodes = set()
for op in graph.all_op_nodes():
print(op.name())
if op.name().find('conv') > -1:
marked_nodes.add(op)
graph.draw('.', 'forward', marked_nodes)
```
该示例中MobileNetV1-YoloV3模型结构的可视化结果:<a href="./images/MobileNetV1-YoloV3.pdf">MobileNetV1-YoloV3.pdf</a>
同时通过以下命令观察目标卷积层的参数(parameters)的名称和shape:
```
for param in fluid.default_main_program().global_block().all_parameters():
if 'weights' in param.name:
print param.name, param.shape
```
从可视化结果,我们可以排除后续会做concat的卷积层,最终得到如下要裁剪的参数名称:
```
conv2_1_sep_weights
conv2_2_sep_weights
conv3_1_sep_weights
conv4_1_sep_weights
conv5_1_sep_weights
conv5_2_sep_weights
conv5_3_sep_weights
conv5_4_sep_weights
conv5_5_sep_weights
conv5_6_sep_weights
yolo_block.0.0.0.conv.weights
yolo_block.0.0.1.conv.weights
yolo_block.0.1.0.conv.weights
yolo_block.0.1.1.conv.weights
yolo_block.1.0.0.conv.weights
yolo_block.1.0.1.conv.weights
yolo_block.1.1.0.conv.weights
yolo_block.1.1.1.conv.weights
yolo_block.1.2.conv.weights
yolo_block.2.0.0.conv.weights
yolo_block.2.0.1.conv.weights
yolo_block.2.1.1.conv.weights
yolo_block.2.2.conv.weights
yolo_block.2.tip.conv.weights
```
```
(conv2_1_sep_weights)|(conv2_2_sep_weights)|(conv3_1_sep_weights)|(conv4_1_sep_weights)|(conv5_1_sep_weights)|(conv5_2_sep_weights)|(conv5_3_sep_weights)|(conv5_4_sep_weights)|(conv5_5_sep_weights)|(conv5_6_sep_weights)|(yolo_block.0.0.0.conv.weights)|(yolo_block.0.0.1.conv.weights)|(yolo_block.0.1.0.conv.weights)|(yolo_block.0.1.1.conv.weights)|(yolo_block.1.0.0.conv.weights)|(yolo_block.1.0.1.conv.weights)|(yolo_block.1.1.0.conv.weights)|(yolo_block.1.1.1.conv.weights)|(yolo_block.1.2.conv.weights)|(yolo_block.2.0.0.conv.weights)|(yolo_block.2.0.1.conv.weights)|(yolo_block.2.1.1.conv.weights)|(yolo_block.2.2.conv.weights)|(yolo_block.2.tip.conv.weights)
```
综上,我们将MobileNetV2配置文件中的`pruned_params`设置为以下正则表达式:
```
(conv2_1_sep_weights)|(conv2_2_sep_weights)|(conv3_1_sep_weights)|(conv4_1_sep_weights)|(conv5_1_sep_weights)|(conv5_2_sep_weights)|(conv5_3_sep_weights)|(conv5_4_sep_weights)|(conv5_5_sep_weights)|(conv5_6_sep_weights)|(yolo_block.0.0.0.conv.weights)|(yolo_block.0.0.1.conv.weights)|(yolo_block.0.1.0.conv.weights)|(yolo_block.0.1.1.conv.weights)|(yolo_block.1.0.0.conv.weights)|(yolo_block.1.0.1.conv.weights)|(yolo_block.1.1.0.conv.weights)|(yolo_block.1.1.1.conv.weights)|(yolo_block.1.2.conv.weights)|(yolo_block.2.0.0.conv.weights)|(yolo_block.2.0.1.conv.weights)|(yolo_block.2.1.1.conv.weights)|(yolo_block.2.2.conv.weights)|(yolo_block.2.tip.conv.weights)
```
我们可以用上述操作观察其它检测模型的参数名称规律,然后设置合适的正则表达式来剪裁合适的参数。
## 训练
根据<a href="../../tools/train.py">PaddleDetection/tools/train.py</a>编写压缩脚本compress.py。
在该脚本中定义了Compressor对象,用于执行压缩任务。
### 执行示例
step1: 设置gpu卡
```
export CUDA_VISIBLE_DEVICES=0
```
step2: 开始训练
使用PaddleDetection提供的配置文件在用8卡进行训练:
```
python compress.py \
-s yolov3_mobilenet_v1_slim.yaml \
-c ../../configs/yolov3_mobilenet_v1_voc.yml \
-o max_iters=258 \
-d "../../dataset/voc"
```
>通过命令行覆盖设置max_iters选项,因为PaddleDetection中训练是以`batch`为单位迭代的,并没有涉及`epoch`的概念,但是PaddleSlim需要知道当前训练进行到第几个`epoch`, 所以需要将`max_iters`设置为一个`epoch`内的`batch`的数量。
如果要调整训练卡数,需要调整配置文件`yolov3_mobilenet_v1_voc.yml`中的以下参数:
- **max_iters:** 一个`epoch`中batch的数量,需要设置为`total_num / batch_size`, 其中`total_num`为训练样本总数量,`batch_size`为多卡上总的batch size.
- **YoloTrainFeed.batch_size:** 单张卡上的batch size, 受限于显存大小。
- **LeaningRate.base_lr:** 根据多卡的总`batch_size`调整`base_lr`,两者大小正相关,可以简单的按比例进行调整。
- **LearningRate.schedulers.PiecewiseDecay.milestones:**请根据batch size的变化对其调整。
- **LearningRate.schedulers.PiecewiseDecay.LinearWarmup.steps:** 请根据batch size的变化对其进行调整。
以下为4卡训练示例,通过命令行覆盖`yolov3_mobilenet_v1_voc.yml`中的参数:
```
python compress.py \
-s yolov3_mobilenet_v1_slim.yaml \
-c ../../configs/yolov3_mobilenet_v1_voc.yml \
-o max_iters=258 \
-o YoloTrainFeed.batch_size = 16 \
-d "../../dataset/voc"
```
以下为2卡训练示例,受显存所制,单卡`batch_size`不变,总`batch_size`减小,`base_lr`减小,一个epoch内batch数量增加,同时需要调整学习率相关参数,如下:
```
python compress.py \
-s yolov3_mobilenet_v1_slim.yaml \
-c ../../configs/yolov3_mobilenet_v1_voc.yml \
-o max_iters=516 \
-o LeaningRate.base_lr=0.005 \ # 0.001 /2
-o YoloTrainFeed.batch_size = 16 \
-o LearningRate.schedulers='[!PiecewiseDecay {gamma: 0.1, milestones: [110000, 124000]}, !LinearWarmup {start_factor: 0., steps: 2000}]' \
-d "../../dataset/voc"
```
通过`python compress.py --help`查看可配置参数。
通过`python ../../tools/configure.py ${option_name} help`查看如何通过命令行覆盖配置文件`yolov3_mobilenet_v1_voc.yml`中的参数。
### 保存断点(checkpoint)
如果在配置文件中设置了`checkpoint_path`, 则在压缩任务执行过程中会自动保存断点,当任务异常中断时,
重启任务会自动从`checkpoint_path`路径下按数字顺序加载最新的checkpoint文件。如果不想让重启的任务从断点恢复,
需要修改配置文件中的`checkpoint_path`,或者将`checkpoint_path`路径下文件清空。
>注意:配置文件中的信息不会保存在断点中,重启前对配置文件的修改将会生效。
## 评估
如果在配置文件中设置了`checkpoint_path`,则每个epoch会保存一个压缩后的用于评估的模型,
该模型会保存在`${checkpoint_path}/${epoch_id}/eval_model/`路径下,包含`__model__``__params__`两个文件。
其中,`__model__`用于保存模型结构信息,`__params__`用于保存参数(parameters)信息。
如果不需要保存评估模型,可以在定义Compressor对象时,将`save_eval_model`选项设置为False(默认为True)。
## 预测
如果在配置文件中设置了`checkpoint_path`,并且在定义Compressor对象时指定了`prune_infer_model`选项,则每个epoch都会
保存一个`inference model`。该模型是通过删除eval_program中多余的operators而得到的。
该模型会保存在`${checkpoint_path}/${epoch_id}/eval_model/`路径下,包含`__model__.infer``__params__`两个文件。
其中,`__model__.infer`用于保存模型结构信息,`__params__`用于保存参数(parameters)信息。
更多关于`prune_infer_model`选项的介绍,请参考:[Compressor介绍](https://github.com/PaddlePaddle/models/blob/develop/PaddleSlim/docs/usage.md#121-%E5%A6%82%E4%BD%95%E6%94%B9%E5%86%99%E6%99%AE%E9%80%9A%E8%AE%AD%E7%BB%83%E8%84%9A%E6%9C%AC)
### python预测
在脚本<a href="../infer.py">PaddleDetection/tools/infer.py</a>中展示了如何使用fluid python API加载使用预测模型进行预测。
### PaddleLite
该示例中产出的预测(inference)模型可以直接用PaddleLite进行加载使用。
关于PaddleLite如何使用,请参考:[PaddleLite使用文档](https://github.com/PaddlePaddle/Paddle-Lite/wiki#%E4%BD%BF%E7%94%A8)
## 示例结果
### MobileNetV1-YOLO-V3
| FLOPS |top1_acc/top5_acc| model_size |Paddle Fluid inference time(ms)| Paddle Lite inference time(ms)|
|---|---|---|---|---|
|baseline|- |- |- |-|
|-10%|- |- |- |-|
|-30%|- |- |- |-|
|-50%|- |- |- |-|
## FAQ
此差异已折叠。
version: 1.0
pruners:
pruner_1:
class: 'StructurePruner'
pruning_axis:
'*': 0
criterions:
'*': 'l1_norm'
strategies:
uniform_pruning_strategy:
class: 'UniformPruneStrategy'
pruner: 'pruner_1'
start_epoch: 0
target_ratio: 0.5
pruned_params: '(conv2_1_sep_weights)|(conv2_2_sep_weights)|(conv3_1_sep_weights)|(conv4_1_sep_weights)|(conv5_1_sep_weights)|(conv5_2_sep_weights)|(conv5_3_sep_weights)|(conv5_4_sep_weights)|(conv5_5_sep_weights)|(conv5_6_sep_weights)|(yolo_block.0.0.0.conv.weights)|(yolo_block.0.0.1.conv.weights)|(yolo_block.0.1.0.conv.weights)|(yolo_block.0.1.1.conv.weights)|(yolo_block.1.0.0.conv.weights)|(yolo_block.1.0.1.conv.weights)|(yolo_block.1.1.0.conv.weights)|(yolo_block.1.1.1.conv.weights)|(yolo_block.1.2.conv.weights)|(yolo_block.2.0.0.conv.weights)|(yolo_block.2.0.1.conv.weights)|(yolo_block.2.1.1.conv.weights)|(yolo_block.2.2.conv.weights)|(yolo_block.2.tip.conv.weights)'
metric_name: 'acc_top1'
compressor:
epoch: 271
eval_epoch: 10
#init_model: './checkpoints/0' # Please enable this option for loading checkpoint.
checkpoint_path: './checkpoints/'
strategies:
- uniform_pruning_strategy
此差异已折叠。
此差异已折叠。
此差异已折叠。
version: 1.0
strategies:
quantization_strategy:
class: 'QuantizationStrategy'
start_epoch: 0
end_epoch: 4
float_model_save_path: './output/yolov3/float'
mobile_model_save_path: './output/yolov3/mobile'
int8_model_save_path: './output/yolov3/int8'
weight_bits: 8
activation_bits: 8
weight_quantize_type: 'abs_max'
activation_quantize_type: 'moving_average_abs_max'
save_in_nodes: ['image', 'im_size']
save_out_nodes: ['multiclass_nms_0.tmp_0']
compressor:
epoch: 5
checkpoint_path: './checkpoints/yolov3/'
strategies:
- quantization_strategy
architecture: YOLOv3
train_feed: YoloTrainFeed
eval_feed: YoloEvalFeed
test_feed: YoloTestFeed
use_gpu: true
max_iters: 1000
log_smooth_window: 20
save_dir: output
snapshot_iter: 2000
metric: VOC
map_type: 11point
pretrain_weights: https://paddlemodels.bj.bcebos.com/object_detection/yolov3_mobilenet_v1_voc.tar
weights: output/yolov3_mobilenet_v1_voc/model_final
num_classes: 20
YOLOv3:
backbone: MobileNet
yolo_head: YOLOv3Head
MobileNet:
norm_type: sync_bn
norm_decay: 0.
conv_group_scale: 1
with_extra_blocks: false
YOLOv3Head:
anchor_masks: [[6, 7, 8], [3, 4, 5], [0, 1, 2]]
anchors: [[10, 13], [16, 30], [33, 23],
[30, 61], [62, 45], [59, 119],
[116, 90], [156, 198], [373, 326]]
norm_decay: 0.
ignore_thresh: 0.7
label_smooth: false
nms:
background_label: -1
keep_top_k: 100
nms_threshold: 0.45
nms_top_k: 1000
normalized: false
score_threshold: 0.01
LearningRate:
base_lr: 0.0001
schedulers:
- !PiecewiseDecay
gamma: 0.1
milestones:
- 1000
- 2000
#- !LinearWarmup
#start_factor: 0.
#steps: 1000
OptimizerBuilder:
optimizer:
momentum: 0.9
type: Momentum
regularizer:
factor: 0.0005
type: L2
YoloTrainFeed:
batch_size: 8
dataset:
dataset_dir: ../../dataset/voc
annotation: VOCdevkit/VOC_all/ImageSets/Main/train.txt
image_dir: VOCdevkit/VOC_all/JPEGImages
use_default_label: true
num_workers: 8
bufsize: 128
use_process: true
mixup_epoch: 250
YoloEvalFeed:
batch_size: 8
image_shape: [3, 608, 608]
dataset:
dataset_dir: ../../dataset/voc
annotation: VOCdevkit/VOC_all/ImageSets/Main/val.txt
image_dir: VOCdevkit/VOC_all/JPEGImages
use_default_label: true
YoloTestFeed:
batch_size: 1
image_shape: [3, 608, 608]
dataset:
use_default_label: true
......@@ -33,15 +33,14 @@ set_paddle_flags(
import paddle.fluid as fluid
from tools.configure import print_total_cfg
from ppdet.utils.eval_utils import parse_fetches, eval_run, eval_results, json_eval_results
import ppdet.utils.checkpoint as checkpoint
from ppdet.utils.cli import ArgsParser
from ppdet.utils.check import check_gpu
from ppdet.modeling.model_input import create_feed
from ppdet.data.data_feed import create_reader
from ppdet.core.workspace import load_config, merge_config, create
import time
from ppdet.utils.cli import print_total_cfg
from ppdet.utils.cli import ArgsParser
import logging
FORMAT = '%(asctime)s-%(levelname)s: %(message)s'
......@@ -105,9 +104,8 @@ def main():
# load model
exe.run(startup_prog)
if 'weights' in cfg:
checkpoint.load_pretrain(exe, eval_prog, cfg.weights)
checkpoint.load_params(exe, eval_prog, cfg.weights)
instance_num = 5000.0
assert cfg.metric in ['COCO', 'VOC'], \
"unknown metric type {}".format(cfg.metric)
extra_keys = []
......@@ -115,7 +113,6 @@ def main():
extra_keys = ['im_info', 'im_id', 'im_shape']
if cfg.metric == 'VOC':
extra_keys = ['gt_box', 'gt_label', 'is_difficult']
instance_num = 4952.0
keys, values, cls = parse_fetches(fetches, eval_prog, extra_keys)
......@@ -125,11 +122,7 @@ def main():
callable(model.is_bbox_normalized):
is_bbox_normalized = model.is_bbox_normalized()
t1 = time.time()
results = eval_run(exe, compile_program, pyreader, keys, values, cls)
t2 = time.time()
speed = instance_num / (t2 - t1)
print("Inference time: {} fps".format(speed))
# evaluation
resolution = None
......
此差异已折叠。
......@@ -21,9 +21,10 @@
注意:
1. StarGAN,AttGAN和STGAN由于梯度惩罚所需的操作目前只支持GPU,需使用GPU训练。
2. GAN模型目前仅仅验证了单机单卡训练和预测结果。
3. CGAN和DCGAN两个模型训练使用的数据集为MNIST数据集;StarGAN,AttGAN和STGAN的数据集为CelebA数据集。Pix2Pix和CycleGAN支持的数据集可以参考download.py中的cycle_pix_dataset。
3. CGAN和DCGAN两个模型训练使用的数据集为MNIST数据集;StarGAN,AttGAN和STGAN的数据集为CelebA数据集。Pix2Pix和CycleGAN支持的数据集可以参考download.py中的cycle_pix_dataset。cityscapes数据集需要从[官方](https://www.cityscapes-dataset.com)下载数据,下载完之后使用`scripts/prepare_cityscapes_dataset.py`处理,处理后的文件夹命名为cityscapes并放入data目录下即可。
4. PaddlePaddle1.5.1及之前的版本不支持在AttGAN和STGAN模型里的判别器加上的instance norm。如果要在判别器中加上instance norm,请源码编译develop分支并安装。
5. 中间效果图保存在${output_dir}/test文件夹中。对于Pix2Pix来说,inputA 和inputB 代表输入的两种风格的图片,fakeB表示生成图片;对于CycleGAN来说,inputA表示输入图片,fakeB表示inputA根据生成的图片,cycA表示fakeB经过生成器重构出来的对应于inputA的重构图片;对于StarGAN,AttGAN和STGAN来说,第一行表示原图,之后的每一行都代表一种属性变换。
6. infer过程使用的test_list文件和训练过程中使用的train_list具有相同格式,第一行为样本数量,第二行为属性,之后的行中第一个表示图片名称,之后的-1和1表示该图片是否拥有该属性(1为有该属性,-1为没有该属性)。
图像生成模型库库的目录结构如下:
```
......@@ -139,6 +140,7 @@ StarGAN, AttGAN和STGAN所需要的[Celeba](http://mmlab.ie.cuhk.edu.hk/projects
python infer.py \
--model_net=$(StarGAN_or_AttGAN_or_STGAN) \
--init_model=$(path_to_init_model)\
--test_list=$(path_to_test_list)\
--dataset_dir=$(path_to_data)
Pix2Pix和CycleGAN的效果如图所示:
......
此差异已折叠。
......@@ -153,8 +153,8 @@ if __name__ == '__main__':
args = parser.parse_args()
cycle_pix_dataset = [
'apple2orange', 'summer2winter_yosemite', 'horse2zebra', 'monet2photo',
'cezanne2photo', 'ukiyoe2photo', 'vangogh2photo', 'maps', 'cityscapes',
'facades', 'iphone2dslr_flower', 'ae_photos', 'mini'
'cezanne2photo', 'ukiyoe2photo', 'vangogh2photo', 'maps', 'facades',
'iphone2dslr_flower', 'ae_photos', 'mini'
]
pwd = os.path.join(os.path.dirname(__file__), 'data')
......
此差异已折叠。
此差异已折叠。
此差异已折叠。
export FLAGS_eager_delete_tensor_gb=0.0
export FLAGS_fast_eager_deletion_mode=1
export FLAGS_fraction_of_gpu_memory_to_use=0.01
CUDA_VISIBLE_DEVICES=0 python train.py --model_net SPADE --dataset cityscapes --train_list train_list --test_list val_list --crop_type Random --batch_size 1 --epoch 200 --load_height 612 --load_width 1124 --crop_height 512 --crop_width 1024 --label_nc 36
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册