未验证 提交 41d194cc 编写于 作者: Q qingqing01 提交者: GitHub

Object detection framework based on PaddlePaddle. (#2496)

* Unified object detection framework based on PaddlePaddle.
* Include algo: Faster, Mask, FPN, Cascade, RetinaNet, Yolo v3, SSD.
上级 120b2bda
# Virtualenv
/.venv/
/venv/
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
# C extensions
*.so
# json file
*.json
# Distribution / packaging
/bin/
/build/
/develop-eggs/
/dist/
/eggs/
/lib/
/lib64/
/output/
/parts/
/sdist/
/var/
/*.egg-info/
/.installed.cfg
/*.egg
/.eggs
# AUTHORS and ChangeLog will be generated while packaging
/AUTHORS
/ChangeLog
# BCloud / BuildSubmitter
/build_submitter.*
/logger_client_log
# Installer logs
pip-log.txt
pip-delete-this-directory.txt
# Unit test / coverage reports
.tox/
.coverage
.cache
.pytest_cache
nosetests.xml
coverage.xml
# Translations
*.mo
# Sphinx documentation
/docs/_build/
*.json
[style]
based_on_style = pep8
column_limit = 80
## PaddlePaddle Object Detection
PaddlePaddle Object Detection
===
Thanks for your attention. The object detection framework based on PaddlePaddle will be coming soon.
The document will be coming soon.
architecture: CascadeRCNN
train_feed: FasterRCNNTrainFeed
eval_feed: FasterRCNNEvalFeed
test_feed: FasterRCNNTestFeed
max_iters: 90000
snapshot_iter: 10000
use_gpu: true
log_smooth_window: 20
save_dir: output
pretrain_weights: https://paddle-imagenet-models-name.bj.bcebos.com/ResNet50_pretrained.tar
weights: output/cascade_rcnn_r50_fpn_1x/model_final
metric: COCO
CascadeRCNN:
backbone: ResNet
fpn: FPN
rpn_head: FPNRPNHead
roi_extractor: FPNRoIAlign
bbox_head: CascadeBBoxHead
bbox_assigner: CascadeBBoxAssigner
ResNet:
norm_type: affine_channel
depth: 50
feature_maps: [2, 3, 4, 5]
freeze_at: 2
variant: b
FPN:
min_level: 2
max_level: 6
num_chan: 256
spatial_scale: [0.03125, 0.0625, 0.125, 0.25]
FPNRPNHead:
anchor_generator:
anchor_sizes: [32, 64, 128, 256, 512]
aspect_ratios: [0.5, 1.0, 2.0]
stride: [16.0, 16.0]
variance: [1.0, 1.0, 1.0, 1.0]
anchor_start_size: 32
min_level: 2
max_level: 6
num_chan: 256
rpn_target_assign:
rpn_batch_size_per_im: 256
rpn_fg_fraction: 0.5
rpn_positive_overlap: 0.7
rpn_negative_overlap: 0.3
rpn_straddle_thresh: 0.0
train_proposal:
min_size: 0.0
nms_thresh: 0.7
pre_nms_top_n: 2000
post_nms_top_n: 2000
test_proposal:
min_size: 0.0
nms_thresh: 0.7
pre_nms_top_n: 1000
post_nms_top_n: 1000
FPNRoIAlign:
canconical_level: 4
canonical_size: 224
min_level: 2
max_level: 5
box_resolution: 7
sampling_ratio: 2
CascadeBBoxAssigner:
batch_size_per_im: 512
bbox_reg_weights: [10, 20, 30]
bg_thresh_lo: [0.0, 0.0, 0.0]
bg_thresh_hi: [0.5, 0.6, 0.7]
fg_thresh: [0.5, 0.6, 0.7]
fg_fraction: 0.25
num_classes: 81
CascadeBBoxHead:
head: FC6FC7Head
nms:
keep_top_k: 100
nms_threshold: 0.5
score_threshold: 0.05
num_classes: 81
FC6FC7Head:
num_chan: 1024
LearningRate:
base_lr: 0.02
schedulers:
- !PiecewiseDecay
gamma: 0.1
milestones: [60000, 80000]
- !LinearWarmup
start_factor: 0.3333333333333333
steps: 500
OptimizerBuilder:
optimizer:
momentum: 0.9
type: Momentum
regularizer:
factor: 0.0001
type: L2
FasterRCNNTrainFeed:
batch_size: 2
batch_transforms:
- !PadBatch
pad_to_stride: 32
dataset:
dataset_dir: data/coco
annotation: annotations/instances_train2017.json
image_dir: train2017
drop_last: false
num_workers: 2
shuffle: true
FasterRCNNEvalFeed:
batch_size: 1
batch_transforms:
- !PadBatch
pad_to_stride: 32
dataset:
dataset_dir: data/coco
annotation: annotations/instances_val2017.json
image_dir: val2017
FasterRCNNTestFeed:
batch_size: 1
batch_transforms:
- !PadBatch
pad_to_stride: 32
dataset:
annotation: annotations/instances_val2017.json
image_dir: val2017
test_file: val2017.txt
drop_last: false
num_workers: 2
shuffle: false
architecture: FasterRCNN
train_feed: FasterRCNNTrainFeed
eval_feed: FasterRCNNEvalFeed
test_feed: FasterRCNNTestFeed
use_gpu: True
max_iters: 180000
log_smooth_window: 20
save_dir: output
snapshot_iter: 10000
pretrain_weights: https://paddle-imagenet-models-name.bj.bcebos.com/ResNet101_pretrained.tar
metric: COCO
weights: output/faster_rcnn_r101_1x/model_final
FasterRCNN:
backbone: ResNet
rpn_head: RPNHead
roi_extractor: RoIAlign
bbox_head: BBoxHead
bbox_assigner: BBoxAssigner
ResNet:
norm_type: affine_channel
depth: 101
feature_maps: [2,3,4]
freeze_at: 2
ResNetC5:
norm_type: affine_channel
RPNHead:
anchor_generator:
anchor_sizes: [32, 64, 128, 256, 512]
aspect_ratios: [0.5, 1.0, 2.0]
stride: [16.0, 16.0]
variance: [1.0, 1.0, 1.0, 1.0]
rpn_target_assign:
rpn_batch_size_per_im: 256
rpn_fg_fraction: 0.5
rpn_negative_overlap: 0.3
rpn_positive_overlap: 0.7
rpn_straddle_thresh: 0.0
use_random: true
train_proposal:
min_size: 0.0
nms_thresh: 0.7
pre_nms_top_n: 12000
post_nms_top_n: 2000
test_proposal:
min_size: 0.0
nms_thresh: 0.7
pre_nms_top_n: 6000
post_nms_top_n: 1000
RoIAlign:
resolution: 14
sampling_ratio: 0
spatial_scale: 0.0625
BBoxAssigner:
batch_size_per_im: 512
bbox_reg_weights: [0.1, 0.1, 0.2, 0.2]
bg_thresh_hi: 0.5
bg_thresh_lo: 0.0
fg_fraction: 0.25
fg_thresh: 0.5
num_classes: 81
BBoxHead:
head: ResNetC5
nms:
keep_top_k: 100
nms_threshold: 0.5
score_threshold: 0.05
num_classes: 81
LearningRate:
base_lr: 0.01
schedulers:
- !PiecewiseDecay
gamma: 0.1
milestones: [12000, 16000]
- !LinearWarmup
start_factor: 0.3333333333333333
steps: 500
OptimizerBuilder:
optimizer:
momentum: 0.9
type: Momentum
regularizer:
factor: 0.0001
type: L2
FasterRCNNTrainFeed:
# batch size per device
batch_size: 1
dataset:
dataset_dir: data/coco
annotation: annotations/instances_train2017.json
image_dir: train2017
drop_last: false
num_workers: 2
FasterRCNNEvalFeed:
batch_size: 1
dataset:
dataset_dir: data/coco
annotation: annotations/instances_val2017.json
image_dir: val2017
num_workers: 2
FasterRCNNTestFeed:
batch_size: 1
dataset:
dataset_dir: data/coco
annotation: annotations/instances_val2017.json
image_dir: val2017
test_file: val2017.txt
samples: 5
architecture: FasterRCNN
train_feed: FasterRCNNTrainFeed
eval_feed: FasterRCNNEvalFeed
test_feed: FasterRCNNTestFeed
max_iters: 180000
snapshot_iter: 10000
use_gpu: True
log_smooth_window: 20
save_dir: output
pretrain_weights: http://paddle-imagenet-models-name.bj.bcebos.com/ResNet101_pretrained.tar
weights: output/faster_rcnn_r101_fpn_1x/model_final
metric: COCO
FasterRCNN:
backbone: ResNet
fpn: FPN
rpn_head: FPNRPNHead
roi_extractor: FPNRoIAlign
bbox_head: BBoxHead
bbox_assigner: BBoxAssigner
ResNet:
depth: 101
feature_maps: [2, 3, 4, 5]
freeze_at: 2
norm_type: affine_channel
FPN:
max_level: 6
min_level: 2
num_chan: 256
spatial_scale: [0.03125, 0.0625, 0.125, 0.25]
FPNRPNHead:
anchor_generator:
anchor_sizes: [32, 64, 128, 256, 512]
aspect_ratios: [0.5, 1.0, 2.0]
stride: [16.0, 16.0]
variance: [1.0, 1.0, 1.0, 1.0]
anchor_start_size: 32
max_level: 6
min_level: 2
num_chan: 256
rpn_target_assign:
rpn_batch_size_per_im: 256
rpn_fg_fraction: 0.5
rpn_negative_overlap: 0.3
rpn_positive_overlap: 0.7
rpn_straddle_thresh: 0.0
train_proposal:
min_size: 0.0
nms_thresh: 0.7
post_nms_top_n: 2000
pre_nms_top_n: 2000
test_proposal:
min_size: 0.0
nms_thresh: 0.7
post_nms_top_n: 1000
pre_nms_top_n: 1000
FPNRoIAlign:
canconical_level: 4
canonical_size: 224
max_level: 5
min_level: 2
box_resolution: 7
sampling_ratio: 2
BBoxAssigner:
batch_size_per_im: 512
bbox_reg_weights: [0.1, 0.1, 0.2, 0.2]
bg_thresh_hi: 0.5
bg_thresh_lo: 0.0
fg_fraction: 0.25
fg_thresh: 0.5
num_classes: 81
BBoxHead:
head: TwoFCHead
nms:
keep_top_k: 100
nms_threshold: 0.5
score_threshold: 0.05
num_classes: 81
TwoFCHead:
num_chan: 1024
LearningRate:
base_lr: 0.01
schedulers:
- !PiecewiseDecay
gamma: 0.1
milestones: [120000, 160000]
values: null
- !LinearWarmup
start_factor: 0.3333333333333333
steps: 500
OptimizerBuilder:
optimizer:
momentum: 0.9
type: Momentum
regularizer:
factor: 0.0001
type: L2
FasterRCNNTrainFeed:
# batch size per device
batch_size: 1
batch_transforms:
- !PadBatch
pad_to_stride: 32
dataset:
dataset_dir: data/coco
image_dir: train2017
annotation: annotations/instances_train2017.json
num_workers: 2
shuffle: True
FasterRCNNEvalFeed:
batch_size: 1
batch_transforms:
- !PadBatch
pad_to_stride: 32
dataset:
dataset_dir: data/coco
annotation: annotations/instances_val2017.json
image_dir: val2017
num_workers: 2
FasterRCNNTestFeed:
batch_size: 1
batch_transforms:
- !PadBatch
pad_to_stride: 32
dataset:
dataset_dir: data/coco
annotation: annotations/instances_val2017.json
image_dir: val2017
test_file: val2017.txt
num_workers: 2
shuffle: False
architecture: FasterRCNN
train_feed: FasterRCNNTrainFeed
eval_feed: FasterRCNNEvalFeed
test_feed: FasterRCNNTestFeed
max_iters: 360000
snapshot_iter: 10000
use_gpu: True
log_smooth_window: 20
save_dir: output
pretrain_weights: http://paddle-imagenet-models-name.bj.bcebos.com/ResNet101_pretrained.tar
weights: output/faster_rcnn_r101_fpn_2x/model_final
metric: COCO
FasterRCNN:
backbone: ResNet
fpn: FPN
rpn_head: FPNRPNHead
roi_extractor: FPNRoIAlign
bbox_head: BBoxHead
bbox_assigner: BBoxAssigner
ResNet:
depth: 101
feature_maps: [2, 3, 4, 5]
freeze_at: 2
norm_type: affine_channel
FPN:
max_level: 6
min_level: 2
num_chan: 256
spatial_scale: [0.03125, 0.0625, 0.125, 0.25]
FPNRPNHead:
anchor_generator:
anchor_sizes: [32, 64, 128, 256, 512]
aspect_ratios: [0.5, 1.0, 2.0]
stride: [16.0, 16.0]
variance: [1.0, 1.0, 1.0, 1.0]
anchor_start_size: 32
max_level: 6
min_level: 2
num_chan: 256
rpn_target_assign:
rpn_batch_size_per_im: 256
rpn_fg_fraction: 0.5
rpn_negative_overlap: 0.3
rpn_positive_overlap: 0.7
rpn_straddle_thresh: 0.0
train_proposal:
min_size: 0.0
nms_thresh: 0.7
post_nms_top_n: 2000
pre_nms_top_n: 2000
test_proposal:
min_size: 0.0
nms_thresh: 0.7
post_nms_top_n: 1000
pre_nms_top_n: 1000
FPNRoIAlign:
canconical_level: 4
canonical_size: 224
max_level: 5
min_level: 2
box_resolution: 7
sampling_ratio: 2
BBoxAssigner:
batch_size_per_im: 512
bbox_reg_weights: [0.1, 0.1, 0.2, 0.2]
bg_thresh_hi: 0.5
bg_thresh_lo: 0.0
fg_fraction: 0.25
fg_thresh: 0.5
num_classes: 81
BBoxHead:
head: TwoFCHead
nms:
keep_top_k: 100
nms_threshold: 0.5
score_threshold: 0.05
num_classes: 81
TwoFCHead:
num_chan: 1024
LearningRate:
base_lr: 0.01
schedulers:
- !PiecewiseDecay
gamma: 0.1
milestones: [240000, 320000]
values: null
- !LinearWarmup
start_factor: 0.3333333333333333
steps: 500
OptimizerBuilder:
optimizer:
momentum: 0.9
type: Momentum
regularizer:
factor: 0.0001
type: L2
FasterRCNNTrainFeed:
# batch size per device
batch_size: 1
batch_transforms:
- !PadBatch
pad_to_stride: 32
dataset:
dataset_dir: data/coco
image_dir: train2017
annotation: annotations/instances_train2017.json
num_workers: 2
shuffle: True
FasterRCNNEvalFeed:
batch_size: 1
batch_transforms:
- !PadBatch
pad_to_stride: 32
dataset:
dataset_dir: data/coco
annotation: annotations/instances_val2017.json
image_dir: val2017
num_workers: 2
FasterRCNNTestFeed:
batch_size: 1
batch_transforms:
- !PadBatch
pad_to_stride: 32
dataset:
dataset_dir: data/coco
annotation: annotations/instances_val2017.json
image_dir: val2017
test_file: val2017.txt
num_workers: 2
shuffle: False
architecture: FasterRCNN
train_feed: FasterRCNNTrainFeed
eval_feed: FasterRCNNEvalFeed
test_feed: FasterRCNNTestFeed
max_iters: 180000
snapshot_iter: 10000
use_gpu: True
log_smooth_window: 20
save_dir: output
pretrain_weights: https://paddle-imagenet-models-name.bj.bcebos.com/ResNet101_vd_pretrained.tar
weights: output/faster_rcnn_r101_vd_fpn_1x/model_final
metric: COCO
FasterRCNN:
backbone: ResNet
fpn: FPN
rpn_head: FPNRPNHead
roi_extractor: FPNRoIAlign
bbox_head: BBoxHead
bbox_assigner: BBoxAssigner
ResNet:
depth: 101
feature_maps: [2, 3, 4, 5]
freeze_at: 2
norm_type: affine_channel
variant: d
FPN:
max_level: 6
min_level: 2
num_chan: 256
spatial_scale: [0.03125, 0.0625, 0.125, 0.25]
FPNRPNHead:
anchor_generator:
anchor_sizes: [32, 64, 128, 256, 512]
aspect_ratios: [0.5, 1.0, 2.0]
stride: [16.0, 16.0]
variance: [1.0, 1.0, 1.0, 1.0]
anchor_start_size: 32
max_level: 6
min_level: 2
num_chan: 256
rpn_target_assign:
rpn_batch_size_per_im: 256
rpn_fg_fraction: 0.5
rpn_negative_overlap: 0.3
rpn_positive_overlap: 0.7
rpn_straddle_thresh: 0.0
train_proposal:
min_size: 0.0
nms_thresh: 0.7
post_nms_top_n: 2000
pre_nms_top_n: 2000
test_proposal:
min_size: 0.0
nms_thresh: 0.7
post_nms_top_n: 1000
pre_nms_top_n: 1000
FPNRoIAlign:
canconical_level: 4
canonical_size: 224
max_level: 5
min_level: 2
box_resolution: 7
sampling_ratio: 2
BBoxAssigner:
batch_size_per_im: 512
bbox_reg_weights: [0.1, 0.1, 0.2, 0.2]
bg_thresh_hi: 0.5
bg_thresh_lo: 0.0
fg_fraction: 0.25
fg_thresh: 0.5
num_classes: 81
BBoxHead:
head: TwoFCHead
nms:
keep_top_k: 100
nms_threshold: 0.5
score_threshold: 0.05
num_classes: 81
TwoFCHead:
num_chan: 1024
LearningRate:
base_lr: 0.01
schedulers:
- !PiecewiseDecay
gamma: 0.1
milestones: [120000, 160000]
values: null
- !LinearWarmup
start_factor: 0.3333333333333333
steps: 1000
OptimizerBuilder:
optimizer:
momentum: 0.9
type: Momentum
regularizer:
factor: 0.0001
type: L2
FasterRCNNTrainFeed:
# batch size per device
batch_size: 1
batch_transforms:
- !PadBatch
pad_to_stride: 32
dataset:
dataset_dir: data/coco
image_dir: train2017
annotation: annotations/instances_train2017.json
num_workers: 2
shuffle: True
FasterRCNNEvalFeed:
batch_size: 1
batch_transforms:
- !PadBatch
pad_to_stride: 32
dataset:
dataset_dir: data/coco
annotation: annotations/instances_val2017.json
image_dir: val2017
num_workers: 2
FasterRCNNTestFeed:
batch_size: 1
batch_transforms:
- !PadBatch
pad_to_stride: 32
dataset:
dataset_dir: data/coco
annotation: annotations/instances_val2017.json
image_dir: val2017
test_file: val2017.txt
num_workers: 2
shuffle: False
architecture: FasterRCNN
train_feed: FasterRCNNTrainFeed
eval_feed: FasterRCNNEvalFeed
test_feed: FasterRCNNTestFeed
max_iters: 360000
snapshot_iter: 10000
use_gpu: True
log_smooth_window: 20
save_dir: output
pretrain_weights: https://paddle-imagenet-models-name.bj.bcebos.com/ResNet101_vd_pretrained.tar
weights: output/faster_rcnn_r101_vd_fpn_2x/model_final
metric: COCO
FasterRCNN:
backbone: ResNet
fpn: FPN
rpn_head: FPNRPNHead
roi_extractor: FPNRoIAlign
bbox_head: BBoxHead
bbox_assigner: BBoxAssigner
ResNet:
depth: 101
feature_maps: [2, 3, 4, 5]
freeze_at: 2
norm_type: affine_channel
variant: d
FPN:
max_level: 6
min_level: 2
num_chan: 256
spatial_scale: [0.03125, 0.0625, 0.125, 0.25]
FPNRPNHead:
anchor_generator:
anchor_sizes: [32, 64, 128, 256, 512]
aspect_ratios: [0.5, 1.0, 2.0]
stride: [16.0, 16.0]
variance: [1.0, 1.0, 1.0, 1.0]
anchor_start_size: 32
max_level: 6
min_level: 2
num_chan: 256
rpn_target_assign:
rpn_batch_size_per_im: 256
rpn_fg_fraction: 0.5
rpn_negative_overlap: 0.3
rpn_positive_overlap: 0.7
rpn_straddle_thresh: 0.0
train_proposal:
min_size: 0.0
nms_thresh: 0.7
post_nms_top_n: 2000
pre_nms_top_n: 2000
test_proposal:
min_size: 0.0
nms_thresh: 0.7
post_nms_top_n: 1000
pre_nms_top_n: 1000
FPNRoIAlign:
canconical_level: 4
canonical_size: 224
max_level: 5
min_level: 2
box_resolution: 7
sampling_ratio: 2
BBoxAssigner:
batch_size_per_im: 512
bbox_reg_weights: [0.1, 0.1, 0.2, 0.2]
bg_thresh_hi: 0.5
bg_thresh_lo: 0.0
fg_fraction: 0.25
fg_thresh: 0.5
num_classes: 81
BBoxHead:
head: TwoFCHead
nms:
keep_top_k: 100
nms_threshold: 0.5
score_threshold: 0.05
num_classes: 81
TwoFCHead:
num_chan: 1024
LearningRate:
base_lr: 0.01
schedulers:
- !PiecewiseDecay
gamma: 0.1
milestones: [240000, 320000]
values: null
- !LinearWarmup
start_factor: 0.3333333333333333
steps: 1000
OptimizerBuilder:
optimizer:
momentum: 0.9
type: Momentum
regularizer:
factor: 0.0001
type: L2
FasterRCNNTrainFeed:
# batch size per device
batch_size: 1
batch_transforms:
- !PadBatch
pad_to_stride: 32
dataset:
dataset_dir: data/coco
image_dir: train2017
annotation: annotations/instances_train2017.json
num_workers: 2
shuffle: True
FasterRCNNEvalFeed:
batch_size: 1
batch_transforms:
- !PadBatch
pad_to_stride: 32
dataset:
dataset_dir: data/coco
annotation: annotations/instances_val2017.json
image_dir: val2017
num_workers: 2
FasterRCNNTestFeed:
batch_size: 1
batch_transforms:
- !PadBatch
pad_to_stride: 32
dataset:
dataset_dir: data/coco
annotation: annotations/instances_val2017.json
image_dir: val2017
test_file: val2017.txt
num_workers: 2
shuffle: False
architecture: FasterRCNN
train_feed: FasterRCNNTrainFeed
eval_feed: FasterRCNNEvalFeed
test_feed: FasterRCNNTestFeed
use_gpu: True
max_iters: 180000
log_smooth_window: 20
save_dir: output
snapshot_iter: 10000
pretrain_weights: https://paddle-imagenet-models-name.bj.bcebos.com/ResNet50_pretrained.tar
metric: COCO
weights: output/faster_rcnn_r50_1x/model_final
FasterRCNN:
backbone: ResNet
rpn_head: RPNHead
roi_extractor: RoIAlign
bbox_head: BBoxHead
bbox_assigner: BBoxAssigner
ResNet:
norm_type: affine_channel
depth: 50
feature_maps: [2,3,4]
freeze_at: 2
ResNetC5:
norm_type: affine_channel
RPNHead:
anchor_generator:
anchor_sizes: [32, 64, 128, 256, 512]
aspect_ratios: [0.5, 1.0, 2.0]
stride: [16.0, 16.0]
variance: [1.0, 1.0, 1.0, 1.0]
rpn_target_assign:
rpn_batch_size_per_im: 256
rpn_fg_fraction: 0.5
rpn_negative_overlap: 0.3
rpn_positive_overlap: 0.7
rpn_straddle_thresh: 0.0
use_random: true
train_proposal:
min_size: 0.0
nms_thresh: 0.7
pre_nms_top_n: 12000
post_nms_top_n: 2000
test_proposal:
min_size: 0.0
nms_thresh: 0.7
pre_nms_top_n: 6000
post_nms_top_n: 1000
RoIAlign:
resolution: 14
sampling_ratio: 0
spatial_scale: 0.0625
BBoxAssigner:
batch_size_per_im: 512
bbox_reg_weights: [0.1, 0.1, 0.2, 0.2]
bg_thresh_hi: 0.5
bg_thresh_lo: 0.0
fg_fraction: 0.25
fg_thresh: 0.5
num_classes: 81
BBoxHead:
head: ResNetC5
nms:
keep_top_k: 100
nms_threshold: 0.5
score_threshold: 0.05
num_classes: 81
LearningRate:
base_lr: 0.01
schedulers:
- !PiecewiseDecay
gamma: 0.1
milestones: [12000, 16000]
- !LinearWarmup
start_factor: 0.3333333333333333
steps: 500
OptimizerBuilder:
optimizer:
momentum: 0.9
type: Momentum
regularizer:
factor: 0.0001
type: L2
FasterRCNNTrainFeed:
# batch size per device
batch_size: 1
dataset:
dataset_dir: data/coco
annotation: annotations/instances_train2017.json
image_dir: train2017
drop_last: false
num_workers: 2
FasterRCNNEvalFeed:
batch_size: 1
dataset:
dataset_dir: data/coco
annotation: annotations/instances_val2017.json
image_dir: val2017
num_workers: 2
FasterRCNNTestFeed:
batch_size: 1
dataset:
dataset_dir: data/coco
annotation: annotations/instances_val2017.json
image_dir: val2017
test_file: val2017.txt
samples: 5
architecture: FasterRCNN
train_feed: FasterRCNNTrainFeed
eval_feed: FasterRCNNEvalFeed
test_feed: FasterRCNNTestFeed
use_gpu: True
max_iters: 360000
log_smooth_window: 20
save_dir: output
snapshot_iter: 10000
pretrain_weights: https://paddle-imagenet-models-name.bj.bcebos.com/ResNet50_pretrained.tar
metric: COCO
weights: output/faster_rcnn_r50_2x/model_final
FasterRCNN:
backbone: ResNet
rpn_head: RPNHead
roi_extractor: RoIAlign
bbox_head: BBoxHead
bbox_assigner: BBoxAssigner
ResNet:
norm_type: affine_channel
depth: 50
feature_maps: [2,3,4]
freeze_at: 2
ResNetC5:
norm_type: affine_channel
RPNHead:
anchor_generator:
anchor_sizes: [32, 64, 128, 256, 512]
aspect_ratios: [0.5, 1.0, 2.0]
stride: [16.0, 16.0]
variance: [1.0, 1.0, 1.0, 1.0]
rpn_target_assign:
rpn_batch_size_per_im: 256
rpn_fg_fraction: 0.5
rpn_negative_overlap: 0.3
rpn_positive_overlap: 0.7
rpn_straddle_thresh: 0.0
use_random: true
train_proposal:
min_size: 0.0
nms_thresh: 0.7
pre_nums_top_n: 12000
post_nms_top_n: 2000
test_proposal:
min_size: 0.0
nms_thresh: 0.7
pre_nms_top_n: 6000
post_nms_top_n: 1000
RoIAlign:
resolution: 14
sampling_ratio: 0
spatial_scale: 0.0625
BBoxAssigner:
batch_size_per_im: 512
bbox_reg_weights: [0.1, 0.1, 0.2, 0.2]
bg_thresh_hi: 0.5
bg_thresh_lo: 0.0
fg_fraction: 0.25
fg_thresh: 0.5
num_classes: 81
BBoxHead:
head: ResNetC5
nms:
keep_top_k: 100
nms_threshold: 0.5
score_threshold: 0.05
num_classes: 81
LearningRate:
base_lr: 0.01
schedulers:
- !PiecewiseDecay
gamma: 0.1
milestones: [24000, 32000]
- !LinearWarmup
start_factor: 0.3333333333333333
steps: 500
OptimizerBuilder:
optimizer:
momentum: 0.9
type: Momentum
regularizer:
factor: 0.0001
type: L2
FasterRCNNTrainFeed:
# batch size per device
batch_size: 1
dataset:
dataset_dir: data/coco
annotation: annotations/instances_train2017.json
image_dir: train2017
drop_last: false
num_workers: 2
FasterRCNNEvalFeed:
batch_size: 1
dataset:
dataset_dir: data/coco
annotation: annotations/instances_val2017.json
image_dir: val2017
num_workers: 2
FasterRCNNTestFeed:
batch_size: 1
dataset:
dataset_dir: data/coco
annotation: annotations/instances_val2017.json
image_dir: val2017
test_file: val2017.txt
samples: 5
architecture: FasterRCNN
train_feed: FasterRCNNTrainFeed
eval_feed: FasterRCNNEvalFeed
test_feed: FasterRCNNTestFeed
max_iters: 90000
use_gpu: True
snapshot_iter: 10000
log_smooth_window: 20
save_dir: output
pretrain_weights: https://paddle-imagenet-models-name.bj.bcebos.com/ResNet50_pretrained.tar
metric: COCO
weights: output/fpn/faster_rcnn_r50_fpn_1x/model_final
FasterRCNN:
backbone: ResNet
fpn: FPN
rpn_head: FPNRPNHead
roi_extractor: FPNRoIAlign
bbox_head: BBoxHead
bbox_assigner: BBoxAssigner
ResNet:
norm_type: affine_channel
norm_decay: true
depth: 50
feature_maps: [2, 3, 4, 5]
freeze_at: 2
FPN:
min_level: 2
max_level: 6
num_chan: 256
spatial_scale: [0.03125, 0.0625, 0.125, 0.25]
FPNRPNHead:
anchor_generator:
anchor_sizes: [32, 64, 128, 256, 512]
aspect_ratios: [0.5, 1.0, 2.0]
stride: [16.0, 16.0]
variance: [1.0, 1.0, 1.0, 1.0]
anchor_start_size: 32
min_level: 2
max_level: 6
num_chan: 256
rpn_target_assign:
rpn_batch_size_per_im: 256
rpn_fg_fraction: 0.5
rpn_positive_overlap: 0.7
rpn_negative_overlap: 0.3
rpn_straddle_thresh: 0.0
train_proposal:
min_size: 0.0
nms_thresh: 0.7
pre_nms_top_n: 2000
post_nms_top_n: 2000
test_proposal:
min_size: 0.0
nms_thresh: 0.7
pre_nms_top_n: 1000
post_nms_top_n: 1000
FPNRoIAlign:
canconical_level: 4
canonical_size: 224
min_level: 2
max_level: 5
box_resolution: 7
sampling_ratio: 2
BBoxAssigner:
batch_size_per_im: 512
bbox_reg_weights: [0.1, 0.1, 0.2, 0.2]
bg_thresh_lo: 0.0
bg_thresh_hi: 0.5
fg_fraction: 0.25
fg_thresh: 0.5
num_classes: 81
BBoxHead:
head: TwoFCHead
nms:
keep_top_k: 100
nms_threshold: 0.5
score_threshold: 0.05
num_classes: 81
TwoFCHead:
num_chan: 1024
LearningRate:
base_lr: 0.02
schedulers:
- !PiecewiseDecay
gamma: 0.1
milestones: [60000, 80000]
- !LinearWarmup
start_factor: 0.3333333333333333
steps: 500
OptimizerBuilder:
optimizer:
momentum: 0.9
type: Momentum
regularizer:
factor: 0.0001
type: L2
FasterRCNNTrainFeed:
batch_size: 2
batch_transforms:
- !PadBatch
pad_to_stride: 32
dataset:
dataset_dir: data/coco
annotation: annotations/instances_train2017.json
image_dir: train2017
drop_last: false
num_workers: 2
shuffle: true
FasterRCNNEvalFeed:
batch_size: 1
batch_transforms:
- !PadBatch
pad_to_stride: 32
dataset:
dataset_dir: data/coco
annotation: annotations/instances_val2017.json
image_dir: val2017
FasterRCNNTestFeed:
batch_size: 1
batch_transforms:
- !PadBatch
pad_to_stride: 32
dataset:
annotation: annotations/instances_val2017.json
image_dir: val2017
test_file: val2017.txt
drop_last: false
num_workers: 2
shuffle: false
architecture: FasterRCNN
train_feed: FasterRCNNTrainFeed
eval_feed: FasterRCNNEvalFeed
test_feed: FasterRCNNTestFeed
max_iters: 90000
use_gpu: True
snapshot_iter: 10000
log_smooth_window: 20
save_dir: output
pretrain_weights: https://paddle-imagenet-models-name.bj.bcebos.com/ResNet50_pretrained.tar
metric: COCO
weights: output/faster_rcnn_r50_fpn_2x/model_final
FasterRCNN:
backbone: ResNet
fpn: FPN
rpn_head: FPNRPNHead
roi_extractor: FPNRoIAlign
bbox_head: BBoxHead
bbox_assigner: BBoxAssigner
ResNet:
norm_type: affine_channel
norm_decay: true
depth: 50
feature_maps: [2, 3, 4, 5]
freeze_at: 2
FPN:
min_level: 2
max_level: 6
num_chan: 256
spatial_scale: [0.03125, 0.0625, 0.125, 0.25]
FPNRPNHead:
anchor_generator:
anchor_sizes: [32, 64, 128, 256, 512]
aspect_ratios: [0.5, 1.0, 2.0]
stride: [16.0, 16.0]
variance: [1.0, 1.0, 1.0, 1.0]
anchor_start_size: 32
min_level: 2
max_level: 6
num_chan: 256
rpn_target_assign:
rpn_batch_size_per_im: 256
rpn_fg_fraction: 0.5
rpn_positive_overlap: 0.7
rpn_negative_overlap: 0.3
rpn_straddle_thresh: 0.0
train_proposal:
min_size: 0.0
nms_thresh: 0.7
pre_nms_top_n: 2000
post_nms_top_n: 2000
test_proposal:
min_size: 0.0
nms_thresh: 0.7
pre_nms_top_n: 1000
post_nms_top_n: 1000
FPNRoIAlign:
canconical_level: 4
canonical_size: 224
min_level: 2
max_level: 5
box_resolution: 7
sampling_ratio: 2
BBoxAssigner:
batch_size_per_im: 512
bbox_reg_weights: [0.1, 0.1, 0.2, 0.2]
bg_thresh_lo: 0.0
bg_thresh_hi: 0.5
fg_fraction: 0.25
fg_thresh: 0.5
num_classes: 81
BBoxHead:
head: TwoFCHead
nms:
keep_top_k: 100
nms_threshold: 0.5
score_threshold: 0.05
num_classes: 81
TwoFCHead:
num_chan: 1024
LearningRate:
base_lr: 0.02
schedulers:
- !PiecewiseDecay
gamma: 0.1
milestones: [120000, 160000]
- !LinearWarmup
start_factor: 0.3333333333333333
steps: 500
OptimizerBuilder:
optimizer:
momentum: 0.9
type: Momentum
regularizer:
factor: 0.0001
type: L2
FasterRCNNTrainFeed:
batch_size: 2
batch_transforms:
- !PadBatch
pad_to_stride: 32
dataset:
dataset_dir: data/coco
annotation: annotations/instances_train2017.json
image_dir: train2017
drop_last: false
num_workers: 2
shuffle: true
FasterRCNNEvalFeed:
batch_size: 1
batch_transforms:
- !PadBatch
pad_to_stride: 32
dataset:
dataset_dir: data/coco
annotation: annotations/instances_val2017.json
image_dir: val2017
FasterRCNNTestFeed:
batch_size: 1
batch_transforms:
- !PadBatch
pad_to_stride: 32
dataset:
annotation: coco/annotations/instances_val2017.json
image_dir: coco/val2017
drop_last: false
num_workers: 2
shuffle: false
architecture: FasterRCNN
train_feed: FasterRCNNTrainFeed
eval_feed: FasterRCNNEvalFeed
test_feed: FasterRCNNTestFeed
use_gpu: True
max_iters: 180000
log_smooth_window: 20
save_dir: output/faster-r50-vd-c4-1x
snapshot_iter: 10000
pretrain_weights: https://paddle-imagenet-models-name.bj.bcebos.com/ResNet50_vd_pretrained.tar
metric: COCO
weights: output/faster_rcnn_r50_vd_1x/model_final
FasterRCNN:
backbone: ResNet
rpn_head: RPNHead
roi_extractor: RoIAlign
bbox_head: BBoxHead
bbox_assigner: BBoxAssigner
ResNet:
norm_type: affine_channel
depth: 50
feature_maps: [2,3,4]
freeze_at: 2
variant: d
ResNetC5:
norm_type: affine_channel
variant: d
RPNHead:
anchor_generator:
anchor_sizes: [32, 64, 128, 256, 512]
aspect_ratios: [0.5, 1.0, 2.0]
stride: [16.0, 16.0]
variance: [1.0, 1.0, 1.0, 1.0]
rpn_target_assign:
rpn_batch_size_per_im: 256
rpn_fg_fraction: 0.5
rpn_negative_overlap: 0.3
rpn_positive_overlap: 0.7
rpn_straddle_thresh: 0.0
use_random: true
train_proposal:
min_size: 0.0
nms_thresh: 0.7
pre_nms_top_n: 12000
post_nms_top_n: 2000
test_proposal:
min_size: 0.0
nms_thresh: 0.7
pre_nms_top_n: 6000
post_nms_top_n: 1000
RoIAlign:
resolution: 14
sampling_ratio: 0
spatial_scale: 0.0625
BBoxAssigner:
batch_size_per_im: 512
bbox_reg_weights: [0.1, 0.1, 0.2, 0.2]
bg_thresh_hi: 0.5
bg_thresh_lo: 0.0
fg_fraction: 0.25
fg_thresh: 0.5
num_classes: 81
BBoxHead:
head: ResNetC5
nms:
keep_top_k: 100
nms_threshold: 0.5
score_threshold: 0.05
num_classes: 81
LearningRate:
base_lr: 0.01
schedulers:
- !PiecewiseDecay
gamma: 0.1
milestones: [12000, 16000]
- !LinearWarmup
start_factor: 0.3333333333333333
steps: 500
OptimizerBuilder:
optimizer:
momentum: 0.9
type: Momentum
regularizer:
factor: 0.0001
type: L2
FasterRCNNTrainFeed:
# batch size per device
batch_size: 1
dataset:
dataset_dir: data/coco
annotation: annotations/instances_train2017.json
image_dir: train2017
drop_last: false
num_workers: 2
FasterRCNNEvalFeed:
batch_size: 1
dataset:
dataset_dir: data/coco
annotation: annotations/instances_val2017.json
image_dir: val2017
num_workers: 2
FasterRCNNTestFeed:
batch_size: 1
dataset:
dataset_dir: data/coco
annotation: annotations/instances_val2017.json
image_dir: val2017
test_file: val2017.txt
samples: 5
architecture: FasterRCNN
train_feed: FasterRCNNTrainFeed
eval_feed: FasterRCNNEvalFeed
test_feed: FasterRCNNTestFeed
max_iters: 180000
snapshot_iter: 10000
use_gpu: True
log_smooth_window: 20
save_dir: output
pretrain_weights: https://paddle-imagenet-models-name.bj.bcebos.com/ResNet50_vd_pretrained.tar
weights: output/faster_rcnn_r50_vd_fpn_2x/model_final
metric: COCO
FasterRCNN:
backbone: ResNet
fpn: FPN
rpn_head: FPNRPNHead
roi_extractor: FPNRoIAlign
bbox_head: BBoxHead
bbox_assigner: BBoxAssigner
ResNet:
depth: 50
feature_maps: [2, 3, 4, 5]
freeze_at: 2
norm_type: affine_channel
variant: d
FPN:
max_level: 6
min_level: 2
num_chan: 256
spatial_scale: [0.03125, 0.0625, 0.125, 0.25]
FPNRPNHead:
anchor_generator:
anchor_sizes: [32, 64, 128, 256, 512]
aspect_ratios: [0.5, 1.0, 2.0]
stride: [16.0, 16.0]
variance: [1.0, 1.0, 1.0, 1.0]
anchor_start_size: 32
max_level: 6
min_level: 2
num_chan: 256
rpn_target_assign:
rpn_batch_size_per_im: 256
rpn_fg_fraction: 0.5
rpn_negative_overlap: 0.3
rpn_positive_overlap: 0.7
rpn_straddle_thresh: 0.0
train_proposal:
min_size: 0.0
nms_thresh: 0.7
post_nms_top_n: 2000
pre_nms_top_n: 2000
test_proposal:
min_size: 0.0
nms_thresh: 0.7
post_nms_top_n: 1000
pre_nms_top_n: 1000
FPNRoIAlign:
canconical_level: 4
canonical_size: 224
max_level: 5
min_level: 2
box_resolution: 7
sampling_ratio: 2
BBoxAssigner:
batch_size_per_im: 512
bbox_reg_weights: [0.1, 0.1, 0.2, 0.2]
bg_thresh_hi: 0.5
bg_thresh_lo: 0.0
fg_fraction: 0.25
fg_thresh: 0.5
num_classes: 81
BBoxHead:
head: TwoFCHead
nms:
keep_top_k: 100
nms_threshold: 0.5
score_threshold: 0.05
num_classes: 81
TwoFCHead:
num_chan: 1024
LearningRate:
base_lr: 0.01
schedulers:
- !PiecewiseDecay
gamma: 0.1
milestones: [120000, 160000]
values: null
- !LinearWarmup
start_factor: 0.3333333333333333
steps: 500
OptimizerBuilder:
optimizer:
momentum: 0.9
type: Momentum
regularizer:
factor: 0.0001
type: L2
FasterRCNNTrainFeed:
# batch size per device
batch_size: 2
batch_transforms:
- !PadBatch
pad_to_stride: 32
dataset:
dataset_dir: data/coco
image_dir: train2017
annotation: annotations/instances_train2017.json
num_workers: 2
shuffle: True
FasterRCNNEvalFeed:
batch_size: 1
batch_transforms:
- !PadBatch
pad_to_stride: 32
dataset:
dataset_dir: data/coco
annotation: annotations/instances_val2017.json
image_dir: val2017
num_workers: 2
FasterRCNNTestFeed:
batch_size: 1
batch_transforms:
- !PadBatch
pad_to_stride: 32
dataset:
dataset_dir: data/coco
annotation: annotations/instances_val2017.json
image_dir: val2017
test_file: val2017.txt
num_workers: 2
shuffle: False
architecture: FasterRCNN
train_feed: FasterRCNNTrainFeed
eval_feed: FasterRCNNEvalFeed
test_feed: FasterRCNNTestFeed
max_iters: 180000
snapshot_iter: 10000
use_gpu: True
log_smooth_window: 20
save_dir: output
pretrain_weights: https://paddle-imagenet-models-name.bj.bcebos.com/SE154_vd_pretrained.tar
weights: output/faster_rcnn_se154_1x/model_final
metric: COCO
FasterRCNN:
backbone: SENet
fpn: null
rpn_head: RPNHead
roi_extractor: RoIAlign
bbox_head: BBoxHead
bbox_assigner: BBoxAssigner
SENet:
depth: 152
feature_maps: [2, 3, 4]
freeze_at: 2
group_width: 4
groups: 64
norm_type: affine_channel
variant: d
SENetC5:
depth: 152
feature_maps: 5
freeze_at: 2
group_width: 4
groups: 64
norm_type: affine_channel
variant: d
RPNHead:
anchor_generator:
anchor_sizes: [32, 64, 128, 256, 512]
aspect_ratios: [0.5, 1.0, 2.0]
stride: [16.0, 16.0]
variance: [1.0, 1.0, 1.0, 1.0]
rpn_target_assign:
rpn_batch_size_per_im: 256
rpn_fg_fraction: 0.5
rpn_negative_overlap: 0.3
rpn_positive_overlap: 0.7
rpn_straddle_thresh: 0.0
train_proposal:
min_size: 0.0
nms_thresh: 0.7
post_nms_top_n: 2000
pre_nms_top_n: 12000
test_proposal:
min_size: 0.0
nms_thresh: 0.7
post_nms_top_n: 1000
pre_nms_top_n: 6000
RoIAlign:
resolution: 7
sampling_ratio: 0
spatial_scale: 0.0625
BBoxAssigner:
batch_size_per_im: 512
bbox_reg_weights: [0.1, 0.1, 0.2, 0.2]
bg_thresh_hi: 0.5
bg_thresh_lo: 0.0
fg_fraction: 0.25
fg_thresh: 0.5
num_classes: 81
BBoxHead:
head: SENetC5
nms:
keep_top_k: 100
nms_threshold: 0.5
score_threshold: 0.05
num_classes: 81
LearningRate:
base_lr: 0.01
schedulers:
- !PiecewiseDecay
gamma: 0.1
milestones: [120000, 160000]
values: null
- !LinearWarmup
start_factor: 0.1
steps: 1000
OptimizerBuilder:
optimizer:
momentum: 0.9
type: Momentum
regularizer:
factor: 0.0001
type: L2
FasterRCNNTrainFeed:
# batch size per device
batch_size: 1
dataset:
dataset_dir: data/coco
annotation: annotations/instances_val2017.json
image_dir: val2017
num_workers: 2
shuffle: True
FasterRCNNEvalFeed:
batch_size: 1
dataset:
dataset_dir: data/coco
annotation: annotations/instances_val2017.json
image_dir: val2017
num_workers: 2
FasterRCNNTestFeed:
batch_size: 1
dataset:
dataset_dir: data/coco
annotation: annotations/instances_val2017.json
image_dir: val2017
test_file: val2017.txt
num_workers: 2
shuffle: False
architecture: FasterRCNN
train_feed: FasterRCNNTrainFeed
eval_feed: FasterRCNNEvalFeed
test_feed: FasterRCNNTestFeed
max_iters: 180000
snapshot_iter: 10000
use_gpu: True
log_smooth_window: 20
save_dir: output
pretrain_weights: https://paddle-imagenet-models-name.bj.bcebos.com/SE154_vd_pretrained.tar
weights: output/faster_rcnn_se154_fpn_1x/model_final
metric: COCO
FasterRCNN:
backbone: SENet
fpn: FPN
rpn_head: FPNRPNHead
roi_extractor: FPNRoIAlign
bbox_head: BBoxHead
bbox_assigner: BBoxAssigner
SENet:
depth: 152
feature_maps: [2, 3, 4, 5]
freeze_at: 2
group_width: 4
groups: 64
norm_type: affine_channel
variant: d
FPN:
max_level: 6
min_level: 2
num_chan: 256
spatial_scale: [0.03125, 0.0625, 0.125, 0.25]
FPNRPNHead:
anchor_generator:
anchor_sizes: [32, 64, 128, 256, 512]
aspect_ratios: [0.5, 1.0, 2.0]
stride: [16.0, 16.0]
variance: [1.0, 1.0, 1.0, 1.0]
anchor_start_size: 32
max_level: 6
min_level: 2
num_chan: 256
rpn_target_assign:
rpn_batch_size_per_im: 256
rpn_fg_fraction: 0.5
rpn_negative_overlap: 0.3
rpn_positive_overlap: 0.7
rpn_straddle_thresh: 0.0
train_proposal:
min_size: 0.0
nms_thresh: 0.7
post_nms_top_n: 2000
pre_nms_top_n: 2000
test_proposal:
min_size: 0.0
nms_thresh: 0.7
post_nms_top_n: 1000
pre_nms_top_n: 1000
FPNRoIAlign:
canconical_level: 4
canonical_size: 224
max_level: 5
min_level: 2
box_resolution: 7
sampling_ratio: 2
BBoxAssigner:
batch_size_per_im: 512
bbox_reg_weights: [0.1, 0.1, 0.2, 0.2]
bg_thresh_hi: 0.5
bg_thresh_lo: 0.0
fg_fraction: 0.25
fg_thresh: 0.5
num_classes: 81
BBoxHead:
head: TwoFCHead
nms:
keep_top_k: 100
nms_threshold: 0.5
score_threshold: 0.05
num_classes: 81
TwoFCHead:
num_chan: 1024
LearningRate:
base_lr: 0.01
schedulers:
- !PiecewiseDecay
gamma: 0.1
milestones: [120000, 160000]
values: null
- !LinearWarmup
start_factor: 0.1
steps: 1000
OptimizerBuilder:
optimizer:
momentum: 0.9
type: Momentum
regularizer:
factor: 0.0001
type: L2
FasterRCNNTrainFeed:
# batch size per device
batch_size: 1
batch_transforms:
- !PadBatch
pad_to_stride: 32
dataset:
dataset_dir: data/coco
image_dir: train2017
annotation: annotations/instances_train2017.json
num_workers: 2
shuffle: True
FasterRCNNEvalFeed:
batch_size: 1
batch_transforms:
- !PadBatch
pad_to_stride: 32
dataset:
dataset_dir: data/coco
annotation: annotations/instances_val2017.json
image_dir: val2017
num_workers: 2
FasterRCNNTestFeed:
batch_size: 1
batch_transforms:
- !PadBatch
pad_to_stride: 32
dataset:
dataset_dir: data/coco
annotation: annotations/instances_val2017.json
image_dir: val2017
test_file: val2017.txt
num_workers: 2
shuffle: False
architecture: FasterRCNN
train_feed: FasterRCNNTrainFeed
eval_feed: FasterRCNNEvalFeed
test_feed: FasterRCNNTestFeed
max_iters: 260000
snapshot_iter: 10000
use_gpu: True
log_smooth_window: 20
save_dir: output
pretrain_weights: https://paddle-imagenet-models-name.bj.bcebos.com/SE154_vd_pretrained.tar
weights: output/faster_rcnn_se154_fpn_s1x/model_final
metric: COCO
FasterRCNN:
backbone: SENet
fpn: FPN
rpn_head: FPNRPNHead
roi_extractor: FPNRoIAlign
bbox_head: BBoxHead
bbox_assigner: BBoxAssigner
SENet:
depth: 152
feature_maps: [2, 3, 4, 5]
freeze_at: 2
group_width: 4
groups: 64
norm_type: affine_channel
variant: d
FPN:
max_level: 6
min_level: 2
num_chan: 256
spatial_scale: [0.03125, 0.0625, 0.125, 0.25]
FPNRPNHead:
anchor_generator:
anchor_sizes: [32, 64, 128, 256, 512]
aspect_ratios: [0.5, 1.0, 2.0]
stride: [16.0, 16.0]
variance: [1.0, 1.0, 1.0, 1.0]
anchor_start_size: 32
max_level: 6
min_level: 2
num_chan: 256
rpn_target_assign:
rpn_batch_size_per_im: 256
rpn_fg_fraction: 0.5
rpn_negative_overlap: 0.3
rpn_positive_overlap: 0.7
rpn_straddle_thresh: 0.0
train_proposal:
min_size: 0.0
nms_thresh: 0.7
post_nms_top_n: 2000
pre_nms_top_n: 2000
test_proposal:
min_size: 0.0
nms_thresh: 0.7
post_nms_top_n: 1000
pre_nms_top_n: 1000
FPNRoIAlign:
canconical_level: 4
canonical_size: 224
max_level: 5
min_level: 2
box_resolution: 7
sampling_ratio: 2
BBoxAssigner:
batch_size_per_im: 512
bbox_reg_weights: [0.1, 0.1, 0.2, 0.2]
bg_thresh_hi: 0.5
bg_thresh_lo: 0.0
fg_fraction: 0.25
fg_thresh: 0.5
num_classes: 81
BBoxHead:
head: TwoFCHead
nms:
keep_top_k: 100
nms_threshold: 0.5
score_threshold: 0.05
num_classes: 81
TwoFCHead:
num_chan: 1024
LearningRate:
base_lr: 0.01
schedulers:
- !PiecewiseDecay
gamma: 0.1
milestones: [200000, 240000]
values: null
- !LinearWarmup
start_factor: 0.1
steps: 1000
OptimizerBuilder:
optimizer:
momentum: 0.9
type: Momentum
regularizer:
factor: 0.0001
type: L2
FasterRCNNTrainFeed:
# batch size per device
batch_size: 1
batch_transforms:
- !PadBatch
pad_to_stride: 32
dataset:
dataset_dir: data/coco
image_dir: train2017
annotation: annotations/instances_train2017.json
num_workers: 2
shuffle: True
FasterRCNNEvalFeed:
batch_size: 1
batch_transforms:
- !PadBatch
pad_to_stride: 32
dataset:
dataset_dir: data/coco
annotation: annotations/instances_val2017.json
image_dir: val2017
num_workers: 2
FasterRCNNTestFeed:
batch_size: 1
batch_transforms:
- !PadBatch
pad_to_stride: 32
dataset:
dataset_dir: data/coco
annotation: annotations/instances_val2017.json
image_dir: val2017
test_file: val2017.txt
num_workers: 2
shuffle: False
architecture: FasterRCNN
train_feed: FasterRCNNTrainFeed
eval_feed: FasterRCNNEvalFeed
test_feed: FasterRCNNTestFeed
max_iters: 180000
snapshot_iter: 10000
use_gpu: True
log_smooth_window: 20
save_dir: output
pretrain_weights: https://paddle-imagenet-models-name.bj.bcebos.com/ResNeXt101_64x4d_pretrained.tar
weights: output/faster_rcnn_x101_64x4d_fpn_1x/model_final
metric: COCO
FasterRCNN:
backbone: ResNeXt
fpn: FPN
rpn_head: FPNRPNHead
roi_extractor: FPNRoIAlign
bbox_head: BBoxHead
bbox_assigner: BBoxAssigner
ResNeXt:
depth: 101
feature_maps: [2, 3, 4, 5]
freeze_at: 2
group_width: 4
groups: 64
norm_type: affine_channel
FPN:
max_level: 6
min_level: 2
num_chan: 256
spatial_scale: [0.03125, 0.0625, 0.125, 0.25]
FPNRPNHead:
anchor_generator:
anchor_sizes: [32, 64, 128, 256, 512]
aspect_ratios: [0.5, 1.0, 2.0]
stride: [16.0, 16.0]
variance: [1.0, 1.0, 1.0, 1.0]
anchor_start_size: 32
max_level: 6
min_level: 2
num_chan: 256
rpn_target_assign:
rpn_batch_size_per_im: 256
rpn_fg_fraction: 0.5
rpn_negative_overlap: 0.3
rpn_positive_overlap: 0.7
rpn_straddle_thresh: 0.0
train_proposal:
min_size: 0.0
nms_thresh: 0.7
post_nms_top_n: 2000
pre_nms_top_n: 2000
test_proposal:
min_size: 0.0
nms_thresh: 0.7
post_nms_top_n: 1000
pre_nms_top_n: 1000
FPNRoIAlign:
canconical_level: 4
canonical_size: 224
max_level: 5
min_level: 2
box_resolution: 7
sampling_ratio: 2
BBoxAssigner:
batch_size_per_im: 512
bbox_reg_weights: [0.1, 0.1, 0.2, 0.2]
bg_thresh_hi: 0.5
bg_thresh_lo: 0.0
fg_fraction: 0.25
fg_thresh: 0.5
num_classes: 81
BBoxHead:
head: TwoFCHead
nms:
keep_top_k: 100
nms_threshold: 0.5
score_threshold: 0.05
num_classes: 81
TwoFCHead:
num_chan: 1024
LearningRate:
base_lr: 0.01
schedulers:
- !PiecewiseDecay
gamma: 0.1
milestones: [120000, 160000]
values: null
- !LinearWarmup
start_factor: 0.3333333333333333
steps: 500
OptimizerBuilder:
optimizer:
momentum: 0.9
type: Momentum
regularizer:
factor: 0.0001
type: L2
FasterRCNNTrainFeed:
# batch size per device
batch_size: 1
batch_transforms:
- !PadBatch
pad_to_stride: 32
dataset:
dataset_dir: data/coco
image_dir: train2017
annotation: annotations/instances_train2017.json
num_workers: 2
shuffle: True
FasterRCNNEvalFeed:
batch_size: 1
batch_transforms:
- !PadBatch
pad_to_stride: 32
dataset:
dataset_dir: data/coco
annotation: annotations/instances_val2017.json
image_dir: val2017
num_workers: 2
FasterRCNNTestFeed:
batch_size: 1
batch_transforms:
- !PadBatch
pad_to_stride: 32
dataset:
dataset_dir: data/coco
annotation: annotations/instances_val2017.json
image_dir: val2017
test_file: val2017.txt
num_workers: 2
shuffle: False
architecture: FasterRCNN
train_feed: FasterRCNNTrainFeed
eval_feed: FasterRCNNEvalFeed
test_feed: FasterRCNNTestFeed
max_iters: 180000
snapshot_iter: 10000
use_gpu: True
log_smooth_window: 20
save_dir: output
pretrain_weights: https://paddle-imagenet-models-name.bj.bcebos.com/ResNeXt101_64x4d_pretrained.tar
weights: output/faster_rcnn_x101_64x4d_fpn_2x/model_final
metric: COCO
FasterRCNN:
backbone: ResNeXt
fpn: FPN
rpn_head: FPNRPNHead
roi_extractor: FPNRoIAlign
bbox_head: BBoxHead
bbox_assigner: BBoxAssigner
ResNeXt:
depth: 101
feature_maps: [2, 3, 4, 5]
freeze_at: 2
group_width: 4
groups: 64
norm_type: affine_channel
FPN:
max_level: 6
min_level: 2
num_chan: 256
spatial_scale: [0.03125, 0.0625, 0.125, 0.25]
FPNRPNHead:
anchor_generator:
anchor_sizes: [32, 64, 128, 256, 512]
aspect_ratios: [0.5, 1.0, 2.0]
stride: [16.0, 16.0]
variance: [1.0, 1.0, 1.0, 1.0]
anchor_start_size: 32
max_level: 6
min_level: 2
num_chan: 256
rpn_target_assign:
rpn_batch_size_per_im: 256
rpn_fg_fraction: 0.5
rpn_negative_overlap: 0.3
rpn_positive_overlap: 0.7
rpn_straddle_thresh: 0.0
train_proposal:
min_size: 0.0
nms_thresh: 0.7
post_nms_top_n: 2000
pre_nms_top_n: 2000
test_proposal:
min_size: 0.0
nms_thresh: 0.7
post_nms_top_n: 1000
pre_nms_top_n: 1000
FPNRoIAlign:
canconical_level: 4
canonical_size: 224
max_level: 5
min_level: 2
box_resolution: 7
sampling_ratio: 2
BBoxAssigner:
batch_size_per_im: 512
bbox_reg_weights: [0.1, 0.1, 0.2, 0.2]
bg_thresh_hi: 0.5
bg_thresh_lo: 0.0
fg_fraction: 0.25
fg_thresh: 0.5
num_classes: 81
BBoxHead:
head: TwoFCHead
nms:
keep_top_k: 100
nms_threshold: 0.5
score_threshold: 0.05
num_classes: 81
TwoFCHead:
num_chan: 1024
LearningRate:
base_lr: 0.01
schedulers:
- !PiecewiseDecay
gamma: 0.1
milestones: [240000, 320000]
values: null
- !LinearWarmup
start_factor: 0.3333333333333333
steps: 500
OptimizerBuilder:
optimizer:
momentum: 0.9
type: Momentum
regularizer:
factor: 0.0001
type: L2
FasterRCNNTrainFeed:
# batch size per device
batch_size: 1
batch_transforms:
- !PadBatch
pad_to_stride: 32
dataset:
dataset_dir: data/coco
image_dir: train2017
annotation: annotations/instances_train2017.json
num_workers: 2
shuffle: True
FasterRCNNEvalFeed:
batch_size: 1
batch_transforms:
- !PadBatch
pad_to_stride: 32
dataset:
dataset_dir: data/coco
annotation: annotations/instances_val2017.json
image_dir: val2017
num_workers: 2
FasterRCNNTestFeed:
batch_size: 1
batch_transforms:
- !PadBatch
pad_to_stride: 32
dataset:
dataset_dir: data/coco
annotation: annotations/instances_val2017.json
image_dir: val2017
test_file: val2017.txt
num_workers: 2
shuffle: False
architecture: MaskRCNN
train_feed: MaskRCNNTrainFeed
eval_feed: MaskRCNNEvalFeed
test_feed: MaskRCNNTestFeed
use_gpu: True
max_iters: 180000
snapshot_iter: 10000
log_smooth_window: 20
save_dir: output
pretrain_weights: https://paddle-imagenet-models-name.bj.bcebos.com/ResNet101_pretrained.tar
metric: COCO
weights: output/mask_rcnn_r101_fpn_1x/model_final/
MaskRCNN:
backbone: ResNet
fpn: FPN
rpn_head: FPNRPNHead
roi_extractor: FPNRoIAlign
bbox_head: BBoxHead
bbox_assigner: BBoxAssigner
ResNet:
depth: 101
feature_maps: [2, 3, 4, 5]
freeze_at: 2
norm_type: affine_channel
FPN:
max_level: 6
min_level: 2
num_chan: 256
spatial_scale: [0.03125, 0.0625, 0.125, 0.25]
FPNRPNHead:
anchor_generator:
aspect_ratios: [0.5, 1.0, 2.0]
variance: [1.0, 1.0, 1.0, 1.0]
anchor_start_size: 32
max_level: 6
min_level: 2
num_chan: 256
rpn_target_assign:
rpn_batch_size_per_im: 256
rpn_fg_fraction: 0.5
rpn_negative_overlap: 0.3
rpn_positive_overlap: 0.7
rpn_straddle_thresh: 0.0
train_proposal:
min_size: 0.0
nms_thresh: 0.7
pre_nms_top_n: 2000
post_nms_top_n: 2000
test_proposal:
min_size: 0.0
nms_thresh: 0.7
pre_nms_top_n: 1000
post_nms_top_n: 1000
FPNRoIAlign:
canconical_level: 4
canonical_size: 224
max_level: 5
min_level: 2
sampling_ratio: 2
box_resolution: 7
mask_resolution: 14
MaskHead:
dilation: 1
num_chan_reduced: 256
num_classes: 81
num_convs: 4
resolution: 28
BBoxAssigner:
batch_size_per_im: 512
bbox_reg_weights: [0.1, 0.1, 0.2, 0.2]
bg_thresh_hi: 0.5
bg_thresh_lo: 0.0
fg_fraction: 0.25
fg_thresh: 0.5
num_classes: 81
MaskAssigner:
resolution: 28
BBoxHead:
head: TwoFCHead
nms:
keep_top_k: 100
nms_threshold: 0.5
score_threshold: 0.05
num_classes: 81
TwoFCHead:
num_chan: 1024
LearningRate:
base_lr: 0.01
schedulers:
- !PiecewiseDecay
gamma: 0.1
milestones: [120000, 160000]
values: null
- !LinearWarmup
start_factor: 0.3333333333333333
steps: 500
OptimizerBuilder:
optimizer:
momentum: 0.9
type: Momentum
regularizer:
factor: 0.0001
type: L2
MaskRCNNTrainFeed:
batch_size: 1
batch_transforms:
- !PadBatch
pad_to_stride: 32
dataset:
dataset_dir: data/coco
annotation: annotations/instances_train2017.json
image_dir: train2017
num_workers: 2
use_padded_im_info: False
MaskRCNNEvalFeed:
batch_size: 1
batch_transforms:
- !PadBatch
pad_to_stride: 32
dataset:
dataset_dir: data/coco
annotation: annotations/instances_val2017.json
image_dir: val2017
num_workers: 2
use_padded_im_info: True
MaskRCNNTestFeed:
batch_size: 1
batch_transforms:
- !PadBatch
pad_to_stride: 32
dataset:
dataset_dir: data/coco
annotation: annotations/instances_val2017.json
image_dir: val2017
test_file: val2017.txt
samples: 5
num_workers: 2
use_padded_im_info: True
architecture: MaskRCNN
train_feed: MaskRCNNTrainFeed
eval_feed: MaskRCNNEvalFeed
test_feed: MaskRCNNTestFeed
use_gpu: True
max_iters: 360000
snapshot_iter: 10000
log_smooth_window: 20
save_dir: output
pretrain_weights: https://paddle-imagenet-models-name.bj.bcebos.com/ResNet101_pretrained.tar
metric: COCO
weights: output/mask_rcnn_r101_fpn_2x/model_final/
MaskRCNN:
backbone: ResNet
fpn: FPN
rpn_head: FPNRPNHead
roi_extractor: FPNRoIAlign
bbox_head: BBoxHead
bbox_assigner: BBoxAssigner
ResNet:
depth: 101
feature_maps: [2, 3, 4, 5]
freeze_at: 2
norm_type: affine_channel
FPN:
max_level: 6
min_level: 2
num_chan: 256
spatial_scale: [0.03125, 0.0625, 0.125, 0.25]
FPNRPNHead:
anchor_generator:
aspect_ratios: [0.5, 1.0, 2.0]
variance: [1.0, 1.0, 1.0, 1.0]
anchor_start_size: 32
max_level: 6
min_level: 2
num_chan: 256
rpn_target_assign:
rpn_batch_size_per_im: 256
rpn_fg_fraction: 0.5
rpn_negative_overlap: 0.3
rpn_positive_overlap: 0.7
rpn_straddle_thresh: 0.0
train_proposal:
min_size: 0.0
nms_thresh: 0.7
pre_nms_top_n: 2000
post_nms_top_n: 2000
test_proposal:
min_size: 0.0
nms_thresh: 0.7
pre_nms_top_n: 1000
post_nms_top_n: 1000
FPNRoIAlign:
canconical_level: 4
canonical_size: 224
max_level: 5
min_level: 2
sampling_ratio: 2
box_resolution: 7
mask_resolution: 14
MaskHead:
dilation: 1
num_chan_reduced: 256
num_classes: 81
num_convs: 4
resolution: 28
BBoxAssigner:
batch_size_per_im: 512
bbox_reg_weights: [0.1, 0.1, 0.2, 0.2]
bg_thresh_hi: 0.5
bg_thresh_lo: 0.0
fg_fraction: 0.25
fg_thresh: 0.5
num_classes: 81
MaskAssigner:
resolution: 28
BBoxHead:
head: TwoFCHead
nms:
keep_top_k: 100
nms_threshold: 0.5
score_threshold: 0.05
num_classes: 81
TwoFCHead:
num_chan: 1024
LearningRate:
base_lr: 0.01
schedulers:
- !PiecewiseDecay
gamma: 0.1
milestones: [240000, 320000]
values: null
- !LinearWarmup
start_factor: 0.3333333333333333
steps: 500
OptimizerBuilder:
optimizer:
momentum: 0.9
type: Momentum
regularizer:
factor: 0.0001
type: L2
MaskRCNNTrainFeed:
batch_size: 1
batch_transforms:
- !PadBatch
pad_to_stride: 32
dataset:
dataset_dir: data/coco
annotation: annotations/instances_train2017.json
image_dir: train2017
num_workers: 2
use_padded_im_info: False
MaskRCNNEvalFeed:
batch_size: 1
batch_transforms:
- !PadBatch
pad_to_stride: 32
dataset:
dataset_dir: data/coco
annotation: annotations/instances_val2017.json
image_dir: val2017
num_workers: 2
use_padded_im_info: True
MaskRCNNTestFeed:
batch_size: 1
batch_transforms:
- !PadBatch
pad_to_stride: 32
dataset:
dataset_dir: data/coco
annotation: annotations/instances_val2017.json
image_dir: val2017
test_file: val2017.txt
samples: 5
num_workers: 2
use_padded_im_info: True
architecture: MaskRCNN
train_feed: MaskRCNNTrainFeed
eval_feed: MaskRCNNEvalFeed
test_feed: MaskRCNNTestFeed
use_gpu: True
max_iters: 180000
snapshot_iter: 10000
log_smooth_window: 20
save_dir: output
pretrain_weights: https://paddle-imagenet-models-name.bj.bcebos.com/ResNet50_pretrained.tar
metric: COCO
weights: output/mask_rcnn_r50_1x/model_final
MaskRCNN:
backbone: ResNet
rpn_head: RPNHead
roi_extractor: RoIAlign
bbox_assigner: BBoxAssigner
bbox_head: BBoxHead
mask_assigner: MaskAssigner
mask_head: MaskHead
ResNet:
norm_type: affine_channel
norm_decay: true
depth: 50
feature_maps: [2,3, 4]
freeze_at: 2
ResNetC5:
norm_type: affine_channel
RPNHead:
anchor_generator:
anchor_sizes: [32, 64, 128, 256, 512]
aspect_ratios: [0.5, 1.0, 2.0]
stride: [16.0, 16.0]
variance: [1.0, 1.0, 1.0, 1.0]
rpn_target_assign:
rpn_batch_size_per_im: 256
rpn_fg_fraction: 0.5
rpn_negative_overlap: 0.3
rpn_positive_overlap: 0.7
rpn_straddle_thresh: 0.0
train_proposal:
min_size: 0.0
nms_thresh: 0.7
pre_nms_top_n: 12000
post_nms_top_n: 2000
test_proposal:
min_size: 0.0
nms_thresh: 0.7
pre_nms_top_n: 6000
post_nms_top_n: 1000
RoIAlign:
resolution: 14
spatial_scale: 0.0625
sampling_ratio: 0
BBoxHead:
head: ResNetC5
nms:
keep_top_k: 100
nms_threshold: 0.5
normalized: false
score_threshold: 0.05
num_classes: 81
MaskHead:
dilation: 1
num_chan_reduced: 256
num_classes: 81
resolution: 14
BBoxAssigner:
batch_size_per_im: 512
bbox_reg_weights: [0.1, 0.1, 0.2, 0.2]
bg_thresh_hi: 0.5
bg_thresh_lo: 0.0
fg_fraction: 0.25
fg_thresh: 0.5
num_classes: 81
MaskAssigner:
num_classes: 81
resolution: 14
LearningRate:
base_lr: 0.01
schedulers:
- !PiecewiseDecay
gamma: 0.1
milestones: [120000, 160000]
- !LinearWarmup
start_factor: 0.3333333333333333
steps: 500
OptimizerBuilder:
optimizer:
momentum: 0.9
type: Momentum
regularizer:
factor: 0.0001
type: L2
MaskRCNNTrainFeed:
batch_size: 1
dataset:
dataset_dir: data/coco
annotation: annotations/instances_train2017.json
image_dir: train2017
num_workers: 2
shuffle: true
MaskRCNNEvalFeed:
batch_size: 1
dataset:
dataset_dir: data/coco
annotation: annotations/instances_val2017.json
image_dir: val2017
shuffle: false
MaskRCNNTestFeed:
batch_size: 1
dataset:
dataset_dir: data/coco
annotation: annotations/instances_val2017.json
image_dir: val2017
test_file: val2017.txt
samples: 5
architecture: MaskRCNN
train_feed: MaskRCNNTrainFeed
eval_feed: MaskRCNNEvalFeed
test_feed: MaskRCNNTestFeed
use_gpu: True
max_iters: 360000
snapshot_iter: 10000
log_smooth_window: 20
save_dir: output
pretrain_weights: https://paddle-imagenet-models-name.bj.bcebos.com/ResNet50_pretrained.tar
metric: COCO
weights: output/mask_rcnn_r50_2x/model_final/
MaskRCNN:
backbone: ResNet
rpn_head: RPNHead
roi_extractor: RoIAlign
bbox_assigner: BBoxAssigner
bbox_head: BBoxHead
mask_assigner: MaskAssigner
mask_head: MaskHead
ResNet:
norm_type: affine_channel
norm_decay: true
depth: 50
feature_maps: [2,3, 4]
freeze_at: 2
ResNetC5:
norm_type: affine_channel
RPNHead:
anchor_generator:
anchor_sizes: [32, 64, 128, 256, 512]
aspect_ratios: [0.5, 1.0, 2.0]
stride: [16.0, 16.0]
variance: [1.0, 1.0, 1.0, 1.0]
rpn_target_assign:
rpn_batch_size_per_im: 256
rpn_fg_fraction: 0.5
rpn_negative_overlap: 0.3
rpn_positive_overlap: 0.7
rpn_straddle_thresh: 0.0
train_proposal:
min_size: 0.0
nms_thresh: 0.7
pre_nms_top_n: 12000
post_nms_top_n: 2000
test_proposal:
min_size: 0.0
nms_thresh: 0.7
pre_nms_top_n: 6000
post_nms_top_n: 1000
RoIAlign:
resolution: 14
spatial_scale: 0.0625
sampling_ratio: 0
BBoxHead:
head: ResNetC5
nms:
keep_top_k: 100
nms_threshold: 0.5
normalized: false
score_threshold: 0.05
num_classes: 81
MaskHead:
dilation: 1
num_chan_reduced: 256
num_classes: 81
resolution: 14
BBoxAssigner:
batch_size_per_im: 512
bbox_reg_weights: [0.1, 0.1, 0.2, 0.2]
bg_thresh_hi: 0.5
bg_thresh_lo: 0.0
fg_fraction: 0.25
fg_thresh: 0.5
num_classes: 81
MaskAssigner:
num_classes: 81
resolution: 14
LearningRate:
base_lr: 0.01
schedulers:
- !PiecewiseDecay
gamma: 0.1
milestones: [240000, 320000]
#start the warm up from base_lr * start_factor
- !LinearWarmup
start_factor: 0.3333333333333333
steps: 500
OptimizerBuilder:
optimizer:
momentum: 0.9
type: Momentum
regularizer:
factor: 0.0001
type: L2
MaskRCNNTrainFeed:
batch_size: 1
dataset:
dataset_dir: data/coco
annotation: annotations/instances_train2017.json
image_dir: train2017
num_workers: 2
shuffle: true
MaskRCNNEvalFeed:
batch_size: 1
dataset:
dataset_dir: data/coco
annotation: annotations/instances_val2017.json
image_dir: val2017
shuffle: false
MaskRCNNTestFeed:
batch_size: 1
dataset:
dataset_dir: data/coco
annotation: annotations/instances_val2017.json
image_dir: val2017
test_file: val2017.txt
samples: 5
architecture: MaskRCNN
train_feed: MaskRCNNTrainFeed
eval_feed: MaskRCNNEvalFeed
test_feed: MaskRCNNTestFeed
use_gpu: True
max_iters: 180000
snapshot_iter: 10000
log_smooth_window: 20
save_dir: output
pretrain_weights: https://paddle-imagenet-models-name.bj.bcebos.com/ResNet50_pretrained.tar
metric: COCO
weights: output/mask_rcnn_r50_fpn_1x/model_final/
MaskRCNN:
backbone: ResNet
fpn: FPN
rpn_head: FPNRPNHead
roi_extractor: FPNRoIAlign
bbox_head: BBoxHead
bbox_assigner: BBoxAssigner
ResNet:
depth: 50
feature_maps: [2, 3, 4, 5]
freeze_at: 2
norm_type: affine_channel
FPN:
max_level: 6
min_level: 2
num_chan: 256
spatial_scale: [0.03125, 0.0625, 0.125, 0.25]
FPNRPNHead:
anchor_generator:
aspect_ratios: [0.5, 1.0, 2.0]
variance: [1.0, 1.0, 1.0, 1.0]
anchor_start_size: 32
max_level: 6
min_level: 2
num_chan: 256
rpn_target_assign:
rpn_batch_size_per_im: 256
rpn_fg_fraction: 0.5
rpn_negative_overlap: 0.3
rpn_positive_overlap: 0.7
rpn_straddle_thresh: 0.0
train_proposal:
min_size: 0.0
nms_thresh: 0.7
pre_nms_top_n: 2000
post_nms_top_n: 2000
test_proposal:
min_size: 0.0
nms_thresh: 0.7
pre_nms_top_n: 1000
post_nms_top_n: 1000
FPNRoIAlign:
canconical_level: 4
canonical_size: 224
max_level: 5
min_level: 2
sampling_ratio: 2
box_resolution: 7
mask_resolution: 14
MaskHead:
dilation: 1
num_chan_reduced: 256
num_classes: 81
num_convs: 4
resolution: 28
BBoxAssigner:
batch_size_per_im: 512
bbox_reg_weights: [0.1, 0.1, 0.2, 0.2]
bg_thresh_hi: 0.5
bg_thresh_lo: 0.0
fg_fraction: 0.25
fg_thresh: 0.5
num_classes: 81
MaskAssigner:
resolution: 28
BBoxHead:
head: TwoFCHead
nms:
keep_top_k: 100
nms_threshold: 0.5
score_threshold: 0.05
num_classes: 81
TwoFCHead:
num_chan: 1024
LearningRate:
base_lr: 0.01
schedulers:
- !PiecewiseDecay
gamma: 0.1
milestones: [120000, 160000]
values: null
- !LinearWarmup
start_factor: 0.3333333333333333
steps: 500
OptimizerBuilder:
optimizer:
momentum: 0.9
type: Momentum
regularizer:
factor: 0.0001
type: L2
MaskRCNNTrainFeed:
batch_size: 1
batch_transforms:
- !PadBatch
pad_to_stride: 32
dataset:
dataset_dir: data/coco
annotation: annotations/instances_train2017.json
image_dir: train2017
num_workers: 2
use_padded_im_info: False
MaskRCNNEvalFeed:
batch_size: 1
batch_transforms:
- !PadBatch
pad_to_stride: 32
dataset:
dataset_dir: data/coco
annotation: annotations/instances_val2017.json
image_dir: val2017
num_workers: 2
use_padded_im_info: True
MaskRCNNTestFeed:
batch_size: 1
batch_transforms:
- !PadBatch
pad_to_stride: 32
dataset:
dataset_dir: data/coco
annotation: annotations/instances_val2017.json
image_dir: val2017
test_file: val2017.txt
samples: 5
num_workers: 2
use_padded_im_info: True
architecture: MaskRCNN
train_feed: MaskRCNNTrainFeed
eval_feed: MaskRCNNEvalFeed
test_feed: MaskRCNNTestFeed
use_gpu: True
max_iters: 360000
snapshot_iter: 10000
log_smooth_window: 20
save_dir: output
pretrain_weights: https://paddle-imagenet-models-name.bj.bcebos.com/ResNet50_pretrained.tar
metric: COCO
weights: output/mask_rcnn_r50_fpn_2x/model_final/
MaskRCNN:
backbone: ResNet
fpn: FPN
rpn_head: FPNRPNHead
roi_extractor: FPNRoIAlign
bbox_head: BBoxHead
bbox_assigner: BBoxAssigner
ResNet:
depth: 50
feature_maps: [2, 3, 4, 5]
freeze_at: 2
norm_type: affine_channel
FPN:
max_level: 6
min_level: 2
num_chan: 256
spatial_scale: [0.03125, 0.0625, 0.125, 0.25]
FPNRPNHead:
anchor_generator:
aspect_ratios: [0.5, 1.0, 2.0]
variance: [1.0, 1.0, 1.0, 1.0]
anchor_start_size: 32
max_level: 6
min_level: 2
num_chan: 256
rpn_target_assign:
rpn_batch_size_per_im: 256
rpn_fg_fraction: 0.5
rpn_negative_overlap: 0.3
rpn_positive_overlap: 0.7
rpn_straddle_thresh: 0.0
train_proposal:
min_size: 0.0
nms_thresh: 0.7
pre_nms_top_n: 2000
post_nms_top_n: 2000
test_proposal:
min_size: 0.0
nms_thresh: 0.7
pre_nms_top_n: 1000
post_nms_top_n: 1000
FPNRoIAlign:
canconical_level: 4
canonical_size: 224
max_level: 5
min_level: 2
sampling_ratio: 2
box_resolution: 7
mask_resolution: 14
MaskHead:
dilation: 1
num_chan_reduced: 256
num_classes: 81
num_convs: 4
resolution: 28
BBoxAssigner:
batch_size_per_im: 512
bbox_reg_weights: [0.1, 0.1, 0.2, 0.2]
bg_thresh_hi: 0.5
bg_thresh_lo: 0.0
fg_fraction: 0.25
fg_thresh: 0.5
num_classes: 81
MaskAssigner:
resolution: 28
BBoxHead:
head: TwoFCHead
nms:
keep_top_k: 100
nms_threshold: 0.5
score_threshold: 0.05
num_classes: 81
TwoFCHead:
num_chan: 1024
LearningRate:
base_lr: 0.01
schedulers:
- !PiecewiseDecay
gamma: 0.1
milestones: [240000, 320000]
values: null
- !LinearWarmup
start_factor: 0.3333333333333333
steps: 500
OptimizerBuilder:
optimizer:
momentum: 0.9
type: Momentum
regularizer:
factor: 0.0001
type: L2
MaskRCNNTrainFeed:
batch_size: 1
batch_transforms:
- !PadBatch
pad_to_stride: 32
dataset:
dataset_dir: data/coco
annotation: annotations/instances_train2017.json
image_dir: train2017
num_workers: 2
use_padded_im_info: False
MaskRCNNEvalFeed:
batch_size: 1
batch_transforms:
- !PadBatch
pad_to_stride: 32
dataset:
dataset_dir: data/coco
annotation: annotations/instances_val2017.json
image_dir: val2017
num_workers: 2
use_padded_im_info: True
MaskRCNNTestFeed:
batch_size: 1
batch_transforms:
- !PadBatch
pad_to_stride: 32
dataset:
dataset_dir: data/coco
annotation: annotations/instances_val2017.json
image_dir: val2017
test_file: val2017.txt
samples: 5
num_workers: 2
use_padded_im_info: True
architecture: MaskRCNN
train_feed: MaskRCNNTrainFeed
eval_feed: MaskRCNNEvalFeed
test_feed: MaskRCNNTestFeed
use_gpu: True
max_iters: 360000
snapshot_iter: 10000
log_smooth_window: 20
save_dir: output
pretrain_weights: https://paddle-imagenet-models-name.bj.bcebos.com/ResNet50_vd_pretrained.tar
metric: COCO
weights: output/mask_rcnn_r50_vd_fpn_2x/model_final/
MaskRCNN:
backbone: ResNet
fpn: FPN
rpn_head: FPNRPNHead
roi_extractor: FPNRoIAlign
bbox_head: BBoxHead
bbox_assigner: BBoxAssigner
ResNet:
depth: 50
feature_maps: [2, 3, 4, 5]
freeze_at: 2
norm_type: affine_channel
variant: d
FPN:
max_level: 6
min_level: 2
num_chan: 256
spatial_scale: [0.03125, 0.0625, 0.125, 0.25]
FPNRPNHead:
anchor_generator:
aspect_ratios: [0.5, 1.0, 2.0]
variance: [1.0, 1.0, 1.0, 1.0]
anchor_start_size: 32
max_level: 6
min_level: 2
num_chan: 256
rpn_target_assign:
rpn_batch_size_per_im: 256
rpn_fg_fraction: 0.5
rpn_negative_overlap: 0.3
rpn_positive_overlap: 0.7
rpn_straddle_thresh: 0.0
train_proposal:
min_size: 0.0
nms_thresh: 0.7
pre_nms_top_n: 2000
post_nms_top_n: 2000
test_proposal:
min_size: 0.0
nms_thresh: 0.7
pre_nms_top_n: 1000
post_nms_top_n: 1000
FPNRoIAlign:
canconical_level: 4
canonical_size: 224
max_level: 5
min_level: 2
box_resolution: 7
sampling_ratio: 2
mask_resolution: 14
MaskHead:
dilation: 1
num_chan_reduced: 256
num_classes: 81
num_convs: 4
resolution: 28
BBoxAssigner:
batch_size_per_im: 512
bbox_reg_weights: [0.1, 0.1, 0.2, 0.2]
bg_thresh_hi: 0.5
bg_thresh_lo: 0.0
fg_fraction: 0.25
fg_thresh: 0.5
num_classes: 81
MaskAssigner:
resolution: 28
BBoxHead:
head: TwoFCHead
nms:
keep_top_k: 100
nms_threshold: 0.5
score_threshold: 0.05
num_classes: 81
TwoFCHead:
num_chan: 1024
LearningRate:
base_lr: 0.01
schedulers:
- !PiecewiseDecay
gamma: 0.1
milestones: [240000, 320000]
values: null
- !LinearWarmup
start_factor: 0.3333333333333333
steps: 500
OptimizerBuilder:
optimizer:
momentum: 0.9
type: Momentum
regularizer:
factor: 0.0001
type: L2
MaskRCNNTrainFeed:
# batch size per device
batch_size: 1
batch_transforms:
- !PadBatch
pad_to_stride: 32
dataset:
dataset_dir: data/coco
image_dir: train2017
annotation: annotations/instances_train2017.json
num_workers: 2
shuffle: True
use_padded_im_info: False
MaskRCNNEvalFeed:
batch_size: 1
batch_transforms:
- !PadBatch
pad_to_stride: 32
dataset:
dataset_dir: data/coco
annotation: annotations/instances_val2017.json
image_dir: val2017
num_workers: 2
use_padded_im_info: True
MaskRCNNTestFeed:
batch_size: 1
batch_transforms:
- !PadBatch
pad_to_stride: 32
dataset:
dataset_dir: data/coco
annotation: annotations/instances_val2017.json
image_dir: val2017
test_file: val2017.txt
num_workers: 2
use_padded_im_info: True
architecture: RetinaNet
train_feed: FasterRCNNTrainFeed
eval_feed: FasterRCNNEvalFeed
test_feed: FasterRCNNTestFeed
max_iters: 90000
use_gpu: yes
pretrain_weights: https://paddle-imagenet-models-name.bj.bcebos.com/ResNet50_pretrained.tar
weights: output/retinanet_r50_fpn_1x/model_final
log_smooth_window: 20
snapshot_iter: 10000
metric: COCO
save_dir: output
RetinaNet:
backbone: ResNet
fpn: FPN
retina_head: RetinaHead
ResNet:
norm_type: affine_channel
freeze_norm: true
norm_decay: 0.0001
depth: 50
feature_maps: [3, 4, 5]
freeze_at: 2
variant: b
FPN:
max_level: 7
min_level: 3
num_chan: 256
spatial_scale: [0.03125, 0.0625, 0.125]
has_extra_convs: true
RetinaHead:
num_convs_per_octave: 4
num_chan: 256
max_level: 7
min_level: 3
prior_prob: 0.01
base_scale: 4
num_scales_per_octave: 3
num_classes: 81
anchor_generator:
aspect_ratios: [1.0, 2.0, 0.5]
variance: [1.0, 1.0, 1.0, 1.0]
target_assign:
positive_overlap: 0.5
negative_overlap: 0.4
gamma: 2.0
alpha: 0.25
sigma: 3.0151134457776365
output_decoder:
score_thresh: 0.05
nms_thresh: 0.5
pre_nms_top_n: 1000
detections_per_im: 100
nms_eta: 1.0
LearningRate:
base_lr: 0.01
schedulers:
- !PiecewiseDecay
gamma: 0.1
milestones: [60000, 80000]
values: null
- !LinearWarmup
start_factor: 0.3333333333333333
steps: 500
OptimizerBuilder:
optimizer:
momentum: 0.9
type: Momentum
regularizer:
factor: 0.0001
type: L2
FasterRCNNTrainFeed:
batch_size: 2
batch_transforms:
- !PadBatch
pad_to_stride: 128
dataset:
dataset_dir: data/coco
annotation: annotations/instances_train2017.json
image_dir: train2017
drop_last: false
image_shape: [3, 1333, 800]
num_workers: 2
sample_transforms:
- !DecodeImage
to_rgb: true
with_mixup: false
- !RandomFlipImage
is_mask_flip: false
is_normalized: false
prob: 0.5
- !NormalizeImage
is_channel_first: false
is_scale: true
mean: [0.485, 0.456, 0.406]
std: [0.229, 0.224, 0.225]
- !ResizeImage
interp: 1
max_size: 1333
target_size: 800
use_cv2: true
- !Permute
channel_first: true
to_bgr: false
shuffle: true
FasterRCNNEvalFeed:
batch_size: 2
batch_transforms:
- !PadBatch
pad_to_stride: 128
dataset:
dataset_dir: data/coco
annotation: annotations/instances_val2017.json
image_dir: val2017
shuffle: false
image_shape: [3, 1333, 800]
num_workers: 2
sample_transforms:
- !DecodeImage
to_rgb: true
with_mixup: false
- !NormalizeImage
is_channel_first: false
is_scale: true
mean: [0.485, 0.456, 0.406]
std: [0.229, 0.224, 0.225]
- !ResizeImage
interp: 1
max_size: 1333
target_size: 800
use_cv2: true
- !Permute
channel_first: true
to_bgr: false
shuffle: false
FasterRCNNTestFeed:
batch_size: 1
batch_transforms:
- !PadBatch
pad_to_stride: 128
dataset:
dataset_dir: data/coco
annotation: annotations/instances_val2017.json
image_dir: val2017
test_file: val2017.txt
drop_last: false
image_shape: [3, 1333, 800]
num_workers: 2
sample_transforms:
- !DecodeImage
to_rgb: true
with_mixup: false
- !NormalizeImage
is_channel_first: false
is_scale: true
mean: [0.485, 0.456, 0.406]
std: [0.229, 0.224, 0.225]
- !ResizeImage
interp: 1
max_size: 1333
target_size: 800
use_cv2: true
- !Permute
channel_first: true
to_bgr: false
shuffle: false
architecture: SSD
max_iters: 28000
train_feed: SSDTrainFeed
eval_feed: SSDEvalFeed
test_feed: SSDTestFeed
pretrain_weights: ./ssd3/
use_gpu: true
snapshot_iter: 2000
log_smooth_window: 1
metric: VOC
save_dir: output
weights: output/ssd_mobilenet_v1_voc/model_final/
SSD:
backbone: MobileNet
multi_box_head: MultiBoxHead
num_classes: 21
metric:
ap_version: 11point
evaluate_difficult: false
overlap_threshold: 0.5
output_decoder:
background_label: 0
keep_top_k: 200
nms_eta: 1.0
nms_threshold: 0.45
nms_top_k: 400
score_threshold: 0.01
MobileNet:
norm_decay: 0.
conv_group_scale: 1
extra_block_filters: [[256, 512], [128, 256], [128, 256], [64, 128]]
with_extra_blocks: true
MultiBoxHead:
aspect_ratios: [[2.], [2., 3.], [2., 3.], [2., 3.], [2., 3.], [2., 3.]]
base_size: 300
flip: true
max_ratio: 90
max_sizes: [[], 150.0, 195.0, 240.0, 285.0, 300.0]
min_ratio: 20
min_sizes: [60.0, 105.0, 150.0, 195.0, 240.0, 285.0]
offset: 0.5
LearningRate:
schedulers:
- !PiecewiseDecay
milestones: [10000, 15000, 20000, 25000]
values: [0.001, 0.0005, 0.00025, 0.0001, 0.00001]
OptimizerBuilder:
optimizer:
momentum: 0.0
type: RMSPropOptimizer
regularizer:
factor: 0.00005
type: L2
SSDTrainFeed:
batch_size: 32
use_process: true
dataset:
dataset_dir: data/voc
annotation: VOCdevkit/VOC_all/ImageSets/Main/train.txt
image_dir: VOCdevkit/VOC_all/JPEGImages
SSDEvalFeed:
batch_size: 64
use_process: true
dataset:
dataset_dir: data/voc
annotation: VOCdevkit/VOC_all/ImageSets/Main/val.txt
image_dir: VOCdevkit/VOC_all/JPEGImages
use_default_label: false
drop_last: false
SSDTestFeed:
batch_size: 1
dataset:
dataset_dir: data/voc
annotation: VOCdevkit/VOC_all/ImageSets/Main/test.txt
image_dir: VOCdevkit/VOC_all/JPEGImages
use_default_label: false
drop_last: false
test_file: data/voc/VOCdevkit/VOC_all/ImageSets/Main/test.txt
architecture: YOLOv3
train_feed: YoloTrainFeed
eval_feed: YoloEvalFeed
test_feed: YoloTestFeed
use_gpu: yes
max_iters: 500200
log_smooth_window: 20
save_dir: output
snapshot_iter: 2000
metric: COCO
pretrain_weights: https://paddlemodels.bj.bcebos.com/yolo/darknet53.tar.gz
weights: https://paddlemodels.bj.bcebos.com/yolo/yolov3.tar.gz
YOLOv3:
backbone: DarkNet
yolo_head: YOLOv3Head
DarkNet:
norm_type: sync_bn
norm_decay: 0.
depth: 53
YOLOv3Head:
anchor_masks: [[6, 7, 8], [3, 4, 5], [0, 1, 2]]
anchors: [[10, 13], [16, 30], [33, 23],
[30, 61], [62, 45], [59, 119],
[116, 90], [156, 198], [373, 326]]
norm_decay: 0.
ignore_thresh: 0.7
label_smooth: true
nms:
background_label: -1
keep_top_k: 100
nms_threshold: 0.45
nms_top_k: 1000
normalized: false
score_threshold: 0.01
num_classes: 80
LearningRate:
base_lr: 0.001
schedulers:
- !PiecewiseDecay
gamma: 0.1
milestones:
- 400000
- 450000
- !LinearWarmup
start_factor: 0.
steps: 4000
OptimizerBuilder:
optimizer:
momentum: 0.9
type: Momentum
regularizer:
factor: 0.0005
type: L2
YoloTrainFeed:
batch_size: 8
dataset:
dataset_dir: data/coco
annotation: annotations/instances_train2017.json
image_dir: train2017
num_workers: 8
bufsize: 128
use_process: true
YoloEvalFeed:
batch_size: 8
dataset:
dataset_dir: data/coco
annotation: annotations/instances_val2017.json
image_dir: val2017
YoloTestFeed:
batch_size: 1
dataset:
dataset_dir: data/coco
annotation: annotations/instances_val2017.json
image_dir: val2017
test_file: ../val2017.txt
samples: 5
architecture: YOLOv3
train_feed: YoloTrainFeed
eval_feed: YoloEvalFeed
test_feed: YoloTestFeed
use_gpu: yes
max_iters: 500200
log_smooth_window: 20
save_dir: output
snapshot_iter: 2000
metric: COCO
pretrain_weights: http://paddle-imagenet-models-name.bj.bcebos.com/MobileNetV1_pretrained.tar
weights: https://paddlemodels.bj.bcebos.com/yolo/yolo_mobilenet1.0.tar.gz
YOLOv3:
backbone: MobileNet
yolo_head: YOLOv3Head
MobileNet:
norm_type: sync_bn
norm_decay: 0.
conv_group_scale: 1
with_extra_blocks: false
YOLOv3Head:
anchor_masks: [[6, 7, 8], [3, 4, 5], [0, 1, 2]]
anchors: [[10, 13], [16, 30], [33, 23],
[30, 61], [62, 45], [59, 119],
[116, 90], [156, 198], [373, 326]]
norm_decay: 0.
ignore_thresh: 0.7
label_smooth: true
nms:
background_label: -1
keep_top_k: 100
nms_threshold: 0.45
nms_top_k: 1000
normalized: false
score_threshold: 0.01
num_classes: 80
LearningRate:
base_lr: 0.001
schedulers:
- !PiecewiseDecay
gamma: 0.1
milestones:
- 400000
- 450000
- !LinearWarmup
start_factor: 0.
steps: 4000
OptimizerBuilder:
optimizer:
momentum: 0.9
type: Momentum
regularizer:
factor: 0.0005
type: L2
YoloTrainFeed:
batch_size: 8
dataset:
dataset_dir: data/coco
annotation: annotations/instances_train2017.json
image_dir: train2017
num_workers: 8
bufsize: 128
use_process: true
YoloEvalFeed:
batch_size: 8
dataset:
dataset_dir: data/coco
annotation: annotations/instances_val2017.json
image_dir: val2017
YoloTestFeed:
batch_size: 1
dataset:
dataset_dir: data/coco
annotation: annotations/instances_val2017.json
image_dir: val2017
test_file: ../val2017.txt
samples: 5
architecture: YOLOv3
train_feed: YoloTrainFeed
eval_feed: YoloEvalFeed
test_feed: YoloTestFeed
use_gpu: yes
max_iters: 500200
log_smooth_window: 20
save_dir: output
snapshot_iter: 2000
metric: COCO
pretrain_weights: https://paddle-imagenet-models-name.bj.bcebos.com/ResNet34_pretrained.tar
weights: https://paddlemodels.bj.bcebos.com/yolo/yolo_resnet34.tar.gz
YOLOv3:
backbone: ResNet
yolo_head: YOLOv3Head
ResNet:
norm_type: sync_bn
norm_decay: 0.
depth: 34
feature_maps: [3, 4, 5]
YOLOv3Head:
anchor_masks: [[6, 7, 8], [3, 4, 5], [0, 1, 2]]
anchors: [[10, 13], [16, 30], [33, 23],
[30, 61], [62, 45], [59, 119],
[116, 90], [156, 198], [373, 326]]
norm_decay: 0.
ignore_thresh: 0.7
label_smooth: true
nms:
background_label: -1
keep_top_k: 100
nms_threshold: 0.45
nms_top_k: 1000
normalized: false
score_threshold: 0.01
num_classes: 80
LearningRate:
base_lr: 0.001
schedulers:
- !PiecewiseDecay
gamma: 0.1
milestones:
- 400000
- 450000
- !LinearWarmup
start_factor: 0.
steps: 4000
OptimizerBuilder:
optimizer:
momentum: 0.9
type: Momentum
regularizer:
factor: 0.0005
type: L2
YoloTrainFeed:
batch_size: 8
dataset:
dataset_dir: data/coco
annotation: annotations/instances_train2017.json
image_dir: train2017
num_workers: 8
bufsize: 128
use_process: true
YoloEvalFeed:
batch_size: 8
dataset:
dataset_dir: data/coco
annotation: annotations/instances_val2017.json
image_dir: val2017
YoloTestFeed:
batch_size: 1
dataset:
dataset_dir: data/coco
annotation: annotations/instances_val2017.json
image_dir: val2017
test_file: ../val2017.txt
samples: 5
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import ppdet.modeling
import ppdet.optimizer
import ppdet.data.data_feed
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import
from __future__ import print_function
from __future__ import division
import inspect
import importlib
import re
try:
from docstring_parser import parse as doc_parse
except Exception:
def doc_parse(*args):
if not doc_parse.__warning_sent__:
from ppdet.utils.cli import ColorTTY
color_tty = ColorTTY()
message = "docstring_parser is not installed, " \
+ "argument description is not available"
print(color_tty.yellow(message))
doc_parse.__warning_sent__ = True
doc_parse.__warning_sent__ = False
try:
from typeguard import check_type
except Exception:
def check_type(*args):
if not check_type.__warning_sent__:
from ppdet.utils.cli import ColorTTY
color_tty = ColorTTY()
message = "typeguard is not installed, type checking is not available"
print(color_tty.yellow(message))
check_type.__warning_sent__ = True
check_type.__warning_sent__ = False
__all__ = ['SchemaValue', 'SchemaDict', 'extract_schema']
class SchemaValue(object):
def __init__(self, name, doc='', type=None):
super(SchemaValue, self).__init__()
self.name = name
self.doc = doc
self.type = type
def set_default(self, value):
self.default = value
def has_default(self):
return hasattr(self, 'default')
class SchemaDict(dict):
def __init__(self, **kwargs):
super(SchemaDict, self).__init__()
self.schema = {}
self.strict = False
self.doc = ""
self.update(kwargs)
def __setitem__(self, key, value):
# XXX also update regular dict to SchemaDict??
if isinstance(value, dict) and key in self and isinstance(self[key],
SchemaDict):
self[key].update(value)
else:
super(SchemaDict, self).__setitem__(key, value)
def __missing__(self, key):
if self.has_default(key):
return self.schema[key].default
elif key in self.schema:
return self.schema[key]
else:
raise KeyError(key)
def copy(self):
newone = SchemaDict()
newone.__dict__.update(self.__dict__)
newone.update(self)
return newone
def set_schema(self, key, value):
assert isinstance(value, SchemaValue)
self.schema[key] = value
def set_strict(self, strict):
self.strict = strict
def has_default(self, key):
return key in self.schema and self.schema[key].has_default()
def is_default(self, key):
if not self.has_default(key):
return False
if hasattr(self[key], '__dict__'):
return True
else:
return key not in self or self[key] == self.schema[key].default
def find_default_keys(self):
return [
k for k in list(self.keys()) + list(self.schema.keys())
if self.is_default(k)
]
def mandatory(self):
return any([k for k in self.schema.keys() if not self.has_default(k)])
def find_missing_keys(self):
missing = [
k for k in self.schema.keys()
if k not in self and not self.has_default(k)
]
placeholders = [k for k in self if self[k] in ('<missing>', '<value>')]
return missing + placeholders
def find_extra_keys(self):
return list(set(self.keys()) - set(self.schema.keys()))
def find_mismatch_keys(self):
mismatch_keys = []
for arg in self.schema.values():
if arg.type is not None:
try:
check_type("{}.{}".format(self.name, arg.name),
self[arg.name], arg.type)
except Exception:
mismatch_keys.append(arg.name)
return mismatch_keys
def validate(self):
missing_keys = self.find_missing_keys()
if missing_keys:
raise ValueError("Missing param for class<{}>: {}".format(
self.name, ", ".join(missing_keys)))
extra_keys = self.find_extra_keys()
if extra_keys and self.strict:
raise ValueError("Extraneous param for class<{}>: {}".format(
self.name, ", ".join(extra_keys)))
mismatch_keys = self.find_mismatch_keys()
if mismatch_keys:
raise TypeError("Wrong param type for class<{}>: {}".format(
self.name, ", ".join(mismatch_keys)))
def extract_schema(cls):
"""
Extract schema from a given class
Args:
cls (type): Class from which to extract.
Returns:
schema (SchemaDict): Extracted schema.
"""
ctor = cls.__init__
# python 2 compatibility
if hasattr(inspect, 'getfullargspec'):
argspec = inspect.getfullargspec(ctor)
annotations = argspec.annotations
has_kwargs = argspec.varkw is not None
else:
argspec = inspect.getargspec(ctor)
# python 2 type hinting workaround, see pep-3107
# however, since `typeguard` does not support python 2, type checking
# is still python 3 only for now
annotations = getattr(ctor, '__annotations__', {})
has_kwargs = argspec.keywords is not None
names = [arg for arg in argspec.args if arg != 'self']
defaults = argspec.defaults
num_defaults = argspec.defaults is not None and len(argspec.defaults) or 0
num_required = len(names) - num_defaults
docs = cls.__doc__
if docs is None and getattr(cls, '__category__', None) == 'op':
docs = cls.__call__.__doc__
docstring = doc_parse(docs)
if docstring is None:
comments = {}
else:
comments = {}
for p in docstring.params:
match_obj = re.match('^([a-zA-Z_]+[a-zA-Z_0-9]*).*', p.arg_name)
if match_obj is not None:
comments[match_obj.group(1)] = p.description
schema = SchemaDict()
schema.name = cls.__name__
schema.doc = ""
if docs is not None:
start_pos = docs[0] == '\n' and 1 or 0
schema.doc = docs[start_pos:].split("\n")[0].strip()
# XXX handle paddle's weird doc convention
if '**' == schema.doc[:2] and '**' == schema.doc[-2:]:
schema.doc = schema.doc[2:-2].strip()
schema.category = hasattr(cls, '__category__') and getattr(
cls, '__category__') or 'module'
schema.strict = not has_kwargs
schema.pymodule = importlib.import_module(cls.__module__)
schema.inject = getattr(cls, '__inject__', [])
for idx, name in enumerate(names):
comment = name in comments and comments[name] or name
if name in schema.inject:
type_ = None
else:
type_ = name in annotations and annotations[name] or None
value_schema = SchemaValue(name, comment, type_)
if idx >= num_required:
value_schema.set_default(defaults[idx - num_required])
schema.set_schema(name, value_schema)
return schema
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import importlib
import inspect
import yaml
__all__ = ['serializable', 'Callable']
def _make_python_constructor(cls):
def python_constructor(loader, node):
if isinstance(node, yaml.SequenceNode):
args = loader.construct_sequence(node, deep=True)
return cls(*args)
else:
kwargs = loader.construct_mapping(node, deep=True)
try:
return cls(**kwargs)
except Exception as ex:
print("Error when construct {} instance from yaml config".
format(cls.__name__))
raise ex
return python_constructor
def _make_python_representer(cls):
# python 2 compatibility
if hasattr(inspect, 'getfullargspec'):
argspec = inspect.getfullargspec(cls)
else:
argspec = inspect.getargspec(cls.__init__)
argnames = [arg for arg in argspec.args if arg != 'self']
def python_representer(dumper, obj):
if argnames:
data = {name: getattr(obj, name) for name in argnames}
else:
data = obj.__dict__
if '_id' in data:
del data['_id']
return dumper.represent_mapping(u'!{}'.format(cls.__name__), data)
return python_representer
def serializable(cls):
"""
Add loader and dumper for given class, which must be "trivially serializable"
Args:
cls: class to be serialized
Returns: cls
"""
yaml.add_constructor(u'!{}'.format(cls.__name__),
_make_python_constructor(cls))
yaml.add_representer(cls, _make_python_representer(cls))
return cls
@serializable
class Callable(object):
"""
Helper to be used in Yaml for creating arbitrary class objects
Args:
full_type (str): the full module path to target function
"""
def __init__(self, full_type, args=[], kwargs={}):
super(Callable, self).__init__()
self.full_type = full_type
self.args = args
self.kwargs = kwargs
def __call__(self):
if '.' in self.full_type:
idx = self.full_type.rfind('.')
module = importlib.import_module(self.full_type[:idx])
func_name = self.full_type[idx + 1:]
else:
try:
module = importlib.import_module('builtins')
except Exception:
module = importlib.import_module('__builtin__')
func_name = self.full_type
func = getattr(module, func_name)
return func(*self.args, **self.kwargs)
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import
from __future__ import print_function
from __future__ import division
import importlib
import os
import sys
import yaml
from .config.schema import SchemaDict, extract_schema
from .config.yaml_helpers import serializable
__all__ = [
'global_config', 'load_config', 'merge_config', 'get_registered_modules',
'create', 'register', 'serializable'
]
class AttrDict(dict):
"""Single level attribute dict, NOT recursive"""
def __init__(self, **kwargs):
super(AttrDict, self).__init__()
super(AttrDict, self).update(kwargs)
def __getattr__(self, key):
if key in self:
return self[key]
raise AttributeError("object has no attribute '{}'".format(key))
global_config = AttrDict()
def load_config(file_path):
"""
Load config from file.
Args:
file_path (str): Path of the config file to be loaded.
Returns: global config
"""
_, ext = os.path.splitext(file_path)
assert ext in ['.yml', '.yaml'], "only support yaml files for now"
merge_config(yaml.load(open(file_path), Loader=yaml.Loader))
return global_config
def merge_config(config):
"""
Merge config into global config.
Args:
config (dict): Config to be merged.
Returns: global config
"""
for key, value in config.items():
if isinstance(value, dict) and key in global_config:
global_config[key].update(value)
else:
global_config[key] = value
def get_registered_modules():
return {k: v for k, v in global_config.items() if isinstance(v, SchemaDict)}
def make_partial(cls):
op_module = importlib.import_module(cls.__op__.__module__)
op = getattr(op_module, cls.__op__.__name__)
cls.__category__ = getattr(cls, '__category__', None) or 'op'
def partial_apply(self, *args, **kwargs):
kwargs_ = self.__dict__.copy()
kwargs_.update(kwargs)
return op(*args, **kwargs_)
if getattr(cls, '__append_doc__', True): # XXX should default to True?
if sys.version_info[0] > 2:
cls.__doc__ = "Wrapper for `{}` OP".format(op.__name__)
cls.__init__.__doc__ = op.__doc__
cls.__call__ = partial_apply
cls.__call__.__doc__ = op.__doc__
else:
# XXX work around for python 2
partial_apply.__doc__ = op.__doc__
cls.__call__ = partial_apply
return cls
def register(cls):
"""
Register a given module class.
Args:
cls (type): Module class to be registered.
Returns: cls
"""
if cls.__name__ in global_config:
raise ValueError("Module class already registered: {}".format(
cls.__name__))
if hasattr(cls, '__op__'):
cls = make_partial(cls)
global_config[cls.__name__] = extract_schema(cls)
return cls
def create(cls_or_name, **kwargs):
"""
Create an instance of given module class.
Args:
cls_or_name (type or str): Class of which to create instance.
Returns: instance of type `cls_or_name`
"""
assert type(cls_or_name) in [type, str
], "should be a class or name of a class"
name = type(cls_or_name) == str and cls_or_name or cls_or_name.__name__
assert name in global_config and isinstance(global_config[name], SchemaDict), \
"the module {} is not registered".format(name)
config = global_config[name]
config.update(kwargs)
config.validate()
cls = getattr(config.pymodule, name)
kwargs = {}
kwargs.update(global_config[name])
if getattr(config, 'inject', None):
for k in config.inject:
target_key = global_config[name][k]
# optional dependency
if target_key is None:
continue
# also accept dictionaries and serialized objects
if isinstance(target_key, dict) or hasattr(target_key, '__dict__'):
continue
elif isinstance(target_key, str):
if target_key not in global_config:
raise ValueError("Missing injection config:", target_key)
target = global_config[target_key]
if isinstance(target, SchemaDict):
kwargs[k] = create(target_key)
elif hasattr(target, '__dict__'): # serialized object
kwargs[k] = target
else:
raise ValueError("Unsupported injection type:", target_key)
return cls(**kwargs)
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# function:
# module to prepare data for detection model training
#
# implementation notes:
# - Dateset
# basic interface to accessing data samples in stream mode
#
# - xxxSource (RoiDbSource)
# * subclass of 'Dataset'
# * load data from local files and other source data
#
# - xxxOperator (DecodeImage)
# * subclass of 'BaseOperator'
# * each op can transform a sample, eg: decode/resize/crop image
# * each op must obey basic rules defined in transform.operator.base
#
# - transformer
# * subclass of 'Dataset'
# * 'MappedDataset' accept a 'xxxSource' and a list of 'xxxOperator'
# to build a transformed 'Dataset'
from .dataset import Dataset
from .reader import Reader
from .data_feed import create_reader
__all__ = ['Dataset', 'Reader', 'create_reader']
此差异已折叠。
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# function:
# interface for accessing data samples in stream
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
class Dataset(object):
"""interface to access a stream of data samples"""
def __init__(self):
self._epoch = -1
def __next__(self):
return self.next()
def __iter__(self):
return self
def __str__(self):
return "{}(fname:{}, epoch:{:d}, size:{:d}, pos:{:d})".format(
type(self).__name__, self._fname, self._epoch,
self.size(), self._pos)
def next(self):
"""get next sample"""
raise NotImplementedError('%s.next not available' %
(self.__class__.__name__))
def reset(self):
"""reset to initial status and begins a new epoch"""
raise NotImplementedError('%s.reset not available' %
(self.__class__.__name__))
def size(self):
"""get number of samples in this dataset"""
raise NotImplementedError('%s.size not available' %
(self.__class__.__name__))
def drained(self):
"""whether all sampled has been readed out for this epoch"""
raise NotImplementedError('%s.drained not available' %
(self.__class__.__name__))
def epoch_id(self):
"""return epoch id for latest sample"""
raise NotImplementedError('%s.epoch_id not available' %
(self.__class__.__name__))
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# function:
# Interface to build readers for detection data like COCO or VOC
#
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
from __future__ import unicode_literals
from numbers import Integral
import logging
from .source import build_source
from .transform import build_mapper, map, batch, batch_map
logger = logging.getLogger(__name__)
class Reader(object):
"""Interface to make readers for training or evaluation"""
def __init__(self, data_cf, trans_conf, maxiter=-1):
self._data_cf = data_cf
self._trans_conf = trans_conf
self._maxiter = maxiter
self._cname2cid = None
assert isinstance(self._maxiter, Integral), "maxiter should be int"
def _make_reader(self, mode):
"""Build reader for training or validation"""
file_conf = self._data_cf[mode]
# 1, Build data source
sc_conf = {'data_cf': file_conf, 'cname2cid': self._cname2cid}
sc = build_source(sc_conf)
# 2, Buid a transformed dataset
ops = self._trans_conf[mode]['OPS']
batchsize = self._trans_conf[mode]['BATCH_SIZE']
drop_last = False if 'DROP_LAST' not in \
self._trans_conf[mode] else self._trans_conf[mode]['DROP_LAST']
mapper = build_mapper(ops, {'is_train': mode == 'TRAIN'})
worker_args = None
if 'WORKER_CONF' in self._trans_conf[mode]:
worker_args = self._trans_conf[mode]['WORKER_CONF']
worker_args = {k.lower(): v for k, v in worker_args.items()}
mapped_ds = map(sc, mapper, worker_args)
batched_ds = batch(mapped_ds, batchsize, drop_last)
trans_conf = {k.lower(): v for k, v in self._trans_conf[mode].items()}
need_keys = {
'is_padding',
'coarsest_stride',
'random_shapes',
'multi_scales',
'use_padded_im_info',
}
bm_config = {
key: value
for key, value in trans_conf.items() if key in need_keys
}
batched_ds = batch_map(batched_ds, bm_config)
batched_ds.reset()
if mode.lower() == 'train':
if self._cname2cid is not None:
logger.warn('cname2cid already set, it will be overridden')
self._cname2cid = sc.cname2cid
# 3, Build a reader
maxit = -1 if self._maxiter <= 0 else self._maxiter
def _reader():
n = 0
while True:
for _batch in batched_ds:
yield _batch
n += 1
if maxit > 0 and n == maxit:
return
batched_ds.reset()
if maxit <= 0:
return
if hasattr(sc, 'get_imid2path'):
_reader.imid2path = sc.get_imid2path()
return _reader
def train(self):
"""Build reader for training"""
return self._make_reader('TRAIN')
def val(self):
"""Build reader for validation"""
return self._make_reader('VAL')
def test(self):
"""Build reader for inference"""
return self._make_reader('TEST')
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import copy
from .roidb_source import RoiDbSource
from .simple_source import SimpleSource
def build_source(config):
"""
Build dataset from source data, default source type is 'RoiDbSource'
Args:
config (dict): should have following structure:
{
data_cf (dict):
anno_file (str): label file or image list file path
image_dir (str): root directory for images
samples (int): number of samples to load, -1 means all
is_shuffle (bool): should samples be shuffled
load_img (bool): should images be loaded
mixup_epoch (int): parse mixup in first n epoch
with_background (bool): whether load background as a class
cname2cid (dict): the label name to id dictionary
}
"""
if 'data_cf' in config:
data_cf = {k.lower(): v for k, v in config['data_cf'].items()}
data_cf['cname2cid'] = config['cname2cid']
else:
data_cf = config
args = copy.deepcopy(data_cf)
# defaut type is 'RoiDbSource'
source_type = 'RoiDbSource'
if 'type' in data_cf:
if data_cf['type'] in ['VOCSource', 'COCOSource', 'RoiDbSource']:
source_type = 'RoiDbSource'
else:
source_type = data_cf['type']
del args['type']
if source_type == 'RoiDbSource':
return RoiDbSource(**args)
elif source_type == 'SimpleSource':
return SimpleSource(**args)
else:
raise ValueError('source type not supported: ' + source_type)
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import numpy as np
from pycocotools.coco import COCO
import logging
logger = logging.getLogger(__name__)
def load(anno_path, sample_num=-1, with_background=True):
"""
Load COCO records with annotations in json file 'anno_path'
Args:
anno_path (str): json file path
sample_num (int): number of samples to load, -1 means all
with_background (bool): whether load background as a class.
if True, total class number will
be 81. default True
Returns:
(records, cname2cid)
'records' is list of dict whose structure is:
{
'im_file': im_fname, # image file name
'im_id': img_id, # image id
'h': im_h, # height of image
'w': im_w, # width
'is_crowd': is_crowd,
'gt_score': gt_score,
'gt_class': gt_class,
'gt_bbox': gt_bbox,
'gt_poly': gt_poly,
}
'cname2cid' is a dict used to map category name to class id
"""
assert anno_path.endswith('.json'), 'invalid coco annotation file: ' \
+ anno_path
coco = COCO(anno_path)
img_ids = coco.getImgIds()
cat_ids = coco.getCatIds()
records = []
ct = 0
# when with_background = True, mapping category to classid, like:
# background:0, first_class:1, second_class:2, ...
catid2clsid = dict(
{catid: i + int(with_background)
for i, catid in enumerate(cat_ids)})
cname2cid = dict({
coco.loadCats(catid)[0]['name']: clsid
for catid, clsid in catid2clsid.items()
})
for img_id in img_ids:
img_anno = coco.loadImgs(img_id)[0]
im_fname = img_anno['file_name']
im_w = img_anno['width']
im_h = img_anno['height']
ins_anno_ids = coco.getAnnIds(imgIds=img_id, iscrowd=False)
instances = coco.loadAnns(ins_anno_ids)
bboxes = []
for inst in instances:
x, y, box_w, box_h = inst['bbox']
x1 = max(0, x)
y1 = max(0, y)
x2 = min(im_w - 1, x1 + max(0, box_w - 1))
y2 = min(im_h - 1, y1 + max(0, box_h - 1))
if inst['area'] > 0 and x2 >= x1 and y2 >= y1:
inst['clean_bbox'] = [x1, y1, x2, y2]
bboxes.append(inst)
num_bbox = len(bboxes)
gt_bbox = np.zeros((num_bbox, 4), dtype=np.float32)
gt_class = np.zeros((num_bbox, 1), dtype=np.int32)
gt_score = np.ones((num_bbox, 1), dtype=np.float32)
is_crowd = np.zeros((num_bbox, 1), dtype=np.int32)
difficult = np.zeros((num_bbox, 1), dtype=np.int32)
gt_poly = [None] * num_bbox
for i, box in enumerate(bboxes):
catid = box['category_id']
gt_class[i][0] = catid2clsid[catid]
gt_bbox[i, :] = box['clean_bbox']
is_crowd[i][0] = box['iscrowd']
gt_poly[i] = box['segmentation']
coco_rec = {
'im_file': im_fname,
'im_id': np.array([img_id]),
'h': im_h,
'w': im_w,
'is_crowd': is_crowd,
'gt_class': gt_class,
'gt_bbox': gt_bbox,
'gt_score': gt_score,
'gt_poly': gt_poly,
'difficult': difficult
}
logger.debug('Load file: {}, im_id: {}, h: {}, w: {}.'.format(
im_fname, img_id, im_h, im_w))
records.append(coco_rec)
ct += 1
if sample_num > 0 and ct >= sample_num:
break
assert len(records) > 0, 'not found any coco record in %s' % (anno_path)
logger.info('{} samples in file {}'.format(ct, anno_path))
return records, cname2cid
此差异已折叠。
DATA:
TRAIN:
ANNO_FILE: data/coco.test/train2017.roidb
IMAGE_DIR: data/coco.test/train2017
SAMPLES: 10
TYPE: RoiDbSource
VAL:
ANNO_FILE: data/coco.test/val2017.roidb
IMAGE_DIR: data/coco.test/val2017
SAMPLES: 10
TYPE: RoiDbSource
TRANSFORM:
TRAIN:
OPS:
- OP: DecodeImage
TO_RGB: False
- OP: RandomFlipImage
PROB: 0.5
- OP: NormalizeImage
MEAN: [102.9801, 115.9465, 122.7717]
IS_SCALE: False
IS_CHANNEL_FIRST: False
- OP: ResizeImage
TARGET_SIZE: 800
MAX_SIZE: 1333
- OP: Rgb2Bgr
TO_BGR: False
- OP: ArrangeRCNN
BATCH_SIZE: 1
IS_PADDING: True
DROP_LAST: False
VAL:
OPS:
- OP: DecodeImage
TO_RGB: True
- OP: ResizeImage
TARGET_SIZE: 224
- OP: ArrangeSSD
BATCH_SIZE: 1
WORKER_CONF:
BUFSIZE: 200
WORKER_NUM: 8
USE_PROCESS: False
#!/bin/bash
#function:
# prepare coco data for testing
root=$(dirname `readlink -f ${BASH_SOURCE}[0]`)
cwd=`pwd`
if [[ $cwd != $root ]];then
pushd $root 2>&1 1>/dev/null
fi
test_coco_python2_url="http://filecenter.matrix.baidu.com/api/v1/file/wanglong03/coco.test.python2.zip/20190603095315/download"
test_coco_python3_url="http://filecenter.matrix.baidu.com/api/v1/file/wanglong03/coco.test.python3.zip/20190603095447/download"
if [[ $1 = "python2" ]];then
test_coco_data_url=${test_coco_python2_url}
coco_zip_file="coco.test.python2.zip"
else
test_coco_data_url=${test_coco_python3_url}
coco_zip_file="coco.test.python3.zip"
fi
echo "download testing coco from url[${test_coco_data_url}]"
coco_root_dir=${coco_zip_file/.zip/}
# clear already exist file or directory
rm -rf ${coco_root_dir} ${coco_zip_file}
wget ${test_coco_data_url} -O ${coco_zip_file}
if [ -e $coco_zip_file ];then
echo "succeed to download ${coco_zip_file}, so unzip it"
unzip ${coco_zip_file} >/dev/null 2>&1
fi
if [ -e ${coco_root_dir} ];then
rm -rf coco.test
ln -s ${coco_root_dir} coco.test
echo "succeed to generate coco data in[${coco_root_dir}] for testing"
exit 0
else
echo "failed to generate coco data"
exit 1
fi
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
此差异已折叠。
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册