From b58662910ee46e7e6df31ac7545e3a6af1f3fa8b Mon Sep 17 00:00:00 2001
From: QingshuChen <qingshu.chen714@gmail.com>
Date: Tue, 22 Dec 2020 15:31:09 +0800
Subject: [PATCH] support mask_rcnn for kunlun (#1945)

* support mask_rcnn for kunlun
---
 configs/mask_rcnn_r50_1x_cocome_kunlun.yml | 104 +++++++++++++++++++++
 configs/mask_reader_cocome.yml             |  95 +++++++++++++++++++
 docs/tutorials/train_on_kunlun.md          |  19 ++++
 3 files changed, 218 insertions(+)
 create mode 100644 configs/mask_rcnn_r50_1x_cocome_kunlun.yml
 create mode 100644 configs/mask_reader_cocome.yml

diff --git a/configs/mask_rcnn_r50_1x_cocome_kunlun.yml b/configs/mask_rcnn_r50_1x_cocome_kunlun.yml
new file mode 100644
index 000000000..58517fd4a
--- /dev/null
+++ b/configs/mask_rcnn_r50_1x_cocome_kunlun.yml
@@ -0,0 +1,104 @@
+architecture: MaskRCNN
+use_gpu: false
+use_xpu: true
+max_iters: 1200
+snapshot_iter: 100
+log_iter: 20
+save_dir: output
+pretrain_weights: https://paddlemodels.bj.bcebos.com/object_detection/mask_rcnn_r50_2x.tar
+metric: COCO
+weights: output/mask_rcnn_r50_1x_cocome_kunlun/model_final
+num_classes: 2
+finetune_exclude_pretrained_params: ['cls_score']
+
+MaskRCNN:
+  backbone: ResNet
+  rpn_head: RPNHead
+  roi_extractor: RoIAlign
+  bbox_assigner: BBoxAssigner
+  bbox_head: BBoxHead
+  mask_assigner: MaskAssigner
+  mask_head: MaskHead
+
+ResNet:
+  norm_type: affine_channel
+  norm_decay: 0.
+  depth: 50
+  feature_maps: 4
+  freeze_at: 2
+
+ResNetC5:
+  depth: 50
+  norm_type: affine_channel
+
+RPNHead:
+  anchor_generator:
+    anchor_sizes: [32, 64, 128, 256, 512]
+    aspect_ratios: [0.5, 1.0, 2.0]
+    stride: [16.0, 16.0]
+    variance: [1.0, 1.0, 1.0, 1.0]
+  rpn_target_assign:
+    rpn_batch_size_per_im: 256
+    rpn_fg_fraction: 0.5
+    rpn_negative_overlap: 0.3
+    rpn_positive_overlap: 0.7
+    rpn_straddle_thresh: 0.0
+  train_proposal:
+    min_size: 0.0
+    nms_thresh: 0.7
+    pre_nms_top_n: 12000
+    post_nms_top_n: 2000
+  test_proposal:
+    min_size: 0.0
+    nms_thresh: 0.7
+    pre_nms_top_n: 6000
+    post_nms_top_n: 1000
+
+RoIAlign:
+  resolution: 14
+  spatial_scale: 0.0625
+  sampling_ratio: 0
+
+BBoxHead:
+  head: ResNetC5
+  nms:
+    keep_top_k: 100
+    nms_threshold: 0.5
+    normalized: false
+    score_threshold: 0.05
+
+MaskHead:
+  dilation: 1
+  conv_dim: 256
+  resolution: 14
+
+BBoxAssigner:
+  batch_size_per_im: 512
+  bbox_reg_weights: [0.1, 0.1, 0.2, 0.2]
+  bg_thresh_hi: 0.5
+  bg_thresh_lo: 0.0
+  fg_fraction: 0.25
+  fg_thresh: 0.5
+
+MaskAssigner:
+  resolution: 14
+
+LearningRate:
+  base_lr: 0.001
+  schedulers:
+  - !PiecewiseDecay
+    gamma: 0.1
+    milestones: [900, 1100]
+  - !LinearWarmup
+    start_factor: 0.1
+    steps: 300
+
+OptimizerBuilder:
+  optimizer:
+    momentum: 0.9
+    type: Momentum
+  regularizer:
+    factor: 0.0001
+    type: L2
+
+_READER_: 'mask_reader_cocome.yml'
diff --git a/configs/mask_reader_cocome.yml b/configs/mask_reader_cocome.yml
new file mode 100644
index 000000000..a7760a162
--- /dev/null
+++ b/configs/mask_reader_cocome.yml
@@ -0,0 +1,95 @@
+TrainReader:
+  inputs_def:
+    fields: ['image', 'im_info', 'im_id', 'gt_bbox', 'gt_class', 'is_crowd', 'gt_mask']
+  dataset:
+    !COCODataSet
+    image_dir: train
+    anno_path: annotations/instances_split_train.json
+    dataset_dir: dataset/cocome
+  sample_transforms:
+  - !DecodeImage
+    to_rgb: true
+  - !RandomFlipImage
+    prob: 0.5
+    is_mask_flip: true
+  - !NormalizeImage
+    is_channel_first: false
+    is_scale: true
+    mean: [0.485,0.456,0.406]
+    std: [0.229, 0.224,0.225]
+  - !ResizeImage
+    target_size: 800
+    max_size: 1333
+    interp: 1
+    use_cv2: true
+  - !Permute
+    to_bgr: false
+    channel_first: true
+  batch_transforms:
+  - !PadBatch
+    pad_to_stride: -1.
+    use_padded_im_info: false
+  batch_size: 1
+  shuffle: true
+  worker_num: 2
+  drop_last: false
+  use_process: false
+
+EvalReader:
+  inputs_def:
+    fields: ['image', 'im_info', 'im_id', 'im_shape']
+    # for voc
+    #fields: ['image', 'im_info', 'im_id', 'gt_bbox', 'gt_class', 'is_difficult']
+  dataset:
+    !COCODataSet
+    image_dir: train
+    anno_path: annotations/instances_split_val.json
+    dataset_dir: dataset/cocome
+  sample_transforms:
+  - !DecodeImage
+    to_rgb: true
+  - !NormalizeImage
+    is_channel_first: false
+    is_scale: true
+    mean: [0.485,0.456,0.406]
+    std: [0.229, 0.224,0.225]
+  - !ResizeImage
+    interp: 1
+    max_size: 1333
+    target_size: 800
+    use_cv2: true
+  - !Permute
+    channel_first: true
+    to_bgr: false
+  batch_size: 1
+  shuffle: false
+  drop_last: false
+  drop_empty: false
+  worker_num: 2
+
+TestReader:
+  inputs_def:
+    fields: ['image', 'im_info', 'im_id', 'im_shape']
+  dataset:
+    !ImageFolder
+    anno_path: dataset/cocome/annotations/instances_split_val.json
+  sample_transforms:
+  - !DecodeImage
+    to_rgb: true
+    with_mixup: false
+  - !NormalizeImage
+    is_channel_first: false
+    is_scale: true
+    mean: [0.485,0.456,0.406]
+    std: [0.229, 0.224,0.225]
+  - !ResizeImage
+    interp: 1
+    max_size: 1333
+    target_size: 800
+    use_cv2: true
+  - !Permute
+    channel_first: true
+    to_bgr: false
+  batch_size: 1
+  shuffle: false
+  drop_last: false
diff --git a/docs/tutorials/train_on_kunlun.md b/docs/tutorials/train_on_kunlun.md
index f871cedb3..e0fb4ed3d 100644
--- a/docs/tutorials/train_on_kunlun.md
+++ b/docs/tutorials/train_on_kunlun.md
@@ -37,3 +37,22 @@ python3.7 -u tools/train.py --eval -c configs/ppyolo/ppyolo_roadsign_kunlun.yml
 ```shell
 python3.7 -u tools/eval.py -c configs/ppyolo/ppyolo_roadsign_kunlun.yml
 ```
+
+
+## mask_rcnn
+
+### Prepare data
+Download dataset from https://dataset.bj.bcebos.com/PaddleDetection_demo/cocome.tar and put it in the dataset directory.
+
+
+
+### Train
+```shell
+python3.7 -u tools/train.py --eval -c configs/mask_rcnn_r50_1x_cocome_kunlun.yml
+```
+
+
+### Eval
+```shell
+python3.7 -u tools/eval.py -c configs/mask_rcnn_r50_1x_cocome_kunlun.yml
+```
-- 
GitLab