提交 6000abcd 编写于 作者: Z zhaoting

change some settings in SSD

上级 72fd4178
...@@ -7,6 +7,7 @@ ...@@ -7,6 +7,7 @@
* DeepFM: a factorization-machine based neural network for CTR prediction on Criteo dataset. * DeepFM: a factorization-machine based neural network for CTR prediction on Criteo dataset.
* DeepLabV3: significantly improves over our previous DeepLab versions without DenseCRF post-processing and attains comparable performance with other state-of-art models on the PASCAL VOC 2007 semantic image segmentation benchmark. * DeepLabV3: significantly improves over our previous DeepLab versions without DenseCRF post-processing and attains comparable performance with other state-of-art models on the PASCAL VOC 2007 semantic image segmentation benchmark.
* Faster-RCNN: towards real-time object detection with region proposal networks on COCO 2017 dataset. * Faster-RCNN: towards real-time object detection with region proposal networks on COCO 2017 dataset.
* SSD: a single stage object detection methods on COCO 2017 dataset.
* GoogLeNet: a deep convolutional neural network architecture codenamed Inception V1 for classification and detection on CIFAR-10 dataset. * GoogLeNet: a deep convolutional neural network architecture codenamed Inception V1 for classification and detection on CIFAR-10 dataset.
* Wide&Deep: jointly trained wide linear models and deep neural networks for recommender systems on Criteo dataset. * Wide&Deep: jointly trained wide linear models and deep neural networks for recommender systems on Criteo dataset.
* Frontend and User Interface * Frontend and User Interface
......
# Copyright 2020 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
"""Config parameters for SSD models."""
class ConfigSSD:
"""
Config parameters for SSD.
Examples:
ConfigSSD().
"""
IMG_SHAPE = [300, 300]
NUM_SSD_BOXES = 1917
NEG_PRE_POSITIVE = 3
MATCH_THRESHOLD = 0.5
NUM_DEFAULT = [3, 6, 6, 6, 6, 6]
EXTRAS_IN_CHANNELS = [256, 576, 1280, 512, 256, 256]
EXTRAS_OUT_CHANNELS = [576, 1280, 512, 256, 256, 128]
EXTRAS_STRIDES = [1, 1, 2, 2, 2, 2]
EXTRAS_RATIO = [0.2, 0.2, 0.2, 0.25, 0.5, 0.25]
FEATURE_SIZE = [19, 10, 5, 3, 2, 1]
SCALES = [21, 45, 99, 153, 207, 261, 315]
ASPECT_RATIOS = [(1,), (2, 3), (2, 3), (2, 3), (2, 3), (2, 3)]
STEPS = (16, 32, 64, 100, 150, 300)
PRIOR_SCALING = (0.1, 0.2)
# `MINDRECORD_DIR` and `COCO_ROOT` are better to use absolute path.
MINDRECORD_DIR = "MindRecord_COCO"
COCO_ROOT = "coco2017"
TRAIN_DATA_TYPE = "train2017"
VAL_DATA_TYPE = "val2017"
INSTANCES_SET = "annotations/instances_{}.json"
COCO_CLASSES = ('background', 'person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus',
'train', 'truck', 'boat', 'traffic light', 'fire', 'hydrant',
'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog',
'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra',
'giraffe', 'backpack', 'umbrella', 'handbag', 'tie',
'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball',
'kite', 'baseball bat', 'baseball glove', 'skateboard',
'surfboard', 'tennis racket', 'bottle', 'wine glass', 'cup',
'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple',
'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza',
'donut', 'cake', 'chair', 'couch', 'potted plant', 'bed',
'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote',
'keyboard', 'cell phone', 'microwave oven', 'toaster', 'sink',
'refrigerator', 'book', 'clock', 'vase', 'scissors',
'teddy bear', 'hair drier', 'toothbrush')
NUM_CLASSES = len(COCO_CLASSES)
# Copyright 2020 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
"""metrics utils"""
import numpy as np
from config import ConfigSSD
from dataset import ssd_bboxes_decode
def calc_iou(bbox_pred, bbox_ground):
"""Calculate iou of predicted bbox and ground truth."""
bbox_pred = np.expand_dims(bbox_pred, axis=0)
pred_w = bbox_pred[:, 2] - bbox_pred[:, 0]
pred_h = bbox_pred[:, 3] - bbox_pred[:, 1]
pred_area = pred_w * pred_h
gt_w = bbox_ground[:, 2] - bbox_ground[:, 0]
gt_h = bbox_ground[:, 3] - bbox_ground[:, 1]
gt_area = gt_w * gt_h
iw = np.minimum(bbox_pred[:, 2], bbox_ground[:, 2]) - np.maximum(bbox_pred[:, 0], bbox_ground[:, 0])
ih = np.minimum(bbox_pred[:, 3], bbox_ground[:, 3]) - np.maximum(bbox_pred[:, 1], bbox_ground[:, 1])
iw = np.maximum(iw, 0)
ih = np.maximum(ih, 0)
intersection_area = iw * ih
union_area = pred_area + gt_area - intersection_area
union_area = np.maximum(union_area, np.finfo(float).eps)
iou = intersection_area * 1. / union_area
return iou
def apply_nms(all_boxes, all_scores, thres, max_boxes):
"""Apply NMS to bboxes."""
x1 = all_boxes[:, 0]
y1 = all_boxes[:, 1]
x2 = all_boxes[:, 2]
y2 = all_boxes[:, 3]
areas = (x2 - x1 + 1) * (y2 - y1 + 1)
order = all_scores.argsort()[::-1]
keep = []
while order.size > 0:
i = order[0]
keep.append(i)
if len(keep) >= max_boxes:
break
xx1 = np.maximum(x1[i], x1[order[1:]])
yy1 = np.maximum(y1[i], y1[order[1:]])
xx2 = np.minimum(x2[i], x2[order[1:]])
yy2 = np.minimum(y2[i], y2[order[1:]])
w = np.maximum(0.0, xx2 - xx1 + 1)
h = np.maximum(0.0, yy2 - yy1 + 1)
inter = w * h
ovr = inter / (areas[i] + areas[order[1:]] - inter)
inds = np.where(ovr <= thres)[0]
order = order[inds + 1]
return keep
def calc_ap(recall, precision):
"""Calculate AP."""
correct_recall = np.concatenate(([0.], recall, [1.]))
correct_precision = np.concatenate(([0.], precision, [0.]))
for i in range(correct_recall.size - 1, 0, -1):
correct_precision[i - 1] = np.maximum(correct_precision[i - 1], correct_precision[i])
i = np.where(correct_recall[1:] != correct_recall[:-1])[0]
ap = np.sum((correct_recall[i + 1] - correct_recall[i]) * correct_precision[i + 1])
return ap
def metrics(pred_data):
"""Calculate mAP of predicted bboxes."""
config = ConfigSSD()
num_classes = config.NUM_CLASSES
all_detections = [None for i in range(num_classes)]
all_pred_scores = [None for i in range(num_classes)]
all_annotations = [None for i in range(num_classes)]
average_precisions = {}
num = [0 for i in range(num_classes)]
accurate_num = [0 for i in range(num_classes)]
for sample in pred_data:
pred_boxes = sample['boxes']
boxes_scores = sample['box_scores']
annotation = sample['annotation']
annotation = np.squeeze(annotation, axis=0)
pred_labels = np.argmax(boxes_scores, axis=-1)
index = np.nonzero(pred_labels)
pred_boxes = ssd_bboxes_decode(pred_boxes, index)
pred_boxes = pred_boxes.clip(0, 1)
boxes_scores = np.max(boxes_scores, axis=-1)
boxes_scores = boxes_scores[index]
pred_labels = pred_labels[index]
top_k = 50
for c in range(1, num_classes):
if len(pred_labels) >= 1:
class_box_scores = boxes_scores[pred_labels == c]
class_boxes = pred_boxes[pred_labels == c]
nms_index = apply_nms(class_boxes, class_box_scores, config.MATCH_THRESHOLD, top_k)
class_boxes = class_boxes[nms_index]
class_box_scores = class_box_scores[nms_index]
cmask = class_box_scores > 0.5
class_boxes = class_boxes[cmask]
class_box_scores = class_box_scores[cmask]
all_detections[c] = class_boxes
all_pred_scores[c] = class_box_scores
for c in range(1, num_classes):
if len(annotation) >= 1:
all_annotations[c] = annotation[annotation[:, 4] == c, :4]
for c in range(1, num_classes):
false_positives = np.zeros((0,))
true_positives = np.zeros((0,))
scores = np.zeros((0,))
num_annotations = 0.0
annotations = all_annotations[c]
num_annotations += annotations.shape[0]
detections = all_detections[c]
pred_scores = all_pred_scores[c]
for index, detection in enumerate(detections):
scores = np.append(scores, pred_scores[index])
if len(annotations) >= 1:
IoUs = calc_iou(detection, annotations)
assigned_anno = np.argmax(IoUs)
max_overlap = IoUs[assigned_anno]
if max_overlap >= 0.5:
false_positives = np.append(false_positives, 0)
true_positives = np.append(true_positives, 1)
else:
false_positives = np.append(false_positives, 1)
true_positives = np.append(true_positives, 0)
else:
false_positives = np.append(false_positives, 1)
true_positives = np.append(true_positives, 0)
if num_annotations == 0:
if c not in average_precisions.keys():
average_precisions[c] = 0
continue
accurate_num[c] = 1
indices = np.argsort(-scores)
false_positives = false_positives[indices]
true_positives = true_positives[indices]
false_positives = np.cumsum(false_positives)
true_positives = np.cumsum(true_positives)
recall = true_positives * 1. / num_annotations
precision = true_positives * 1. / np.maximum(true_positives + false_positives, np.finfo(np.float64).eps)
average_precision = calc_ap(recall, precision)
if c not in average_precisions.keys():
average_precisions[c] = average_precision
else:
average_precisions[c] += average_precision
num[c] += 1
count = 0
for key in average_precisions:
if num[key] != 0:
count += (average_precisions[key] / num[key])
mAP = count * 1. / accurate_num.count(1)
return mAP
...@@ -20,14 +20,17 @@ SSD network based on MobileNetV2, with support for training and evaluation. ...@@ -20,14 +20,17 @@ SSD network based on MobileNetV2, with support for training and evaluation.
pip install pycocotools pip install pycocotools
``` ```
And change the COCO_ROOT and other settings you need in `config.py`. The directory structure is as follows: And change the coco_root and other settings you need in `config.py`. The directory structure is as follows:
``` ```
└─coco2017 .
├── annotations # annotation jsons └─cocodataset
├── train2017 # train dataset ├─annotations
└── val2017 # infer dataset ├─instance_train2017.json
└─instance_val2017.json
├─val2017
└─train2017
``` ```
2. If your own dataset is used. **Select dataset to other when run script.** 2. If your own dataset is used. **Select dataset to other when run script.**
...@@ -37,14 +40,14 @@ SSD network based on MobileNetV2, with support for training and evaluation. ...@@ -37,14 +40,14 @@ SSD network based on MobileNetV2, with support for training and evaluation.
train2017/0000001.jpg 0,259,401,459,7 35,28,324,201,2 0,30,59,80,2 train2017/0000001.jpg 0,259,401,459,7 35,28,324,201,2 0,30,59,80,2
``` ```
Each row is an image annotation which split by space, the first column is a relative path of image, the others are box and class infomations of the format [xmin,ymin,xmax,ymax,class]. We read image from an image path joined by the `IMAGE_DIR`(dataset directory) and the relative path in `ANNO_PATH`(the TXT file path), `IMAGE_DIR` and `ANNO_PATH` are setting in `config.py`. Each row is an image annotation which split by space, the first column is a relative path of image, the others are box and class infomations of the format [xmin,ymin,xmax,ymax,class]. We read image from an image path joined by the `image_dir`(dataset directory) and the relative path in `anno_path`(the TXT file path), `image_dir` and `anno_path` are setting in `config.py`.
## Running the example ## Running the example
### Training ### Training
To train the model, run `train.py`. If the `MINDRECORD_DIR` is empty, it will generate [mindrecord](https://www.mindspore.cn/tutorial/en/master/use/data_preparation/converting_datasets.html) files by `COCO_ROOT`(coco dataset) or `IMAGE_DIR` and `ANNO_PATH`(own dataset). **Note if MINDRECORD_DIR isn't empty, it will use MINDRECORD_DIR instead of raw images.** To train the model, run `train.py`. If the `mindrecord_dir` is empty, it will generate [mindrecord](https://www.mindspore.cn/tutorial/en/master/use/data_preparation/converting_datasets.html) files by `coco_root`(coco dataset) or `iamge_dir` and `anno_path`(own dataset). **Note if mindrecord_dir isn't empty, it will use mindrecord_dir instead of raw images.**
- Stand alone mode - Stand alone mode
...@@ -60,29 +63,57 @@ To train the model, run `train.py`. If the `MINDRECORD_DIR` is empty, it will ge ...@@ -60,29 +63,57 @@ To train the model, run `train.py`. If the `MINDRECORD_DIR` is empty, it will ge
- Distribute mode - Distribute mode
``` ```
sh run_distribute_train.sh 8 150 coco /data/hccl.json sh run_distribute_train.sh 8 500 0.2 coco /data/hccl.json
``` ```
The input parameters are device numbers, epoch size, dataset mode and [hccl json configuration file](https://www.mindspore.cn/tutorial/en/master/advanced_use/distributed_training.html). **It is better to use absolute path.** The input parameters are device numbers, epoch size, learning rate, dataset mode and [hccl json configuration file](https://www.mindspore.cn/tutorial/en/master/advanced_use/distributed_training.html). **It is better to use absolute path.**
You will get the loss value of each step as following: You will get the loss value of each step as following:
``` ```
epoch: 1 step: 455, loss is 5.8653416 epoch: 1 step: 458, loss is 3.1681802
epoch: 2 step: 455, loss is 5.4292373 epoch time: 228752.4654865265, per step time: 499.4595316299705
epoch: 3 step: 455, loss is 5.458992 epoch: 2 step: 458, loss is 2.8847265
epoch time: 38912.93382644653, per step time: 84.96273761232868
epoch: 3 step: 458, loss is 2.8398118
epoch time: 38769.184827804565, per step time: 84.64887516987896
... ...
epoch: 148 step: 455, loss is 1.8340507
epoch: 149 step: 455, loss is 2.0876894 epoch: 498 step: 458, loss is 0.70908034
epoch: 150 step: 455, loss is 2.239692 epoch time: 38771.079778671265, per step time: 84.65301261718616
epoch: 499 step: 458, loss is 0.7974688
epoch time: 38787.413120269775, per step time: 84.68867493508685
epoch: 500 step: 458, loss is 0.5548882
epoch time: 39064.8467540741, per step time: 85.29442522723602
``` ```
### Evaluation ### Evaluation
for evaluation , run `eval.py` with `ckpt_path`. `ckpt_path` is the path of [checkpoint](https://www.mindspore.cn/tutorial/en/master/use/saving_and_loading_model_parameters.html) file. for evaluation , run `eval.py` with `checkpoint_path`. `checkpoint_path` is the path of [checkpoint](https://www.mindspore.cn/tutorial/en/master/use/saving_and_loading_model_parameters.html) file.
``` ```
python eval.py --ckpt_path ssd.ckpt --dataset coco python eval.py --checkpoint_path ssd.ckpt --dataset coco
``` ```
You can run ```python eval.py -h``` to get more information. You can run ```python eval.py -h``` to get more information.
You will get the result as following:
```
Average Precision (AP) @[ IoU=0.50:0.95 | area= all | maxDets=100 ] = 0.189
Average Precision (AP) @[ IoU=0.50 | area= all | maxDets=100 ] = 0.341
Average Precision (AP) @[ IoU=0.75 | area= all | maxDets=100 ] = 0.183
Average Precision (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.040
Average Precision (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.181
Average Precision (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.326
Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets= 1 ] = 0.213
Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets= 10 ] = 0.348
Average Recall (AR) @[ IoU=0.50:0.95 | area= all | maxDets=100 ] = 0.380
Average Recall (AR) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.124
Average Recall (AR) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.412
Average Recall (AR) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.588
========================================
mAP: 0.18937438355383837
```
...@@ -14,49 +14,51 @@ ...@@ -14,49 +14,51 @@
# ============================================================================ # ============================================================================
"""Evaluation for SSD""" """Evaluation for SSD"""
import os import os
import argparse import argparse
import time import time
import numpy as np
from mindspore import context, Tensor from mindspore import context, Tensor
from mindspore.train.serialization import load_checkpoint, load_param_into_net from mindspore.train.serialization import load_checkpoint, load_param_into_net
from mindspore.model_zoo.ssd import SSD300, ssd_mobilenet_v2 from src.ssd import SSD300, ssd_mobilenet_v2
from dataset import create_ssd_dataset, data_to_mindrecord_byte_image from src.dataset import create_ssd_dataset, data_to_mindrecord_byte_image
from config import ConfigSSD from src.config import config
from util import metrics from src.coco_eval import metrics
def ssd_eval(dataset_path, ckpt_path): def ssd_eval(dataset_path, ckpt_path):
"""SSD evaluation.""" """SSD evaluation."""
batch_size = 1
ds = create_ssd_dataset(dataset_path, batch_size=1, repeat_num=1, is_training=False) ds = create_ssd_dataset(dataset_path, batch_size=batch_size, repeat_num=1, is_training=False)
net = SSD300(ssd_mobilenet_v2(), ConfigSSD(), is_training=False) net = SSD300(ssd_mobilenet_v2(), config, is_training=False)
print("Load Checkpoint!") print("Load Checkpoint!")
param_dict = load_checkpoint(ckpt_path) param_dict = load_checkpoint(ckpt_path)
net.init_parameters_data() net.init_parameters_data()
load_param_into_net(net, param_dict) load_param_into_net(net, param_dict)
net.set_train(False) net.set_train(False)
i = 1. i = batch_size
total = ds.get_dataset_size() total = ds.get_dataset_size() * batch_size
start = time.time() start = time.time()
pred_data = [] pred_data = []
print("\n========================================\n") print("\n========================================\n")
print("total images num: ", total) print("total images num: ", total)
print("Processing, please wait a moment.") print("Processing, please wait a moment.")
for data in ds.create_dict_iterator(): for data in ds.create_dict_iterator():
img_id = data['img_id']
img_np = data['image'] img_np = data['image']
image_shape = data['image_shape'] image_shape = data['image_shape']
annotation = data['annotation']
output = net(Tensor(img_np)) output = net(Tensor(img_np))
for batch_idx in range(img_np.shape[0]): for batch_idx in range(img_np.shape[0]):
pred_data.append({"boxes": output[0].asnumpy()[batch_idx], pred_data.append({"boxes": output[0].asnumpy()[batch_idx],
"box_scores": output[1].asnumpy()[batch_idx], "box_scores": output[1].asnumpy()[batch_idx],
"annotation": annotation, "img_id": int(np.squeeze(img_id[batch_idx])),
"image_shape": image_shape}) "image_shape": image_shape[batch_idx]})
percent = round(i / total * 100, 2) percent = round(i / total * 100., 2)
print(f' {str(percent)} [{i}/{total}]', end='\r') print(f' {str(percent)} [{i}/{total}]', end='\r')
i += 1 i += batch_size
cost_time = int((time.time() - start) * 1000) cost_time = int((time.time() - start) * 1000)
print(f' 100% [{total}/{total}] cost {cost_time} ms') print(f' 100% [{total}/{total}] cost {cost_time} ms')
mAP = metrics(pred_data) mAP = metrics(pred_data)
...@@ -73,22 +75,21 @@ if __name__ == '__main__': ...@@ -73,22 +75,21 @@ if __name__ == '__main__':
context.set_context(mode=context.GRAPH_MODE, device_target="Ascend", device_id=args_opt.device_id) context.set_context(mode=context.GRAPH_MODE, device_target="Ascend", device_id=args_opt.device_id)
config = ConfigSSD()
prefix = "ssd_eval.mindrecord" prefix = "ssd_eval.mindrecord"
mindrecord_dir = config.MINDRECORD_DIR mindrecord_dir = config.mindrecord_dir
mindrecord_file = os.path.join(mindrecord_dir, prefix + "0") mindrecord_file = os.path.join(mindrecord_dir, prefix + "0")
if not os.path.exists(mindrecord_file): if not os.path.exists(mindrecord_file):
if not os.path.isdir(mindrecord_dir): if not os.path.isdir(mindrecord_dir):
os.makedirs(mindrecord_dir) os.makedirs(mindrecord_dir)
if args_opt.dataset == "coco": if args_opt.dataset == "coco":
if os.path.isdir(config.COCO_ROOT): if os.path.isdir(config.coco_root):
print("Create Mindrecord.") print("Create Mindrecord.")
data_to_mindrecord_byte_image("coco", False, prefix) data_to_mindrecord_byte_image("coco", False, prefix)
print("Create Mindrecord Done, at {}".format(mindrecord_dir)) print("Create Mindrecord Done, at {}".format(mindrecord_dir))
else: else:
print("COCO_ROOT not exits.") print("coco_root not exits.")
else: else:
if os.path.isdir(config.IMAGE_DIR) and os.path.exists(config.ANNO_PATH): if os.path.isdir(config.image_dir) and os.path.exists(config.anno_path):
print("Create Mindrecord.") print("Create Mindrecord.")
data_to_mindrecord_byte_image("other", False, prefix) data_to_mindrecord_byte_image("other", False, prefix)
print("Create Mindrecord Done, at {}".format(mindrecord_dir)) print("Create Mindrecord Done, at {}".format(mindrecord_dir))
......
...@@ -14,17 +14,16 @@ ...@@ -14,17 +14,16 @@
# limitations under the License. # limitations under the License.
# ============================================================================ # ============================================================================
echo "=================================================================================================================" echo "=============================================================================================================="
echo "Please run the scipt as: " echo "Please run the scipt as: "
echo "sh run_distribute_train.sh DEVICE_NUM EPOCH_SIZE DATASET MINDSPORE_HCCL_CONFIG_PATH PRE_TRAINED PRE_TRAINED_EPOCH_SIZE" echo "sh run_distribute_train.sh DEVICE_NUM EPOCH_SIZE LR DATASET MINDSPORE_HCCL_CONFIG_PATH PRE_TRAINED PRE_TRAINED_EPOCH_SIZE"
echo "for example: sh run_distribute_train.sh 8 350 coco /data/hccl.json /opt/ssd-300.ckpt(optional) 200(optional)" echo "for example: sh run_distribute_train.sh 8 500 0.2 coco /data/hccl.json /opt/ssd-300.ckpt(optional) 200(optional)"
echo "It is better to use absolute path." echo "It is better to use absolute path."
echo "The learning rate is 0.4 as default, if you want other lr, please change the value in this script."
echo "=================================================================================================================" echo "================================================================================================================="
if [ $# != 4 ] && [ $# != 6 ] if [ $# != 5 ] && [ $# != 7 ]
then then
echo "Usage: sh run_distribute_train.sh [DEVICE_NUM] [EPOCH_SIZE] [DATASET] \ echo "Usage: sh run_distribute_train.sh [DEVICE_NUM] [EPOCH_SIZE] [LR] [DATASET] \
[MINDSPORE_HCCL_CONFIG_PATH] [PRE_TRAINED](optional) [PRE_TRAINED_EPOCH_SIZE](optional)" [MINDSPORE_HCCL_CONFIG_PATH] [PRE_TRAINED](optional) [PRE_TRAINED_EPOCH_SIZE](optional)"
exit 1 exit 1
fi fi
...@@ -36,38 +35,39 @@ echo "After running the scipt, the network runs in the background. The log will ...@@ -36,38 +35,39 @@ echo "After running the scipt, the network runs in the background. The log will
export RANK_SIZE=$1 export RANK_SIZE=$1
EPOCH_SIZE=$2 EPOCH_SIZE=$2
DATASET=$3 LR=$3
PRE_TRAINED=$5 DATASET=$4
PRE_TRAINED_EPOCH_SIZE=$6 PRE_TRAINED=$6
export MINDSPORE_HCCL_CONFIG_PATH=$4 PRE_TRAINED_EPOCH_SIZE=$7
export MINDSPORE_HCCL_CONFIG_PATH=$5
for((i=0;i<RANK_SIZE;i++)) for((i=0;i<RANK_SIZE;i++))
do do
export DEVICE_ID=$i export DEVICE_ID=$i
rm -rf LOG$i rm -rf LOG$i
mkdir ./LOG$i mkdir ./LOG$i
cp *.py ./LOG$i cp ../*.py ./LOG$i
cp -r ../src ./LOG$i
cd ./LOG$i || exit cd ./LOG$i || exit
export RANK_ID=$i export RANK_ID=$i
echo "start training for rank $i, device $DEVICE_ID" echo "start training for rank $i, device $DEVICE_ID"
env > env.log env > env.log
if [ $# == 4 ] if [ $# == 5 ]
then then
python ../train.py \ python train.py \
--distribute=1 \ --distribute=1 \
--lr=0.4 \ --lr=$LR \
--dataset=$DATASET \ --dataset=$DATASET \
--device_num=$RANK_SIZE \ --device_num=$RANK_SIZE \
--device_id=$DEVICE_ID \ --device_id=$DEVICE_ID \
--epoch_size=$EPOCH_SIZE > log.txt 2>&1 & --epoch_size=$EPOCH_SIZE > log.txt 2>&1 &
fi fi
if [ $# == 6 ] if [ $# == 7 ]
then then
python ../train.py \ python train.py \
--distribute=1 \ --distribute=1 \
--lr=0.4 \ --lr=$LR \
--dataset=$DATASET \ --dataset=$DATASET \
--device_num=$RANK_SIZE \ --device_num=$RANK_SIZE \
--device_id=$DEVICE_ID \ --device_id=$DEVICE_ID \
......
# Copyright 2020 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
"""Bbox utils"""
import math
import itertools as it
import numpy as np
from .config import config
class GeneratDefaultBoxes():
"""
Generate Default boxes for SSD, follows the order of (W, H, archor_sizes).
`self.default_boxes` has a shape of [archor_sizes, H, W, 4], the last dimension is [y, x, h, w].
`self.default_boxes_ltrb` has a shape as `self.default_boxes`, the last dimension is [y1, x1, y2, x2].
"""
def __init__(self):
fk = config.img_shape[0] / np.array(config.steps)
scale_rate = (config.max_scale - config.min_scale) / (len(config.num_default) - 1)
scales = [config.min_scale + scale_rate * i for i in range(len(config.num_default))] + [1.0]
self.default_boxes = []
for idex, feature_size in enumerate(config.feature_size):
sk1 = scales[idex]
sk2 = scales[idex + 1]
sk3 = math.sqrt(sk1 * sk2)
if idex == 0:
w, h = sk1 * math.sqrt(2), sk1 / math.sqrt(2)
all_sizes = [(0.1, 0.1), (w, h), (h, w)]
else:
all_sizes = [(sk1, sk1)]
for aspect_ratio in config.aspect_ratios[idex]:
w, h = sk1 * math.sqrt(aspect_ratio), sk1 / math.sqrt(aspect_ratio)
all_sizes.append((w, h))
all_sizes.append((h, w))
all_sizes.append((sk3, sk3))
assert len(all_sizes) == config.num_default[idex]
for i, j in it.product(range(feature_size), repeat=2):
for w, h in all_sizes:
cx, cy = (j + 0.5) / fk[idex], (i + 0.5) / fk[idex]
self.default_boxes.append([cy, cx, h, w])
def to_ltrb(cy, cx, h, w):
return cy - h / 2, cx - w / 2, cy + h / 2, cx + w / 2
# For IoU calculation
self.default_boxes_ltrb = np.array(tuple(to_ltrb(*i) for i in self.default_boxes), dtype='float32')
self.default_boxes = np.array(self.default_boxes, dtype='float32')
default_boxes_ltrb = GeneratDefaultBoxes().default_boxes_ltrb
default_boxes = GeneratDefaultBoxes().default_boxes
y1, x1, y2, x2 = np.split(default_boxes_ltrb[:, :4], 4, axis=-1)
vol_anchors = (x2 - x1) * (y2 - y1)
matching_threshold = config.match_thershold
def ssd_bboxes_encode(boxes):
"""
Labels anchors with ground truth inputs.
Args:
boxex: ground truth with shape [N, 5], for each row, it stores [y, x, h, w, cls].
Returns:
gt_loc: location ground truth with shape [num_anchors, 4].
gt_label: class ground truth with shape [num_anchors, 1].
num_matched_boxes: number of positives in an image.
"""
def jaccard_with_anchors(bbox):
"""Compute jaccard score a box and the anchors."""
# Intersection bbox and volume.
ymin = np.maximum(y1, bbox[0])
xmin = np.maximum(x1, bbox[1])
ymax = np.minimum(y2, bbox[2])
xmax = np.minimum(x2, bbox[3])
w = np.maximum(xmax - xmin, 0.)
h = np.maximum(ymax - ymin, 0.)
# Volumes.
inter_vol = h * w
union_vol = vol_anchors + (bbox[2] - bbox[0]) * (bbox[3] - bbox[1]) - inter_vol
jaccard = inter_vol / union_vol
return np.squeeze(jaccard)
pre_scores = np.zeros((config.num_ssd_boxes), dtype=np.float32)
t_boxes = np.zeros((config.num_ssd_boxes, 4), dtype=np.float32)
t_label = np.zeros((config.num_ssd_boxes), dtype=np.int64)
for bbox in boxes:
label = int(bbox[4])
scores = jaccard_with_anchors(bbox)
idx = np.argmax(scores)
scores[idx] = 2.0
mask = (scores > matching_threshold)
mask = mask & (scores > pre_scores)
pre_scores = np.maximum(pre_scores, scores * mask)
t_label = mask * label + (1 - mask) * t_label
for i in range(4):
t_boxes[:, i] = mask * bbox[i] + (1 - mask) * t_boxes[:, i]
index = np.nonzero(t_label)
# Transform to ltrb.
bboxes = np.zeros((config.num_ssd_boxes, 4), dtype=np.float32)
bboxes[:, [0, 1]] = (t_boxes[:, [0, 1]] + t_boxes[:, [2, 3]]) / 2
bboxes[:, [2, 3]] = t_boxes[:, [2, 3]] - t_boxes[:, [0, 1]]
# Encode features.
bboxes_t = bboxes[index]
default_boxes_t = default_boxes[index]
bboxes_t[:, :2] = (bboxes_t[:, :2] - default_boxes_t[:, :2]) / (default_boxes_t[:, 2:] * config.prior_scaling[0])
bboxes_t[:, 2:4] = np.log(bboxes_t[:, 2:4] / default_boxes_t[:, 2:4]) / config.prior_scaling[1]
bboxes[index] = bboxes_t
num_match = np.array([len(np.nonzero(t_label)[0])], dtype=np.int32)
return bboxes, t_label.astype(np.int32), num_match
def ssd_bboxes_decode(boxes):
"""Decode predict boxes to [y, x, h, w]"""
boxes_t = boxes.copy()
default_boxes_t = default_boxes.copy()
boxes_t[:, :2] = boxes_t[:, :2] * config.prior_scaling[0] * default_boxes_t[:, 2:] + default_boxes_t[:, :2]
boxes_t[:, 2:4] = np.exp(boxes_t[:, 2:4] * config.prior_scaling[1]) * default_boxes_t[:, 2:4]
bboxes = np.zeros((len(boxes_t), 4), dtype=np.float32)
bboxes[:, [0, 1]] = boxes_t[:, [0, 1]] - boxes_t[:, [2, 3]] / 2
bboxes[:, [2, 3]] = boxes_t[:, [0, 1]] + boxes_t[:, [2, 3]] / 2
return np.clip(bboxes, 0, 1)
def intersect(box_a, box_b):
"""Compute the intersect of two sets of boxes."""
max_yx = np.minimum(box_a[:, 2:4], box_b[2:4])
min_yx = np.maximum(box_a[:, :2], box_b[:2])
inter = np.clip((max_yx - min_yx), a_min=0, a_max=np.inf)
return inter[:, 0] * inter[:, 1]
def jaccard_numpy(box_a, box_b):
"""Compute the jaccard overlap of two sets of boxes."""
inter = intersect(box_a, box_b)
area_a = ((box_a[:, 2] - box_a[:, 0]) *
(box_a[:, 3] - box_a[:, 1]))
area_b = ((box_b[2] - box_b[0]) *
(box_b[3] - box_b[1]))
union = area_a + area_b - inter
return inter / union
# Copyright 2020 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
"""Coco metrics utils"""
import os
import json
import numpy as np
from .config import config
from .box_utils import ssd_bboxes_decode
def apply_nms(all_boxes, all_scores, thres, max_boxes):
"""Apply NMS to bboxes."""
y1 = all_boxes[:, 0]
x1 = all_boxes[:, 1]
y2 = all_boxes[:, 2]
x2 = all_boxes[:, 3]
areas = (x2 - x1 + 1) * (y2 - y1 + 1)
order = all_scores.argsort()[::-1]
keep = []
while order.size > 0:
i = order[0]
keep.append(i)
if len(keep) >= max_boxes:
break
xx1 = np.maximum(x1[i], x1[order[1:]])
yy1 = np.maximum(y1[i], y1[order[1:]])
xx2 = np.minimum(x2[i], x2[order[1:]])
yy2 = np.minimum(y2[i], y2[order[1:]])
w = np.maximum(0.0, xx2 - xx1 + 1)
h = np.maximum(0.0, yy2 - yy1 + 1)
inter = w * h
ovr = inter / (areas[i] + areas[order[1:]] - inter)
inds = np.where(ovr <= thres)[0]
order = order[inds + 1]
return keep
def metrics(pred_data):
"""Calculate mAP of predicted bboxes."""
from pycocotools.coco import COCO
from pycocotools.cocoeval import COCOeval
num_classes = config.num_classes
coco_root = config.coco_root
data_type = config.val_data_type
#Classes need to train or test.
val_cls = config.coco_classes
val_cls_dict = {}
for i, cls in enumerate(val_cls):
val_cls_dict[i] = cls
anno_json = os.path.join(coco_root, config.instances_set.format(data_type))
coco_gt = COCO(anno_json)
classs_dict = {}
cat_ids = coco_gt.loadCats(coco_gt.getCatIds())
for cat in cat_ids:
classs_dict[cat["name"]] = cat["id"]
predictions = []
img_ids = []
for sample in pred_data:
pred_boxes = sample['boxes']
box_scores = sample['box_scores']
img_id = sample['img_id']
h, w = sample['image_shape']
pred_boxes = ssd_bboxes_decode(pred_boxes)
final_boxes = []
final_label = []
final_score = []
img_ids.append(img_id)
for c in range(1, num_classes):
class_box_scores = box_scores[:, c]
score_mask = class_box_scores > config.min_score
class_box_scores = class_box_scores[score_mask]
class_boxes = pred_boxes[score_mask] * [h, w, h, w]
if score_mask.any():
nms_index = apply_nms(class_boxes, class_box_scores, config.nms_thershold, config.max_boxes)
class_boxes = class_boxes[nms_index]
class_box_scores = class_box_scores[nms_index]
final_boxes += class_boxes.tolist()
final_score += class_box_scores.tolist()
final_label += [classs_dict[val_cls_dict[c]]] * len(class_box_scores)
for loc, label, score in zip(final_boxes, final_label, final_score):
res = {}
res['image_id'] = img_id
res['bbox'] = [loc[1], loc[0], loc[3] - loc[1], loc[2] - loc[0]]
res['score'] = score
res['category_id'] = label
predictions.append(res)
with open('predictions.json', 'w') as f:
json.dump(predictions, f)
coco_dt = coco_gt.loadRes('predictions.json')
E = COCOeval(coco_gt, coco_dt, iouType='bbox')
E.params.imgIds = img_ids
E.evaluate()
E.accumulate()
E.summarize()
return E.stats[0]
# Copyright 2020 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#" ============================================================================
"""Config parameters for SSD models."""
from easydict import EasyDict as ed
config = ed({
"img_shape": [300, 300],
"num_ssd_boxes": 1917,
"neg_pre_positive": 3,
"match_thershold": 0.5,
"nms_thershold": 0.6,
"min_score": 0.1,
"max_boxes": 100,
# learing rate settings
"global_step": 0,
"lr_init": 0.001,
"lr_end_rate": 0.001,
"warmup_epochs": 2,
"momentum": 0.9,
"weight_decay": 1.5e-4,
# network
"num_default": [3, 6, 6, 6, 6, 6],
"extras_in_channels": [256, 576, 1280, 512, 256, 256],
"extras_out_channels": [576, 1280, 512, 256, 256, 128],
"extras_srides": [1, 1, 2, 2, 2, 2],
"extras_ratio": [0.2, 0.2, 0.2, 0.25, 0.5, 0.25],
"feature_size": [19, 10, 5, 3, 2, 1],
"min_scale": 0.2,
"max_scale": 0.95,
"aspect_ratios": [(2,), (2, 3), (2, 3), (2, 3), (2, 3), (2, 3)],
"steps": (16, 32, 64, 100, 150, 300),
"prior_scaling": (0.1, 0.2),
"gamma": 2.0,
"alpha": 0.75,
# `mindrecord_dir` and `coco_root` are better to use absolute path.
"mindrecord_dir": "/data/MindRecord_COCO",
"coco_root": "/data/coco2017",
"train_data_type": "train2017",
"val_data_type": "val2017",
"instances_set": "annotations/instances_{}.json",
"coco_classes": ('background', 'person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus',
'train', 'truck', 'boat', 'traffic light', 'fire hydrant',
'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog',
'horse', 'sheep', 'cow', 'elephant', 'bear', 'zebra',
'giraffe', 'backpack', 'umbrella', 'handbag', 'tie',
'suitcase', 'frisbee', 'skis', 'snowboard', 'sports ball',
'kite', 'baseball bat', 'baseball glove', 'skateboard',
'surfboard', 'tennis racket', 'bottle', 'wine glass', 'cup',
'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple',
'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza',
'donut', 'cake', 'chair', 'couch', 'potted plant', 'bed',
'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote',
'keyboard', 'cell phone', 'microwave', 'oven', 'toaster', 'sink',
'refrigerator', 'book', 'clock', 'vase', 'scissors',
'teddy bear', 'hair drier', 'toothbrush'),
"num_classes": 81,
# if coco used, `image_dir` and `anno_path` are useless.
"image_dir": "",
"anno_path": "",
})
...@@ -14,176 +14,98 @@ ...@@ -14,176 +14,98 @@
# ============================================================================ # ============================================================================
"""SSD dataset""" """SSD dataset"""
from __future__ import division from __future__ import division
import os import os
import math
import itertools as it
import numpy as np
import cv2 import cv2
import numpy as np
import mindspore.dataset as de import mindspore.dataset as de
import mindspore.dataset.transforms.vision.c_transforms as C import mindspore.dataset.transforms.vision.c_transforms as C
from mindspore.mindrecord import FileWriter from mindspore.mindrecord import FileWriter
from config import ConfigSSD from .config import config
from .box_utils import jaccard_numpy, ssd_bboxes_encode
config = ConfigSSD()
class GeneratDefaultBoxes():
"""
Generate Default boxes for SSD, follows the order of (W, H, archor_sizes).
`self.default_boxes` has a shape of [archor_sizes, H, W, 4], the last dimension is [x, y, w, h].
`self.default_boxes_ltrb` has a shape as `self.default_boxes`, the last dimension is [x1, y1, x2, y2].
"""
def __init__(self):
fk = config.IMG_SHAPE[0] / np.array(config.STEPS)
self.default_boxes = []
for idex, feature_size in enumerate(config.FEATURE_SIZE):
sk1 = config.SCALES[idex] / config.IMG_SHAPE[0]
sk2 = config.SCALES[idex + 1] / config.IMG_SHAPE[0]
sk3 = math.sqrt(sk1 * sk2)
if config.NUM_DEFAULT[idex] == 3:
all_sizes = [(0.5, 1.0), (1.0, 1.0), (1.0, 0.5)]
else:
all_sizes = [(sk1, sk1), (sk3, sk3)]
for aspect_ratio in config.ASPECT_RATIOS[idex]:
w, h = sk1 * math.sqrt(aspect_ratio), sk1 / math.sqrt(aspect_ratio)
all_sizes.append((w, h))
all_sizes.append((h, w))
assert len(all_sizes) == config.NUM_DEFAULT[idex]
for i, j in it.product(range(feature_size), repeat=2):
for w, h in all_sizes:
cx, cy = (j + 0.5) / fk[idex], (i + 0.5) / fk[idex]
box = [np.clip(k, 0, 1) for k in (cx, cy, w, h)]
self.default_boxes.append(box)
def to_ltrb(cx, cy, w, h):
return cx - w / 2, cy - h / 2, cx + w / 2, cy + h / 2
# For IoU calculation
self.default_boxes_ltrb = np.array(tuple(to_ltrb(*i) for i in self.default_boxes), dtype='float32')
self.default_boxes = np.array(self.default_boxes, dtype='float32')
default_boxes_ltrb = GeneratDefaultBoxes().default_boxes_ltrb
default_boxes = GeneratDefaultBoxes().default_boxes
x1, y1, x2, y2 = np.split(default_boxes_ltrb[:, :4], 4, axis=-1)
vol_anchors = (x2 - x1) * (y2 - y1)
matching_threshold = config.MATCH_THRESHOLD
def ssd_bboxes_encode(boxes):
"""
Labels anchors with ground truth inputs.
Args:
boxex: ground truth with shape [N, 5], for each row, it stores [x, y, w, h, cls].
Returns:
gt_loc: location ground truth with shape [num_anchors, 4].
gt_label: class ground truth with shape [num_anchors, 1].
num_matched_boxes: number of positives in an image.
"""
def jaccard_with_anchors(bbox):
"""Compute jaccard score a box and the anchors."""
# Intersection bbox and volume.
xmin = np.maximum(x1, bbox[0])
ymin = np.maximum(y1, bbox[1])
xmax = np.minimum(x2, bbox[2])
ymax = np.minimum(y2, bbox[3])
w = np.maximum(xmax - xmin, 0.)
h = np.maximum(ymax - ymin, 0.)
# Volumes.
inter_vol = h * w
union_vol = vol_anchors + (bbox[2] - bbox[0]) * (bbox[3] - bbox[1]) - inter_vol
jaccard = inter_vol / union_vol
return np.squeeze(jaccard)
pre_scores = np.zeros((config.NUM_SSD_BOXES), dtype=np.float32)
t_boxes = np.zeros((config.NUM_SSD_BOXES, 4), dtype=np.float32)
t_label = np.zeros((config.NUM_SSD_BOXES), dtype=np.int64)
for bbox in boxes:
label = int(bbox[4])
scores = jaccard_with_anchors(bbox)
mask = (scores > matching_threshold)
if not np.any(mask):
mask[np.argmax(scores)] = True
mask = mask & (scores > pre_scores)
pre_scores = np.maximum(pre_scores, scores)
t_label = mask * label + (1 - mask) * t_label
for i in range(4):
t_boxes[:, i] = mask * bbox[i] + (1 - mask) * t_boxes[:, i]
index = np.nonzero(t_label)
# Transform to ltrb.
bboxes = np.zeros((config.NUM_SSD_BOXES, 4), dtype=np.float32)
bboxes[:, [0, 1]] = (t_boxes[:, [0, 1]] + t_boxes[:, [2, 3]]) / 2
bboxes[:, [2, 3]] = t_boxes[:, [2, 3]] - t_boxes[:, [0, 1]]
# Encode features.
bboxes_t = bboxes[index]
default_boxes_t = default_boxes[index]
bboxes_t[:, :2] = (bboxes_t[:, :2] - default_boxes_t[:, :2]) / (default_boxes_t[:, 2:] * config.PRIOR_SCALING[0])
bboxes_t[:, 2:4] = np.log(bboxes_t[:, 2:4] / default_boxes_t[:, 2:4]) / config.PRIOR_SCALING[1]
bboxes[index] = bboxes_t
num_match_num = np.array([len(np.nonzero(t_label)[0])], dtype=np.int32)
return bboxes, t_label.astype(np.int32), num_match_num
def ssd_bboxes_decode(boxes, index):
"""Decode predict boxes to [x, y, w, h]"""
boxes_t = boxes[index]
default_boxes_t = default_boxes[index]
boxes_t[:, :2] = boxes_t[:, :2] * config.PRIOR_SCALING[0] * default_boxes_t[:, 2:] + default_boxes_t[:, :2]
boxes_t[:, 2:4] = np.exp(boxes_t[:, 2:4] * config.PRIOR_SCALING[1]) * default_boxes_t[:, 2:4]
bboxes = np.zeros((len(boxes_t), 4), dtype=np.float32)
bboxes[:, [0, 1]] = boxes_t[:, [0, 1]] - boxes_t[:, [2, 3]] / 2
bboxes[:, [2, 3]] = boxes_t[:, [0, 1]] + boxes_t[:, [2, 3]] / 2
return bboxes
def preprocess_fn(image, box, is_training):
"""Preprocess function for dataset."""
def _rand(a=0., b=1.):
"""Generate random."""
return np.random.rand() * (b - a) + a
def _infer_data(image, input_shape, box): def _rand(a=0., b=1.):
img_h, img_w, _ = image.shape """Generate random."""
input_h, input_w = input_shape return np.random.rand() * (b - a) + a
def random_sample_crop(image, boxes):
"""Random Crop the image and boxes"""
height, width, _ = image.shape
min_iou = np.random.choice([None, 0.1, 0.3, 0.5, 0.7, 0.9])
if min_iou is None:
return image, boxes
# max trails (50)
for _ in range(50):
image_t = image
w = _rand(0.3, 1.0) * width
h = _rand(0.3, 1.0) * height
# aspect ratio constraint b/t .5 & 2
if h / w < 0.5 or h / w > 2:
continue
left = _rand() * (width - w)
top = _rand() * (height - h)
rect = np.array([int(top), int(left), int(top+h), int(left+w)])
overlap = jaccard_numpy(boxes, rect)
# dropout some boxes
drop_mask = overlap > 0
if not drop_mask.any():
continue
if overlap[drop_mask].min() < min_iou:
continue
image_t = image_t[rect[0]:rect[2], rect[1]:rect[3], :]
centers = (boxes[:, :2] + boxes[:, 2:4]) / 2.0
m1 = (rect[0] < centers[:, 0]) * (rect[1] < centers[:, 1])
m2 = (rect[2] > centers[:, 0]) * (rect[3] > centers[:, 1])
# mask in that both m1 and m2 are true
mask = m1 * m2 * drop_mask
# have any valid boxes? try again if not
if not mask.any():
continue
scale = min(float(input_w) / float(img_w), float(input_h) / float(img_h)) # take only matching gt boxes
nw = int(img_w * scale) boxes_t = boxes[mask, :].copy()
nh = int(img_h * scale)
image = cv2.resize(image, (nw, nh)) boxes_t[:, :2] = np.maximum(boxes_t[:, :2], rect[:2])
boxes_t[:, :2] -= rect[:2]
boxes_t[:, 2:4] = np.minimum(boxes_t[:, 2:4], rect[2:4])
boxes_t[:, 2:4] -= rect[:2]
new_image = np.zeros((input_h, input_w, 3), np.float32) return image_t, boxes_t
dh = (input_h - nh) // 2 return image, boxes
dw = (input_w - nw) // 2
new_image[dh: (nh + dh), dw: (nw + dw), :] = image
image = new_image def preprocess_fn(img_id, image, box, is_training):
"""Preprocess function for dataset."""
def _infer_data(image, input_shape):
img_h, img_w, _ = image.shape
input_h, input_w = input_shape
image = cv2.resize(image, (input_w, input_h))
#When the channels of image is 1 #When the channels of image is 1
if len(image.shape) == 2: if len(image.shape) == 2:
image = np.expand_dims(image, axis=-1) image = np.expand_dims(image, axis=-1)
image = np.concatenate([image, image, image], axis=-1) image = np.concatenate([image, image, image], axis=-1)
box = box.astype(np.float32) return img_id, image, np.array((img_h, img_w), np.float32)
box[:, [0, 2]] = (box[:, [0, 2]] * scale + dw) / input_w
box[:, [1, 3]] = (box[:, [1, 3]] * scale + dh) / input_h
return image, np.array((img_h, img_w), np.float32), box
def _data_aug(image, box, is_training, image_size=(300, 300)): def _data_aug(image, box, is_training, image_size=(300, 300)):
"""Data augmentation function.""" """Data augmentation function."""
...@@ -191,72 +113,53 @@ def preprocess_fn(image, box, is_training): ...@@ -191,72 +113,53 @@ def preprocess_fn(image, box, is_training):
w, h = image_size w, h = image_size
if not is_training: if not is_training:
return _infer_data(image, image_size, box) return _infer_data(image, image_size)
# Random settings
scale_w = _rand(0.75, 1.25)
scale_h = _rand(0.75, 1.25)
flip = _rand() < .5 # Random crop
nw = iw * scale_w box = box.astype(np.float32)
nh = ih * scale_h image, box = random_sample_crop(image, box)
scale = min(w / nw, h / nh) ih, iw, _ = image.shape
nw = int(scale * nw)
nh = int(scale * nh)
# Resize image # Resize image
image = cv2.resize(image, (nw, nh)) image = cv2.resize(image, (w, h))
# place image
new_image = np.zeros((h, w, 3), dtype=np.float32)
dw = (w - nw) // 2
dh = (h - nh) // 2
new_image[dh:dh + nh, dw:dw + nw, :] = image
image = new_image
# Flip image or not # Flip image or not
flip = _rand() < .5
if flip: if flip:
image = cv2.flip(image, 1, dst=None) image = cv2.flip(image, 1, dst=None)
# Convert image to gray or not
gray = _rand() < .25
if gray:
image = cv2.cvtColor(image, cv2.COLOR_BGR2GRAY)
# When the channels of image is 1 # When the channels of image is 1
if len(image.shape) == 2: if len(image.shape) == 2:
image = np.expand_dims(image, axis=-1) image = np.expand_dims(image, axis=-1)
image = np.concatenate([image, image, image], axis=-1) image = np.concatenate([image, image, image], axis=-1)
box = box.astype(np.float32) box[:, [0, 2]] = box[:, [0, 2]] / ih
box[:, [1, 3]] = box[:, [1, 3]] / iw
# Transform box with shape[x1, y1, x2, y2].
box[:, [0, 2]] = (box[:, [0, 2]] * scale * scale_w + dw) / w
box[:, [1, 3]] = (box[:, [1, 3]] * scale * scale_h + dh) / h
if flip: if flip:
box[:, [0, 2]] = 1 - box[:, [2, 0]] box[:, [1, 3]] = 1 - box[:, [3, 1]]
box, label, num_match_num = ssd_bboxes_encode(box) box, label, num_match = ssd_bboxes_encode(box)
return image, box, label, num_match_num return image, box, label, num_match
return _data_aug(image, box, is_training, image_size=config.IMG_SHAPE) return _data_aug(image, box, is_training, image_size=config.img_shape)
def create_coco_label(is_training): def create_coco_label(is_training):
"""Get image path and annotation from COCO.""" """Get image path and annotation from COCO."""
from pycocotools.coco import COCO from pycocotools.coco import COCO
coco_root = config.COCO_ROOT coco_root = config.coco_root
data_type = config.VAL_DATA_TYPE data_type = config.val_data_type
if is_training: if is_training:
data_type = config.TRAIN_DATA_TYPE data_type = config.train_data_type
#Classes need to train or test. #Classes need to train or test.
train_cls = config.COCO_CLASSES train_cls = config.coco_classes
train_cls_dict = {} train_cls_dict = {}
for i, cls in enumerate(train_cls): for i, cls in enumerate(train_cls):
train_cls_dict[cls] = i train_cls_dict[cls] = i
anno_json = os.path.join(coco_root, config.INSTANCES_SET.format(data_type)) anno_json = os.path.join(coco_root, config.instances_set.format(data_type))
coco = COCO(anno_json) coco = COCO(anno_json)
classs_dict = {} classs_dict = {}
...@@ -265,7 +168,8 @@ def create_coco_label(is_training): ...@@ -265,7 +168,8 @@ def create_coco_label(is_training):
classs_dict[cat["id"]] = cat["name"] classs_dict[cat["id"]] = cat["name"]
image_ids = coco.getImgIds() image_ids = coco.getImgIds()
image_files = [] images = []
image_path_dict = {}
image_anno_dict = {} image_anno_dict = {}
for img_id in image_ids: for img_id in image_ids:
...@@ -275,17 +179,24 @@ def create_coco_label(is_training): ...@@ -275,17 +179,24 @@ def create_coco_label(is_training):
anno = coco.loadAnns(anno_ids) anno = coco.loadAnns(anno_ids)
image_path = os.path.join(coco_root, data_type, file_name) image_path = os.path.join(coco_root, data_type, file_name)
annos = [] annos = []
iscrowd = False
for label in anno: for label in anno:
bbox = label["bbox"] bbox = label["bbox"]
class_name = classs_dict[label["category_id"]] class_name = classs_dict[label["category_id"]]
iscrowd = iscrowd or label["iscrowd"]
if class_name in train_cls: if class_name in train_cls:
x_min, x_max = bbox[0], bbox[0] + bbox[2] x_min, x_max = bbox[0], bbox[0] + bbox[2]
y_min, y_max = bbox[1], bbox[1] + bbox[3] y_min, y_max = bbox[1], bbox[1] + bbox[3]
annos.append(list(map(round, [x_min, y_min, x_max, y_max])) + [train_cls_dict[class_name]]) annos.append(list(map(round, [y_min, x_min, y_max, x_max])) + [train_cls_dict[class_name]])
if not is_training and iscrowd:
continue
if len(annos) >= 1: if len(annos) >= 1:
image_files.append(image_path) images.append(img_id)
image_anno_dict[image_path] = np.array(annos) image_path_dict[img_id] = image_path
return image_files, image_anno_dict image_anno_dict[img_id] = np.array(annos)
return images, image_path_dict, image_anno_dict
def anno_parser(annos_str): def anno_parser(annos_str):
...@@ -299,7 +210,8 @@ def anno_parser(annos_str): ...@@ -299,7 +210,8 @@ def anno_parser(annos_str):
def filter_valid_data(image_dir, anno_path): def filter_valid_data(image_dir, anno_path):
"""Filter valid image file, which both in image_dir and anno_path.""" """Filter valid image file, which both in image_dir and anno_path."""
image_files = [] images = []
image_path_dict = {}
image_anno_dict = {} image_anno_dict = {}
if not os.path.isdir(image_dir): if not os.path.isdir(image_dir):
raise RuntimeError("Path given is not valid.") raise RuntimeError("Path given is not valid.")
...@@ -308,38 +220,43 @@ def filter_valid_data(image_dir, anno_path): ...@@ -308,38 +220,43 @@ def filter_valid_data(image_dir, anno_path):
with open(anno_path, "rb") as f: with open(anno_path, "rb") as f:
lines = f.readlines() lines = f.readlines()
for line in lines: for img_id, line in enumerate(lines):
line_str = line.decode("utf-8").strip() line_str = line.decode("utf-8").strip()
line_split = str(line_str).split(' ') line_split = str(line_str).split(' ')
file_name = line_split[0] file_name = line_split[0]
image_path = os.path.join(image_dir, file_name) image_path = os.path.join(image_dir, file_name)
if os.path.isfile(image_path): if os.path.isfile(image_path):
image_anno_dict[image_path] = anno_parser(line_split[1:]) images.append(img_id)
image_files.append(image_path) image_path_dict[img_id] = image_path
return image_files, image_anno_dict image_anno_dict[img_id] = anno_parser(line_split[1:])
return images, image_path_dict, image_anno_dict
def data_to_mindrecord_byte_image(dataset="coco", is_training=True, prefix="ssd.mindrecord", file_num=8): def data_to_mindrecord_byte_image(dataset="coco", is_training=True, prefix="ssd.mindrecord", file_num=8):
"""Create MindRecord file.""" """Create MindRecord file."""
mindrecord_dir = config.MINDRECORD_DIR mindrecord_dir = config.mindrecord_dir
mindrecord_path = os.path.join(mindrecord_dir, prefix) mindrecord_path = os.path.join(mindrecord_dir, prefix)
writer = FileWriter(mindrecord_path, file_num) writer = FileWriter(mindrecord_path, file_num)
if dataset == "coco": if dataset == "coco":
image_files, image_anno_dict = create_coco_label(is_training) images, image_path_dict, image_anno_dict = create_coco_label(is_training)
else: else:
image_files, image_anno_dict = filter_valid_data(config.IMAGE_DIR, config.ANNO_PATH) images, image_path_dict, image_anno_dict = filter_valid_data(config.image_dir, config.anno_path)
ssd_json = { ssd_json = {
"img_id": {"type": "int32", "shape": [1]},
"image": {"type": "bytes"}, "image": {"type": "bytes"},
"annotation": {"type": "int32", "shape": [-1, 5]}, "annotation": {"type": "int32", "shape": [-1, 5]},
} }
writer.add_schema(ssd_json, "ssd_json") writer.add_schema(ssd_json, "ssd_json")
for image_name in image_files: for img_id in images:
with open(image_name, 'rb') as f: image_path = image_path_dict[img_id]
with open(image_path, 'rb') as f:
img = f.read() img = f.read()
annos = np.array(image_anno_dict[image_name], dtype=np.int32) annos = np.array(image_anno_dict[img_id], dtype=np.int32)
row = {"image": img, "annotation": annos} img_id = np.array([img_id], dtype=np.int32)
row = {"img_id": img_id, "image": img, "annotation": annos}
writer.write_raw_data([row]) writer.write_raw_data([row])
writer.commit() writer.commit()
...@@ -347,29 +264,26 @@ def data_to_mindrecord_byte_image(dataset="coco", is_training=True, prefix="ssd. ...@@ -347,29 +264,26 @@ def data_to_mindrecord_byte_image(dataset="coco", is_training=True, prefix="ssd.
def create_ssd_dataset(mindrecord_file, batch_size=32, repeat_num=10, device_num=1, rank=0, def create_ssd_dataset(mindrecord_file, batch_size=32, repeat_num=10, device_num=1, rank=0,
is_training=True, num_parallel_workers=4): is_training=True, num_parallel_workers=4):
"""Creatr SSD dataset with MindDataset.""" """Creatr SSD dataset with MindDataset."""
ds = de.MindDataset(mindrecord_file, columns_list=["image", "annotation"], num_shards=device_num, shard_id=rank, ds = de.MindDataset(mindrecord_file, columns_list=["img_id", "image", "annotation"], num_shards=device_num,
num_parallel_workers=num_parallel_workers, shuffle=is_training) shard_id=rank, num_parallel_workers=num_parallel_workers, shuffle=is_training)
decode = C.Decode() decode = C.Decode()
ds = ds.map(input_columns=["image"], operations=decode) ds = ds.map(input_columns=["image"], operations=decode)
compose_map_func = (lambda image, annotation: preprocess_fn(image, annotation, is_training)) change_swap_op = C.HWC2CHW()
normalize_op = C.Normalize(mean=[0.485*255, 0.456*255, 0.406*255], std=[0.229*255, 0.224*255, 0.225*255])
color_adjust_op = C.RandomColorAdjust(brightness=0.4, contrast=0.4, saturation=0.4)
compose_map_func = (lambda img_id, image, annotation: preprocess_fn(img_id, image, annotation, is_training))
if is_training: if is_training:
hwc_to_chw = C.HWC2CHW() output_columns = ["image", "box", "label", "num_match"]
ds = ds.map(input_columns=["image", "annotation"], trans = [color_adjust_op, normalize_op, change_swap_op]
output_columns=["image", "box", "label", "num_match_num"],
columns_order=["image", "box", "label", "num_match_num"],
operations=compose_map_func, python_multiprocessing=True, num_parallel_workers=num_parallel_workers)
ds = ds.map(input_columns=["image"], operations=hwc_to_chw, python_multiprocessing=True,
num_parallel_workers=num_parallel_workers)
ds = ds.batch(batch_size, drop_remainder=True)
ds = ds.repeat(repeat_num)
else: else:
hwc_to_chw = C.HWC2CHW() output_columns = ["img_id", "image", "image_shape"]
ds = ds.map(input_columns=["image", "annotation"], trans = [normalize_op, change_swap_op]
output_columns=["image", "image_shape", "annotation"], ds = ds.map(input_columns=["img_id", "image", "annotation"],
columns_order=["image", "image_shape", "annotation"], output_columns=output_columns, columns_order=output_columns,
operations=compose_map_func) operations=compose_map_func, python_multiprocessing=is_training,
ds = ds.map(input_columns=["image"], operations=hwc_to_chw, num_parallel_workers=num_parallel_workers) num_parallel_workers=num_parallel_workers)
ds = ds.batch(batch_size, drop_remainder=True) ds = ds.map(input_columns=["image"], operations=trans, python_multiprocessing=is_training,
ds = ds.repeat(repeat_num) num_parallel_workers=num_parallel_workers)
ds = ds.batch(batch_size, drop_remainder=True)
ds = ds.repeat(repeat_num)
return ds return ds
# Copyright 2020 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
"""Parameters utils"""
from mindspore import Tensor
from mindspore.common.initializer import initializer, TruncatedNormal
def init_net_param(network, initialize_mode='TruncatedNormal'):
"""Init the parameters in net."""
params = network.trainable_params()
for p in params:
if isinstance(p.data, Tensor) and 'beta' not in p.name and 'gamma' not in p.name and 'bias' not in p.name:
if initialize_mode == 'TruncatedNormal':
p.set_parameter_data(initializer(TruncatedNormal(0.03), p.data.shape(), p.data.dtype()))
else:
p.set_parameter_data(initialize_mode, p.data.shape(), p.data.dtype())
def load_backbone_params(network, param_dict):
"""Init the parameters from pre-train model, default is mobilenetv2."""
for _, param in net.parameters_and_names():
param_name = param.name.replace('network.backbone.', '')
name_split = param_name.split('.')
if 'features_1' in param_name:
param_name = param_name.replace('features_1', 'features')
if 'features_2' in param_name:
param_name = '.'.join(['features', str(int(name_split[1]) + 14)] + name_split[2:])
if param_name in param_dict:
param.set_parameter_data(param_dict[param_name].data)
# Copyright 2020 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
"""Learning rate schedule"""
import math
import numpy as np
def get_lr(global_step, lr_init, lr_end, lr_max, warmup_epochs, total_epochs, steps_per_epoch):
"""
generate learning rate array
Args:
global_step(int): total steps of the training
lr_init(float): init learning rate
lr_end(float): end learning rate
lr_max(float): max learning rate
warmup_epochs(float): number of warmup epochs
total_epochs(int): total epoch of training
steps_per_epoch(int): steps of one epoch
Returns:
np.array, learning rate array
"""
lr_each_step = []
total_steps = steps_per_epoch * total_epochs
warmup_steps = steps_per_epoch * warmup_epochs
for i in range(total_steps):
if i < warmup_steps:
lr = lr_init + (lr_max - lr_init) * i / warmup_steps
else:
lr = lr_end + \
(lr_max - lr_end) * \
(1. + math.cos(math.pi * (i - warmup_steps) / (total_steps - warmup_steps))) / 2.
if lr < 0.0:
lr = 0.0
lr_each_step.append(lr)
current_step = global_step
lr_each_step = np.array(lr_each_step).astype(np.float32)
learning_rate = lr_each_step[current_step:]
return learning_rate
...@@ -14,25 +14,17 @@ ...@@ -14,25 +14,17 @@
# ============================================================================ # ============================================================================
"""SSD net based MobilenetV2.""" """SSD net based MobilenetV2."""
import mindspore.common.dtype as mstype import mindspore.common.dtype as mstype
import mindspore as ms import mindspore as ms
import mindspore.nn as nn import mindspore.nn as nn
from mindspore import context from mindspore import Parameter, context, Tensor
from mindspore.parallel._auto_parallel_context import auto_parallel_context from mindspore.parallel._auto_parallel_context import auto_parallel_context
from mindspore.communication.management import get_group_size from mindspore.communication.management import get_group_size
from mindspore.ops import operations as P from mindspore.ops import operations as P
from mindspore.ops import functional as F from mindspore.ops import functional as F
from mindspore.ops import composite as C from mindspore.ops import composite as C
from mindspore.common.initializer import initializer from mindspore.common.initializer import initializer
from mindspore.ops.operations import TensorAdd
from mindspore import Parameter
def _conv2d(in_channel, out_channel, kernel_size=3, stride=1, pad_mod='same'):
weight_shape = (out_channel, in_channel, kernel_size, kernel_size)
weight = initializer('XavierUniform', shape=weight_shape, dtype=mstype.float32).to_tensor()
return nn.Conv2d(in_channel, out_channel, kernel_size=kernel_size, stride=stride,
padding=0, pad_mode=pad_mod, weight_init=weight)
def _make_divisible(v, divisor, min_value=None): def _make_divisible(v, divisor, min_value=None):
...@@ -46,6 +38,55 @@ def _make_divisible(v, divisor, min_value=None): ...@@ -46,6 +38,55 @@ def _make_divisible(v, divisor, min_value=None):
return new_v return new_v
def _conv2d(in_channel, out_channel, kernel_size=3, stride=1, pad_mod='same'):
return nn.Conv2d(in_channel, out_channel, kernel_size=kernel_size, stride=stride,
padding=0, pad_mode=pad_mod, has_bias=True)
def _bn(channel):
return nn.BatchNorm2d(channel, eps=1e-3, momentum=0.97,
gamma_init=1, beta_init=0, moving_mean_init=0, moving_var_init=1)
def _last_conv2d(in_channel, out_channel, kernel_size=3, stride=1, pad_mod='same', pad=0):
depthwise_conv = DepthwiseConv(in_channel, kernel_size, stride, pad_mode='same', pad=pad)
conv = _conv2d(in_channel, out_channel, kernel_size=1)
return nn.SequentialCell([depthwise_conv, _bn(in_channel), nn.ReLU6(), conv])
class ConvBNReLU(nn.Cell):
"""
Convolution/Depthwise fused with Batchnorm and ReLU block definition.
Args:
in_planes (int): Input channel.
out_planes (int): Output channel.
kernel_size (int): Input kernel size.
stride (int): Stride size for the first convolutional layer. Default: 1.
groups (int): channel group. Convolution is 1 while Depthiwse is input channel. Default: 1.
Returns:
Tensor, output tensor.
Examples:
>>> ConvBNReLU(16, 256, kernel_size=1, stride=1, groups=1)
"""
def __init__(self, in_planes, out_planes, kernel_size=3, stride=1, groups=1):
super(ConvBNReLU, self).__init__()
padding = 0
if groups == 1:
conv = nn.Conv2d(in_planes, out_planes, kernel_size, stride, pad_mode='same',
padding=padding)
else:
conv = DepthwiseConv(in_planes, kernel_size, stride, pad_mode='same', pad=padding)
layers = [conv, _bn(out_planes), nn.ReLU6()]
self.features = nn.SequentialCell(layers)
def construct(self, x):
output = self.features(x)
return output
class DepthwiseConv(nn.Cell): class DepthwiseConv(nn.Cell):
""" """
Depthwise Convolution warpper definition. Depthwise Convolution warpper definition.
...@@ -64,6 +105,7 @@ class DepthwiseConv(nn.Cell): ...@@ -64,6 +105,7 @@ class DepthwiseConv(nn.Cell):
Examples: Examples:
>>> DepthwiseConv(16, 3, 1, 'pad', 1, channel_multiplier=1) >>> DepthwiseConv(16, 3, 1, 'pad', 1, channel_multiplier=1)
""" """
def __init__(self, in_planes, kernel_size, stride, pad_mode, pad, channel_multiplier=1, has_bias=False): def __init__(self, in_planes, kernel_size, stride, pad_mode, pad, channel_multiplier=1, has_bias=False):
super(DepthwiseConv, self).__init__() super(DepthwiseConv, self).__init__()
self.has_bias = has_bias self.has_bias = has_bias
...@@ -91,42 +133,9 @@ class DepthwiseConv(nn.Cell): ...@@ -91,42 +133,9 @@ class DepthwiseConv(nn.Cell):
return output return output
class ConvBNReLU(nn.Cell):
"""
Convolution/Depthwise fused with Batchnorm and ReLU block definition.
Args:
in_planes (int): Input channel.
out_planes (int): Output channel.
kernel_size (int): Input kernel size.
stride (int): Stride size for the first convolutional layer. Default: 1.
groups (int): channel group. Convolution is 1 while Depthiwse is input channel. Default: 1.
Returns:
Tensor, output tensor.
Examples:
>>> ConvBNReLU(16, 256, kernel_size=1, stride=1, groups=1)
"""
def __init__(self, in_planes, out_planes, kernel_size=3, stride=1, groups=1):
super(ConvBNReLU, self).__init__()
padding = (kernel_size - 1) // 2
if groups == 1:
conv = nn.Conv2d(in_planes, out_planes, kernel_size, stride, pad_mode='pad',
padding=padding)
else:
conv = DepthwiseConv(in_planes, kernel_size, stride, pad_mode='pad', pad=padding)
layers = [conv, nn.BatchNorm2d(out_planes), nn.ReLU6()]
self.features = nn.SequentialCell(layers)
def construct(self, x):
output = self.features(x)
return output
class InvertedResidual(nn.Cell): class InvertedResidual(nn.Cell):
""" """
Mobilenetv2 residual block definition. Residual block definition.
Args: Args:
inp (int): Input channel. inp (int): Input channel.
...@@ -140,7 +149,7 @@ class InvertedResidual(nn.Cell): ...@@ -140,7 +149,7 @@ class InvertedResidual(nn.Cell):
Examples: Examples:
>>> ResidualBlock(3, 256, 1, 1) >>> ResidualBlock(3, 256, 1, 1)
""" """
def __init__(self, inp, oup, stride, expand_ratio): def __init__(self, inp, oup, stride, expand_ratio, last_relu=False):
super(InvertedResidual, self).__init__() super(InvertedResidual, self).__init__()
assert stride in [1, 2] assert stride in [1, 2]
...@@ -155,17 +164,21 @@ class InvertedResidual(nn.Cell): ...@@ -155,17 +164,21 @@ class InvertedResidual(nn.Cell):
ConvBNReLU(hidden_dim, hidden_dim, stride=stride, groups=hidden_dim), ConvBNReLU(hidden_dim, hidden_dim, stride=stride, groups=hidden_dim),
# pw-linear # pw-linear
nn.Conv2d(hidden_dim, oup, kernel_size=1, stride=1, has_bias=False), nn.Conv2d(hidden_dim, oup, kernel_size=1, stride=1, has_bias=False),
nn.BatchNorm2d(oup), _bn(oup),
]) ])
self.conv = nn.SequentialCell(layers) self.conv = nn.SequentialCell(layers)
self.add = TensorAdd() self.add = P.TensorAdd()
self.cast = P.Cast() self.cast = P.Cast()
self.last_relu = last_relu
self.relu = nn.ReLU6()
def construct(self, x): def construct(self, x):
identity = x identity = x
x = self.conv(x) x = self.conv(x)
if self.use_res_connect: if self.use_res_connect:
return self.add(identity, x) x = self.add(identity, x)
if self.last_relu:
x = self.relu(x)
return x return x
...@@ -174,14 +187,14 @@ class FlattenConcat(nn.Cell): ...@@ -174,14 +187,14 @@ class FlattenConcat(nn.Cell):
Concatenate predictions into a single tensor. Concatenate predictions into a single tensor.
Args: Args:
config (Class): The default config of SSD. config (dict): The default config of SSD.
Returns: Returns:
Tensor, flatten predictions. Tensor, flatten predictions.
""" """
def __init__(self, config): def __init__(self, config):
super(FlattenConcat, self).__init__() super(FlattenConcat, self).__init__()
self.num_ssd_boxes = config.NUM_SSD_BOXES self.num_ssd_boxes = config.num_ssd_boxes
self.concat = P.Concat(axis=1) self.concat = P.Concat(axis=1)
self.transpose = P.Transpose() self.transpose = P.Transpose()
def construct(self, inputs): def construct(self, inputs):
...@@ -199,7 +212,7 @@ class MultiBox(nn.Cell): ...@@ -199,7 +212,7 @@ class MultiBox(nn.Cell):
Multibox conv layers. Each multibox layer contains class conf scores and localization predictions. Multibox conv layers. Each multibox layer contains class conf scores and localization predictions.
Args: Args:
config (Class): The default config of SSD. config (dict): The default config of SSD.
Returns: Returns:
Tensor, localization predictions. Tensor, localization predictions.
...@@ -207,17 +220,17 @@ class MultiBox(nn.Cell): ...@@ -207,17 +220,17 @@ class MultiBox(nn.Cell):
""" """
def __init__(self, config): def __init__(self, config):
super(MultiBox, self).__init__() super(MultiBox, self).__init__()
num_classes = config.NUM_CLASSES num_classes = config.num_classes
out_channels = config.EXTRAS_OUT_CHANNELS out_channels = config.extras_out_channels
num_default = config.NUM_DEFAULT num_default = config.num_default
loc_layers = [] loc_layers = []
cls_layers = [] cls_layers = []
for k, out_channel in enumerate(out_channels): for k, out_channel in enumerate(out_channels):
loc_layers += [_conv2d(out_channel, 4 * num_default[k], loc_layers += [_last_conv2d(out_channel, 4 * num_default[k],
kernel_size=3, stride=1, pad_mod='same')] kernel_size=3, stride=1, pad_mod='same', pad=0)]
cls_layers += [_conv2d(out_channel, num_classes * num_default[k], cls_layers += [_last_conv2d(out_channel, num_classes * num_default[k],
kernel_size=3, stride=1, pad_mod='same')] kernel_size=3, stride=1, pad_mod='same', pad=0)]
self.multi_loc_layers = nn.layer.CellList(loc_layers) self.multi_loc_layers = nn.layer.CellList(loc_layers)
self.multi_cls_layers = nn.layer.CellList(cls_layers) self.multi_cls_layers = nn.layer.CellList(cls_layers)
...@@ -238,7 +251,7 @@ class SSD300(nn.Cell): ...@@ -238,7 +251,7 @@ class SSD300(nn.Cell):
Args: Args:
backbone (Cell): Backbone Network. backbone (Cell): Backbone Network.
config (Class): The default config of SSD. config (dict): The default config of SSD.
Returns: Returns:
Tensor, localization predictions. Tensor, localization predictions.
...@@ -246,25 +259,26 @@ class SSD300(nn.Cell): ...@@ -246,25 +259,26 @@ class SSD300(nn.Cell):
Examples:backbone Examples:backbone
SSD300(backbone=resnet34(num_classes=None), SSD300(backbone=resnet34(num_classes=None),
config=ConfigSSDResNet34()). config=config).
""" """
def __init__(self, backbone, config, is_training=True): def __init__(self, backbone, config, is_training=True):
super(SSD300, self).__init__() super(SSD300, self).__init__()
self.backbone = backbone self.backbone = backbone
in_channels = config.EXTRAS_IN_CHANNELS in_channels = config.extras_in_channels
out_channels = config.EXTRAS_OUT_CHANNELS out_channels = config.extras_out_channels
ratios = config.EXTRAS_RATIO ratios = config.extras_ratio
strides = config.EXTRAS_STRIDES strides = config.extras_srides
residual_list = [] residual_list = []
for i in range(2, len(in_channels)): for i in range(2, len(in_channels)):
residual = InvertedResidual(in_channels[i], out_channels[i], stride=strides[i], expand_ratio=ratios[i]) residual = InvertedResidual(in_channels[i], out_channels[i], stride=strides[i],
expand_ratio=ratios[i], last_relu=True)
residual_list.append(residual) residual_list.append(residual)
self.multi_residual = nn.layer.CellList(residual_list) self.multi_residual = nn.layer.CellList(residual_list)
self.multi_box = MultiBox(config) self.multi_box = MultiBox(config)
self.is_training = is_training self.is_training = is_training
if not is_training: if not is_training:
self.softmax = P.Softmax() self.activation = P.Sigmoid()
def construct(self, x): def construct(self, x):
layer_out_13, output = self.backbone(x) layer_out_13, output = self.backbone(x)
...@@ -275,77 +289,42 @@ class SSD300(nn.Cell): ...@@ -275,77 +289,42 @@ class SSD300(nn.Cell):
multi_feature += (feature,) multi_feature += (feature,)
pred_loc, pred_label = self.multi_box(multi_feature) pred_loc, pred_label = self.multi_box(multi_feature)
if not self.is_training: if not self.is_training:
pred_label = self.softmax(pred_label) pred_label = self.activation(pred_label)
return pred_loc, pred_label return pred_loc, pred_label
class LocalizationLoss(nn.Cell): class SigmoidFocalClassificationLoss(nn.Cell):
"""" """"
Computes the localization loss with SmoothL1Loss. Sigmoid focal-loss for classification.
Returns:
Tensor, box regression loss.
"""
def __init__(self):
super(LocalizationLoss, self).__init__()
self.reduce_sum = P.ReduceSum()
self.reduce_mean = P.ReduceMean()
self.loss = nn.SmoothL1Loss()
self.expand_dims = P.ExpandDims()
self.less = P.Less()
def construct(self, pred_loc, gt_loc, gt_label, num_matched_boxes):
mask = F.cast(self.less(0, gt_label), mstype.float32)
mask = self.expand_dims(mask, -1)
smooth_l1 = self.loss(gt_loc, pred_loc) * mask
box_loss = self.reduce_sum(smooth_l1, 1)
return self.reduce_mean(box_loss / F.cast(num_matched_boxes, mstype.float32), (0, 1))
class ClassificationLoss(nn.Cell):
""""
Computes the classification loss with hard example mining.
Args: Args:
config (Class): The default config of SSD. gamma (float): Hyper-parameter to balance the easy and hard examples. Default: 2.0
alpha (float): Hyper-parameter to balance the positive and negative example. Default: 0.25
Returns: Returns:
Tensor, classification loss. Tensor, the focal loss.
""" """
def __init__(self, config): def __init__(self, gamma=2.0, alpha=0.25):
super(ClassificationLoss, self).__init__() super(SigmoidFocalClassificationLoss, self).__init__()
self.num_classes = config.NUM_CLASSES self.sigmiod_cross_entropy = P.SigmoidCrossEntropyWithLogits()
self.num_boxes = config.NUM_SSD_BOXES self.sigmoid = P.Sigmoid()
self.neg_pre_positive = config.NEG_PRE_POSITIVE self.pow = P.Pow()
self.minimum = P.Minimum() self.onehot = P.OneHot()
self.less = P.Less() self.on_value = Tensor(1.0, mstype.float32)
self.sort = P.TopK() self.off_value = Tensor(0.0, mstype.float32)
self.tile = P.Tile() self.gamma = gamma
self.reduce_sum = P.ReduceSum() self.alpha = alpha
self.reduce_mean = P.ReduceMean()
self.expand_dims = P.ExpandDims() def construct(self, logits, label):
self.sort_descend = P.TopK(True) label = self.onehot(label, F.shape(logits)[-1], self.on_value, self.off_value)
self.cross_entropy = nn.SoftmaxCrossEntropyWithLogits(sparse=True) sigmiod_cross_entropy = self.sigmiod_cross_entropy(logits, label)
sigmoid = self.sigmoid(logits)
def construct(self, pred_label, gt_label, num_matched_boxes): label = F.cast(label, mstype.float32)
gt_label = F.cast(gt_label, mstype.int32) p_t = label * sigmoid + (1 - label) * (1 - sigmoid)
mask = F.cast(self.less(0, gt_label), mstype.float32) modulating_factor = self.pow(1 - p_t, self.gamma)
gt_label_shape = F.shape(gt_label) alpha_weight_factor = label * self.alpha + (1 - label) * (1 - self.alpha)
pred_label = F.reshape(pred_label, (-1, self.num_classes)) focal_loss = modulating_factor * alpha_weight_factor * sigmiod_cross_entropy
gt_label = F.reshape(gt_label, (-1,)) return focal_loss
cross_entropy = self.cross_entropy(pred_label, gt_label)
cross_entropy = F.reshape(cross_entropy, gt_label_shape)
# Hard example mining
num_matched_boxes = F.reshape(num_matched_boxes, (-1,))
neg_masked_cross_entropy = F.cast(cross_entropy * (1- mask), mstype.float16)
_, loss_idx = self.sort_descend(neg_masked_cross_entropy, self.num_boxes)
_, relative_position = self.sort(F.cast(loss_idx, mstype.float16), self.num_boxes)
num_neg_boxes = self.minimum(num_matched_boxes * self.neg_pre_positive, self.num_boxes)
tile_num_neg_boxes = self.tile(self.expand_dims(num_neg_boxes, -1), (1, self.num_boxes))
top_k_neg_mask = F.cast(self.less(relative_position, tile_num_neg_boxes), mstype.float32)
class_loss = self.reduce_sum(cross_entropy * (mask + top_k_neg_mask), 1)
return self.reduce_mean(class_loss / F.cast(num_matched_boxes, mstype.float32), 0)
class SSDWithLossCell(nn.Cell): class SSDWithLossCell(nn.Cell):
...@@ -354,7 +333,7 @@ class SSDWithLossCell(nn.Cell): ...@@ -354,7 +333,7 @@ class SSDWithLossCell(nn.Cell):
Args: Args:
network (Cell): The training network. network (Cell): The training network.
config (Class): SSD config. config (dict): SSD config.
Returns: Returns:
Tensor, the loss of the network. Tensor, the loss of the network.
...@@ -362,14 +341,29 @@ class SSDWithLossCell(nn.Cell): ...@@ -362,14 +341,29 @@ class SSDWithLossCell(nn.Cell):
def __init__(self, network, config): def __init__(self, network, config):
super(SSDWithLossCell, self).__init__() super(SSDWithLossCell, self).__init__()
self.network = network self.network = network
self.class_loss = ClassificationLoss(config) self.less = P.Less()
self.box_loss = LocalizationLoss() self.tile = P.Tile()
self.reduce_sum = P.ReduceSum()
self.reduce_mean = P.ReduceMean()
self.expand_dims = P.ExpandDims()
self.class_loss = SigmoidFocalClassificationLoss(config.gamma, config.alpha)
self.loc_loss = nn.SmoothL1Loss()
def construct(self, x, gt_loc, gt_label, num_matched_boxes): def construct(self, x, gt_loc, gt_label, num_matched_boxes):
pred_loc, pred_label = self.network(x) pred_loc, pred_label = self.network(x)
loss_cls = self.class_loss(pred_label, gt_label, num_matched_boxes) mask = F.cast(self.less(0, gt_label), mstype.float32)
loss_loc = self.box_loss(pred_loc, gt_loc, gt_label, num_matched_boxes) num_matched_boxes = self.reduce_sum(F.cast(num_matched_boxes, mstype.float32))
return loss_cls + loss_loc
# Localization Loss
mask_loc = self.tile(self.expand_dims(mask, -1), (1, 1, 4))
smooth_l1 = self.loc_loss(pred_loc, gt_loc) * mask_loc
loss_loc = self.reduce_sum(self.reduce_mean(smooth_l1, -1), -1)
# Classification Loss
loss_cls = self.class_loss(pred_label, gt_label)
loss_cls = self.reduce_sum(loss_cls, (1, 2))
return self.reduce_sum((loss_cls + loss_loc) / num_matched_boxes)
class TrainingWrapper(nn.Cell): class TrainingWrapper(nn.Cell):
...@@ -415,7 +409,6 @@ class TrainingWrapper(nn.Cell): ...@@ -415,7 +409,6 @@ class TrainingWrapper(nn.Cell):
return F.depend(loss, self.optimizer(grads)) return F.depend(loss, self.optimizer(grads))
class SSDWithMobileNetV2(nn.Cell): class SSDWithMobileNetV2(nn.Cell):
""" """
MobileNetV2 architecture for SSD backbone. MobileNetV2 architecture for SSD backbone.
......
...@@ -13,83 +13,38 @@ ...@@ -13,83 +13,38 @@
# limitations under the License. # limitations under the License.
# ============================================================================ # ============================================================================
"""train SSD and get checkpoint files.""" """Train SSD and get checkpoint files."""
import os import os
import math
import argparse import argparse
import numpy as np
import mindspore.nn as nn import mindspore.nn as nn
from mindspore import context, Tensor from mindspore import context, Tensor
from mindspore.communication.management import init from mindspore.communication.management import init
from mindspore.train.callback import CheckpointConfig, ModelCheckpoint, LossMonitor, TimeMonitor from mindspore.train.callback import CheckpointConfig, ModelCheckpoint, LossMonitor, TimeMonitor
from mindspore.train import Model, ParallelMode from mindspore.train import Model, ParallelMode
from mindspore.train.serialization import load_checkpoint, load_param_into_net from mindspore.train.serialization import load_checkpoint, load_param_into_net
from mindspore.common.initializer import initializer from src.ssd import SSD300, SSDWithLossCell, TrainingWrapper, ssd_mobilenet_v2
from src.config import config
from mindspore.model_zoo.ssd import SSD300, SSDWithLossCell, TrainingWrapper, ssd_mobilenet_v2 from src.dataset import create_ssd_dataset, data_to_mindrecord_byte_image
from config import ConfigSSD from src.lr_schedule import get_lr
from dataset import create_ssd_dataset, data_to_mindrecord_byte_image from src.init_params import init_net_param
def get_lr(global_step, lr_init, lr_end, lr_max, warmup_epochs, total_epochs, steps_per_epoch):
"""
generate learning rate array
Args:
global_step(int): total steps of the training
lr_init(float): init learning rate
lr_end(float): end learning rate
lr_max(float): max learning rate
warmup_epochs(int): number of warmup epochs
total_epochs(int): total epoch of training
steps_per_epoch(int): steps of one epoch
Returns:
np.array, learning rate array
"""
lr_each_step = []
total_steps = steps_per_epoch * total_epochs
warmup_steps = steps_per_epoch * warmup_epochs
for i in range(total_steps):
if i < warmup_steps:
lr = lr_init + (lr_max - lr_init) * i / warmup_steps
else:
lr = lr_end + (lr_max - lr_end) * \
(1. + math.cos(math.pi * (i - warmup_steps) / (total_steps - warmup_steps))) / 2.
if lr < 0.0:
lr = 0.0
lr_each_step.append(lr)
current_step = global_step
lr_each_step = np.array(lr_each_step).astype(np.float32)
learning_rate = lr_each_step[current_step:]
return learning_rate
def init_net_param(network, initialize_mode='XavierUniform'):
"""Init the parameters in net."""
params = network.trainable_params()
for p in params:
if isinstance(p.data, Tensor) and 'beta' not in p.name and 'gamma' not in p.name and 'bias' not in p.name:
p.set_parameter_data(initializer(initialize_mode, p.data.shape(), p.data.dtype()))
def main(): def main():
parser = argparse.ArgumentParser(description="SSD training") parser = argparse.ArgumentParser(description="SSD training")
parser.add_argument("--only_create_dataset", type=bool, default=False, help="If set it true, only create " parser.add_argument("--only_create_dataset", type=bool, default=False, help="If set it true, only create "
"Mindrecord, default is false.") "Mindrecord, default is False.")
parser.add_argument("--distribute", type=bool, default=False, help="Run distribute, default is false.") parser.add_argument("--distribute", type=bool, default=False, help="Run distribute, default is False.")
parser.add_argument("--device_id", type=int, default=0, help="Device id, default is 0.") parser.add_argument("--device_id", type=int, default=0, help="Device id, default is 0.")
parser.add_argument("--device_num", type=int, default=1, help="Use device nums, default is 1.") parser.add_argument("--device_num", type=int, default=1, help="Use device nums, default is 1.")
parser.add_argument("--lr", type=float, default=0.25, help="Learning rate, default is 0.25.") parser.add_argument("--lr", type=float, default=0.05, help="Learning rate, default is 0.05.")
parser.add_argument("--mode", type=str, default="sink", help="Run sink mode or not, default is sink.") parser.add_argument("--mode", type=str, default="sink", help="Run sink mode or not, default is sink.")
parser.add_argument("--dataset", type=str, default="coco", help="Dataset, defalut is coco.") parser.add_argument("--dataset", type=str, default="coco", help="Dataset, defalut is coco.")
parser.add_argument("--epoch_size", type=int, default=70, help="Epoch size, default is 70.") parser.add_argument("--epoch_size", type=int, default=250, help="Epoch size, default is 250.")
parser.add_argument("--batch_size", type=int, default=32, help="Batch size, default is 32.") parser.add_argument("--batch_size", type=int, default=32, help="Batch size, default is 32.")
parser.add_argument("--pre_trained", type=str, default=None, help="Pretrained Checkpoint file path.") parser.add_argument("--pre_trained", type=str, default=None, help="Pretrained Checkpoint file path.")
parser.add_argument("--pre_trained_epoch_size", type=int, default=0, help="Pretrained epoch size.") parser.add_argument("--pre_trained_epoch_size", type=int, default=0, help="Pretrained epoch size.")
parser.add_argument("--save_checkpoint_epochs", type=int, default=5, help="Save checkpoint epochs, default is 5.") parser.add_argument("--save_checkpoint_epochs", type=int, default=10, help="Save checkpoint epochs, default is 5.")
parser.add_argument("--loss_scale", type=int, default=1024, help="Loss scale, default is 1024.") parser.add_argument("--loss_scale", type=int, default=1024, help="Loss scale, default is 1024.")
args_opt = parser.parse_args() args_opt = parser.parse_args()
...@@ -111,27 +66,26 @@ def main(): ...@@ -111,27 +66,26 @@ def main():
# It will generate mindrecord file in args_opt.mindrecord_dir, # It will generate mindrecord file in args_opt.mindrecord_dir,
# and the file name is ssd.mindrecord0, 1, ... file_num. # and the file name is ssd.mindrecord0, 1, ... file_num.
config = ConfigSSD()
prefix = "ssd.mindrecord" prefix = "ssd.mindrecord"
mindrecord_dir = config.MINDRECORD_DIR mindrecord_dir = config.mindrecord_dir
mindrecord_file = os.path.join(mindrecord_dir, prefix + "0") mindrecord_file = os.path.join(mindrecord_dir, prefix + "0")
if not os.path.exists(mindrecord_file): if not os.path.exists(mindrecord_file):
if not os.path.isdir(mindrecord_dir): if not os.path.isdir(mindrecord_dir):
os.makedirs(mindrecord_dir) os.makedirs(mindrecord_dir)
if args_opt.dataset == "coco": if args_opt.dataset == "coco":
if os.path.isdir(config.COCO_ROOT): if os.path.isdir(config.coco_root):
print("Create Mindrecord.") print("Create Mindrecord.")
data_to_mindrecord_byte_image("coco", True, prefix) data_to_mindrecord_byte_image("coco", True, prefix)
print("Create Mindrecord Done, at {}".format(mindrecord_dir)) print("Create Mindrecord Done, at {}".format(mindrecord_dir))
else: else:
print("COCO_ROOT not exits.") print("coco_root not exits.")
else: else:
if os.path.isdir(config.IMAGE_DIR) and os.path.exists(config.ANNO_PATH): if os.path.isdir(config.image_dir) and os.path.exists(config.anno_path):
print("Create Mindrecord.") print("Create Mindrecord.")
data_to_mindrecord_byte_image("other", True, prefix) data_to_mindrecord_byte_image("other", True, prefix)
print("Create Mindrecord Done, at {}".format(mindrecord_dir)) print("Create Mindrecord Done, at {}".format(mindrecord_dir))
else: else:
print("IMAGE_DIR or ANNO_PATH not exits.") print("image_dir or anno_path not exits.")
if not args_opt.only_create_dataset: if not args_opt.only_create_dataset:
loss_scale = float(args_opt.loss_scale) loss_scale = float(args_opt.loss_scale)
...@@ -143,7 +97,8 @@ def main(): ...@@ -143,7 +97,8 @@ def main():
dataset_size = dataset.get_dataset_size() dataset_size = dataset.get_dataset_size()
print("Create dataset done!") print("Create dataset done!")
ssd = SSD300(backbone=ssd_mobilenet_v2(), config=config) backbone = ssd_mobilenet_v2()
ssd = SSD300(backbone=backbone, config=config)
net = SSDWithLossCell(ssd, config) net = SSDWithLossCell(ssd, config)
init_net_param(net) init_net_param(net)
...@@ -157,12 +112,13 @@ def main(): ...@@ -157,12 +112,13 @@ def main():
param_dict = load_checkpoint(args_opt.pre_trained) param_dict = load_checkpoint(args_opt.pre_trained)
load_param_into_net(net, param_dict) load_param_into_net(net, param_dict)
lr = Tensor(get_lr(global_step=args_opt.pre_trained_epoch_size * dataset_size, lr = Tensor(get_lr(global_step=config.global_step,
lr_init=0, lr_end=0, lr_max=args_opt.lr, lr_init=config.lr_init, lr_end=config.lr_end_rate * args_opt.lr, lr_max=args_opt.lr,
warmup_epochs=max(350 // 20, 1), warmup_epochs=config.warmup_epochs,
total_epochs=350, total_epochs=args_opt.epoch_size,
steps_per_epoch=dataset_size)) steps_per_epoch=dataset_size))
opt = nn.Momentum(filter(lambda x: x.requires_grad, net.get_parameters()), lr, 0.9, 0.0001, loss_scale) opt = nn.Momentum(filter(lambda x: x.requires_grad, net.get_parameters()), lr,
config.momentum, config.weight_decay, loss_scale)
net = TrainingWrapper(net, opt, loss_scale) net = TrainingWrapper(net, opt, loss_scale)
callback = [TimeMonitor(data_size=dataset_size), LossMonitor(), ckpoint_cb] callback = [TimeMonitor(data_size=dataset_size), LossMonitor(), ckpoint_cb]
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册