diff --git a/README.md b/README.md index 96537cbb918a611f11416c9a2d92a5726c351a3f..0cd889e85e9816f1d698bf3fb3622c7f11cc0da6 100644 --- a/README.md +++ b/README.md @@ -70,16 +70,18 @@ export PYTHONPATH=/path/to/models:$PYTHONPATH | ShuffleNetV2 x1.5 | 72.806 | 90.792 | | ShuffleNetV2 x2.0 | 75.074 | 92.278 | - ### 目标检测 目标检测同样是计算机视觉中的常见任务,我们提供了两个经典的目标检测模型[Retinanet](./official/vision/detection/model/retinanet)和[Faster R-CNN](./official/vision/detection/model/faster_rcnn),这两个模型在**COCO验证集**上的测试结果如下: -| 模型 | mAP
@5-95 | -| :---: | :---: | -| retinanet-res50-1x-800size | 36.4 | -| faster-rcnn-res50-1x-800size | 38.8 | - +| 模型 | mAP
@5-95 | +| :---: | :---: | +| retinanet-res50-coco-1x-800size | 36.4 | +| retinanet-res50-coco-1x-800size-syncbn | 37.1 | +| retinanet-res101-coco-2x-800size | 40.8 | +| faster-rcnn-res50-coco-1x-800size | 38.8 | +| faster-rcnn-res50-coco-1x-800size-syncbn | 39.3 | +| faster-rcnn-res101-coco-2x-800size | 43.0 | ### 图像分割 @@ -117,7 +119,6 @@ export PYTHONPATH=/path/to/models:$PYTHONPATH | chinese_L-12_H-768_A-12| [link](https://data.megengine.org.cn/models/weights/bert/chinese_L-12_H-768_A-12/vocab.txt) | [link](https://data.megengine.org.cn/models/weights/bert/chinese_L-12_H-768_A-12/bert_config.json) | multi_cased_L-12_H-768_A-12| [link](https://data.megengine.org.cn/models/weights/bert/multi_cased_L-12_H-768_A-12/vocab.txt) | [link](https://data.megengine.org.cn/models/weights/bert/multi_cased_L-12_H-768_A-12/bert_config.json) - 在glue_data/MRPC数据集中使用默认的超参数进行微调和评估,评估结果介于84%和88%之间。 | Dataset | pretrained_bert | acc | diff --git a/hubconf.py b/hubconf.py index 04cc0cd10abca172e6ae1085496e8a2d13ba9f16..588552d87753b4db1225892eeeb9106e5a38651a 100644 --- a/hubconf.py +++ b/hubconf.py @@ -30,8 +30,10 @@ from official.vision.classification.shufflenet.model import ( from official.vision.detection.configs import ( faster_rcnn_res50_coco_1x_800size, faster_rcnn_res50_coco_1x_800size_syncbn, + faster_rcnn_res101_coco_2x_800size, retinanet_res50_coco_1x_800size, retinanet_res50_coco_1x_800size_syncbn, + retinanet_res101_coco_2x_800size, ) from official.vision.detection.models import FasterRCNN, RetinaNet from official.vision.detection.tools.utils import DetEvaluator diff --git a/official/vision/detection/README.md b/official/vision/detection/README.md index f704897f35622baff231c4a21273778a6d7fe6eb..c872657a5c0231e72abe1c052775d37aa1021e11 100644 --- a/official/vision/detection/README.md +++ b/official/vision/detection/README.md @@ -10,10 +10,12 @@ | --- | :---: | :---: | :---: | :---: | | retinanet-res50-coco-1x-800size | 36.4 | 2 | 2080Ti | 3.1(it/s) | | retinanet-res50-coco-1x-800size-syncbn | 37.1 | 2 | 2080Ti | 1.7(it/s) | +| retinanet-res101-coco-2x-800size | 40.8 | 2 | 2080Ti | 2.1(it/s) | | faster-rcnn-res50-coco-1x-800size | 38.8 | 2 | 2080Ti | 3.3(it/s) | | faster-rcnn-res50-coco-1x-800size-syncbn | 39.3 | 2 | 2080Ti | 1.8(it/s) | +| faster-rcnn-res101-coco-2x-800size | 43.0 | 2 | 2080Ti | 2.3(it/s) | -* MegEngine v0.4.0 +* MegEngine v0.5.1 ## 如何使用 diff --git a/official/vision/detection/configs/__init__.py b/official/vision/detection/configs/__init__.py index f9a258c1f603cbd564fa6f76f9e4f73e2e192fda..9eadf38e8cbbde333866aa153d69e8495598526b 100644 --- a/official/vision/detection/configs/__init__.py +++ b/official/vision/detection/configs/__init__.py @@ -1,7 +1,9 @@ from .faster_rcnn_res50_coco_1x_800size import faster_rcnn_res50_coco_1x_800size from .faster_rcnn_res50_coco_1x_800size_syncbn import faster_rcnn_res50_coco_1x_800size_syncbn +from .faster_rcnn_res101_coco_2x_800size import faster_rcnn_res101_coco_2x_800size from .retinanet_res50_coco_1x_800size import retinanet_res50_coco_1x_800size from .retinanet_res50_coco_1x_800size_syncbn import retinanet_res50_coco_1x_800size_syncbn +from .retinanet_res101_coco_2x_800size import retinanet_res101_coco_2x_800size _EXCLUDE = {} __all__ = [k for k in globals().keys() if k not in _EXCLUDE and not k.startswith("_")] diff --git a/official/vision/detection/configs/faster_rcnn_res101_coco_2x_800size.py b/official/vision/detection/configs/faster_rcnn_res101_coco_2x_800size.py new file mode 100644 index 0000000000000000000000000000000000000000..da6c3fd07fab58d51c6923b24807dc26ce1b3a17 --- /dev/null +++ b/official/vision/detection/configs/faster_rcnn_res101_coco_2x_800size.py @@ -0,0 +1,40 @@ +# -*- coding: utf-8 -*- +# MegEngine is Licensed under the Apache License, Version 2.0 (the "License") +# +# Copyright (c) 2014-2020 Megvii Inc. All rights reserved. +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +from megengine import hub + +from official.vision.detection import models + + +class CustomFasterRCNNConfig(models.FasterRCNNConfig): + def __init__(self): + super().__init__() + + self.backbone = "resnet101" + + # ------------------------ training cfg ---------------------- # + self.max_epoch = 36 + self.lr_decay_stages = [24, 32, 34] + + +@hub.pretrained( + "https://data.megengine.org.cn/models/weights/" + "faster_rcnn_res101_coco_2x_800size_43dot0_ee249359.pkl" +) +def faster_rcnn_res101_coco_2x_800size(batch_size=1, **kwargs): + r""" + Faster-RCNN FPN trained from COCO dataset. + `"Faster-RCNN" `_ + `"FPN" `_ + `"COCO" `_ + """ + return models.FasterRCNN(CustomFasterRCNNConfig(), batch_size=batch_size, **kwargs) + + +Net = models.FasterRCNN +Cfg = CustomFasterRCNNConfig diff --git a/official/vision/detection/configs/retinanet_res101_coco_2x_800size.py b/official/vision/detection/configs/retinanet_res101_coco_2x_800size.py new file mode 100644 index 0000000000000000000000000000000000000000..456f7059383e7e8959c14d49efdc4177d5d1250d --- /dev/null +++ b/official/vision/detection/configs/retinanet_res101_coco_2x_800size.py @@ -0,0 +1,40 @@ +# -*- coding: utf-8 -*- +# MegEngine is Licensed under the Apache License, Version 2.0 (the "License") +# +# Copyright (c) 2014-2020 Megvii Inc. All rights reserved. +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +from megengine import hub + +from official.vision.detection import models + + +class CustomRetinaNetConfig(models.RetinaNetConfig): + def __init__(self): + super().__init__() + + self.backbone = "resnet101" + + # ------------------------ training cfg ---------------------- # + self.max_epoch = 36 + self.lr_decay_stages = [24, 32, 34] + + +@hub.pretrained( + "https://data.megengine.org.cn/models/weights/" + "retinanet_res101_coco_2x_800size_40dot8_661c3608.pkl" +) +def retinanet_res101_coco_2x_800size(batch_size=1, **kwargs): + r""" + RetinaNet trained from COCO dataset. + `"RetinaNet" `_ + `"FPN" `_ + `"COCO" `_ + """ + return models.RetinaNet(CustomRetinaNetConfig(), batch_size=batch_size, **kwargs) + + +Net = models.RetinaNet +Cfg = CustomRetinaNetConfig diff --git a/official/vision/detection/tools/inference.py b/official/vision/detection/tools/inference.py index 04ca91f4d73761515b423f9bbec99738a0b68f01..d91cbd55ddde56141f4e726a6b890575321d4e54 100644 --- a/official/vision/detection/tools/inference.py +++ b/official/vision/detection/tools/inference.py @@ -48,7 +48,9 @@ def main(): sys.path.insert(0, os.path.dirname(args.file)) current_network = importlib.import_module(os.path.basename(args.file).split(".")[0]) - model = current_network.Net(current_network.Cfg(), batch_size=1) + cfg = current_network.Cfg() + cfg.backbone_pretrained = False + model = current_network.Net(cfg, batch_size=1) model.eval() state_dict = mge.load(args.weight_file) if "state_dict" in state_dict: diff --git a/official/vision/detection/tools/test.py b/official/vision/detection/tools/test.py index 20dc2cd8a7f32726d2009f9b6e58cfcab8fcb9ab..90710fb87670da65c99b580c3e9a1baea036b720 100644 --- a/official/vision/detection/tools/test.py +++ b/official/vision/detection/tools/test.py @@ -37,9 +37,6 @@ def make_parser(): parser.add_argument( "-n", "--ngpus", default=1, type=int, help="total number of gpus for testing", ) - parser.add_argument( - "-b", "--batch_size", default=1, type=int, help="batchsize for testing", - ) parser.add_argument( "-d", "--dataset_dir", default="/data/datasets", type=str, ) @@ -56,6 +53,9 @@ def main(): parser = make_parser() args = parser.parse_args() + sys.path.insert(0, os.path.dirname(args.file)) + current_network = importlib.import_module(os.path.basename(args.file).split(".")[0]) + if args.end_epoch == -1: args.end_epoch = args.start_epoch @@ -75,7 +75,7 @@ def main(): proc = Process( target=worker, args=( - args.file, + current_network, model_file, args.dataset_dir, i, @@ -86,10 +86,6 @@ def main(): proc.start() procs.append(proc) - sys.path.insert(0, os.path.dirname(args.file)) - current_network = importlib.import_module( - os.path.basename(args.file).split(".")[0] - ) cfg = current_network.Cfg() num_imgs = dict(coco=5000, objects365=30000) @@ -139,7 +135,7 @@ def main(): def worker( - net_file, model_file, data_dir, worker_id, total_worker, result_queue, + current_network, model_file, data_dir, worker_id, total_worker, result_queue, ): """ :param net_file: network description file @@ -156,9 +152,9 @@ def worker( pred = model(model.inputs) return pred - sys.path.insert(0, os.path.dirname(net_file)) - current_network = importlib.import_module(os.path.basename(net_file).split(".")[0]) - model = current_network.Net(current_network.Cfg(), batch_size=1) + cfg = current_network.Cfg() + cfg.backbone_pretrained = False + model = current_network.Net(cfg, batch_size=1) model.eval() evaluator = DetEvaluator(model) state_dict = mge.load(model_file)