diff --git a/README.md b/README.md
index 96537cbb918a611f11416c9a2d92a5726c351a3f..0cd889e85e9816f1d698bf3fb3622c7f11cc0da6 100644
--- a/README.md
+++ b/README.md
@@ -70,16 +70,18 @@ export PYTHONPATH=/path/to/models:$PYTHONPATH
| ShuffleNetV2 x1.5 | 72.806 | 90.792 |
| ShuffleNetV2 x2.0 | 75.074 | 92.278 |
-
### 目标检测
目标检测同样是计算机视觉中的常见任务,我们提供了两个经典的目标检测模型[Retinanet](./official/vision/detection/model/retinanet)和[Faster R-CNN](./official/vision/detection/model/faster_rcnn),这两个模型在**COCO验证集**上的测试结果如下:
-| 模型 | mAP
@5-95 |
-| :---: | :---: |
-| retinanet-res50-1x-800size | 36.4 |
-| faster-rcnn-res50-1x-800size | 38.8 |
-
+| 模型 | mAP
@5-95 |
+| :---: | :---: |
+| retinanet-res50-coco-1x-800size | 36.4 |
+| retinanet-res50-coco-1x-800size-syncbn | 37.1 |
+| retinanet-res101-coco-2x-800size | 40.8 |
+| faster-rcnn-res50-coco-1x-800size | 38.8 |
+| faster-rcnn-res50-coco-1x-800size-syncbn | 39.3 |
+| faster-rcnn-res101-coco-2x-800size | 43.0 |
### 图像分割
@@ -117,7 +119,6 @@ export PYTHONPATH=/path/to/models:$PYTHONPATH
| chinese_L-12_H-768_A-12| [link](https://data.megengine.org.cn/models/weights/bert/chinese_L-12_H-768_A-12/vocab.txt) | [link](https://data.megengine.org.cn/models/weights/bert/chinese_L-12_H-768_A-12/bert_config.json)
| multi_cased_L-12_H-768_A-12| [link](https://data.megengine.org.cn/models/weights/bert/multi_cased_L-12_H-768_A-12/vocab.txt) | [link](https://data.megengine.org.cn/models/weights/bert/multi_cased_L-12_H-768_A-12/bert_config.json)
-
在glue_data/MRPC数据集中使用默认的超参数进行微调和评估,评估结果介于84%和88%之间。
| Dataset | pretrained_bert | acc |
diff --git a/hubconf.py b/hubconf.py
index 04cc0cd10abca172e6ae1085496e8a2d13ba9f16..588552d87753b4db1225892eeeb9106e5a38651a 100644
--- a/hubconf.py
+++ b/hubconf.py
@@ -30,8 +30,10 @@ from official.vision.classification.shufflenet.model import (
from official.vision.detection.configs import (
faster_rcnn_res50_coco_1x_800size,
faster_rcnn_res50_coco_1x_800size_syncbn,
+ faster_rcnn_res101_coco_2x_800size,
retinanet_res50_coco_1x_800size,
retinanet_res50_coco_1x_800size_syncbn,
+ retinanet_res101_coco_2x_800size,
)
from official.vision.detection.models import FasterRCNN, RetinaNet
from official.vision.detection.tools.utils import DetEvaluator
diff --git a/official/vision/detection/README.md b/official/vision/detection/README.md
index f704897f35622baff231c4a21273778a6d7fe6eb..c872657a5c0231e72abe1c052775d37aa1021e11 100644
--- a/official/vision/detection/README.md
+++ b/official/vision/detection/README.md
@@ -10,10 +10,12 @@
| --- | :---: | :---: | :---: | :---: |
| retinanet-res50-coco-1x-800size | 36.4 | 2 | 2080Ti | 3.1(it/s) |
| retinanet-res50-coco-1x-800size-syncbn | 37.1 | 2 | 2080Ti | 1.7(it/s) |
+| retinanet-res101-coco-2x-800size | 40.8 | 2 | 2080Ti | 2.1(it/s) |
| faster-rcnn-res50-coco-1x-800size | 38.8 | 2 | 2080Ti | 3.3(it/s) |
| faster-rcnn-res50-coco-1x-800size-syncbn | 39.3 | 2 | 2080Ti | 1.8(it/s) |
+| faster-rcnn-res101-coco-2x-800size | 43.0 | 2 | 2080Ti | 2.3(it/s) |
-* MegEngine v0.4.0
+* MegEngine v0.5.1
## 如何使用
diff --git a/official/vision/detection/configs/__init__.py b/official/vision/detection/configs/__init__.py
index f9a258c1f603cbd564fa6f76f9e4f73e2e192fda..9eadf38e8cbbde333866aa153d69e8495598526b 100644
--- a/official/vision/detection/configs/__init__.py
+++ b/official/vision/detection/configs/__init__.py
@@ -1,7 +1,9 @@
from .faster_rcnn_res50_coco_1x_800size import faster_rcnn_res50_coco_1x_800size
from .faster_rcnn_res50_coco_1x_800size_syncbn import faster_rcnn_res50_coco_1x_800size_syncbn
+from .faster_rcnn_res101_coco_2x_800size import faster_rcnn_res101_coco_2x_800size
from .retinanet_res50_coco_1x_800size import retinanet_res50_coco_1x_800size
from .retinanet_res50_coco_1x_800size_syncbn import retinanet_res50_coco_1x_800size_syncbn
+from .retinanet_res101_coco_2x_800size import retinanet_res101_coco_2x_800size
_EXCLUDE = {}
__all__ = [k for k in globals().keys() if k not in _EXCLUDE and not k.startswith("_")]
diff --git a/official/vision/detection/configs/faster_rcnn_res101_coco_2x_800size.py b/official/vision/detection/configs/faster_rcnn_res101_coco_2x_800size.py
new file mode 100644
index 0000000000000000000000000000000000000000..da6c3fd07fab58d51c6923b24807dc26ce1b3a17
--- /dev/null
+++ b/official/vision/detection/configs/faster_rcnn_res101_coco_2x_800size.py
@@ -0,0 +1,40 @@
+# -*- coding: utf-8 -*-
+# MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
+#
+# Copyright (c) 2014-2020 Megvii Inc. All rights reserved.
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+from megengine import hub
+
+from official.vision.detection import models
+
+
+class CustomFasterRCNNConfig(models.FasterRCNNConfig):
+ def __init__(self):
+ super().__init__()
+
+ self.backbone = "resnet101"
+
+ # ------------------------ training cfg ---------------------- #
+ self.max_epoch = 36
+ self.lr_decay_stages = [24, 32, 34]
+
+
+@hub.pretrained(
+ "https://data.megengine.org.cn/models/weights/"
+ "faster_rcnn_res101_coco_2x_800size_43dot0_ee249359.pkl"
+)
+def faster_rcnn_res101_coco_2x_800size(batch_size=1, **kwargs):
+ r"""
+ Faster-RCNN FPN trained from COCO dataset.
+ `"Faster-RCNN" `_
+ `"FPN" `_
+ `"COCO" `_
+ """
+ return models.FasterRCNN(CustomFasterRCNNConfig(), batch_size=batch_size, **kwargs)
+
+
+Net = models.FasterRCNN
+Cfg = CustomFasterRCNNConfig
diff --git a/official/vision/detection/configs/retinanet_res101_coco_2x_800size.py b/official/vision/detection/configs/retinanet_res101_coco_2x_800size.py
new file mode 100644
index 0000000000000000000000000000000000000000..456f7059383e7e8959c14d49efdc4177d5d1250d
--- /dev/null
+++ b/official/vision/detection/configs/retinanet_res101_coco_2x_800size.py
@@ -0,0 +1,40 @@
+# -*- coding: utf-8 -*-
+# MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
+#
+# Copyright (c) 2014-2020 Megvii Inc. All rights reserved.
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+from megengine import hub
+
+from official.vision.detection import models
+
+
+class CustomRetinaNetConfig(models.RetinaNetConfig):
+ def __init__(self):
+ super().__init__()
+
+ self.backbone = "resnet101"
+
+ # ------------------------ training cfg ---------------------- #
+ self.max_epoch = 36
+ self.lr_decay_stages = [24, 32, 34]
+
+
+@hub.pretrained(
+ "https://data.megengine.org.cn/models/weights/"
+ "retinanet_res101_coco_2x_800size_40dot8_661c3608.pkl"
+)
+def retinanet_res101_coco_2x_800size(batch_size=1, **kwargs):
+ r"""
+ RetinaNet trained from COCO dataset.
+ `"RetinaNet" `_
+ `"FPN" `_
+ `"COCO" `_
+ """
+ return models.RetinaNet(CustomRetinaNetConfig(), batch_size=batch_size, **kwargs)
+
+
+Net = models.RetinaNet
+Cfg = CustomRetinaNetConfig
diff --git a/official/vision/detection/tools/inference.py b/official/vision/detection/tools/inference.py
index 04ca91f4d73761515b423f9bbec99738a0b68f01..d91cbd55ddde56141f4e726a6b890575321d4e54 100644
--- a/official/vision/detection/tools/inference.py
+++ b/official/vision/detection/tools/inference.py
@@ -48,7 +48,9 @@ def main():
sys.path.insert(0, os.path.dirname(args.file))
current_network = importlib.import_module(os.path.basename(args.file).split(".")[0])
- model = current_network.Net(current_network.Cfg(), batch_size=1)
+ cfg = current_network.Cfg()
+ cfg.backbone_pretrained = False
+ model = current_network.Net(cfg, batch_size=1)
model.eval()
state_dict = mge.load(args.weight_file)
if "state_dict" in state_dict:
diff --git a/official/vision/detection/tools/test.py b/official/vision/detection/tools/test.py
index 20dc2cd8a7f32726d2009f9b6e58cfcab8fcb9ab..90710fb87670da65c99b580c3e9a1baea036b720 100644
--- a/official/vision/detection/tools/test.py
+++ b/official/vision/detection/tools/test.py
@@ -37,9 +37,6 @@ def make_parser():
parser.add_argument(
"-n", "--ngpus", default=1, type=int, help="total number of gpus for testing",
)
- parser.add_argument(
- "-b", "--batch_size", default=1, type=int, help="batchsize for testing",
- )
parser.add_argument(
"-d", "--dataset_dir", default="/data/datasets", type=str,
)
@@ -56,6 +53,9 @@ def main():
parser = make_parser()
args = parser.parse_args()
+ sys.path.insert(0, os.path.dirname(args.file))
+ current_network = importlib.import_module(os.path.basename(args.file).split(".")[0])
+
if args.end_epoch == -1:
args.end_epoch = args.start_epoch
@@ -75,7 +75,7 @@ def main():
proc = Process(
target=worker,
args=(
- args.file,
+ current_network,
model_file,
args.dataset_dir,
i,
@@ -86,10 +86,6 @@ def main():
proc.start()
procs.append(proc)
- sys.path.insert(0, os.path.dirname(args.file))
- current_network = importlib.import_module(
- os.path.basename(args.file).split(".")[0]
- )
cfg = current_network.Cfg()
num_imgs = dict(coco=5000, objects365=30000)
@@ -139,7 +135,7 @@ def main():
def worker(
- net_file, model_file, data_dir, worker_id, total_worker, result_queue,
+ current_network, model_file, data_dir, worker_id, total_worker, result_queue,
):
"""
:param net_file: network description file
@@ -156,9 +152,9 @@ def worker(
pred = model(model.inputs)
return pred
- sys.path.insert(0, os.path.dirname(net_file))
- current_network = importlib.import_module(os.path.basename(net_file).split(".")[0])
- model = current_network.Net(current_network.Cfg(), batch_size=1)
+ cfg = current_network.Cfg()
+ cfg.backbone_pretrained = False
+ model = current_network.Net(cfg, batch_size=1)
model.eval()
evaluator = DetEvaluator(model)
state_dict = mge.load(model_file)