From 98939299e55ab2c98e2bf6a4d26739e686bc77c9 Mon Sep 17 00:00:00 2001 From: Jianfeng Wang Date: Tue, 14 Jul 2020 14:09:57 +0800 Subject: [PATCH] fix(hub): fix detection models and links (#7) --- models/megengine_nlp_bert.md | 42 +++++++++++------------ models/megengine_vision_deeplabv3plus.md | 14 ++++---- models/megengine_vision_faster_rcnn.md | 28 ++++++++------- models/megengine_vision_mspn.md | 10 +++--- models/megengine_vision_resnet.md | 22 ++++++------ models/megengine_vision_retinanet.md | 29 +++++++++------- models/megengine_vision_shufflenet_v2.md | 24 ++++++------- models/megengine_vision_simplebaseline.md | 8 ++--- 8 files changed, 91 insertions(+), 86 deletions(-) diff --git a/models/megengine_nlp_bert.md b/models/megengine_nlp_bert.md index bbdba8b..4439ff6 100644 --- a/models/megengine_nlp_bert.md +++ b/models/megengine_nlp_bert.md @@ -6,7 +6,7 @@ summary: zh_CN: BERT author: MegEngine Team tags: [nlp] -github-link: https://github.com/megengine/models +github-link: https://github.com/MegEngine/Models/tree/master/official/nlp/bert --- ```python @@ -23,7 +23,7 @@ model = megengine.hub.load("megengine/models", "wwm_cased_L-24_H-1024_A-16", pre # model = megengine.hub.load("megengine/models", "multi_cased_L-12_H-768_A-12", pretrained=True) ``` - + 这个项目中, 我们用MegEngine重新实现了Google开源的BERT模型. @@ -72,21 +72,21 @@ def create_hub_bert(model_name, pretrained): vocab_url = '{}/{}/{}'.format(DATA_URL, model_name, VOCAB_NAME) config_url = '{}/{}/{}'.format(DATA_URL, model_name, CONFIG_NAME) - + vocab_file = './{}/{}'.format(model_name, VOCAB_NAME) config_file = './{}/{}'.format(model_name, CONFIG_NAME) - + download_file(vocab_url, vocab_file) download_file(config_url, config_file) config = BertConfig(config_file) model = hub.load( - "megengine/models", - MODEL_NAME[model_name], + "megengine/models", + MODEL_NAME[model_name], pretrained=pretrained, ) - + return model, config, vocab_file ``` @@ -102,14 +102,14 @@ class BertForSequenceClassification(Module): def forward(self, input_ids, token_type_ids=None, attention_mask=None, labels=None): _, pooled_output = self.bert( - input_ids, token_type_ids, + input_ids, token_type_ids, attention_mask, output_all_encoded_layers=False) pooled_output = self.dropout(pooled_output) logits = self.classifier(pooled_output) if labels is not None: loss = cross_entropy_with_softmax( - logits.reshape(-1, self.num_labels), + logits.reshape(-1, self.num_labels), labels.reshape(-1)) return logits, loss else: @@ -123,7 +123,7 @@ model = BertForSequenceClassification(config, num_labels=2, bert=bert) ### 模型描述 -我们在[models](https://github.com/megengine/models/official/nlp/bert)中提供了简单的示例代码. +我们在[models](https://github.com/megengine/models/official/nlp/bert)中提供了简单的示例代码. 此示例代码在Microsoft Research Paraphrase(MRPC)数据集上对预训练的`uncased_L-12_H-768_A-12`模型进行微调. 我们的样例代码中使用了原始的超参进行微调, 在测试集中可以得到84%到88%的正确率. @@ -131,9 +131,9 @@ model = BertForSequenceClassification(config, num_labels=2, bert=bert) ### 参考文献 - [BERT: Pre-training of Deep Bidirectional Transformers for Language Understanding](https://arxiv.org/abs/1810.04805), Jacob Devlin, Ming-Wei Chang, Kenton Lee, Kristina Toutanova; - - + + This repository contains reimplemented Google's BERT by MegEngine. We provide the following pre-trained models for users to finetune in different tasks. @@ -181,21 +181,21 @@ def create_hub_bert(model_name, pretrained): vocab_url = '{}/{}/{}'.format(DATA_URL, model_name, VOCAB_NAME) config_url = '{}/{}/{}'.format(DATA_URL, model_name, CONFIG_NAME) - + vocab_file = './{}/{}'.format(model_name, VOCAB_NAME) config_file = './{}/{}'.format(model_name, CONFIG_NAME) - + download_file(vocab_url, vocab_file) download_file(config_url, config_file) config = BertConfig(config_file) model = hub.load( - "megengine/models", - MODEL_NAME[model_name], + "megengine/models", + MODEL_NAME[model_name], pretrained=pretrained, ) - + return model, config, vocab_file ``` @@ -212,14 +212,14 @@ class BertForSequenceClassification(Module): def forward(self, input_ids, token_type_ids=None, attention_mask=None, labels=None): _, pooled_output = self.bert( - input_ids, token_type_ids, + input_ids, token_type_ids, attention_mask, output_all_encoded_layers=False) pooled_output = self.dropout(pooled_output) logits = self.classifier(pooled_output) if labels is not None: loss = cross_entropy_with_softmax( - logits.reshape(-1, self.num_labels), + logits.reshape(-1, self.num_labels), labels.reshape(-1)) return logits, loss else: @@ -234,11 +234,11 @@ All pre-trained models expect the data to be pre-processed correctly. The requir ### Model Description We provide example code in [models](https://github.com/megengine/models/official/nlp/bert). -This example code fine-tunes the pre-trained `uncased_L-12_H-768_A-12` model on the Microsoft Research Paraphrase (MRPC) dataset. +This example code fine-tunes the pre-trained `uncased_L-12_H-768_A-12` model on the Microsoft Research Paraphrase (MRPC) dataset. Our test ran on the original implementation hyper-parameters gave evaluation results between 84% and 88%. ### References - [BERT: Pre-training of Deep Bidirectional Transformers for Language Understanding](https://arxiv.org/abs/1810.04805), Jacob Devlin, Ming-Wei Chang, Kenton Lee, Kristina Toutanova; - + diff --git a/models/megengine_vision_deeplabv3plus.md b/models/megengine_vision_deeplabv3plus.md index 29d31c7..6d7cc50 100644 --- a/models/megengine_vision_deeplabv3plus.md +++ b/models/megengine_vision_deeplabv3plus.md @@ -6,7 +6,7 @@ summary: zh_CN: DeepLabV3plus (VOC预训练权重) author: MegEngine Team tags: [vision] -github-link: https://github.com/megengine/models +github-link: https://github.com/MegEngine/Models/tree/master/official/vision/segmentation --- ```python @@ -18,15 +18,15 @@ model = hub.load( ) model.eval() ``` - + 所有预训练模型希望数据被正确预处理。模型要求输入BGR的图片, 建议缩放到512x512,最后做归一化处理 (均值为: `[103.530, 116.280, 123.675]`, 标准差为: `[57.375, 57.120, 58.395]`)。 下面是一段处理一张图片的样例代码。 -```python -# Download an example image from the megengine data website +```python +# Download an example image from the megengine data website import urllib url, filename = ("https://data.megengine.org.cn/images/cat.jpg", "cat.jpg") try: urllib.URLopener().retrieve(url, filename) @@ -72,14 +72,14 @@ pred = cv2.resize(pred.astype("uint8"), (oriw, orih), interpolation=cv2.INTER_LI - [Encoder-Decoder with Atrous Separable Convolution for Semantic Image Segmentation](https://arxiv.org/abs/1802.02611.pdf), Liang-Chieh Chen, Yukun Zhu, George Papandreou, Florian Schroff, and Hartwig Adam; ECCV, 2018 - + All pre-trained models expect input images normalized in the same way. Input images must be 3-channel BGR images of shape (H x W x 3), reszied to (512 x 512), then normalized using mean = [103.530, 116.280, 123.675] and std = [57.375, 57.120, 58.395]). Here's a sample execution. -```python -# Download an example image from the megengine data website +```python +# Download an example image from the megengine data website import urllib url, filename = ("https://data.megengine.org.cn/images/cat.jpg", "cat.jpg") try: urllib.URLopener().retrieve(url, filename) diff --git a/models/megengine_vision_faster_rcnn.md b/models/megengine_vision_faster_rcnn.md index 80125cb..8ed5b31 100644 --- a/models/megengine_vision_faster_rcnn.md +++ b/models/megengine_vision_faster_rcnn.md @@ -6,14 +6,14 @@ summary: zh_CN: Faster-RCNN (COCO预训练权重) author: MegEngine Team tags: [vision, detection] -github-link: https://github.com/megengine/models +github-link: https://github.com/MegEngine/Models/tree/master/official/vision/detection --- ```python from megengine import hub model = hub.load( "megengine/models", - "faster_rcnn_fpn_res50_coco_1x_800size", + "faster_rcnn_res50_coco_1x_800size", pretrained=True, use_cache=False, ) @@ -24,7 +24,7 @@ models_api = hub.import_module( git_host="github.com", ) ``` - + 所有预训练模型希望数据被正确预处理。 模型要求输入BGR的图片, 同时需要等比例缩放到:短边和长边分别不超过800/1333 @@ -52,7 +52,7 @@ from megengine import jit def infer(): predictions = model(model.inputs) return predictions - + print(infer()) ``` @@ -60,9 +60,10 @@ print(infer()) 目前我们提供了retinanet的预训练模型, 在coco验证集上的结果如下: -| model | mAP
@5-95 | -| --- | --- | -| faster-rcnn-res50-1x-800size | 37.3 | +| model | mAP
@5-95 | +| --- | :---: | +| faster-rcnn-res50-1x-800size | 38.8 | +| faster-rcnn-res50-1x-800size-syncbn | 39.3 | ### 参考文献 @@ -70,8 +71,8 @@ print(infer()) - [Feature Pyramid Networks for Object Detection](https://arxiv.org/pdf/1612.03144.pdf) T. Lin, P. Dollár, R. Girshick, K. He, B. Hariharan and S. Belongie. 2017 IEEE Conference on Computer Vision and Pattern Recognition (CVPR), Honolulu, HI, 2017, pp. 936-944, doi: 10.1109/CVPR.2017.106. - [Microsoft COCO: Common Objects in Context](https://arxiv.org/pdf/1405.0312.pdf) Lin, Tsung-Yi and Maire, Michael and Belongie, Serge and Hays, James and Perona, Pietro and Ramanan, Deva and Dollár, Piotr and Zitnick, C Lawrence, Lin T Y, Maire M, Belongie S, et al. European conference on computer vision. Springer, Cham, 2014: 740-755. - - + + All pre-trained models expect input images normalized in the same way, i.e. input images must be 3-channel BGR images of shape `(H x W x 3)`, and reszied shortedge/longedge to no more than `800/1333`. @@ -99,7 +100,7 @@ from megengine import jit def infer(): predictions = model(model.inputs) return predictions - + print(infer()) ``` @@ -107,9 +108,10 @@ print(infer()) Currently we provide a `retinanet` model which is pretrained on `COCO2017` training set. The mAP on `COCO2017` val set can be found in following table. -| model | mAP
@5-95 | -| --- | --- | -| faster-rcnn-res50-1x-800size | 37.3 | +| model | mAP
@5-95 | +| --- | :---: | +| faster-rcnn-res50-1x-800size | 38.8 | +| faster-rcnn-res50-1x-800size-syncbn | 39.3 | ### References diff --git a/models/megengine_vision_mspn.md b/models/megengine_vision_mspn.md index 29d7f73..3737345 100644 --- a/models/megengine_vision_mspn.md +++ b/models/megengine_vision_mspn.md @@ -6,7 +6,7 @@ summary: zh_CN: MSPN(COCO 预训练权重) author: MegEngine Team tags: [vision, keypoints] -github-link: https://github.com/megengine/models +github-link: https://github.com/MegEngine/Models/tree/master/official/vision/keypoints --- ```python3 @@ -14,7 +14,7 @@ import megengine.hub model = megengine.hub.load('megengine/models', 'mspn_4stage', pretrained=True) model.eval() ``` - + MSPN是单人关节点检测模型,在多人场景下需要配合人体检测器使用。详细的多人检测代码示例可以参考[inference.py](https://github.com/MegEngine/Models/blob/master/official/vision/keypoints/inference.py)。 针对单张图片,这里提供使用retinanet做人体检测,然后用MSPN检测关节点的示例: @@ -75,7 +75,7 @@ cv2.imwrite("vis_skeleton.jpg", canvas) ### 参考文献 - [Rethinking on Multi-Stage Networks for Human Pose Estimation](https://arxiv.org/pdf/1901.00148.pdf) Wenbo Li1, Zhicheng Wang, Binyi Yin, Qixiang Peng, Yuming Du, Tianzi Xiao, Gang Yu, Hongtao Lu, Yichen Wei and Jian Sun - + SimpleBaseline is classical network for single person pose estimation. It can also be applied to multi-person cases when combined with a human detector. The details of this pipline can be referred to [inference.py](https://github.com/MegEngine/Models/blob/master/official/vision/keypoints/inference.py). For single image, here is a sample execution when SimpleBaseline is combined with retinanet @@ -128,11 +128,11 @@ cv2.imwrite("vis_skeleton.jpg", canvas) ``` ### Model Desription -With the AP human detectoin results being 56.4 on COCO val2017 dataset, the performances of simplebline on COCO val2017 dataset is +With the AP human detectoin results being 56.4 on COCO val2017 dataset, the performances of simplebline on COCO val2017 dataset is |Methods|Backbone|Input Size| AP | Ap .5 | AP .75 | AP (M) | AP (L) | AR | AR .5 | AR .75 | AR (M) | AR (L) | |---|:---:|---|---|---|---|---|---|---|---|---|---|---| | MSPN_4stage |MSPN|256x192| 0.752 | 0.900 | 0.819 | 0.716 | 0.825 | 0.819 | 0.943 | 0.875 | 0.770 | 0.887 | ### References -- [Rethinking on Multi-Stage Networks for Human Pose Estimation](https://arxiv.org/pdf/1901.00148.pdf) Wenbo Li1, Zhicheng Wang, Binyi Yin, Qixiang Peng, Yuming Du, Tianzi Xiao, Gang Yu, Hongtao Lu, Yichen Wei and Jian Sun \ No newline at end of file +- [Rethinking on Multi-Stage Networks for Human Pose Estimation](https://arxiv.org/pdf/1901.00148.pdf) Wenbo Li1, Zhicheng Wang, Binyi Yin, Qixiang Peng, Yuming Du, Tianzi Xiao, Gang Yu, Hongtao Lu, Yichen Wei and Jian Sun diff --git a/models/megengine_vision_resnet.md b/models/megengine_vision_resnet.md index eb1ba56..4f389c4 100644 --- a/models/megengine_vision_resnet.md +++ b/models/megengine_vision_resnet.md @@ -6,7 +6,7 @@ summary: zh_CN: 深度残差网络(ImageNet 预训练权重) author: MegEngine Team tags: [vision, classification] -github-link: https://github.com/megengine/models +github-link: https://github.com/MegEngine/Models/tree/master/official/vision/classification --- ```python @@ -20,7 +20,7 @@ model = megengine.hub.load('megengine/models', 'resnet18', pretrained=True) # model = megengine.hub.load('megengine/models', 'resnext50_32x4d', pretrained=True) model.eval() ``` - + 所有预训练模型希望数据被正确预处理。 模型要求输入BGR的图片, 短边缩放到`256`, 并中心裁剪至`(224 x 224)`的大小,最后做归一化处理 (均值为: `[103.530, 116.280, 123.675]`, 标准差为: `[57.375, 57.120, 58.395]`)。 @@ -59,10 +59,10 @@ print(probs) | 模型 | Top1 acc | Top5 acc | | --- | --- | --- | -| ResNet18 | 70.312 | 89.430 | -| ResNet34 | 73.960 | 91.630 | -| ResNet50 | 76.254 | 93.056 | -| ResNet101| 77.944 | 93.844 | +| ResNet18 | 70.312 | 89.430 | +| ResNet34 | 73.960 | 91.630 | +| ResNet50 | 76.254 | 93.056 | +| ResNet101| 77.944 | 93.844 | | ResNet152| 78.582 | 94.130 | | ResNeXt50 32x4d | 77.592 | 93.644 | @@ -71,7 +71,7 @@ print(probs) - [Deep Residual Learning for Image Recognition](http://openaccess.thecvf.com/content_cvpr_2016/papers/He_Deep_Residual_Learning_CVPR_2016_paper.pdf), Kaiming He, Xiangyu Zhang, Shaoqing Ren, Jian Sun; The IEEE Conference on Computer Vision and Pattern Recognition (CVPR), 2016, pp. 770-778 - [Aggregated Residual Transformation for Deep Neural Networks](http://openaccess.thecvf.com/content_cvpr_2017/papers/Xie_Aggregated_Residual_Transformations_CVPR_2017_paper.pdf), Saining Xie, Ross Girshick, Piotr Dollar, Zhuowen Tu, Kaiming He; The IEEE Conference on Computer Vision and Pattern Recognition (CVPR), 2017, pp. 1492-1500 - + All pre-trained models expect input images normalized in the same way, i.e. input images must be 3-channel BGR images of shape `(H x W x 3)`, and reszied shortedge to `256`, center-cropped to `(224 x 224)`. @@ -111,10 +111,10 @@ Currently we provide these pretrained models: `resnet18`, `resnet34`, `resnet50` | model | Top1 acc | Top5 acc | | --- | --- | --- | -| ResNet18 | 70.312 | 89.430 | -| ResNet34 | 73.960 | 91.630 | -| ResNet50 | 76.254 | 93.056 | -| ResNet101| 77.944 | 93.844 | +| ResNet18 | 70.312 | 89.430 | +| ResNet34 | 73.960 | 91.630 | +| ResNet50 | 76.254 | 93.056 | +| ResNet101| 77.944 | 93.844 | | ResNet152| 78.582 | 94.130 | | ResNeXt50 32x4d | 77.592 | 93.644 | diff --git a/models/megengine_vision_retinanet.md b/models/megengine_vision_retinanet.md index 221d0ef..283df28 100644 --- a/models/megengine_vision_retinanet.md +++ b/models/megengine_vision_retinanet.md @@ -6,15 +6,16 @@ summary: zh_CN: RetinaNet (COCO预训练权重) author: MegEngine Team tags: [vision, detection] -github-link: https://github.com/megengine/models +github-link: https://github.com/MegEngine/Models/tree/master/official/vision/detection --- ```python from megengine import hub model = hub.load( "megengine/models", - "retinanet_res50_1x_800size", + "retinanet_res50_coco_1x_800size", pretrained=True, + use_cache=False, ) model.eval() @@ -23,7 +24,7 @@ models_api = hub.import_module( git_host="github.com", ) ``` - + 所有预训练模型希望数据被正确预处理。 模型要求输入BGR的图片, 同时需要等比例缩放到:短边和长边分别不超过800/1333 @@ -51,7 +52,7 @@ from megengine import jit def infer(): predictions = model(model.inputs) return predictions - + print(infer()) ``` @@ -59,9 +60,10 @@ print(infer()) 目前我们提供了retinanet的预训练模型, 在coco验证集上的结果如下: -| model | mAP
@5-95 | -| --- | --- | -| retinanet-res50-1x-800size | 36.0 | +| model | mAP
@5-95 | +| --- | :---: | +| retinanet-res50-coco1x-800size | 36.4 | +| retinanet-res50-coco1x-800size-syncbn | 37.1 | ### 参考文献 @@ -69,8 +71,8 @@ print(infer()) - [Microsoft COCO: Common Objects in Context](https://arxiv.org/pdf/1405.0312.pdf) Lin, Tsung-Yi and Maire, Michael and Belongie, Serge and Hays, James and Perona, Pietro and Ramanan, Deva and Dollár, Piotr and Zitnick, C Lawrence Lin T Y, Maire M, Belongie S, et al. European conference on computer vision. Springer, Cham, 2014: 740-755. - - + + All pre-trained models expect input images normalized in the same way, i.e. input images must be 3-channel BGR images of shape `(H x W x 3)`, and reszied shortedge/longedge to no more than `800/1333`. @@ -98,7 +100,7 @@ from megengine import jit def infer(): predictions = model(model.inputs) return predictions - + print(infer()) ``` @@ -106,9 +108,10 @@ print(infer()) Currently we provide a `retinanet` model which is pretrained on `COCO2017` training set. The mAP on `COCO2017` val set can be found in following table. -| model | mAP
@5-95 | -| --- | --- | -| retinanet-res50-1x-800size | 36.0 | +| model | mAP
@5-95 | +| --- | :---: | +| retinanet-res50-coco1x-800size | 36.4 | +| retinanet-res50-coco1x-800size-syncbn | 37.1 | ### References diff --git a/models/megengine_vision_shufflenet_v2.md b/models/megengine_vision_shufflenet_v2.md index aea733b..69ba3a3 100644 --- a/models/megengine_vision_shufflenet_v2.md +++ b/models/megengine_vision_shufflenet_v2.md @@ -6,7 +6,7 @@ summary: zh_CN: ShuffleNet V2(ImageNet 预训练权重) author: MegEngine Team tags: [vision, classification] -github-link: https://github.com/megengine/models +github-link: https://github.com/MegEngine/Models/tree/master/official/vision/classification --- ```python @@ -17,7 +17,7 @@ model = megengine.hub.load('megengine/models', 'shufflenet_v2_x1_0', pretrained= # model = megengine.hub.load('megengine/models', 'shufflenet_v2_x2_0', pretrained=True) model.eval() ``` - + 所有预训练模型希望数据被正确预处理。 模型要求输入BGR的图片, 短边缩放到`256`, 并中心裁剪至`(224 x 224)`的大小,最后做归一化处理 (均值为: `[103.530, 116.280, 123.675]`, 标准差为: `[57.375, 57.120, 58.395]`)。 @@ -56,16 +56,16 @@ print(probs) | 模型 | top1 acc | top5 acc | | --- | --- | --- | -| ShuffleNetV2 x0.5 | 60.696 | 82.190 | -| ShuffleNetV2 x1.0 | 69.372 | 88.764 | -| ShuffleNetV2 x1.5 | 72.806 | 90.792 | -| ShuffleNetV2 x2.0 | 75.074 | 92.278 | +| ShuffleNetV2 x0.5 | 60.696 | 82.190 | +| ShuffleNetV2 x1.0 | 69.372 | 88.764 | +| ShuffleNetV2 x1.5 | 72.806 | 90.792 | +| ShuffleNetV2 x2.0 | 75.074 | 92.278 | ### 参考文献 - [ShuffleNet V2: Practical Guidelines for Efficient CNN Architecture Design](https://arxiv.org/abs/1807.11164), Ma, Ningning, et al. "Shufflenet v2: Practical guidelines for efficient cnn architecture design." Proceedings of the European Conference on Computer Vision (ECCV). 2018. - + All pre-trained models expect input images normalized in the same way, i.e. input images must be 3-channel BGR images of shape `(H x W x 3)`, and reszied shortedge to `256`, center-cropped to `(224 x 224)`. @@ -105,11 +105,11 @@ Currently we provide several pretrained models(see the table below), Their 1-cro | model | top1 acc | top5 acc | | --- | --- | --- | -| ShuffleNetV2 x0.5 | 60.696 | 82.190 | -| ShuffleNetV2 x1.0 | 69.372 | 88.764 | -| ShuffleNetV2 x1.5 | 72.806 | 90.792 | -| ShuffleNetV2 x2.0 | 75.074 | 92.278 | +| ShuffleNetV2 x0.5 | 60.696 | 82.190 | +| ShuffleNetV2 x1.0 | 69.372 | 88.764 | +| ShuffleNetV2 x1.5 | 72.806 | 90.792 | +| ShuffleNetV2 x2.0 | 75.074 | 92.278 | ### References - - [ShuffleNet V2: Practical Guidelines for Efficient CNN Architecture Design](https://arxiv.org/abs/1807.11164), Ma, Ningning, et al. "Shufflenet v2: Practical guidelines for efficient cnn architecture design." Proceedings of the European Conference on Computer Vision (ECCV). 2018. \ No newline at end of file + - [ShuffleNet V2: Practical Guidelines for Efficient CNN Architecture Design](https://arxiv.org/abs/1807.11164), Ma, Ningning, et al. "Shufflenet v2: Practical guidelines for efficient cnn architecture design." Proceedings of the European Conference on Computer Vision (ECCV). 2018. diff --git a/models/megengine_vision_simplebaseline.md b/models/megengine_vision_simplebaseline.md index 24baac1..c4f2e3c 100644 --- a/models/megengine_vision_simplebaseline.md +++ b/models/megengine_vision_simplebaseline.md @@ -6,7 +6,7 @@ summary: zh_CN: SimpleBaeline(COCO 预训练权重) author: MegEngine Team tags: [vision, keypoints] -github-link: https://github.com/megengine/models +github-link: https://github.com/MegEngine/Models/tree/master/official/vision/keypoints --- ```python3 @@ -17,7 +17,7 @@ model = megengine.hub.load('megengine/models', 'simplebaseline_res50', pretraine # model = megengine.hub.load('megengine/models', 'simplebaseline_res152', pretrained=True) model.eval() ``` - + SimpleBaseline是单人关节点检测模型,在多人场景下需要配合人体检测器使用。详细的多人检测代码示例可以参考[inference.py](https://github.com/MegEngine/Models/blob/master/official/vision/keypoints/inference.py)。 针对单张图片,这里提供使用retinanet做人体检测,然后用SimpleBaseline检测关节点的示例: @@ -79,7 +79,7 @@ cv2.imwrite("vis_skeleton.jpg", canvas) ### 参考文献 - [Simple Baselines for Human Pose Estimation and Tracking](https://arxiv.org/pdf/1804.06208.pdf), Bin Xiao, Haiping Wu, and Yichen Wei - + SimpleBaseline is classical network for single person pose estimation. It can also be applied to multi-person cases when combined with a human detector. The details of this pipline can be referred to [inference.py](https://github.com/MegEngine/Models/blob/master/official/vision/keypoints/inference.py). For single image, here is a sample execution when SimpleBaseline is combined with retinanet @@ -141,4 +141,4 @@ With the AP human detectoin results being 56.4 on COCO val2017 dataset, the perf | SimpleBaseline |Res152|256x192| 0.724 | 0.888 | 0.794 | 0.688 | 0.795 | 0.795 | 0.934 | 0.856 | 0.746 | 0.863 | ### References -- [Simple Baselines for Human Pose Estimation and Tracking](https://arxiv.org/pdf/1804.06208.pdf), Bin Xiao, Haiping Wu, and Yichen Wei \ No newline at end of file +- [Simple Baselines for Human Pose Estimation and Tracking](https://arxiv.org/pdf/1804.06208.pdf), Bin Xiao, Haiping Wu, and Yichen Wei -- GitLab