提交 1f3a3218 编写于 作者: H huanghaian

add gather_models.py and format code

上级 28222305
# Copyright (c) OpenMMLab. All rights reserved.
import argparse
import glob
import os
import os.path as osp
import shutil
import subprocess
import time
from collections import OrderedDict
import torch
import yaml
from mmengine.config import Config
from mmengine.fileio import dump
from mmengine.utils import mkdir_or_exist, scandir
def ordered_yaml_dump(data, stream=None, Dumper=yaml.SafeDumper, **kwds):
class OrderedDumper(Dumper):
pass
def _dict_representer(dumper, data):
return dumper.represent_mapping(
yaml.resolver.BaseResolver.DEFAULT_MAPPING_TAG, data.items())
OrderedDumper.add_representer(OrderedDict, _dict_representer)
return yaml.dump(data, stream, OrderedDumper, **kwds)
def process_checkpoint(in_file, out_file):
checkpoint = torch.load(in_file, map_location='cpu')
# remove optimizer for smaller file size
if 'optimizer' in checkpoint:
del checkpoint['optimizer']
if 'message_hub' in checkpoint:
del checkpoint['message_hub']
if 'ema_state_dict' in checkpoint:
del checkpoint['ema_state_dict']
for key in list(checkpoint['state_dict']):
if key.startswith('data_preprocessor'):
checkpoint['state_dict'].pop(key)
elif 'priors_base_sizes' in key:
checkpoint['state_dict'].pop(key)
elif 'grid_offset' in key:
checkpoint['state_dict'].pop(key)
elif 'prior_inds' in key:
checkpoint['state_dict'].pop(key)
# if it is necessary to remove some sensitive data in checkpoint['meta'],
# add the code here.
if torch.__version__ >= '1.6':
torch.save(checkpoint, out_file, _use_new_zipfile_serialization=False)
else:
torch.save(checkpoint, out_file)
sha = subprocess.check_output(['sha256sum', out_file]).decode()
final_file = out_file.rstrip('.pth') + f'-{sha[:8]}.pth'
subprocess.Popen(['mv', out_file, final_file])
return final_file
def is_by_epoch(config):
cfg = Config.fromfile('./configs/' + config)
return cfg.train_cfg.type == 'EpochBasedTrainLoop'
def get_final_epoch_or_iter(config):
cfg = Config.fromfile('./configs/' + config)
if cfg.train_cfg.type == 'EpochBasedTrainLoop':
return cfg.train_cfg.max_epochs
else:
return cfg.train_cfg.max_iters
def get_best_epoch_or_iter(exp_dir):
best_epoch_iter_full_path = list(
sorted(glob.glob(osp.join(exp_dir, 'best_*.pth'))))[-1]
best_epoch_or_iter_model_path = best_epoch_iter_full_path.split('/')[-1]
best_epoch_or_iter = best_epoch_or_iter_model_path. \
split('_')[-1].split('.')[0]
return best_epoch_or_iter_model_path, int(best_epoch_or_iter)
def get_real_epoch_or_iter(config):
cfg = Config.fromfile('./configs/' + config)
if cfg.train_cfg.type == 'EpochBasedTrainLoop':
epoch = cfg.train_cfg.max_epochs
return epoch
else:
return cfg.runner.max_iters
def get_final_results(log_json_path,
epoch_or_iter,
results_lut='coco/bbox_mAP',
by_epoch=True):
result_dict = dict()
with open(log_json_path) as f:
r = f.readlines()[-1]
last_metric = r.split(',')[0].split(': ')[-1].strip()
result_dict[results_lut] = last_metric
return result_dict
def get_dataset_name(config):
# If there are more dataset, add here.
name_map = dict(
CityscapesDataset='Cityscapes',
CocoDataset='COCO',
YOLOv5CocoDataset='COCO',
CocoPanopticDataset='COCO',
DeepFashionDataset='Deep Fashion',
LVISV05Dataset='LVIS v0.5',
LVISV1Dataset='LVIS v1',
VOCDataset='Pascal VOC',
WIDERFaceDataset='WIDER Face',
OpenImagesDataset='OpenImagesDataset',
OpenImagesChallengeDataset='OpenImagesChallengeDataset')
cfg = Config.fromfile('./configs/' + config)
return name_map[cfg.dataset_type]
def find_last_dir(model_dir):
dst_times = []
for time_stamp in os.scandir(model_dir):
if osp.isdir(time_stamp):
dst_time = time.mktime(
time.strptime(time_stamp.name, '%Y%m%d_%H%M%S'))
dst_times.append([dst_time, time_stamp.name])
return max(dst_times, key=lambda x: x[0])[1]
def convert_model_info_to_pwc(model_infos):
pwc_files = {}
for model in model_infos:
cfg_folder_name = osp.split(model['config'])[-2]
pwc_model_info = OrderedDict()
pwc_model_info['Name'] = osp.split(model['config'])[-1].split('.')[0]
pwc_model_info['In Collection'] = 'Please fill in Collection name'
pwc_model_info['Config'] = osp.join('configs', model['config'])
# get metadata
meta_data = OrderedDict()
if 'epochs' in model:
meta_data['Epochs'] = get_real_epoch_or_iter(model['config'])
else:
meta_data['Iterations'] = get_real_epoch_or_iter(model['config'])
pwc_model_info['Metadata'] = meta_data
# get dataset name
dataset_name = get_dataset_name(model['config'])
# get results
results = []
# if there are more metrics, add here.
if 'bbox_mAP' in model['results']:
metric = round(model['results']['bbox_mAP'] * 100, 1)
results.append(
OrderedDict(
Task='Object Detection',
Dataset=dataset_name,
Metrics={'box AP': metric}))
if 'segm_mAP' in model['results']:
metric = round(model['results']['segm_mAP'] * 100, 1)
results.append(
OrderedDict(
Task='Instance Segmentation',
Dataset=dataset_name,
Metrics={'mask AP': metric}))
if 'PQ' in model['results']:
metric = round(model['results']['PQ'], 1)
results.append(
OrderedDict(
Task='Panoptic Segmentation',
Dataset=dataset_name,
Metrics={'PQ': metric}))
pwc_model_info['Results'] = results
link_string = 'https://download.openmmlab.com/mmdetection/v2.0/'
link_string += '{}/{}'.format(model['config'].rstrip('.py'),
osp.split(model['model_path'])[-1])
pwc_model_info['Weights'] = link_string
if cfg_folder_name in pwc_files:
pwc_files[cfg_folder_name].append(pwc_model_info)
else:
pwc_files[cfg_folder_name] = [pwc_model_info]
return pwc_files
def parse_args():
parser = argparse.ArgumentParser(description='Gather benchmarked models')
parser.add_argument(
'root',
type=str,
help='root path of benchmarked models to be gathered')
parser.add_argument(
'out', type=str, help='output path of gathered models to be stored')
parser.add_argument(
'--best',
action='store_true',
help='whether to gather the best model.')
args = parser.parse_args()
return args
# TODO: Refine
def main():
args = parse_args()
models_root = args.root
models_out = args.out
mkdir_or_exist(models_out)
# find all models in the root directory to be gathered
raw_configs = list(scandir('./configs', '.py', recursive=True))
# filter configs that is not trained in the experiments dir
used_configs = []
for raw_config in raw_configs:
if osp.exists(osp.join(models_root, raw_config)):
used_configs.append(raw_config)
print(f'Find {len(used_configs)} models to be gathered')
# find final_ckpt and log file for trained each config
# and parse the best performance
model_infos = []
for used_config in used_configs:
exp_dir = osp.join(models_root, used_config)
by_epoch = is_by_epoch(used_config)
# check whether the exps is finished
if args.best is True:
final_model, final_epoch_or_iter = get_best_epoch_or_iter(exp_dir)
else:
final_epoch_or_iter = get_final_epoch_or_iter(used_config)
final_model = '{}_{}.pth'.format('epoch' if by_epoch else 'iter',
final_epoch_or_iter)
model_path = osp.join(exp_dir, final_model)
# skip if the model is still training
if not osp.exists(model_path):
continue
# get the latest logs
latest_exp_name = find_last_dir(exp_dir)
latest_exp_json = osp.join(exp_dir, latest_exp_name, 'vis_data',
latest_exp_name + '.json')
model_performance = get_final_results(
latest_exp_json, final_epoch_or_iter, by_epoch=by_epoch)
if model_performance is None:
continue
model_info = dict(
config=used_config,
results=model_performance,
final_model=final_model,
latest_exp_json=latest_exp_json,
latest_exp_name=latest_exp_name)
model_info['epochs' if by_epoch else 'iterations'] = \
final_epoch_or_iter
model_infos.append(model_info)
# publish model for each checkpoint
publish_model_infos = []
for model in model_infos:
model_publish_dir = osp.join(models_out, model['config'].rstrip('.py'))
mkdir_or_exist(model_publish_dir)
model_name = osp.split(model['config'])[-1].split('.')[0]
model_name += '_' + model['latest_exp_name']
publish_model_path = osp.join(model_publish_dir, model_name)
trained_model_path = osp.join(models_root, model['config'],
model['final_model'])
# convert model
final_model_path = process_checkpoint(trained_model_path,
publish_model_path)
# copy log
shutil.copy(model['latest_exp_json'],
osp.join(model_publish_dir, f'{model_name}.log.json'))
# copy config to guarantee reproducibility
config_path = model['config']
config_path = osp.join(
'configs',
config_path) if 'configs' not in config_path else config_path
target_config_path = osp.split(config_path)[-1]
shutil.copy(config_path, osp.join(model_publish_dir,
target_config_path))
model['model_path'] = final_model_path
publish_model_infos.append(model)
models = dict(models=publish_model_infos)
print(f'Totally gathered {len(publish_model_infos)} models')
dump(models, osp.join(models_out, 'model_info.json'))
pwc_files = convert_model_info_to_pwc(publish_model_infos)
for name in pwc_files:
with open(osp.join(models_out, name + '_metafile.yml'), 'w') as f:
ordered_yaml_dump(pwc_files[name], f, encoding='utf-8')
if __name__ == '__main__':
main()
# Mixed image data augmentation update
Mixed image data augmentation is similar to Mosaic and MixUp, in which the annotation information of multiple images needs to be fused during the runtime. In the OpenMMLab data augmentation pipeline, the other indexes of the dataset are generally not available. In order to achieve the above function, in the [MultiImageMixDataset](https://github.com/open-mmlab/mmdetection/blob/master/mmdet/datasets/dataset_wrappers.py#L338) the concept of dataset wrapper is proposed in YOLOX, which is reproduced in MMDet.
`MultiImageMixDataset` dataset wrapper will include some data augmentation method such as `Mosaic` and `RandAffine`, while `CocoDataset` will also include the `pipeline` to achieve the img and annotation loading function. through this way we can achieve mix data augmentation quickly. The configuration method is as follows:
```python
train_pipeline = [
dict(type='Mosaic', img_scale=img_scale, pad_val=114.0),
dict(
type='RandomAffine',
scaling_ratio_range=(0.1, 2),
border=(-img_scale[0] // 2, -img_scale[1] // 2)),
dict(
type='MixUp',
img_scale=img_scale,
ratio_range=(0.8, 1.6),
pad_val=114.0),
...
]
train_dataset = dict(
# use MultiImageMixDataset wrapper to support mosaic and mixup
type='MultiImageMixDataset',
dataset=dict(
type='CocoDataset',
pipeline=[
dict(type='LoadImageFromFile'),
dict(type='LoadAnnotations', with_bbox=True)
]),
pipeline=train_pipeline)
```
But above method will cause a problem: to the users who are not familiar with MMDet ,they will forget to match data augmentation methods like Mosaic together with `MultiImageMixDataset`, which could extremely increase the Complexity, and it could be hard to understand.
To solve this problem we make a simplification in MMYOLO, which directly make `pipeline` catch the `dataset` ,and make the data augmentation methods like `Mosaic` be achieved and used as random flip, without data wrapper anymore. The new configuration method is as follows:
```python
pre_transform = [
dict(type='LoadImageFromFile'),
dict(type='LoadAnnotations', with_bbox=True)
]
train_pipeline = [
*pre_transform,
dict(
type='Mosaic',
img_scale=img_scale,
pad_val=114.0,
pre_transform=pre_transform),
dict(
type='mmdet.RandomAffine',
scaling_ratio_range=(0.1, 2),
border=(-img_scale[0] // 2, -img_scale[1] // 2)),
dict(
type='YOLOXMixUp',
img_scale=img_scale,
ratio_range=(0.8, 1.6),
pad_val=114.0,
pre_transform=pre_transform),
...
]
```
A more complex YOLOv5-m configuration include MixUp is shown as follows:
```python
mosaic_affine_pipeline = [
dict(
type='Mosaic',
img_scale=img_scale,
pad_val=114.0,
pre_transform=pre_transform),
dict(
type='YOLOv5RandomAffine',
max_rotate_degree=0.0,
max_shear_degree=0.0,
scaling_ratio_range=(1 - affine_scale, 1 + affine_scale),
border=(-img_scale[0] // 2, -img_scale[1] // 2),
border_val=(114, 114, 114))
]
# enable mixup
train_pipeline = [
*pre_transform, *mosaic_affine_pipeline,
dict(
type='YOLOv5MixUp',
prob=0.1,
pre_transform=[*pre_transform, *mosaic_affine_pipeline]),
dict(
type='mmdet.Albu',
transforms=albu_train_transforms,
bbox_params=dict(
type='BboxParams',
format='pascal_voc',
label_fields=['gt_bboxes_labels', 'gt_ignore_flags']),
keymap={
'img': 'image',
'gt_bboxes': 'bboxes'
}),
dict(type='YOLOv5HSVRandomAug'),
dict(type='mmdet.RandomFlip', prob=0.5),
dict(
type='mmdet.PackDetInputs',
meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape', 'flip',
'flip_direction'))
]
```
It is very easy to be achieved, just pass the object of Dataset to the pipeline.
```python
def prepare_data(self, idx) -> Any:
"""Pass the dataset to the pipeline during training to support mixed
data augmentation, such as Mosaic and MixUp."""
if self.test_mode is False:
data_info = self.get_data_info(idx)
data_info['dataset'] = self
return self.pipeline(data_info)
else:
return super().prepare_data(idx)
```
# Mixed image data augmentation update
Mixed image data augmentation is similar to Mosaic and MixUp, in which the annotation information of multiple images needs to be fused during the runtime. In the OpenMMLab data augmentation pipeline, the other indexes of the dataset are generally not available. In order to achieve the above function, in the [MultiImageMixDataset](https://github.com/open-mmlab/mmdetection/blob/master/mmdet/datasets/dataset_wrappers.py#L338) the concept of dataset wrapper is proposed in YOLOX, which is reproduced in MMDet.
`MultiImageMixDataset` dataset wrapper will include some data augmentation method such as `Mosaic` and `RandAffine`, while `CocoDataset` will also include the `pipeline` to achieve the img and annotation loading function. through this way we can achieve mix data augmentation quickly. The configuration method is as follows:
```python
train_pipeline = [
dict(type='Mosaic', img_scale=img_scale, pad_val=114.0),
dict(
type='RandomAffine',
scaling_ratio_range=(0.1, 2),
border=(-img_scale[0] // 2, -img_scale[1] // 2)),
dict(
type='MixUp',
img_scale=img_scale,
ratio_range=(0.8, 1.6),
pad_val=114.0),
...
]
train_dataset = dict(
# use MultiImageMixDataset wrapper to support mosaic and mixup
type='MultiImageMixDataset',
dataset=dict(
type='CocoDataset',
pipeline=[
dict(type='LoadImageFromFile'),
dict(type='LoadAnnotations', with_bbox=True)
]),
pipeline=train_pipeline)
```
But above method will cause a problem: to the users who are not familiar with MMDet ,they will forget to match data augmentation methods like Mosaic together with `MultiImageMixDataset`, which could extremely increase the Complexity, and it could be hard to understand.
To solve this problem we make a simplification in MMYOLO, which directly make `pipeline` catch the `dataset` ,and make the data augmentation methods like `Mosaic` be achieved and used as random flip, without data wrapper anymore. The new configuration method is as follows:
```python
pre_transform = [
dict(type='LoadImageFromFile'),
dict(type='LoadAnnotations', with_bbox=True)
]
train_pipeline = [
*pre_transform,
dict(
type='Mosaic',
img_scale=img_scale,
pad_val=114.0,
pre_transform=pre_transform),
dict(
type='mmdet.RandomAffine',
scaling_ratio_range=(0.1, 2),
border=(-img_scale[0] // 2, -img_scale[1] // 2)),
dict(
type='YOLOXMixUp',
img_scale=img_scale,
ratio_range=(0.8, 1.6),
pad_val=114.0,
pre_transform=pre_transform),
...
]
```
A more complex YOLOv5-m configuration include MixUp is shown as follows:
```python
mosaic_affine_pipeline = [
dict(
type='Mosaic',
img_scale=img_scale,
pad_val=114.0,
pre_transform=pre_transform),
dict(
type='YOLOv5RandomAffine',
max_rotate_degree=0.0,
max_shear_degree=0.0,
scaling_ratio_range=(1 - affine_scale, 1 + affine_scale),
border=(-img_scale[0] // 2, -img_scale[1] // 2),
border_val=(114, 114, 114))
]
# enable mixup
train_pipeline = [
*pre_transform, *mosaic_affine_pipeline,
dict(
type='YOLOv5MixUp',
prob=0.1,
pre_transform=[*pre_transform, *mosaic_affine_pipeline]),
dict(
type='mmdet.Albu',
transforms=albu_train_transforms,
bbox_params=dict(
type='BboxParams',
format='pascal_voc',
label_fields=['gt_bboxes_labels', 'gt_ignore_flags']),
keymap={
'img': 'image',
'gt_bboxes': 'bboxes'
}),
dict(type='YOLOv5HSVRandomAug'),
dict(type='mmdet.RandomFlip', prob=0.5),
dict(
type='mmdet.PackDetInputs',
meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape', 'flip',
'flip_direction'))
]
```
It is very easy to be achieved, just pass the object of Dataset to the pipeline.
```python
def prepare_data(self, idx) -> Any:
"""Pass the dataset to the pipeline during training to support mixed
data augmentation, such as Mosaic and MixUp."""
if self.test_mode is False:
data_info = self.get_data_info(idx)
data_info['dataset'] = self
return self.pipeline(data_info)
else:
return super().prepare_data(idx)
```
# Model design related instructions
## YOLO series model basic class
The structural Graph is provided by @RangeKing ,Thank you RangeKing!
![base class](https://user-images.githubusercontent.com/33799979/190382319-6b4e1fcb-cc3f-4fbe-9d6b-3c9c4e57472c.png)
Most of the YOLO series algorithms adopt a unified algorithm building structure, typically as Darknet + PAFPN. In order to let users quickly understand the YOLO series algorithm architecture, we deliberately designed the BaseBackbone + BaseYOLONeck structure as shown in the above graph.
The benefit of abstract BaseBackbone includes:
1. Subclasses do not need to concern about the forward process, just build the model as the builder pattern.
2. It can be configured to achieve custom plug-in functions, the users can easily insert some similar attention module.
3. All subclasses automatically support frozen certain stage and frozen bn functions.
BaseYOLONeck has the same benefit as BaseBackbone.
### BaseBackbone
We can see in the above graph,as for P5,BaseBackbone include 1 stem layer and 4 stage layers which are similar to the basic structural of ResNet. Different backbone network algorithms inheritance the BaseBackbone, users can achieve construction of every layer of the network by using self-custom basic module through `build_xx` method.
### BaseYOLONeck
We reproduce the YOLO series Neck component by the similar method of the BaseBackbone, we can mainly divide them into Reduce layer, UpSample layer,TopDown layer,DownSample layer,BottomUP layer and output convolution layer, every layer can self-custom its inside construction by inheritance and rewrite `build_xx` method.
### BaseDenseHead
The YOLO series uses the BaseDenseHead designed in MMDet as the base class of the Head structure.[HeadModule](#headmodule)base class's forward function replace original forward method. Different algorithms only need to inheritance and rewrite [HeadModule](#headmodule)`loss_by_feat` and `predict_by_feat` method.
## HeadModule
<div align=center>
<img src="https://user-images.githubusercontent.com/33799979/190407754-c725fe85-a71b-4e45-912b-34513d1ff128.png" width=800>
</div>
Methods implementation in the [MMDetection](https://github.com/open-mmlab/mmdetection) is shown in the above graph ,The solid line is the implementation in [MMDet-YOLO](<>), which has the following advantages over the original implementation:
1. MMDet in the bbox_head split into assigner + box coder + sampler three large components, but for the generality of passing through the 3 components , the model need to encapsulate additional objects to handle, and after the unification, the user needn't separate them. The benefits of not deliberately forcing the division of the three components are: no longer need to data encapsulation of internal data, simplifying the code logic, reducing the difficulty of use and the difficulty of algorithm implementation.
2. MMYOLO is Faster, the user can customize the implementation of the algorithm when the original framework does not depend on the deep optimization of part of the code.
In general, in the MMYOLO, they only need to implement the decouple of the model + loss_by_feat parts, and users can achieve any model with any `loss_by_feat` calculation process through modify the configuration. For example, applying the YOLOX `loss_by_feat` to the YOLOv5 model, etc.
Taking the YOLOX configuration in MMDet as an example, the Head module configuration is written as follows:
```python
bbox_head=dict(
type='YOLOXHead',
num_classes=80,
in_channels=128,
feat_channels=128,
stacked_convs=2,
strides=(8, 16, 32),
use_depthwise=False,
norm_cfg=dict(type='BN', momentum=0.03, eps=0.001),
act_cfg=dict(type='Swish'),
...
loss_obj=dict(
type='CrossEntropyLoss',
use_sigmoid=True,
reduction='sum',
loss_weight=1.0),
loss_l1=dict(type='L1Loss', reduction='sum', loss_weight=1.0)),
train_cfg=dict(assigner=dict(type='SimOTAAssigner', center_radius=2.5)),
```
After extracting the head_module in MMYOLO, the new configuration is written as follows:
```python
bbox_head=dict(
type='YOLOXHead',
head_module=dict(
type='YOLOXHeadModule',
num_classes=80,
in_channels=256,
feat_channels=256,
widen_factor=widen_factor,
stacked_convs=2,
featmap_strides=(8, 16, 32),
use_depthwise=False,
norm_cfg=dict(type='BN', momentum=0.03, eps=0.001),
act_cfg=dict(type='SiLU', inplace=True),
),
...
loss_obj=dict(
type='mmdet.CrossEntropyLoss',
use_sigmoid=True,
reduction='sum',
loss_weight=1.0),
loss_bbox_aux=dict(type='mmdet.L1Loss', reduction='sum', loss_weight=1.0)),
train_cfg=dict(
assigner=dict(
type='mmdet.SimOTAAssigner',
center_radius=2.5,
iou_calculator=dict(type='mmdet.BboxOverlaps2D'))),
```
# Model design related instructions
## YOLO series model basic class
The structural Graph is provided by @RangeKing ,Thank you RangeKing!
![base class](https://user-images.githubusercontent.com/33799979/190382319-6b4e1fcb-cc3f-4fbe-9d6b-3c9c4e57472c.png)
Most of the YOLO series algorithms adopt a unified algorithm building structure, typically as Darknet + PAFPN. In order to let users quickly understand the YOLO series algorithm architecture, we deliberately designed the BaseBackbone + BaseYOLONeck structure as shown in the above graph.
The benefit of abstract BaseBackbone includes:
1. Subclasses do not need to concern about the forward process, just build the model as the builder pattern.
2. It can be configured to achieve custom plug-in functions, the users can easily insert some similar attention module.
3. All subclasses automatically support frozen certain stage and frozen bn functions.
BaseYOLONeck has the same benefit as BaseBackbone.
### BaseBackbone
We can see in the above graph,as for P5,BaseBackbone include 1 stem layer and 4 stage layers which are similar to the basic structural of ResNet. Different backbone network algorithms inheritance the BaseBackbone, users can achieve construction of every layer of the network by using self-custom basic module through `build_xx` method.
### BaseYOLONeck
We reproduce the YOLO series Neck component by the similar method of the BaseBackbone, we can mainly divide them into Reduce layer, UpSample layer,TopDown layer,DownSample layer,BottomUP layer and output convolution layer, every layer can self-custom its inside construction by inheritance and rewrite `build_xx` method.
### BaseDenseHead
The YOLO series uses the BaseDenseHead designed in MMDet as the base class of the Head structure.[HeadModule](#headmodule)base class's forward function replace original forward method. Different algorithms only need to inheritance and rewrite [HeadModule](#headmodule)`loss_by_feat` and `predict_by_feat` method.
## HeadModule
<div align=center>
<img src="https://user-images.githubusercontent.com/33799979/190407754-c725fe85-a71b-4e45-912b-34513d1ff128.png" width=800>
</div>
Methods implementation in the [MMDetection](https://github.com/open-mmlab/mmdetection) is shown in the above graph ,The solid line is the implementation in [MMDet-YOLO](<>), which has the following advantages over the original implementation:
1. MMDet in the bbox_head split into assigner + box coder + sampler three large components, but for the generality of passing through the 3 components , the model need to encapsulate additional objects to handle, and after the unification, the user needn't separate them. The benefits of not deliberately forcing the division of the three components are: no longer need to data encapsulation of internal data, simplifying the code logic, reducing the difficulty of use and the difficulty of algorithm implementation.
2. MMYOLO is Faster, the user can customize the implementation of the algorithm when the original framework does not depend on the deep optimization of part of the code.
In general, in the MMYOLO, they only need to implement the decouple of the model + loss_by_feat parts, and users can achieve any model with any `loss_by_feat` calculation process through modify the configuration. For example, applying the YOLOX `loss_by_feat` to the YOLOv5 model, etc.
Taking the YOLOX configuration in MMDet as an example, the Head module configuration is written as follows:
```python
bbox_head=dict(
type='YOLOXHead',
num_classes=80,
in_channels=128,
feat_channels=128,
stacked_convs=2,
strides=(8, 16, 32),
use_depthwise=False,
norm_cfg=dict(type='BN', momentum=0.03, eps=0.001),
act_cfg=dict(type='Swish'),
...
loss_obj=dict(
type='CrossEntropyLoss',
use_sigmoid=True,
reduction='sum',
loss_weight=1.0),
loss_l1=dict(type='L1Loss', reduction='sum', loss_weight=1.0)),
train_cfg=dict(assigner=dict(type='SimOTAAssigner', center_radius=2.5)),
```
After extracting the head_module in MMYOLO, the new configuration is written as follows:
```python
bbox_head=dict(
type='YOLOXHead',
head_module=dict(
type='YOLOXHeadModule',
num_classes=80,
in_channels=256,
feat_channels=256,
widen_factor=widen_factor,
stacked_convs=2,
featmap_strides=(8, 16, 32),
use_depthwise=False,
norm_cfg=dict(type='BN', momentum=0.03, eps=0.001),
act_cfg=dict(type='SiLU', inplace=True),
),
...
loss_obj=dict(
type='mmdet.CrossEntropyLoss',
use_sigmoid=True,
reduction='sum',
loss_weight=1.0),
loss_bbox_aux=dict(type='mmdet.L1Loss', reduction='sum', loss_weight=1.0)),
train_cfg=dict(
assigner=dict(
type='mmdet.SimOTAAssigner',
center_radius=2.5,
iou_calculator=dict(type='mmdet.BboxOverlaps2D'))),
```
# Overview
This chapter introduces you to the overall framework of MMYOLO and provides links to detailed tutorials.
## What is MMYOLO
![pic](https://user-images.githubusercontent.com/12907710/137271636-56ba1cd2-b110-4812-8221-b4c120320aa9.png)
MMYOLO is a YOLO series algorithm toolbox, which currently implements only the target detection task and will subsequently support various tasks such as instance segmentation, panoramic segmentation and key point detection. It includes a rich set of target detection algorithms and related components and modules, and the following is its overall framework.
MMYOLO file structure is identical to the MMDetection. To allow full reuse of the MMDetection code, MMYOLO includes only custom content, which consists of 3 main parts:datasets、models、engine.
- **datasets** supports a variety of data sets for target detection
- **transforms** includes various data enhancement transforms
- **models** is the most important part of the detector, which include different component of it.
- **detectors** define all detection model classes
- **data_preprocessors** is used to preprocess the dataset of the model
- **backbones** include various backbone networks
- **necks** include various neck components
- **dense_heads** include various dense head of different tasks
- **losses** include various loss functions
- **task_modules** provide component for testing tasks,such as assigners、samplers、box coders and prior generators。
- **layers** provide some basic network layers
- **engine** is a component of running
- **optimizers** provide optimizers and packages for optimizers
- **hooks** provide hooks for runner
## How to use this tutorial
The detailed instruction of MMYOLO is as following
1. Look up install instruction through [start your first step](get_started.md)
2. Basic method of how to use MMYOLO can be found here:
- [Training and testing](https://mmyolo.readthedocs.io/en/latest/user_guides/index.html#train-test)
- [From getting started to deployment tutorial](https://mmyolo.readthedocs.io/en/latest/user_guides/index.html#from-getting-started-to-deployment-tutorial)
- [Useful Tools](https://mmyolo.readthedocs.io/en/latest/user_guides/index.html#useful-tools)
3. YOLO series of tutorials on algorithm implementation and full analysis:
- [Essential Basics](https://mmyolo.readthedocs.io/en/latest/algorithm_descriptions/index.html#essential-basics)
- [Full explanation of the model and implementation](https://mmyolo.readthedocs.io/en/latest/algorithm_descriptions/index.html#algorithm-principles-and-implementation)
4. Refer to the following tutorials for an in-depth look:
- [How to](https://mmyolo.readthedocs.io/en/latest/advanced_guides/index.html#how-to)
# Overview
This chapter introduces you to the overall framework of MMYOLO and provides links to detailed tutorials.
## What is MMYOLO
![pic](https://user-images.githubusercontent.com/12907710/137271636-56ba1cd2-b110-4812-8221-b4c120320aa9.png)
MMYOLO is a YOLO series algorithm toolbox, which currently implements only the target detection task and will subsequently support various tasks such as instance segmentation, panoramic segmentation and key point detection. It includes a rich set of target detection algorithms and related components and modules, and the following is its overall framework.
MMYOLO file structure is identical to the MMDetection. To allow full reuse of the MMDetection code, MMYOLO includes only custom content, which consists of 3 main parts:datasets、models、engine.
- **datasets** supports a variety of data sets for target detection
- **transforms** includes various data enhancement transforms
- **models** is the most important part of the detector, which include different component of it.
- **detectors** define all detection model classes
- **data_preprocessors** is used to preprocess the dataset of the model
- **backbones** include various backbone networks
- **necks** include various neck components
- **dense_heads** include various dense head of different tasks
- **losses** include various loss functions
- **task_modules** provide component for testing tasks,such as assigners、samplers、box coders and prior generators。
- **layers** provide some basic network layers
- **engine** is a component of running
- **optimizers** provide optimizers and packages for optimizers
- **hooks** provide hooks for runner
## How to use this tutorial
The detailed instruction of MMYOLO is as following
1. Look up install instruction through [start your first step](get_started.md)
2. Basic method of how to use MMYOLO can be found here:
- [Training and testing](https://mmyolo.readthedocs.io/en/latest/user_guides/index.html#train-test)
- [From getting started to deployment tutorial](https://mmyolo.readthedocs.io/en/latest/user_guides/index.html#from-getting-started-to-deployment-tutorial)
- [Useful Tools](https://mmyolo.readthedocs.io/en/latest/user_guides/index.html#useful-tools)
3. YOLO series of tutorials on algorithm implementation and full analysis:
- [Essential Basics](https://mmyolo.readthedocs.io/en/latest/algorithm_descriptions/index.html#essential-basics)
- [Full explanation of the model and implementation](https://mmyolo.readthedocs.io/en/latest/algorithm_descriptions/index.html#algorithm-principles-and-implementation)
4. Refer to the following tutorials for an in-depth look:
- [How to](https://mmyolo.readthedocs.io/en/latest/advanced_guides/index.html#how-to)
# Learn about Configs with YOLOv5
# Learn about Configs with YOLOv5
MMYOLO and other OpenMMLab repositories use [MMEngine's config system](https://mmengine.readthedocs.io/en/latest/tutorials/config.html). It has a modular and inheritance design, which is convenient to conduct various experiments.
......@@ -46,7 +46,7 @@ model = dict(
norm_cfg=dict(type='BN', momentum=0.03, eps=0.001), # The config of normalization layers.
act_cfg=dict(type='SiLU', inplace=True)), # The config of activation function
neck=dict(
type='YOLOv5PAFPN', # The neck of detector is YOLOv5FPN,We also support 'YOLOv6RepPAFPN', 'YOLOXPAFPN'.
type='YOLOv5PAFPN', # The neck of detector is YOLOv5FPN,We also support 'YOLOv6RepPAFPN', 'YOLOXPAFPN'.
deepen_factor=deepen_factor, # The scaling factor that controls the depth of the network structure
widen_factor=widen_factor, # The scaling factor that controls the width of the network structure
in_channels=[256, 512, 1024], # The input channels, this is consistent with the output channels of backbone
......@@ -71,7 +71,7 @@ model = dict(
test_cfg=dict(
multi_label=True, # The config of multi-label for multi-clas prediction. THe default setting is True.
nms_pre=30000, # The number of boxes before NMS
score_thr=0.001, # Threshold to filter out boxes.
score_thr=0.001, # Threshold to filter out boxes.
nms=dict(type='nms', # Type of NMS
iou_threshold=0.65), # NMS threshold
max_per_img=300)) # Max number of detections of each image
......@@ -151,7 +151,7 @@ train_dataloader = dict( # Train dataloader config
pipeline=train_pipeline))
```
In the testing phase of YOLOv5, the letterbox resize method resizes all the test images to the same scale, which preserves the aspect ratio of all testing images. Therefore, the validation and testing phases share the same data pipeline.
In the testing phase of YOLOv5, the letterbox resize method resizes all the test images to the same scale, which preserves the aspect ratio of all testing images. Therefore, the validation and testing phases share the same data pipeline.
```python
test_pipeline = [ # Validation/ Testing dataloader config
......@@ -187,7 +187,7 @@ val_dataloader = dict(
test_mode=True, # # Turn on test mode of the dataset to avoid filtering annotations or images
data_prefix=dict(img='val2017/'), # Prefix of image path
ann_file='annotations/instances_val2017.json', # Path of annotation file
pipeline=test_pipeline,
pipeline=test_pipeline,
batch_shapes_cfg=dict( # Config of batch shapes
type='BatchShapePolicy', # Policy that makes paddings with least pixels during batch inference process, which does not require the image scales of all batches to be the same throughout validation.
batch_size=val_batch_size_pre_gpu, # Batch size for batch shapes strategy, equals to validation batch size on single GPU
......@@ -253,7 +253,7 @@ val_cfg = dict(type='ValLoop') # The validation loop type
test_cfg = dict(type='TestLoop') # The testing loop type
```
MMEngine also support dynamic intervals for evaluation. For example, you can run validation every 10 epochs on the first 280 epochs, and run validation every epoch on the final 20 epochs. The configurations are as follows.
MMEngine also support dynamic intervals for evaluation. For example, you can run validation every 10 epochs on the first 280 epochs, and run validation every epoch on the final 20 epochs. The configurations are as follows.
```python
max_epochs = 300 # Maximum training epochs: 300 epochs
......@@ -352,7 +352,7 @@ resume = False # Whether to resume from the checkpoint defined in `load_from`.
## Config file inheritance
`config/_base_` contains default runtime. The configs that are composed by components from `_base_` are called _primitive_.
`config/_base_` contains default runtime. The configs that are composed by components from `_base_` are called _primitive_.
For all configs under the same folder, it is recommended to have only **one** _primitive_ config. All other configs should inherit from the _primitive_ config. In this way, the maximum of inheritance level is 3.
......@@ -421,7 +421,7 @@ model = dict(
### Use intermediate variables in configs
Some intermediate variables are used in the configs files, like `train_pipeline`/`test_pipeline` in datasets. It's worth noting that when modifying intermediate variables in the children configs, user need to pass the intermediate variables into corresponding fields again.
Some intermediate variables are used in the configs files, like `train_pipeline`/`test_pipeline` in datasets. It's worth noting that when modifying intermediate variables in the children configs, user need to pass the intermediate variables into corresponding fields again.
For example, we would like to change the `image_scale` during training and add `YOLOv5MixUp` data augmentation, `img_scale/train_pipeline/test_pipeline` are intermediate variable we would like modify.
**Notice**`YOLOv5MixUp` requires adding the `pre_transform` and `mosaic_affine_pipeline` to its own `pre_transform` field. Please refer to [The description of YOLOv5 algorithm and its implementation](../algorithm_descriptions/yolov5_description.md) for detailed process and diagrams.
......@@ -430,7 +430,7 @@ For example, we would like to change the `image_scale` during training and add `
_base_ = './yolov5_s-v61_syncbn_8xb16-300e_coco.py'
img_scale = (1280, 1280) # image height, image width
affine_scale = 0.9
affine_scale = 0.9
mosaic_affine_pipeline = [
dict(
......@@ -511,7 +511,6 @@ model = dict(
If the users want to reuse the variables in the base file, they can get a copy of the corresponding variable by using `{{_base_.xxx}}`. The latest version of MMEngine also support reusing variables without `{{}}` usage. E.g:
```python
_base_ = '../_base_/default_runtime.py'
......@@ -550,7 +549,7 @@ The file name is divided to 8 name fields, which has 4 required parts and 4 opti
- `{algorithm name}`: The name of the algorithm. It can be a detector name such as `yolov5`, `yolov6`, `yolox` etc.
- `{component names}`: Names of the components used in the algorithm such as backbone, neck, etc. For example, `yolov5_s` means its `deepen_factor` is `0.33` and its `widen_factor` is `0.5`.
- `[version_id]` (optional): Since the evolution of YOLO series is much faster than traditional object detection algorithms, `version id` is used to distinguish the differences between different sub-versions. E.g, YOLOv5-3.0 uses the `Focus` layer as the stem layer, and YOLOv5-6.0 uses the `Conv` layer as the stem layer.
- `[version_id]` (optional): Since the evolution of YOLO series is much faster than traditional object detection algorithms, `version id` is used to distinguish the differences between different sub-versions. E.g, YOLOv5-3.0 uses the `Focus` layer as the stem layer, and YOLOv5-6.0 uses the `Conv` layer as the stem layer.
- `[norm_setting]` (optional): `bn` indicates `Batch Normalization``syncbn` indicates `Synchronized Batch Normalization`
- `[data preprocessor type]` (optional): `fast` incorporates `YOLOv5DetDataPreprocessor` and `yolov5_collate` to preprocess data. The training speed is faster than default `mmdet.DetDataPreprocessor`, while results in extending tge overall pipeline to multi-task learning.
- `{training settings}`: Information of training settings such as batch size, augmentations, loss trick, scheduler, and epochs/iterations. For example: `8xb16-300e_coco` means using 8-gpus x 16-images-per-gpu, and train 300 epochs.
......
......@@ -12,10 +12,10 @@ MMYOLO 中,将使用 MMEngine 提供的 `Visualizer` 可视化器进行特征
- 支持基础绘图接口以及特征图可视化
- 支持选择模型中的不同层来得到特征图,包含 `squeeze_mean``select_max``topk` 三种显示方式,用户还可以使用 `arrangement` 自定义特征图显示的布局方式。
## 特征图绘制
你可以调用 `demo/featmap_vis_demo.py` 来简单快捷地得到可视化结果,为了方便理解,将其主要参数的功能梳理如下:
- `img` 选择要用于特征图可视化的图片,支持单张图片或者图片路径列表
- `config` 选择算法的配置文件
......@@ -31,6 +31,7 @@ MMYOLO 中,将使用 MMEngine 提供的 `Visualizer` 可视化器进行特征
- `--preview-model` 可以预览模型,方便用户理解模型的特征层结构
- `--target-layers` 对指定层获取可视化的特征图
- 可以单独输出某个层的特征图,例如: `--target-layers backbone` , `--target-layers neck` , `--target-layers backbone.stage4`
- 参数为列表时,也可以同时输出多个层的特征图,例如: `--target-layers backbone.stage4 neck` 表示同时输出 backbone 的 stage4 层和 neck 的三层一共四层特征图
......@@ -48,6 +49,7 @@ MMYOLO 中,将使用 MMEngine 提供的 `Visualizer` 可视化器进行特征
- 考虑到输入的特征图通常非常小,函数默认将特征图进行上采样后方便进行可视化。
## 用法示例
以预训练好的 yolov5_s 模型为例:
(1) 将多通道特征图采用 `select_max` 参数压缩为单通道并显示, 通过提取 backbone 层输出进行特征图可视化,将得到 backbone 三个输出层的特征图
......@@ -72,7 +74,6 @@ python demo/featmap_vis_demo.py demo/dog.jpg configs/yolov5/yolov5_s-v61_syncbn_
(3) 将多通道特征图采用 `squeeze_mean` 参数压缩为单通道并显示, 通过提取 backbone.stage4 和 backbone.stage3 层输出进行特征图可视化,将得到两个输出层的特征图
```python
python demo/featmap_vis_demo.py demo/dog.jpg configs/yolov5/yolov5_s-v61_syncbn_8xb16-300e_coco.py mmyolov5s.pt --target-layers backbone.stage4 backbone.stage3 --channel-reduction squeeze_mean
```
......@@ -91,7 +92,7 @@ python demo/featmap_vis_demo.py demo/dog.jpg configs/yolov5/yolov5_s-v61_syncbn_
<img src="https://user-images.githubusercontent.com/89863442/190939723-911c5e9b-dd33-42eb-be4a-ba45f03110a0.jpg" width="1200"/>
</div>
(5) 存储绘制后的图片,在绘制完成后,可以选择本地窗口显示,也可以存储到本地,只需要加入参数 `--out-file xxx.jpg`
(5) 存储绘制后的图片,在绘制完成后,可以选择本地窗口显示,也可以存储到本地,只需要加入参数 `--out-file xxx.jpg`
```python
python demo/featmap_vis_demo.py demo/dog.jpg configs/yolov5/yolov5_s-v61_syncbn_8xb16-300e_coco.py mmyolov5s.pt --target-layers backbone --channel-reduction select_max --out-file featmap_backbone
......
......@@ -165,4 +165,3 @@ python tools/test.py configs/yolov5/yolov5_s-v61_syncbn_fast_1xb4-300e_balloon.p
运行以上推理命令,推理结果图片会自动保存至 `work_dirs/yolov5_s-v61_syncbn_fast_1xb4-300e_balloon/{timestamp}/show_results` 文件夹中。下面为其中一张结果图片,左图为实际标注,右图为模型推理结果。
![result_img](https://user-images.githubusercontent.com/27466624/190913272-f99709e5-c798-46b8-aede-30f4e91683a3.jpg)
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册