add gather_models.py and format code

1f3a3218 · huanghaian · 28222305 · 1f3a3218 · 1f3a3218 · 1f3a3218
7 changed file
--- a/.dev_scripts/gather_models.py
+++ b/.dev_scripts/gather_models.py
+# Copyright (c) OpenMMLab. All rights reserved.
+import argparse
+import glob
+import os
+import os.path as osp
+import shutil
+import subprocess
+import time
+from collections import OrderedDict
+
+import torch
+import yaml
+from mmengine.config import Config
+from mmengine.fileio import dump
+from mmengine.utils import mkdir_or_exist, scandir
+
+
+def ordered_yaml_dump(data, stream=None, Dumper=yaml.SafeDumper, **kwds):
+
+    class OrderedDumper(Dumper):
+        pass
+
+    def _dict_representer(dumper, data):
+        return dumper.represent_mapping(
+            yaml.resolver.BaseResolver.DEFAULT_MAPPING_TAG, data.items())
+
+    OrderedDumper.add_representer(OrderedDict, _dict_representer)
+    return yaml.dump(data, stream, OrderedDumper, **kwds)
+
+
+def process_checkpoint(in_file, out_file):
+    checkpoint = torch.load(in_file, map_location='cpu')
+    # remove optimizer for smaller file size
+    if 'optimizer' in checkpoint:
+        del checkpoint['optimizer']
+    if 'message_hub' in checkpoint:
+        del checkpoint['message_hub']
+    if 'ema_state_dict' in checkpoint:
+        del checkpoint['ema_state_dict']
+
+    for key in list(checkpoint['state_dict']):
+        if key.startswith('data_preprocessor'):
+            checkpoint['state_dict'].pop(key)
+        elif 'priors_base_sizes' in key:
+            checkpoint['state_dict'].pop(key)
+        elif 'grid_offset' in key:
+            checkpoint['state_dict'].pop(key)
+        elif 'prior_inds' in key:
+            checkpoint['state_dict'].pop(key)
+
+    # if it is necessary to remove some sensitive data in checkpoint['meta'],
+    # add the code here.
+    if torch.__version__ >= '1.6':
+        torch.save(checkpoint, out_file, _use_new_zipfile_serialization=False)
+    else:
+        torch.save(checkpoint, out_file)
+    sha = subprocess.check_output(['sha256sum', out_file]).decode()
+    final_file = out_file.rstrip('.pth') + f'-{sha[:8]}.pth'
+    subprocess.Popen(['mv', out_file, final_file])
+    return final_file
+
+
+def is_by_epoch(config):
+    cfg = Config.fromfile('./configs/' + config)
+    return cfg.train_cfg.type == 'EpochBasedTrainLoop'
+
+
+def get_final_epoch_or_iter(config):
+    cfg = Config.fromfile('./configs/' + config)
+    if cfg.train_cfg.type == 'EpochBasedTrainLoop':
+        return cfg.train_cfg.max_epochs
+    else:
+        return cfg.train_cfg.max_iters
+
+
+def get_best_epoch_or_iter(exp_dir):
+    best_epoch_iter_full_path = list(
+        sorted(glob.glob(osp.join(exp_dir, 'best_*.pth'))))[-1]
+    best_epoch_or_iter_model_path = best_epoch_iter_full_path.split('/')[-1]
+    best_epoch_or_iter = best_epoch_or_iter_model_path. \
+        split('_')[-1].split('.')[0]
+    return best_epoch_or_iter_model_path, int(best_epoch_or_iter)
+
+
+def get_real_epoch_or_iter(config):
+    cfg = Config.fromfile('./configs/' + config)
+    if cfg.train_cfg.type == 'EpochBasedTrainLoop':
+        epoch = cfg.train_cfg.max_epochs
+        return epoch
+    else:
+        return cfg.runner.max_iters
+
+
+def get_final_results(log_json_path,
+                      epoch_or_iter,
+                      results_lut='coco/bbox_mAP',
+                      by_epoch=True):
+    result_dict = dict()
+    with open(log_json_path) as f:
+        r = f.readlines()[-1]
+        last_metric = r.split(',')[0].split(': ')[-1].strip()
+    result_dict[results_lut] = last_metric
+    return result_dict
+
+
+def get_dataset_name(config):
+    # If there are more dataset, add here.
+    name_map = dict(
+        CityscapesDataset='Cityscapes',
+        CocoDataset='COCO',
+        YOLOv5CocoDataset='COCO',
+        CocoPanopticDataset='COCO',
+        DeepFashionDataset='Deep Fashion',
+        LVISV05Dataset='LVIS v0.5',
+        LVISV1Dataset='LVIS v1',
+        VOCDataset='Pascal VOC',
+        WIDERFaceDataset='WIDER Face',
+        OpenImagesDataset='OpenImagesDataset',
+        OpenImagesChallengeDataset='OpenImagesChallengeDataset')
+    cfg = Config.fromfile('./configs/' + config)
+    return name_map[cfg.dataset_type]
+
+
+def find_last_dir(model_dir):
+    dst_times = []
+    for time_stamp in os.scandir(model_dir):
+        if osp.isdir(time_stamp):
+            dst_time = time.mktime(
+                time.strptime(time_stamp.name, '%Y%m%d_%H%M%S'))
+            dst_times.append([dst_time, time_stamp.name])
+    return max(dst_times, key=lambda x: x[0])[1]
+
+
+def convert_model_info_to_pwc(model_infos):
+    pwc_files = {}
+    for model in model_infos:
+        cfg_folder_name = osp.split(model['config'])[-2]
+        pwc_model_info = OrderedDict()
+        pwc_model_info['Name'] = osp.split(model['config'])[-1].split('.')[0]
+        pwc_model_info['In Collection'] = 'Please fill in Collection name'
+        pwc_model_info['Config'] = osp.join('configs', model['config'])
+
+        # get metadata
+        meta_data = OrderedDict()
+        if 'epochs' in model:
+            meta_data['Epochs'] = get_real_epoch_or_iter(model['config'])
+        else:
+            meta_data['Iterations'] = get_real_epoch_or_iter(model['config'])
+        pwc_model_info['Metadata'] = meta_data
+
+        # get dataset name
+        dataset_name = get_dataset_name(model['config'])
+
+        # get results
+        results = []
+        # if there are more metrics, add here.
+        if 'bbox_mAP' in model['results']:
+            metric = round(model['results']['bbox_mAP'] * 100, 1)
+            results.append(
+                OrderedDict(
+                    Task='Object Detection',
+                    Dataset=dataset_name,
+                    Metrics={'box AP': metric}))
+        if 'segm_mAP' in model['results']:
+            metric = round(model['results']['segm_mAP'] * 100, 1)
+            results.append(
+                OrderedDict(
+                    Task='Instance Segmentation',
+                    Dataset=dataset_name,
+                    Metrics={'mask AP': metric}))
+        if 'PQ' in model['results']:
+            metric = round(model['results']['PQ'], 1)
+            results.append(
+                OrderedDict(
+                    Task='Panoptic Segmentation',
+                    Dataset=dataset_name,
+                    Metrics={'PQ': metric}))
+        pwc_model_info['Results'] = results
+
+        link_string = 'https://download.openmmlab.com/mmdetection/v2.0/'
+        link_string += '{}/{}'.format(model['config'].rstrip('.py'),
+                                      osp.split(model['model_path'])[-1])
+        pwc_model_info['Weights'] = link_string
+        if cfg_folder_name in pwc_files:
+            pwc_files[cfg_folder_name].append(pwc_model_info)
+        else:
+            pwc_files[cfg_folder_name] = [pwc_model_info]
+    return pwc_files
+
+
+def parse_args():
+    parser = argparse.ArgumentParser(description='Gather benchmarked models')
+    parser.add_argument(
+        'root',
+        type=str,
+        help='root path of benchmarked models to be gathered')
+    parser.add_argument(
+        'out', type=str, help='output path of gathered models to be stored')
+    parser.add_argument(
+        '--best',
+        action='store_true',
+        help='whether to gather the best model.')
+
+    args = parser.parse_args()
+    return args
+
+
+# TODO: Refine
+def main():
+    args = parse_args()
+    models_root = args.root
+    models_out = args.out
+    mkdir_or_exist(models_out)
+
+    # find all models in the root directory to be gathered
+    raw_configs = list(scandir('./configs', '.py', recursive=True))
+
+    # filter configs that is not trained in the experiments dir
+    used_configs = []
+    for raw_config in raw_configs:
+        if osp.exists(osp.join(models_root, raw_config)):
+            used_configs.append(raw_config)
+    print(f'Find {len(used_configs)} models to be gathered')
+
+    # find final_ckpt and log file for trained each config
+    # and parse the best performance
+    model_infos = []
+    for used_config in used_configs:
+        exp_dir = osp.join(models_root, used_config)
+        by_epoch = is_by_epoch(used_config)
+        # check whether the exps is finished
+        if args.best is True:
+            final_model, final_epoch_or_iter = get_best_epoch_or_iter(exp_dir)
+        else:
+            final_epoch_or_iter = get_final_epoch_or_iter(used_config)
+            final_model = '{}_{}.pth'.format('epoch' if by_epoch else 'iter',
+                                             final_epoch_or_iter)
+
+        model_path = osp.join(exp_dir, final_model)
+        # skip if the model is still training
+        if not osp.exists(model_path):
+            continue
+
+        # get the latest logs
+        latest_exp_name = find_last_dir(exp_dir)
+        latest_exp_json = osp.join(exp_dir, latest_exp_name, 'vis_data',
+                                   latest_exp_name + '.json')
+
+        model_performance = get_final_results(
+            latest_exp_json, final_epoch_or_iter, by_epoch=by_epoch)
+
+        if model_performance is None:
+            continue
+
+        model_info = dict(
+            config=used_config,
+            results=model_performance,
+            final_model=final_model,
+            latest_exp_json=latest_exp_json,
+            latest_exp_name=latest_exp_name)
+        model_info['epochs' if by_epoch else 'iterations'] = \
+            final_epoch_or_iter
+        model_infos.append(model_info)
+
+    # publish model for each checkpoint
+    publish_model_infos = []
+    for model in model_infos:
+        model_publish_dir = osp.join(models_out, model['config'].rstrip('.py'))
+        mkdir_or_exist(model_publish_dir)
+
+        model_name = osp.split(model['config'])[-1].split('.')[0]
+
+        model_name += '_' + model['latest_exp_name']
+        publish_model_path = osp.join(model_publish_dir, model_name)
+        trained_model_path = osp.join(models_root, model['config'],
+                                      model['final_model'])
+
+        # convert model
+        final_model_path = process_checkpoint(trained_model_path,
+                                              publish_model_path)
+
+        # copy log
+        shutil.copy(model['latest_exp_json'],
+                    osp.join(model_publish_dir, f'{model_name}.log.json'))
+
+        # copy config to guarantee reproducibility
+        config_path = model['config']
+        config_path = osp.join(
+            'configs',
+            config_path) if 'configs' not in config_path else config_path
+        target_config_path = osp.split(config_path)[-1]
+        shutil.copy(config_path, osp.join(model_publish_dir,
+                                          target_config_path))
+
+        model['model_path'] = final_model_path
+        publish_model_infos.append(model)
+
+    models = dict(models=publish_model_infos)
+    print(f'Totally gathered {len(publish_model_infos)} models')
+    dump(models, osp.join(models_out, 'model_info.json'))
+
+    pwc_files = convert_model_info_to_pwc(publish_model_infos)
+    for name in pwc_files:
+        with open(osp.join(models_out, name + '_metafile.yml'), 'w') as f:
+            ordered_yaml_dump(pwc_files[name], f, encoding='utf-8')
+
+
+if __name__ == '__main__':
+    main()
--- a/docs/en/advanced_guides/data_flow.md
+++ b/docs/en/advanced_guides/data_flow.md
-# Mixed image data augmentation update
-
-Mixed image data augmentation is similar to Mosaic and MixUp, in which the annotation information of multiple images needs to be fused during the runtime. In the OpenMMLab data augmentation pipeline, the other indexes of the dataset are generally not available. In order to achieve the above function, in the [MultiImageMixDataset](https://github.com/open-mmlab/mmdetection/blob/master/mmdet/datasets/dataset_wrappers.py#L338) the concept of dataset wrapper is proposed in YOLOX, which is reproduced in MMDet.
-
-`MultiImageMixDataset` dataset wrapper will include some data augmentation method such as `Mosaic` and `RandAffine`, while `CocoDataset` will also include the `pipeline` to achieve the img and annotation loading function. through this way we can achieve mix data augmentation quickly. The configuration method is as follows：
-
-```python
-train_pipeline = [
-    dict(type='Mosaic', img_scale=img_scale, pad_val=114.0),
-    dict(
-        type='RandomAffine',
-        scaling_ratio_range=(0.1, 2),
-        border=(-img_scale[0] // 2, -img_scale[1] // 2)),
-    dict(
-        type='MixUp',
-        img_scale=img_scale,
-        ratio_range=(0.8, 1.6),
-        pad_val=114.0),
-    ...
-]
-train_dataset = dict(
-    # use MultiImageMixDataset wrapper to support mosaic and mixup
-    type='MultiImageMixDataset',
-    dataset=dict(
-        type='CocoDataset',
-        pipeline=[
-            dict(type='LoadImageFromFile'),
-            dict(type='LoadAnnotations', with_bbox=True)
-        ]),
-    pipeline=train_pipeline)
-
-```
-
-But above method will cause a problem: to the users who are not familiar with MMDet ,they will forget to match data augmentation methods like Mosaic together with  `MultiImageMixDataset`, which could extremely increase the Complexity, and it could be hard to understand.
-
-To solve this problem we make a simplification in MMYOLO, which directly make `pipeline` catch the `dataset` ，and make the data augmentation methods like `Mosaic` be achieved and used as random flip, without data wrapper anymore. The new configuration method is as follows:
-
-```python
-pre_transform = [
-    dict(type='LoadImageFromFile'),
-    dict(type='LoadAnnotations', with_bbox=True)
-]
-train_pipeline = [
-    *pre_transform,
-    dict(
-        type='Mosaic',
-        img_scale=img_scale,
-        pad_val=114.0,
-        pre_transform=pre_transform),
-    dict(
-        type='mmdet.RandomAffine',
-        scaling_ratio_range=(0.1, 2),
-        border=(-img_scale[0] // 2, -img_scale[1] // 2)),
-    dict(
-        type='YOLOXMixUp',
-        img_scale=img_scale,
-        ratio_range=(0.8, 1.6),
-        pad_val=114.0,
-        pre_transform=pre_transform),
-    ...
-]
-```
-
-A more complex YOLOv5-m configuration include MixUp is shown as follows:
-
-```python
-mosaic_affine_pipeline = [
-    dict(
-        type='Mosaic',
-        img_scale=img_scale,
-        pad_val=114.0,
-        pre_transform=pre_transform),
-    dict(
-        type='YOLOv5RandomAffine',
-        max_rotate_degree=0.0,
-        max_shear_degree=0.0,
-        scaling_ratio_range=(1 - affine_scale, 1 + affine_scale),
-        border=(-img_scale[0] // 2, -img_scale[1] // 2),
-        border_val=(114, 114, 114))
-]
-
-# enable mixup
-train_pipeline = [
-    *pre_transform, *mosaic_affine_pipeline,
-    dict(
-        type='YOLOv5MixUp',
-        prob=0.1,
-        pre_transform=[*pre_transform, *mosaic_affine_pipeline]),
-    dict(
-        type='mmdet.Albu',
-        transforms=albu_train_transforms,
-        bbox_params=dict(
-            type='BboxParams',
-            format='pascal_voc',
-            label_fields=['gt_bboxes_labels', 'gt_ignore_flags']),
-        keymap={
-            'img': 'image',
-            'gt_bboxes': 'bboxes'
-        }),
-    dict(type='YOLOv5HSVRandomAug'),
-    dict(type='mmdet.RandomFlip', prob=0.5),
-    dict(
-        type='mmdet.PackDetInputs',
-        meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape', 'flip',
-                   'flip_direction'))
-]
-```
-
-It is very easy to be achieved, just pass the object of Dataset to the pipeline.
-
-```python
-def prepare_data(self, idx) -> Any:
-   """Pass the dataset to the pipeline during training to support mixed
-   data augmentation, such as Mosaic and MixUp."""
-   if self.test_mode is False:
-        data_info = self.get_data_info(idx)
-        data_info['dataset'] = self
-        return self.pipeline(data_info)
-    else:
-        return super().prepare_data(idx)
-```
+# Mixed image data augmentation update
+
+Mixed image data augmentation is similar to Mosaic and MixUp, in which the annotation information of multiple images needs to be fused during the runtime. In the OpenMMLab data augmentation pipeline, the other indexes of the dataset are generally not available. In order to achieve the above function, in the [MultiImageMixDataset](https://github.com/open-mmlab/mmdetection/blob/master/mmdet/datasets/dataset_wrappers.py#L338) the concept of dataset wrapper is proposed in YOLOX, which is reproduced in MMDet.
+
+`MultiImageMixDataset` dataset wrapper will include some data augmentation method such as `Mosaic` and `RandAffine`, while `CocoDataset` will also include the `pipeline` to achieve the img and annotation loading function. through this way we can achieve mix data augmentation quickly. The configuration method is as follows：
+
+```python
+train_pipeline = [
+    dict(type='Mosaic', img_scale=img_scale, pad_val=114.0),
+    dict(
+        type='RandomAffine',
+        scaling_ratio_range=(0.1, 2),
+        border=(-img_scale[0] // 2, -img_scale[1] // 2)),
+    dict(
+        type='MixUp',
+        img_scale=img_scale,
+        ratio_range=(0.8, 1.6),
+        pad_val=114.0),
+    ...
+]
+train_dataset = dict(
+    # use MultiImageMixDataset wrapper to support mosaic and mixup
+    type='MultiImageMixDataset',
+    dataset=dict(
+        type='CocoDataset',
+        pipeline=[
+            dict(type='LoadImageFromFile'),
+            dict(type='LoadAnnotations', with_bbox=True)
+        ]),
+    pipeline=train_pipeline)
+
+```
+
+But above method will cause a problem: to the users who are not familiar with MMDet ,they will forget to match data augmentation methods like Mosaic together with  `MultiImageMixDataset`, which could extremely increase the Complexity, and it could be hard to understand.
+
+To solve this problem we make a simplification in MMYOLO, which directly make `pipeline` catch the `dataset` ，and make the data augmentation methods like `Mosaic` be achieved and used as random flip, without data wrapper anymore. The new configuration method is as follows:
+
+```python
+pre_transform = [
+    dict(type='LoadImageFromFile'),
+    dict(type='LoadAnnotations', with_bbox=True)
+]
+train_pipeline = [
+    *pre_transform,
+    dict(
+        type='Mosaic',
+        img_scale=img_scale,
+        pad_val=114.0,
+        pre_transform=pre_transform),
+    dict(
+        type='mmdet.RandomAffine',
+        scaling_ratio_range=(0.1, 2),
+        border=(-img_scale[0] // 2, -img_scale[1] // 2)),
+    dict(
+        type='YOLOXMixUp',
+        img_scale=img_scale,
+        ratio_range=(0.8, 1.6),
+        pad_val=114.0,
+        pre_transform=pre_transform),
+    ...
+]
+```
+
+A more complex YOLOv5-m configuration include MixUp is shown as follows:
+
+```python
+mosaic_affine_pipeline = [
+    dict(
+        type='Mosaic',
+        img_scale=img_scale,
+        pad_val=114.0,
+        pre_transform=pre_transform),
+    dict(
+        type='YOLOv5RandomAffine',
+        max_rotate_degree=0.0,
+        max_shear_degree=0.0,
+        scaling_ratio_range=(1 - affine_scale, 1 + affine_scale),
+        border=(-img_scale[0] // 2, -img_scale[1] // 2),
+        border_val=(114, 114, 114))
+]
+
+# enable mixup
+train_pipeline = [
+    *pre_transform, *mosaic_affine_pipeline,
+    dict(
+        type='YOLOv5MixUp',
+        prob=0.1,
+        pre_transform=[*pre_transform, *mosaic_affine_pipeline]),
+    dict(
+        type='mmdet.Albu',
+        transforms=albu_train_transforms,
+        bbox_params=dict(
+            type='BboxParams',
+            format='pascal_voc',
+            label_fields=['gt_bboxes_labels', 'gt_ignore_flags']),
+        keymap={
+            'img': 'image',
+            'gt_bboxes': 'bboxes'
+        }),
+    dict(type='YOLOv5HSVRandomAug'),
+    dict(type='mmdet.RandomFlip', prob=0.5),
+    dict(
+        type='mmdet.PackDetInputs',
+        meta_keys=('img_id', 'img_path', 'ori_shape', 'img_shape', 'flip',
+                   'flip_direction'))
+]
+```
+
+It is very easy to be achieved, just pass the object of Dataset to the pipeline.
+
+```python
+def prepare_data(self, idx) -> Any:
+   """Pass the dataset to the pipeline during training to support mixed
+   data augmentation, such as Mosaic and MixUp."""
+   if self.test_mode is False:
+        data_info = self.get_data_info(idx)
+        data_info['dataset'] = self
+        return self.pipeline(data_info)
+    else:
+        return super().prepare_data(idx)
+```
--- a/docs/en/algorithm_descriptions/model_design.md
+++ b/docs/en/algorithm_descriptions/model_design.md
-# Model design related instructions
-
-## YOLO series model basic class
-
-The structural Graph is provided by @RangeKing ,Thank you RangeKing！
-![base class](https://user-images.githubusercontent.com/33799979/190382319-6b4e1fcb-cc3f-4fbe-9d6b-3c9c4e57472c.png)
-
-Most of the YOLO series algorithms adopt a unified algorithm building structure, typically as Darknet + PAFPN. In order to let users quickly understand the YOLO series algorithm architecture, we deliberately designed the BaseBackbone + BaseYOLONeck structure as shown in the above graph.
-
-The benefit of abstract BaseBackbone includes:
-
-1. Subclasses do not need to concern about the forward process, just build the model as the builder pattern.
-2. It can be configured to achieve custom plug-in functions, the users can easily insert some similar attention module.
-3. All subclasses automatically support frozen certain stage and frozen bn functions.
-
-BaseYOLONeck has the same benefit as BaseBackbone.
-
-### BaseBackbone
-
-We can see in the above graph，as for P5，BaseBackbone include 1 stem layer and 4 stage layers which are similar to the basic structural of ResNet. Different backbone network algorithms inheritance the BaseBackbone, users can achieve construction of every layer of the network by using self-custom basic module  through  `build_xx` method.
-
-### BaseYOLONeck
-
-We reproduce the YOLO series Neck component by the similar method of the BaseBackbone, we can mainly divide them into Reduce layer, UpSample layer,TopDown layer,DownSample layer，BottomUP layer and output convolution layer, every layer can self-custom its inside construction by inheritance and rewrite `build_xx` method.
-
-### BaseDenseHead
-
-The YOLO series uses the BaseDenseHead designed in MMDet as the base class of the Head structure.[HeadModule](#headmodule)base class's forward function replace original forward method. Different algorithms only need to inheritance and rewrite [HeadModule](#headmodule)，`loss_by_feat` and `predict_by_feat` method.
-
-## HeadModule
-
-<div align=center>
-<img src="https://user-images.githubusercontent.com/33799979/190407754-c725fe85-a71b-4e45-912b-34513d1ff128.png" width=800>
-</div>
-
-Methods implementation in the [MMDetection](https://github.com/open-mmlab/mmdetection) is shown in the above graph ，The solid line is the implementation in [MMDet-YOLO](<>), which has the following advantages over the original implementation:
-
-1. MMDet in the bbox_head split into assigner + box coder + sampler three large components, but for the generality of passing through the 3 components , the model need to encapsulate additional objects to handle, and after the unification, the user needn't separate them. The benefits of not deliberately forcing the division of the three components are: no longer need to data encapsulation of internal data, simplifying the code logic, reducing the difficulty of use and the difficulty of algorithm implementation.
-2. MMYOLO is Faster, the user can customize the implementation of the algorithm when the original framework does not depend on the deep optimization of part of the code.
-
-In general, in the MMYOLO, they only need to implement the decouple of the model + loss_by_feat parts, and users can achieve any model with any `loss_by_feat` calculation process through modify the configuration. For example, applying the YOLOX `loss_by_feat` to the YOLOv5 model, etc.
-
-Taking the YOLOX configuration in MMDet as an example, the Head module configuration is written as follows:
-
-```python
-bbox_head=dict(
-    type='YOLOXHead',
-    num_classes=80,
-    in_channels=128,
-    feat_channels=128,
-    stacked_convs=2,
-    strides=(8, 16, 32),
-    use_depthwise=False,
-    norm_cfg=dict(type='BN', momentum=0.03, eps=0.001),
-    act_cfg=dict(type='Swish'),
-    ...
-    loss_obj=dict(
-        type='CrossEntropyLoss',
-        use_sigmoid=True,
-        reduction='sum',
-        loss_weight=1.0),
-    loss_l1=dict(type='L1Loss', reduction='sum', loss_weight=1.0)),
-train_cfg=dict(assigner=dict(type='SimOTAAssigner', center_radius=2.5)),
-```
-
-After extracting the head_module in MMYOLO, the new configuration is written as follows:
-
-```python
-bbox_head=dict(
-    type='YOLOXHead',
-    head_module=dict(
-        type='YOLOXHeadModule',
-        num_classes=80,
-        in_channels=256,
-        feat_channels=256,
-        widen_factor=widen_factor,
-        stacked_convs=2,
-        featmap_strides=(8, 16, 32),
-        use_depthwise=False,
-        norm_cfg=dict(type='BN', momentum=0.03, eps=0.001),
-        act_cfg=dict(type='SiLU', inplace=True),
-    ),
-    ...
-    loss_obj=dict(
-        type='mmdet.CrossEntropyLoss',
-        use_sigmoid=True,
-        reduction='sum',
-        loss_weight=1.0),
-    loss_bbox_aux=dict(type='mmdet.L1Loss', reduction='sum', loss_weight=1.0)),
-train_cfg=dict(
-    assigner=dict(
-        type='mmdet.SimOTAAssigner',
-        center_radius=2.5,
-        iou_calculator=dict(type='mmdet.BboxOverlaps2D'))),
-```
+# Model design related instructions
+
+## YOLO series model basic class
+
+The structural Graph is provided by @RangeKing ,Thank you RangeKing！
+![base class](https://user-images.githubusercontent.com/33799979/190382319-6b4e1fcb-cc3f-4fbe-9d6b-3c9c4e57472c.png)
+
+Most of the YOLO series algorithms adopt a unified algorithm building structure, typically as Darknet + PAFPN. In order to let users quickly understand the YOLO series algorithm architecture, we deliberately designed the BaseBackbone + BaseYOLONeck structure as shown in the above graph.
+
+The benefit of abstract BaseBackbone includes:
+
+1. Subclasses do not need to concern about the forward process, just build the model as the builder pattern.
+2. It can be configured to achieve custom plug-in functions, the users can easily insert some similar attention module.
+3. All subclasses automatically support frozen certain stage and frozen bn functions.
+
+BaseYOLONeck has the same benefit as BaseBackbone.
+
+### BaseBackbone
+
+We can see in the above graph，as for P5，BaseBackbone include 1 stem layer and 4 stage layers which are similar to the basic structural of ResNet. Different backbone network algorithms inheritance the BaseBackbone, users can achieve construction of every layer of the network by using self-custom basic module  through  `build_xx` method.
+
+### BaseYOLONeck
+
+We reproduce the YOLO series Neck component by the similar method of the BaseBackbone, we can mainly divide them into Reduce layer, UpSample layer,TopDown layer,DownSample layer，BottomUP layer and output convolution layer, every layer can self-custom its inside construction by inheritance and rewrite `build_xx` method.
+
+### BaseDenseHead
+
+The YOLO series uses the BaseDenseHead designed in MMDet as the base class of the Head structure.[HeadModule](#headmodule)base class's forward function replace original forward method. Different algorithms only need to inheritance and rewrite [HeadModule](#headmodule)，`loss_by_feat` and `predict_by_feat` method.
+
+## HeadModule
+
+<div align=center>
+<img src="https://user-images.githubusercontent.com/33799979/190407754-c725fe85-a71b-4e45-912b-34513d1ff128.png" width=800>
+</div>
+
+Methods implementation in the [MMDetection](https://github.com/open-mmlab/mmdetection) is shown in the above graph ，The solid line is the implementation in [MMDet-YOLO](<>), which has the following advantages over the original implementation:
+
+1. MMDet in the bbox_head split into assigner + box coder + sampler three large components, but for the generality of passing through the 3 components , the model need to encapsulate additional objects to handle, and after the unification, the user needn't separate them. The benefits of not deliberately forcing the division of the three components are: no longer need to data encapsulation of internal data, simplifying the code logic, reducing the difficulty of use and the difficulty of algorithm implementation.
+2. MMYOLO is Faster, the user can customize the implementation of the algorithm when the original framework does not depend on the deep optimization of part of the code.
+
+In general, in the MMYOLO, they only need to implement the decouple of the model + loss_by_feat parts, and users can achieve any model with any `loss_by_feat` calculation process through modify the configuration. For example, applying the YOLOX `loss_by_feat` to the YOLOv5 model, etc.
+
+Taking the YOLOX configuration in MMDet as an example, the Head module configuration is written as follows:
+
+```python
+bbox_head=dict(
+    type='YOLOXHead',
+    num_classes=80,
+    in_channels=128,
+    feat_channels=128,
+    stacked_convs=2,
+    strides=(8, 16, 32),
+    use_depthwise=False,
+    norm_cfg=dict(type='BN', momentum=0.03, eps=0.001),
+    act_cfg=dict(type='Swish'),
+    ...
+    loss_obj=dict(
+        type='CrossEntropyLoss',
+        use_sigmoid=True,
+        reduction='sum',
+        loss_weight=1.0),
+    loss_l1=dict(type='L1Loss', reduction='sum', loss_weight=1.0)),
+train_cfg=dict(assigner=dict(type='SimOTAAssigner', center_radius=2.5)),
+```
+
+After extracting the head_module in MMYOLO, the new configuration is written as follows:
+
+```python
+bbox_head=dict(
+    type='YOLOXHead',
+    head_module=dict(
+        type='YOLOXHeadModule',
+        num_classes=80,
+        in_channels=256,
+        feat_channels=256,
+        widen_factor=widen_factor,
+        stacked_convs=2,
+        featmap_strides=(8, 16, 32),
+        use_depthwise=False,
+        norm_cfg=dict(type='BN', momentum=0.03, eps=0.001),
+        act_cfg=dict(type='SiLU', inplace=True),
+    ),
+    ...
+    loss_obj=dict(
+        type='mmdet.CrossEntropyLoss',
+        use_sigmoid=True,
+        reduction='sum',
+        loss_weight=1.0),
+    loss_bbox_aux=dict(type='mmdet.L1Loss', reduction='sum', loss_weight=1.0)),
+train_cfg=dict(
+    assigner=dict(
+        type='mmdet.SimOTAAssigner',
+        center_radius=2.5,
+        iou_calculator=dict(type='mmdet.BboxOverlaps2D'))),
+```
--- a/docs/en/overview.md
+++ b/docs/en/overview.md
-# Overview
-
-This chapter introduces you to the overall framework of MMYOLO and provides links to detailed tutorials.
-
-## What is  MMYOLO
-
-![pic](https://user-images.githubusercontent.com/12907710/137271636-56ba1cd2-b110-4812-8221-b4c120320aa9.png)
-
-MMYOLO is a YOLO series algorithm toolbox, which currently implements only the target detection task and will subsequently support various tasks such as instance segmentation, panoramic segmentation and key point detection. It includes a rich set of target detection algorithms and related components and modules, and the following is its overall framework.
-
-MMYOLO file structure is identical to the MMDetection. To allow full reuse of the MMDetection code, MMYOLO includes only custom content, which consists of 3 main parts:datasets、models、engine.
-
- **datasets** supports a variety of data sets for target detection
-  - **transforms** includes various data enhancement transforms
- **models** is the most important part of the detector, which include different component of it.
-  - **detectors** define all detection model classes
-  - **data_preprocessors** is used to preprocess the dataset of the model
-  - **backbones** include various backbone networks
-  - **necks** include various neck components
-  - **dense_heads** include various dense head of different tasks
-  - **losses** include various loss functions
-  - **task_modules** provide component for testing tasks,such as assigners、samplers、box coders and prior generators。
-  - **layers** provide some basic network layers
- **engine** is a component of running
-  - **optimizers** provide optimizers and packages for optimizers
-  - **hooks** provide hooks for runner
-
-## How to use this tutorial
-
-The detailed instruction of MMYOLO is as following
-
-1. Look up install instruction through [start your first step](get_started.md)
-
-2. Basic method of how to use MMYOLO can be found here:
-
-   - [Training and testing](https://mmyolo.readthedocs.io/en/latest/user_guides/index.html#train-test)
-   - [From getting started to deployment tutorial](https://mmyolo.readthedocs.io/en/latest/user_guides/index.html#from-getting-started-to-deployment-tutorial)
-   - [Useful Tools](https://mmyolo.readthedocs.io/en/latest/user_guides/index.html#useful-tools)
-
-3. YOLO series of tutorials on algorithm implementation and full analysis:
-
-   - [Essential Basics](https://mmyolo.readthedocs.io/en/latest/algorithm_descriptions/index.html#essential-basics)
-   - [Full explanation of the model and implementation](https://mmyolo.readthedocs.io/en/latest/algorithm_descriptions/index.html#algorithm-principles-and-implementation)
-
-4. Refer to the following tutorials for an in-depth look:
-
-   - [How to](https://mmyolo.readthedocs.io/en/latest/advanced_guides/index.html#how-to)
+# Overview
+
+This chapter introduces you to the overall framework of MMYOLO and provides links to detailed tutorials.
+
+## What is  MMYOLO
+
+![pic](https://user-images.githubusercontent.com/12907710/137271636-56ba1cd2-b110-4812-8221-b4c120320aa9.png)
+
+MMYOLO is a YOLO series algorithm toolbox, which currently implements only the target detection task and will subsequently support various tasks such as instance segmentation, panoramic segmentation and key point detection. It includes a rich set of target detection algorithms and related components and modules, and the following is its overall framework.
+
+MMYOLO file structure is identical to the MMDetection. To allow full reuse of the MMDetection code, MMYOLO includes only custom content, which consists of 3 main parts:datasets、models、engine.
+
+- **datasets** supports a variety of data sets for target detection
+  - **transforms** includes various data enhancement transforms
+- **models** is the most important part of the detector, which include different component of it.
+  - **detectors** define all detection model classes
+  - **data_preprocessors** is used to preprocess the dataset of the model
+  - **backbones** include various backbone networks
+  - **necks** include various neck components
+  - **dense_heads** include various dense head of different tasks
+  - **losses** include various loss functions
+  - **task_modules** provide component for testing tasks,such as assigners、samplers、box coders and prior generators。
+  - **layers** provide some basic network layers
+- **engine** is a component of running
+  - **optimizers** provide optimizers and packages for optimizers
+  - **hooks** provide hooks for runner
+
+## How to use this tutorial
+
+The detailed instruction of MMYOLO is as following
+
+1. Look up install instruction through [start your first step](get_started.md)
+
+2. Basic method of how to use MMYOLO can be found here:
+
+   - [Training and testing](https://mmyolo.readthedocs.io/en/latest/user_guides/index.html#train-test)
+   - [From getting started to deployment tutorial](https://mmyolo.readthedocs.io/en/latest/user_guides/index.html#from-getting-started-to-deployment-tutorial)
+   - [Useful Tools](https://mmyolo.readthedocs.io/en/latest/user_guides/index.html#useful-tools)
+
+3. YOLO series of tutorials on algorithm implementation and full analysis:
+
+   - [Essential Basics](https://mmyolo.readthedocs.io/en/latest/algorithm_descriptions/index.html#essential-basics)
+   - [Full explanation of the model and implementation](https://mmyolo.readthedocs.io/en/latest/algorithm_descriptions/index.html#algorithm-principles-and-implementation)
+
+4. Refer to the following tutorials for an in-depth look:
+
+   - [How to](https://mmyolo.readthedocs.io/en/latest/advanced_guides/index.html#how-to)
--- a/docs/en/user_guides/config.md
+++ b/docs/en/user_guides/config.md
-# Learn about Configs with YOLOv5 
+# Learn about Configs with YOLOv5

 MMYOLO and other OpenMMLab repositories use [MMEngine's config system](https://mmengine.readthedocs.io/en/latest/tutorials/config.html). It has a modular and inheritance design, which is convenient to conduct various experiments.

@@ -46,7 +46,7 @@ model = dict(
        norm_cfg=dict(type='BN', momentum=0.03, eps=0.001), # The config of normalization layers.
        act_cfg=dict(type='SiLU', inplace=True)), # The config of activation function
    neck=dict(
-        type='YOLOv5PAFPN',  # The neck of detector is YOLOv5FPN，We also support 'YOLOv6RepPAFPN', 'YOLOXPAFPN'. 
+        type='YOLOv5PAFPN',  # The neck of detector is YOLOv5FPN，We also support 'YOLOv6RepPAFPN', 'YOLOXPAFPN'.
        deepen_factor=deepen_factor, # The scaling factor that controls the depth of the network structure
        widen_factor=widen_factor, # The scaling factor that controls the width of the network structure
        in_channels=[256, 512, 1024], # The input channels, this is consistent with the output channels of backbone
@@ -71,7 +71,7 @@ model = dict(
    test_cfg=dict(
        multi_label=True, # The config of multi-label for multi-clas prediction. THe default setting is True.
        nms_pre=30000,  # The number of boxes before NMS
-        score_thr=0.001, # Threshold to filter out boxes. 
+        score_thr=0.001, # Threshold to filter out boxes.
        nms=dict(type='nms', # Type of NMS
                 iou_threshold=0.65), # NMS threshold
        max_per_img=300)) # Max number of detections of each image
@@ -151,7 +151,7 @@ train_dataloader = dict( # Train dataloader config
        pipeline=train_pipeline))
 ```

-In the testing phase of YOLOv5, the letterbox resize method resizes all the test images to the same scale, which preserves the aspect ratio of all testing images. Therefore, the validation and testing phases share the same data pipeline. 
+In the testing phase of YOLOv5, the letterbox resize method resizes all the test images to the same scale, which preserves the aspect ratio of all testing images. Therefore, the validation and testing phases share the same data pipeline.

 ```python
 test_pipeline = [ # Validation/ Testing dataloader config
@@ -187,7 +187,7 @@ val_dataloader = dict(
        test_mode=True, # # Turn on test mode of the dataset to avoid filtering annotations or images
        data_prefix=dict(img='val2017/'), # Prefix of image path
        ann_file='annotations/instances_val2017.json', # Path of annotation file
-        pipeline=test_pipeline, 
+        pipeline=test_pipeline,
        batch_shapes_cfg=dict(  # Config of batch shapes
            type='BatchShapePolicy', # Policy that makes paddings with least pixels during batch inference process, which does not require the image scales of all batches to be the same throughout validation.
            batch_size=val_batch_size_pre_gpu, # Batch size for batch shapes strategy, equals to validation batch size on single GPU
@@ -253,7 +253,7 @@ val_cfg = dict(type='ValLoop')  # The validation loop type
 test_cfg = dict(type='TestLoop')  # The testing loop type
 ```

-MMEngine also support dynamic intervals for evaluation. For example, you can run validation every 10 epochs on the first 280 epochs, and run validation every epoch on the final 20 epochs. The configurations are as follows. 
+MMEngine also support dynamic intervals for evaluation. For example, you can run validation every 10 epochs on the first 280 epochs, and run validation every epoch on the final 20 epochs. The configurations are as follows.

 ```python
 max_epochs = 300 # Maximum training epochs: 300 epochs
@@ -352,7 +352,7 @@ resume = False  # Whether to resume from the checkpoint defined in `load_from`.

 ## Config file inheritance

- `config/_base_` contains default runtime. The configs that are composed by components from `_base_` are called _primitive_.
+`config/_base_` contains default runtime. The configs that are composed by components from `_base_` are called _primitive_.

 For all configs under the same folder, it is recommended to have only **one** _primitive_ config. All other configs should inherit from the _primitive_ config. In this way, the maximum of inheritance level is 3.

@@ -421,7 +421,7 @@ model = dict(

 ### Use intermediate variables in configs

-Some intermediate variables are used in the configs files, like `train_pipeline`/`test_pipeline` in datasets. It's worth noting that when modifying intermediate variables in the children configs, user need to pass the intermediate variables into corresponding fields again. 
+Some intermediate variables are used in the configs files, like `train_pipeline`/`test_pipeline` in datasets. It's worth noting that when modifying intermediate variables in the children configs, user need to pass the intermediate variables into corresponding fields again.
 For example, we would like to change the `image_scale` during training and add `YOLOv5MixUp` data augmentation, `img_scale/train_pipeline/test_pipeline` are intermediate variable we would like modify.

 **Notice**：`YOLOv5MixUp` requires adding the `pre_transform` and `mosaic_affine_pipeline` to its own `pre_transform` field. Please refer to [The description of YOLOv5 algorithm and its implementation](../algorithm_descriptions/yolov5_description.md) for detailed process and diagrams.
@@ -430,7 +430,7 @@ For example, we would like to change the `image_scale` during training and add `
 _base_ = './yolov5_s-v61_syncbn_8xb16-300e_coco.py'

 img_scale = (1280, 1280)  # image height, image width
-affine_scale = 0.9        
+affine_scale = 0.9

 mosaic_affine_pipeline = [
    dict(
@@ -511,7 +511,6 @@ model = dict(

 If the users want to reuse the variables in the base file, they can get a copy of the corresponding variable by using `{{_base_.xxx}}`. The latest version of MMEngine also support reusing variables without `{{}}` usage. E.g:

-
 ```python
 _base_ = '../_base_/default_runtime.py'

@@ -550,7 +549,7 @@ The file name is divided to 8 name fields, which has 4 required parts and 4 opti

 - `{algorithm name}`: The name of the algorithm. It can be a detector name such as `yolov5`, `yolov6`, `yolox` etc.
 - `{component names}`:  Names of the components used in the algorithm such as backbone, neck, etc. For example, `yolov5_s` means its `deepen_factor` is `0.33` and its `widen_factor` is `0.5`.
- `[version_id]` (optional): Since the evolution of YOLO series is much faster than traditional object detection algorithms, `version id` is used to distinguish the differences between different sub-versions. E.g, YOLOv5-3.0 uses the `Focus` layer as the stem layer, and YOLOv5-6.0 uses the `Conv` layer as the stem layer. 
+- `[version_id]` (optional): Since the evolution of YOLO series is much faster than traditional object detection algorithms, `version id` is used to distinguish the differences between different sub-versions. E.g, YOLOv5-3.0 uses the `Focus` layer as the stem layer, and YOLOv5-6.0 uses the `Conv` layer as the stem layer.
 - `[norm_setting]` (optional): `bn` indicates `Batch Normalization`， `syncbn` indicates `Synchronized Batch Normalization`。
 - `[data preprocessor type]` (optional): `fast` incorporates `YOLOv5DetDataPreprocessor` and `yolov5_collate` to preprocess data. The training speed is faster than default `mmdet.DetDataPreprocessor`, while results in extending tge overall pipeline to multi-task learning.
 - `{training settings}`: Information of training settings such as batch size, augmentations, loss trick, scheduler, and epochs/iterations. For example: `8xb16-300e_coco` means using 8-gpus x 16-images-per-gpu, and train 300 epochs.

--- a/docs/zh_cn/user_guides/visualization.md
+++ b/docs/zh_cn/user_guides/visualization.md
@@ -12,10 +12,10 @@ MMYOLO 中，将使用 MMEngine 提供的 `Visualizer` 可视化器进行特征
 - 支持基础绘图接口以及特征图可视化
 - 支持选择模型中的不同层来得到特征图，包含 `squeeze_mean` ， `select_max` ， `topk` 三种显示方式，用户还可以使用 `arrangement` 自定义特征图显示的布局方式。

-
 ## 特征图绘制

 你可以调用 `demo/featmap_vis_demo.py` 来简单快捷地得到可视化结果，为了方便理解，将其主要参数的功能梳理如下：
+
 - `img` 选择要用于特征图可视化的图片，支持单张图片或者图片路径列表

 - `config` 选择算法的配置文件
@@ -31,6 +31,7 @@ MMYOLO 中，将使用 MMEngine 提供的 `Visualizer` 可视化器进行特征
 - `--preview-model` 可以预览模型，方便用户理解模型的特征层结构

 - `--target-layers` 对指定层获取可视化的特征图
+
  - 可以单独输出某个层的特征图，例如： `--target-layers backbone` ,  `--target-layers neck` ,  `--target-layers backbone.stage4` 等
  - 参数为列表时，也可以同时输出多个层的特征图，例如： `--target-layers backbone.stage4 neck` 表示同时输出 backbone 的 stage4 层和 neck 的三层一共四层特征图

@@ -48,6 +49,7 @@ MMYOLO 中，将使用 MMEngine 提供的 `Visualizer` 可视化器进行特征
 - 考虑到输入的特征图通常非常小，函数默认将特征图进行上采样后方便进行可视化。

 ## 用法示例
+
 以预训练好的 yolov5_s 模型为例:

 (1) 将多通道特征图采用 `select_max` 参数压缩为单通道并显示, 通过提取 backbone 层输出进行特征图可视化，将得到 backbone 三个输出层的特征图
@@ -72,7 +74,6 @@ python demo/featmap_vis_demo.py demo/dog.jpg configs/yolov5/yolov5_s-v61_syncbn_

 (3) 将多通道特征图采用 `squeeze_mean` 参数压缩为单通道并显示, 通过提取 backbone.stage4 和 backbone.stage3 层输出进行特征图可视化，将得到两个输出层的特征图

-
 ```python
 python demo/featmap_vis_demo.py demo/dog.jpg configs/yolov5/yolov5_s-v61_syncbn_8xb16-300e_coco.py mmyolov5s.pt --target-layers backbone.stage4 backbone.stage3 --channel-reduction squeeze_mean
 ```
@@ -91,7 +92,7 @@ python demo/featmap_vis_demo.py demo/dog.jpg configs/yolov5/yolov5_s-v61_syncbn_
 <img src="https://user-images.githubusercontent.com/89863442/190939723-911c5e9b-dd33-42eb-be4a-ba45f03110a0.jpg" width="1200"/>
 </div>

-(5) 存储绘制后的图片，在绘制完成后，可以选择本地窗口显示，也可以存储到本地，只需要加入参数 `--out-file xxx.jpg` 
+(5) 存储绘制后的图片，在绘制完成后，可以选择本地窗口显示，也可以存储到本地，只需要加入参数 `--out-file xxx.jpg`

 ```python
 python demo/featmap_vis_demo.py demo/dog.jpg configs/yolov5/yolov5_s-v61_syncbn_8xb16-300e_coco.py mmyolov5s.pt --target-layers backbone --channel-reduction select_max --out-file featmap_backbone

--- a/docs/zh_cn/user_guides/yolov5_tutorial.md
+++ b/docs/zh_cn/user_guides/yolov5_tutorial.md
@@ -165,4 +165,3 @@ python tools/test.py configs/yolov5/yolov5_s-v61_syncbn_fast_1xb4-300e_balloon.p
 运行以上推理命令，推理结果图片会自动保存至 `work_dirs/yolov5_s-v61_syncbn_fast_1xb4-300e_balloon/{timestamp}/show_results` 文件夹中。下面为其中一张结果图片，左图为实际标注，右图为模型推理结果。

 ![result_img](https://user-images.githubusercontent.com/27466624/190913272-f99709e5-c798-46b8-aede-30f4e91683a3.jpg)
-