diff --git a/README.md b/README.md index 1c53dc7743f4cb28a99e44687df5abae05846e2c..56f1733a32f9cd4e0efb19b362e4ef43490fdb65 100644 --- a/README.md +++ b/README.md @@ -1,4 +1,4 @@ -# MMAction: Action understanding Toolbox +# MMAction: Action Understanding Toolbox [![docs](https://img.shields.io/badge/docs-latest-blue)](http://open-mmlab.pages.gitlab.sz.sensetime.com/mmaction-lite/) [![codecov](https://codecov.io/gh/open-mmlab/mmaction/branch/master/graph/badge.svg)](https://codecov.io/gh/open-mmlab/mmaction) @@ -41,33 +41,33 @@ This project is released under the [Apache 2.0 license](LICENSE). ## Benchmark and Model Zoo -Benchmark with other repos are available on [benchmark.md](docs/benchmark.md). +Benchmark with other repos are available on [benchmark.md](/docs/benchmark.md). Results and models are available in the **README.md** of each method's config directory. Supported methods for action recognition: -- [x] [TSN](configs/recognition/tsn/README.md) -- [x] [TSM](configs/recognition/tsm/README.md) -- [x] [R(2+1)d](configs/recognition/r2plus1d/README.md) -- [x] [I3D](configs/recognition/i3d/README.md) -- [x] [SlowFast](configs/recognition/slowfast/README.md) -- [x] [SlowOnly](configs/recognition/slowonly/README.md) +- [x] [TSN](/configs/recognition/tsn/README.md) +- [x] [TSM](/configs/recognition/tsm/README.md) +- [x] [R(2+1)d](/configs/recognition/r2plus1d/README.md) +- [x] [I3D](/configs/recognition/i3d/README.md) +- [x] [SlowOnly](/configs/recognition/slowonly/README.md) +- [x] [SlowFast](/configs/recognition/slowfast/README.md) Supported methods for action localization: -- [x] [BMN](configs/localization/bmn/README.md) -- [x] [BSN](configs/localization/bsn/README.md) +- [x] [BMN](/configs/localization/bmn/README.md) +- [x] [BSN](/configs/localization/bsn/README.md) ## Installation -Please refer to [install.md](docs/install.md) for installation. +Please refer to [install.md](/docs/install.md) for installation. ## Data Preparation -Please refer to [data_preparation.md](docs/data_preparation.md) for a general knowledge of data preparation. +Please refer to [data_preparation.md](/docs/data_preparation.md) for a general knowledge of data preparation. ## Get Started -Please see [getting_started.md](docs/getting_started.md) for the basic usage of MMAction. +Please see [getting_started.md](/docs/getting_started.md) for the basic usage of MMAction. ## Contributing diff --git a/demo/demo.ipynb b/demo/demo.ipynb index 7a4e5099083b83cfbbae632b873b722ef296e697..340081a7735c53597d80b79842341df843be8516 100644 --- a/demo/demo.ipynb +++ b/demo/demo.ipynb @@ -25,7 +25,7 @@ "source": [ "config_file = '../configs/recognition/tsn/tsn_r50_video_inference_1x1x3_100e_kinetics400_rgb.py'\n", "# download the checkpoint from model zoo and put it in `checkpoints/`\n", - "checkpoint_file = '../checkpoints/tsn.pth'" + "checkpoint_file = '../checkpoints/tsn_r50_1x1x3_100e_kinetics400_rgb_20200614-e508be42.pth'" ] }, { @@ -73,11 +73,11 @@ { "name": "stdout", "text": [ - "arm wrestling: 7.166297\n", - "balloon blowing: 4.2894025\n", - "getting a haircut: 4.2547274\n", - "waxing legs: 4.2272677\n", - "eating burger: 4.148987\n" + "arm wrestling: 29.61644\n", + "rock scissors paper: 10.754839\n", + "shaking hands: 9.9084\n", + "clapping: 9.189912\n", + "massaging feet: 8.305307\n" ], "output_type": "stream" } diff --git a/demo/demo.py b/demo/demo.py index 66b59f0789ce1cff409e2489f63954801abadd70..4b17ae59bc5e62f8e8ce1e032bf4295842c30f65 100644 --- a/demo/demo.py +++ b/demo/demo.py @@ -26,7 +26,7 @@ def main(): # test a single video results = inference_recognizer(model, args.video, args.label) - # print(f'The top-5 labels with corresponding scores are:') + print('The top-5 labels with corresponding scores are:') for result in results: print(f'{result[0]}: ', result[1]) diff --git a/docs/benchmark.md b/docs/benchmark.md index 3f66690741c43ed33ef52fbac9acd7e708091702..39c78ecd56fef2c3fcbe57aedbd338420f38f8d1 100644 --- a/docs/benchmark.md +++ b/docs/benchmark.md @@ -7,9 +7,9 @@ We compare our results with some popular frameworks and official releases in ter Here we compare our MMAction repo with other video understanding toolboxes in the same data and model settings by the training time per iteration. -To ensure the fairness of the comparison, the comparison experiments will be conducted under the same hardware environment and using the same dataset. -For each model setting, we keep the same data preprocessing methods to make sure the same feature input. -In addition, we also use MemCache, a distributed cached system, to load the data for the same IO time. +To ensure the fairness of the comparison, the comparison experiments were conducted under the same hardware environment and using the same dataset. +For each model setting, we kept the same data preprocessing methods to make sure the same feature input. +In addition, we also used MemCache, a distributed cached system, to load the data for the same IO time. The time we measured is the average training time for an iteration, including data processing and model training. @@ -17,7 +17,7 @@ The time we measured is the average training time for an iteration, including da ## Recognizers | Model | MMAction (s/iter) | MMAction V0.1 (s/iter) | [Temporal-Shift-Module](https://github.com/mit-han-lab/temporal-shift-module) (s/iter) | [PySlowFast](https://github.com/facebookresearch/SlowFast) (s/iter) | -| :---: | :---------------: | :--------------------: | :----------------------------: | :-----------------: | +| :--- | :---------------: | :--------------------: | :----------------------------: | :-----------------: | | TSN ([tsn_r50_1x1x3_100e_kinetics400_rgb](/configs/recognition/tsn/tsn_r50_1x1x3_100e_kinetics400_rgb.py)) | **0.29** | 0.36 | 0.45 | x | | I3D ([i3d_r50_32x2x1_100e_kinetics400_rgb](/configs/recognition/i3d/i3d_r50_32x2x1_100e_kinetics400_rgb.py)) | **0.45** | 0.58 | x | x | | I3D ([i3d_r50_8x8x1_100e_kinetics400_rgb](/configs/recognition/i3d/i3d_r50_8x8x1_100e_kinetics400_rgb.py)) | **0.32** | x | x | 0.56 | @@ -31,7 +31,7 @@ The time we measured is the average training time for an iteration, including da ## Localizers | Model | MMAction (s/iter) | [BSN(boundary sensitive network)](https://github.com/wzmsltw/BSN-boundary-sensitive-network) (s/iter) | -| :---: | :---------------: | :-------------------------------------: | +| :--- | :---------------: | :-------------------------------------: | | BSN ([TEM + PEM + PGM](/configs/localization/bsn)) | **0.074(TEM)+0.040(PEM)** | 0.101(TEM)+0.040(PEM) | | BMN ([bmn_400x100_2x8_9e_activitynet_feature](/configs/localization/bmn/bmn_400x100_2x8_9e_activitynet_feature.py)) | **3.27** | 3.30 | diff --git a/docs/config.md b/docs/config.md index 1a2cc9606894fb5e209f359b52f901ea1e857492..5b3ac2c8398ce980543dcdf625e687c8fb30b600 100644 --- a/docs/config.md +++ b/docs/config.md @@ -11,10 +11,10 @@ We follow the style below to name config files. Contributors are advised to foll `{xxx}` is required field and `[yyy]` is optional. -- `{model}`: model type, e.g. `tin`, `i3d`, etc. +- `{model}`: model type, e.g. `tsn`, `i3d`, etc. - `[model setting]`: specific setting for some models. - `{backbone}`: backbone type, e.g. `r50` (ResNet-50), etc. -- `[misc]`: miscellaneous setting/plugins of model, e.g. `dense`, `2d`, `3d`, etc. +- `[misc]`: miscellaneous setting/plugins of model, e.g. `dense`, `320p`, `video`, etc. - `{data setting}`: frame sample setting in `{clip_len}x{frame_interval}x{num_clips}` format. - `[gpu x batch_per_gpu]`: GPUs and samples per GPU. - `{schedule}`: training schedule, e.g. `20e` means 20 epochs. @@ -34,7 +34,7 @@ Please refer to the corresponding pages for config file structure for different ### Use intermediate variables in configs Some intermediate variables are used in the config files, like `train_pipeline`/`val_pipeline`/`test_pipeline`, -`ann_file_train`/`ann_file_val`/`ann_file_test`, `img_norm_cfg`/`mc_cfg` etc. +`ann_file_train`/`ann_file_val`/`ann_file_test`, `img_norm_cfg` etc. For Example, we would like to first define `train_pipeline`/`val_pipeline`/`test_pipeline` and pass them into `data`. Thus, `train_pipeline`/`val_pipeline`/`test_pipeline` are intermediate variable. @@ -42,7 +42,7 @@ Thus, `train_pipeline`/`val_pipeline`/`test_pipeline` are intermediate variable. we also define `ann_file_train`/`ann_file_val`/`ann_file_test` and `data_root`/`data_root_val` to provide data pipeline some basic information. -In addition, we use `img_norm_cfg`/`mc_cfg` as intermediate variables to construct data augmentation components. +In addition, we use `img_norm_cfg` as intermediate variables to construct data augmentation components. ```python ... @@ -55,14 +55,10 @@ ann_file_test = 'data/kinetics400/kinetics_val_list.txt' img_norm_cfg = dict( mean=[123.675, 116.28, 103.53], std=[58.395, 57.12, 57.375], to_bgr=False) -mc_cfg = dict( - server_list_cfg='/mnt/lustre/share/memcached_client/server_list.conf', - client_cfg='/mnt/lustre/share/memcached_client/client.conf', - sys_path='/mnt/lustre/share/pymc/py3') train_pipeline = [ dict(type='SampleFrames', clip_len=32, frame_interval=2, num_clips=1), - dict(type='FrameSelector', io_backend='memcached', **mc_cfg), + dict(type='FrameSelector'), dict(type='Resize', scale=(-1, 256)), dict( type='MultiScaleCrop', @@ -84,7 +80,7 @@ val_pipeline = [ frame_interval=2, num_clips=1, test_mode=True), - dict(type='FrameSelector', io_backend='memcached', **mc_cfg), + dict(type='FrameSelector'), dict(type='Resize', scale=(-1, 256)), dict(type='CenterCrop', crop_size=224), dict(type='Flip', flip_ratio=0), @@ -100,7 +96,7 @@ test_pipeline = [ frame_interval=2, num_clips=10, test_mode=True), - dict(type='FrameSelector', io_backend='memcached', **mc_cfg), + dict(type='FrameSelector'), dict(type='Resize', scale=(-1, 256)), dict(type='ThreeCrop', crop_size=256), dict(type='Flip', flip_ratio=0), diff --git a/docs/config_localization.md b/docs/config_localization.md index 814233fca8bf2208a18347d81ae72bd2d37949a8..2ed33907a7bfb47bf76443fb875a44ffa76f0a58 100644 --- a/docs/config_localization.md +++ b/docs/config_localization.md @@ -1,6 +1,6 @@ # Config System for Action localization -We incorporate modular and inheritance design into our config system, +We incorporate modular design into our config system, which is convenient to conduct various experiments. ## An Example of BMN diff --git a/docs/config_recognition.md b/docs/config_recognition.md index 731e916b4d20e313303dbae816b9a17658f7f386..9467a5c8638e72681352644a1d8fcec563860017 100644 --- a/docs/config_recognition.md +++ b/docs/config_recognition.md @@ -1,6 +1,6 @@ # Config System for Action Recognition -We incorporate modular and inheritance design into our config system, +We incorporate modular design into our config system, which is convenient to conduct various experiments. ## An Example of TSN @@ -41,10 +41,7 @@ img_norm_cfg = dict( # Config of image normalition used in data pipeline mean=[123.675, 116.28, 103.53], # Mean values of different channels to normalize std=[58.395, 57.12, 57.375], # Std values of different channels to normalize to_bgr=False) # Whether to convert channels from RGB to BGR -mc_cfg = dict( # Config of memcached setting - server_list_cfg='/mnt/lustre/share/memcached_client/server_list.conf', # Path to server list config - client_cfg='/mnt/lustre/share/memcached_client/client.conf', # Path to client config - sys_path='/mnt/lustre/share/pymc/py3') # Path to `pymc` in python3 version + train_pipeline = [ # List of training pipeline steps dict( # Config of SampleFrames type='SampleFrames', # Sample frames pipeline, sampling frames from video @@ -52,9 +49,7 @@ train_pipeline = [ # List of training pipeline steps frame_interval=1, # Temporal interval of adjacent sampled frames num_clips=3), # Number of clips to be sampled dict( # Config of FrameSelector - type='FrameSelector', # Frame selector pipeline, selecting raw frames with given indices - io_backend='memcached', # Storage backend type - **mc_cfg), # Config of memcached + type='FrameSelector'), # Frame selector pipeline, selecting raw frames with given indices dict( # Config of Resize type='Resize', # Resize pipeline scale=(-1, 256)), # The scale to resize images @@ -93,9 +88,7 @@ val_pipeline = [ # List of validation pipeline steps num_clips=3, # Number of clips to be sampled test_mode=True), # Whether to set test mode in sampling dict( # Config of FrameSelector - type='FrameSelector', # Frame selector pipeline, selecting raw frames with given indices - io_backend='memcached', # Storage backend type - **mc_cfg), # Config of memcached + type='FrameSelector'), # Frame selector pipeline, selecting raw frames with given indices dict( # Config of Resize type='Resize', # Resize pipeline scale=(-1, 256)), # The scale to resize images @@ -127,9 +120,7 @@ test_pipeline = [ # List of testing pipeline steps num_clips=25, # Number of clips to be sampled test_mode=True), # Whether to set test mode in sampling dict( # Config of FrameSelector - type='FrameSelector', # Frame selector pipeline, selecting raw frames with given indices - io_backend='memcached', # Storage backend type - **mc_cfg), # Config of memcached + type='FrameSelector'), # Frame selector pipeline, selecting raw frames with given indices dict( # Config of Resize type='Resize', # Resize pipeline scale=(-1, 256)), # The scale to resize images diff --git a/docs/data_preparation.md b/docs/data_preparation.md index 66d564e9c6e9779f480ab1e3a030d975968e777c..fc11842f19d808f0c311d63a3894b25b26c9df0e 100644 --- a/docs/data_preparation.md +++ b/docs/data_preparation.md @@ -21,7 +21,7 @@ To ease usage, we provide tutorials of data deployment for each dataset. - [Something-Something V2](https://20bn.com/datasets/something-something): See [preparing_sthv2.md](/tools/data/sthv2/preparing_sthv2.md) - [Moments in Time](http://moments.csail.mit.edu/): See [preparing_mit.md](/tools/data/mit/preparing_mit.md) - [Multi-Moments in Time](http://moments.csail.mit.edu/challenge_iccv_2019.html): See [preparing_mmit.md](/tools/data/mmit/preparing_mmit.md) -- [ActivityNet_feature](): See[praparing_activitynet.md](/tools/data/activitynet/praparing_activitynet.md) +- [ActivityNet_feature](): See[praparing_activitynet.md](/tools/data/activitynet/preparing_activitynet.md) Now, you can switch to [getting_started.md](/docs/getting_started.md) to train and test the model. @@ -66,7 +66,7 @@ python build_rawframes.py ${SRC_FOLDER} ${OUT_FOLDER} [--task ${TASK}] [--level The recommended practice is -1. set `$OUT_FOLDER` to be an folder located in SSD. +1. set `$OUT_FOLDER` to be a folder located in SSD. 2. symlink the link `$OUT_FOLDER` to `$MMACTION/data/$DATASET/rawframes`. ```shell diff --git a/docs/getting_started.md b/docs/getting_started.md index 936472491588e03946cd807cc55c53ab5aed74d0..1c1f8084ba6484588428fb72f2491bd0dad079e3 100644 --- a/docs/getting_started.md +++ b/docs/getting_started.md @@ -1,7 +1,7 @@ # Getting Started This page provides basic tutorials about the usage of MMAction. -For installation instructions, please see [install.md](install.md). +For installation instructions, please see [install.md](/docs/install.md). ## Datasets @@ -26,9 +26,9 @@ mmaction │ │ ├── ucf101_val_list.txt ``` -For more information on data preparation, please see [data_preparation.md](data_preparation.md) +For more information on data preparation, please see [data_preparation.md](/docs/data_preparation.md) -For using custom datasets, please refer to [Tutorial 2: Adding New Dataset](tutorials/new_dataset.md) +For using custom datasets, please refer to [Tutorial 2: Adding New Dataset](/docs/tutorials/new_dataset.md) ## Inference with Pre-Trained Models @@ -49,12 +49,14 @@ python tools/test.py ${CONFIG_FILE} ${CHECKPOINT_FILE} [--out ${RESULT_FILE}] [- [--proc_per_gpu ${NUM_PROC_PER_GPU}] [--gpu_collect] [--tmpdir ${TMPDIR}] [--average_clips ${AVG_TYPE}] \ [--launcher ${JOB_LAUNCHER}] [--local_rank ${LOCAL_RANK}] +# multi-gpu testing python tools/test.py ${CONFIG_FILE} ${CHECKPOINT_FILE} ${GPU_NUM} [--out ${RESULT_FILE}] [--eval ${EVAL_METRICS}] \ [--proc_per_gpu ${NUM_PROC_PER_GPU}] [--gpu_collect] [--tmpdir ${TMPDIR}] [--average_clips ${AVG_TYPE}] \ [--launcher ${JOB_LAUNCHER}] [--local_rank ${LOCAL_RANK}] ``` Optional arguments: +- `GPU_NUM`: Number of GPU used to test model. If not specified, it will be set to 1. - `RESULT_FILE`: Filename of the output results. If not specified, the results will not be saved to a file. - `EVAL_METRICS`: Items to be evaluated on the results. Allowed values depend on the dataset, e.g., `top_k_accuracy`, `mean_class_accuracy` are available for all datasets in recognition, `mean_average_precision` for Multi-Moments in Time, `AR@AN` for ActivityNet, etc. - `NUM_PROC_PER_GPU`: Number of processes per GPU. If not specified, only one process will be assigned for a single gpu. @@ -103,7 +105,7 @@ python demo/demo.py ${CONFIG_FILE} ${CHECKPOINT_FILE} ${VIDEO_FILE} [--device ${ Examples: ```shell -python demo/demo.py configs/recognition/tsn/tsn_r50_1x1x3_100e_kinetics400_rgb.py checkpoints/tsn.pth demo/demo.mp4 +python demo/demo.py configs/recognition/tsn/tsn_r50_video_inference_1x1x3_100e_kinetics400_rgb.p checkpoints/tsn.pth demo/demo.mp4 ``` ### High-level APIs for testing a video. @@ -111,11 +113,11 @@ python demo/demo.py configs/recognition/tsn/tsn_r50_1x1x3_100e_kinetics400_rgb.p Here is an example of building the model and test a given video. ```python -from mmaction.core import init_recognizer, inference_recognizer +from mmaction.apis import init_recognizer, inference_recognizer config_file = 'configs/recognition/tsn/tsn_r50_video_inference_1x1x3_100e_kinetics400_rgb.py' # download the checkpoint from model zoo and put it in `checkpoints/` -checkpoint_file = 'checkpoints/tsn.pth' +checkpoint_file = 'checkpoints/tsn_r50_1x1x3_100e_kinetics400_rgb_20200614-e508be42.pth' # build the model from a config file and a checkpoint file model = init_recognizer(config_file, checkpoint_file, device='cpu') @@ -123,14 +125,15 @@ model = init_recognizer(config_file, checkpoint_file, device='cpu') # test a single video and show the result: video = 'demo/demo.mp4' labels = 'demo/label_map.txt' -result = inference_recognizer(model, video, labels) +results = inference_recognizer(model, video, labels) # show the results -for key in result: - print(f'{key}: ', result[key]) +print(f'The top-5 labels with corresponding scores are:') +for result in results: + print(f'{result[0]}: ', result[1]) ``` -A notebook demo can be found in [demo/demo.ipynb](../demo/demo.ipynb) +A notebook demo can be found in [demo/demo.ipynb](/demo/demo.ipynb) ## Build a Model @@ -206,7 +209,7 @@ which defines the following abstract methods. - `forward_train()`: forward method of the training mode. - `forward_test()`: forward method of the testing mode. -[Recognizer2D](../mmaction/models/recognizers/recognizer2d.py) and [Recognizer3D](../mmaction/models/recognizers/recognizer3d.py) +[Recognizer2D](/mmaction/models/recognizers/recognizer2d.py) and [Recognizer3D](/mmaction/models/recognizers/recognizer3d.py) are good examples which show how to do that. @@ -288,7 +291,7 @@ Here is an example of using 16 GPUs to train TSN on the dev partition in a slurm GPUS=16 ./tools/slurm_train.sh dev tsn_r50_k400 configs/recognition/tsn/tsn_r50_1x1x3_100e_kinetics400_rgb.py work_dirs/tsn_r50_1x1x3_100e_kinetics400_rgb ``` -You can check [slurm_train.sh](../tools/slurm_train.sh) for full arguments and environment variables. +You can check [slurm_train.sh](/tools/slurm_train.sh) for full arguments and environment variables. If you have just multiple machines connected with ethernet, you can refer to pytorch [launch utility](https://pytorch.org/docs/stable/distributed_deprecated.html#launch-utility). @@ -333,7 +336,7 @@ We provide lots of useful tools under `tools/` directory. You can plot loss/top-k acc curves given a training log file. Run `pip install seaborn` first to install the dependency. -![acc_curve_image](imgs/acc_curve.png) +![acc_curve_image](/docs/imgs/acc_curve.png) ```shell python tools/analyze_logs.py plot_curve ${JSON_LOGS} [--keys ${KEYS}] [--title ${TITLE}] [--legend ${LEGEND}] [--backend ${BACKEND}] [--style ${STYLE}] [--out ${OUT_FILE}] @@ -402,7 +405,7 @@ Params: 28.04 M You may well use the result for simple comparisons, but double check it before you adopt it in technical reports or papers. (1) FLOPs are related to the input shape while parameters are not. The default input shape is (1, 3, 340, 256) for 2D recognizer, (1, 3, 32, 340, 256) for 3D recognizer. -(2) Some custom operators are not counted into FLOPs. You can add support for new operators by modifying [`mmaction/utils/flops_counter.py`](../mmaction/utils/file_client.py). +(2) Some custom operators are not counted into FLOPs. You can add support for new operators by modifying [`mmaction/utils/flops_counter.py`](/mmaction/utils/file_client.py). ### Publish a model @@ -540,9 +543,9 @@ There are two ways to work with custom datasets. - online conversion - You can write a new Dataset class inherited from [BaseDataset](../mmaction/datasets/base.py), and overwrite two methods + You can write a new Dataset class inherited from [BaseDataset](/mmaction/datasets/base.py), and overwrite two methods `load_annotations(self)` and `evaluate(self, results, metrics, logger)`, - like [RawframeDataset](../mmaction/datasets/rawframe_dataset.py), [VideoDataset](../mmaction/datasets/video_dataset.py) or [ActivityNetDataset](../mmaction/datasets/activitynet_dataset.py). + like [RawframeDataset](/mmaction/datasets/rawframe_dataset.py), [VideoDataset](/mmaction/datasets/video_dataset.py) or [ActivityNetDataset](/mmaction/datasets/activitynet_dataset.py). - offline conversion @@ -551,7 +554,7 @@ There are two ways to work with custom datasets. ### Customize optimizer -An example of customized optimizer is [CopyOfSGD](../mmaction/core/optimizer/copy_of_sgd.py). +An example of customized optimizer is [CopyOfSGD](/mmaction/core/optimizer/copy_of_sgd.py). More generally, a customized optimizer could be defined as following. In `mmaction/core/optimizer/my_optimizer.py`: @@ -574,10 +577,10 @@ from .my_optimizer import MyOptimizer Then you can use `MyOptimizer` in `optimizer` field of config files. Especially, If you want to construct a optimizer based on a specified model and param-wise config, -You can write a new optimizer constructor inherit from [DefaultOptimizerConstructor](../mmaction/core/optimizer/default_constructor.py) +You can write a new optimizer constructor inherit from [DefaultOptimizerConstructor](https://github.com/open-mmlab/mmcv/blob/master/mmcv/runner/optimizer/default_constructor.py) and overwrite the `add_params(self, params, module)` method. -An example of customized optimizer constructor is [TSMOptimizerConstructor](../mmaction/core/optimizer/tsm_optimizer_constructor.py). +An example of customized optimizer constructor is [TSMOptimizerConstructor](/mmaction/core/optimizer/tsm_optimizer_constructor.py). More generally, a customized optimizer constructor could be defined as following. In `mmaction/core/optimizer/my_optimizer_constructor.py`: @@ -638,7 +641,7 @@ Here we show how to develop new components with an example of TSN. 3. Create a new file `mmaction/models/heads/tsn_head.py`. - You can write a new classification head inherit from [BaseHead](../mmaction/models/heads/base.py), + You can write a new classification head inherit from [BaseHead](/mmaction/models/heads/base.py), and overwrite `init_weights(self)` and `forward(self, x)` method. ```python @@ -684,5 +687,5 @@ Here we show how to develop new components with an example of TSN. ## Tutorials -Currently, we provide some tutorials for users to [finetune model](tutorials/finetune.md), -[add new dataset](tutorials/new_dataset.md), [add new modules](tutorials/new_modules.md). +Currently, we provide some tutorials for users to [finetune model](/docs/tutorials/finetune.md), +[add new dataset](/docs/tutorials/new_dataset.md), [add new modules](/docs/tutorials/new_modules.md). diff --git a/docs/install.md b/docs/install.md index 2e0d22001b4690e85f670d408f42b511aaf4c53c..713d0c3e759d20a3f448e2f98f96ed50dc8e86ba 100644 --- a/docs/install.md +++ b/docs/install.md @@ -124,8 +124,8 @@ conda create -n open-mmlab python=3.7 -y conda activate open-mmlab conda install -c pytorch pytorch torchvision -y -git clone git@gitlab.sz.sensetime.com:open-mmlab/mmaction-lite.git -cd mmaction-lite +git clone https://github.com/open-mmlab/mmaction.git +cd mmaction pip install -r requirements/build.txt python setup.py develop diff --git a/docs/tutorials/data_pipeline.md b/docs/tutorials/data_pipeline.md index c419429188badd960263a8daf3b5e1e92b7c0862..85e8359d1747b207dea119e019341206ba42c48e 100644 --- a/docs/tutorials/data_pipeline.md +++ b/docs/tutorials/data_pipeline.md @@ -15,7 +15,7 @@ A pipeline consists of a sequence of operations. Each operation takes a dict as We present a typical pipeline in the following figure. The blue blocks are pipeline operations. With the pipeline going on, each operator can add new keys (marked as green) to the result dict or update the existing keys (marked as orange). -![pipeline figure](../imgs/data_pipeline.png) +![pipeline figure](/docs/imgs/data_pipeline.png) The operations are categorized into data loading, pre-processing and formatting. diff --git a/docs/tutorials/finetune.md b/docs/tutorials/finetune.md index 989f13a2abb7482e969a4be67e52cda280a85724..022fda3d01ed4a7bb4dc9c9cfcc9c383820d0aaf 100644 --- a/docs/tutorials/finetune.md +++ b/docs/tutorials/finetune.md @@ -1,11 +1,11 @@ # Tutorial 1: Finetuning Models -This tutorial provides instructions for users to use the pre-trained models (see [Model zoo](../model_zoo.md)) +This tutorial provides instructions for users to use the pre-trained models to finetune them on other datasets, so that better performance can be get. There are two steps to finetune a model on a new dataset. -1. Add support for the new dataset. See [Tutorial 2: Adding New Dataset](new_dataset.md). +1. Add support for the new dataset. See [Tutorial 2: Adding New Dataset](/docs/tutorials/new_dataset.md). 1. Modify the configs. This will be discussed in this tutorial. For example, if the user want to finetune models pre-trained on Kinetics-400 Dataset to another dataset, say UCF101, @@ -80,5 +80,5 @@ To use the pre-trained model for the whole network, the new config adds the link ```python # use the pre-trained model for the whole TSN network -load_from = 'https://s3.ap-northeast-2.amazonaws.com/open-mmlab/mmaction-lite/models/tsn_r50_1x1x3_100e_kinetics400_rgb_xxx.pth' # model path can be found in model zoo +load_from = 'https://open-mmlab.s3.ap-northeast-2.amazonaws.com/mmaction/mmaction-v1/recognition/tsn_r50_1x1x3_100e_kinetics400_rgb/tsn_r50_1x1x3_100e_kinetics400_rgb_20200614-e508be42.pth' # model path can be found in model zoo ``` diff --git a/docs/tutorials/new_dataset.md b/docs/tutorials/new_dataset.md index 46201b2ea136d791719f5326af0cd361857cde2d..7e2d1818b5e74926b677bd6f1294ee6210197f9e 100644 --- a/docs/tutorials/new_dataset.md +++ b/docs/tutorials/new_dataset.md @@ -84,9 +84,9 @@ There are two ways to work with custom datasets. - online conversion - You can write a new Dataset class inherited from [BaseDataset](../mmaction/datasets/base.py), and overwrite three methods + You can write a new Dataset class inherited from [BaseDataset](/mmaction/datasets/base.py), and overwrite three methods `load_annotations(self)`, `evaluate(self, results, metrics, logger)` and `dump_results(self, results, out)`, - like [RawframeDataset](../mmaction/datasets/rawframe_dataset.py), [VideoDataset](../mmaction/datasets/video_dataset.py) or [ActivityNetDataset](../mmaction/datasets/activitynet_dataset.py). + like [RawframeDataset](/mmaction/datasets/rawframe_dataset.py), [VideoDataset](/mmaction/datasets/video_dataset.py) or [ActivityNetDataset](/mmaction/datasets/activitynet_dataset.py). - offline conversion @@ -197,7 +197,7 @@ dataset_A_train = dict( ## Customize Dataset by Mixing Dataset -MMAction also supports to mix dataset for training. Currently it supports to concat and repeat dataset. +MMAction also supports to mix dataset for training. Currently it supports to repeat dataset. ### Repeat dataset @@ -215,5 +215,3 @@ dataset_A_train = dict( ) ) ``` - -### Concatenate dataset (TODO) diff --git a/docs/tutorials/new_modules.md b/docs/tutorials/new_modules.md index cd24b57fd928d8145a994eca3daeec531c6902a5..e3b2bb786778c3299004789a78df10e15972d97e 100644 --- a/docs/tutorials/new_modules.md +++ b/docs/tutorials/new_modules.md @@ -2,7 +2,7 @@ ## Customize Optimizer -An example of customized optimizer is [CopyOfSGD](../mmaction/core/optimizer/copy_of_sgd.py) is defined in `mmaction/core/optimizer/copy_of_sgd.py`. +An example of customized optimizer is [CopyOfSGD](/mmaction/core/optimizer/copy_of_sgd.py) is defined in `mmaction/core/optimizer/copy_of_sgd.py`. More generally, a customized optimizer could be defined as following. Assume you want to add an optimizer named as `MyOptimizer`, which has arguments `a`, `b` and `c`. @@ -51,10 +51,10 @@ The users can directly set arguments following the [API doc](https://pytorch.org Some models may have some parameter-specific settings for optimization, e.g. weight decay for BatchNorm layers. The users can do those fine-grained parameter tuning through customizing optimizer constructor. -You can write a new optimizer constructor inherit from [DefaultOptimizerConstructor](../mmaction/core/optimizer/default_constructor.py) +You can write a new optimizer constructor inherit from [DefaultOptimizerConstructor](https://github.com/open-mmlab/mmcv/blob/master/mmcv/runner/optimizer/default_constructor.py) and overwrite the `add_params(self, params, module)` method. -An example of customized optimizer constructor is [TSMOptimizerConstructor](../mmaction/core/optimizer/tsm_optimizer_constructor.py). +An example of customized optimizer constructor is [TSMOptimizerConstructor](/mmaction/core/optimizer/tsm_optimizer_constructor.py). More generally, a customized optimizer constructor could be defined as following. In `mmaction/core/optimizer/my_optimizer_constructor.py`: @@ -144,7 +144,7 @@ Here we show how to develop a new head with the example of TSNHead as the follow 1. Create a new file `mmaction/models/heads/tsn_head.py`. - You can write a new classification head inheriting from [BaseHead](../mmaction/models/heads/base.py), + You can write a new classification head inheriting from [BaseHead](/mmaction/models/heads/base.py), and overwrite `init_weights(self)` and `forward(self, x)` method. ```python diff --git a/tools/data/build_file_list.py b/tools/data/build_file_list.py index 464e2d269fedb15efe51d171311f0c5b624fd5e3..ad17d7e3ed6b8dbe7f222fa653bce830babc9476 100644 --- a/tools/data/build_file_list.py +++ b/tools/data/build_file_list.py @@ -20,7 +20,7 @@ def parse_args(): ], help='dataset to be built file list') parser.add_argument( - 'frame_path', type=str, help='root directory for the frames or videos') + 'src_folder', type=str, help='root directory for the frames or videos') parser.add_argument( '--rgb_prefix', type=str, default='img_', help='prefix of rgb frames') parser.add_argument( @@ -160,7 +160,7 @@ def main(): if args.format == 'rawframes': frame_info = parse_directory( - args.frame_path, + args.src_folder, key_func=key_func, rgb_prefix=args.rgb_prefix, flow_x_prefix=args.flow_x_prefix, @@ -169,15 +169,15 @@ def main(): elif args.format == 'videos': if args.level == 1: # search for one-level directory - video_list = glob.glob(osp.join(args.frame_path, '*')) + video_list = glob.glob(osp.join(args.src_folder, '*')) elif args.level == 2: # search for two-level directory - video_list = glob.glob(osp.join(args.frame_path, '*', '*')) + video_list = glob.glob(osp.join(args.src_folder, '*', '*')) else: raise ValueError(f'level must be 1 or 2, but got {args.level}') frame_info = {} for video in video_list: - video_path = osp.relpath(video, args.frame_path) + video_path = osp.relpath(video, args.src_folder) # video_id: (video_relative_path, -1, -1) frame_info['.'.join(video_path.split('.')[:-1])] = (video_path, -1, -1)