From 26e5e540f624429bf23b8af5a4dca0f0a8c56057 Mon Sep 17 00:00:00 2001 From: littletomatodonkey Date: Wed, 10 Mar 2021 14:41:23 +0000 Subject: [PATCH] improve doc, test=document_fix --- docs/en/tutorials/install_en.md | 20 ++++--- docs/en/tutorials/quick_start_en.md | 84 ++++++++++++++--------------- docs/zh_CN/tutorials/install.md | 28 ++++++---- docs/zh_CN/tutorials/quick_start.md | 80 +++++++++++++-------------- 4 files changed, 105 insertions(+), 107 deletions(-) diff --git a/docs/en/tutorials/install_en.md b/docs/en/tutorials/install_en.md index 28e77f75..c2140ad0 100644 --- a/docs/en/tutorials/install_en.md +++ b/docs/en/tutorials/install_en.md @@ -16,13 +16,13 @@ Python 3.x, CUDA 10.0, CUDNN7.6.4 nccl2.1.2 and later version are required at fi If you want to use PaddlePaddle on GPU, you can use the following command to install PaddlePaddle. ```bash -pip install paddlepaddle-gpu==2.0.0 --upgrade +pip3 install paddlepaddle-gpu==2.0.0 --upgrade -i https://mirror.baidu.com/pypi/simple ``` If you want to use PaddlePaddle on CPU, you can use the following command to install PaddlePaddle. ```bash -pip install paddlepaddle==2.0.0 --upgrade +pip3 install paddlepaddle==2.0.0 --upgrade -i https://mirror.baidu.com/pypi/simple ``` ### Install PaddlePaddle from source code @@ -39,7 +39,7 @@ paddle.utils.run_check() Check PaddlePaddle version: ```bash -python -c "import paddle; print(paddle.__version__)" +python3 -c "import paddle; print(paddle.__version__)" ``` Note: @@ -53,21 +53,25 @@ Note: **Clone PaddleClas: ** ``` -cd path_to_clone_PaddleClas -git clone https://github.com/PaddlePaddle/PaddleClas.git +git clone https://github.com/PaddlePaddle/PaddleClas.git -b release/2.0 ``` -**Install requirements** +If it is too slow for you to download from github, you can download PaddleClas from gitee. The command is as follows. +```bash +git clone https://gitee.com/paddlepaddle/PaddleClas.git -b release/2.0 +``` + +**Install requirements** ``` -pip install --upgrade -r requirements.txt +pip3 install --upgrade -r requirements.txt -i https://mirror.baidu.com/pypi/simple ``` If the install process of visualdl failed, you can try the following commands. ``` -pip3 install --upgrade visualdl==2.0.0b3 -i https://mirror.baidu.com/pypi/simple +pip3 install --upgrade visualdl==2.1.1 -i https://mirror.baidu.com/pypi/simple ``` diff --git a/docs/en/tutorials/quick_start_en.md b/docs/en/tutorials/quick_start_en.md index 42c8fdfc..809f3d47 100644 --- a/docs/en/tutorials/quick_start_en.md +++ b/docs/en/tutorials/quick_start_en.md @@ -1,6 +1,6 @@ # Trial in 30mins -Based on the flowers102 dataset, it takes only 30 mins to experience PaddleClas, include training varieties of backbone and pretrained model, SSLD distillation, and multiple data augmentation, Please refer to [Installation](install.md) to install at first. +Based on the flowers102 dataset, it takes only 30 mins to experience PaddleClas, include training varieties of backbone and pretrained model, SSLD distillation, and multiple data augmentation, Please refer to [Installation](install_en.md) to install at first. ## Preparation @@ -11,28 +11,17 @@ Based on the flowers102 dataset, it takes only 30 mins to experience PaddleClas, cd path_to_PaddleClas ``` -* enter `dataset/flowers102`, download and decompress flowers102 dataset. +* Enter `dataset/flowers102`, download and decompress flowers102 dataset. ```shell cd dataset/flowers102 -wget https://www.robots.ox.ac.uk/~vgg/data/flowers/102/102flowers.tgz -wget https://www.robots.ox.ac.uk/~vgg/data/flowers/102/imagelabels.mat -wget https://www.robots.ox.ac.uk/~vgg/data/flowers/102/setid.mat -tar -xf 102flowers.tgz +# If you want to download from the brower, you can copy the link, visit it +# in the browser, download and then commpress. +wget https://paddle-imagenet-models-name.bj.bcebos.com/data/flowers102.zip +unzip flowers102.zip ``` -* create train/val/test label files - -```shell -python generate_flowers102_list.py jpg train > train_list.txt -python generate_flowers102_list.py jpg valid > val_list.txt -python generate_flowers102_list.py jpg test > extra_list.txt -cat train_list.txt extra_list.txt > train_extra_list.txt -``` - -**Note:** In order to offer more data to SSLD training task, train_list.txt and extra_list.txt will merge into train_extra_list.txft - -* return `PaddleClas` dir +* Return `PaddleClas` dir ``` cd ../../ @@ -67,11 +56,7 @@ cd ../ ```shell export CUDA_VISIBLE_DEVICES=0 -python -m paddle.distributed.launch \ -    --gpus="0" \ -    tools/train.py \ -        -c ./configs/quick_start/ResNet50_vd.yaml - +python3 tools/train.py -c ./configs/quick_start/ResNet50_vd.yaml ``` The validation `Top1 Acc` curve is shown below. @@ -85,11 +70,7 @@ The validation `Top1 Acc` curve is shown below. ```shell export CUDA_VISIBLE_DEVICES=0 -python -m paddle.distributed.launch \ -    --gpus="0" \ -    tools/train.py \ -        -c ./configs/quick_start/ResNet50_vd_finetune.yaml - +python3 tools/train.py -c ./configs/quick_start/ResNet50_vd_finetune.yaml ``` The validation `Top1 Acc` curve is shown below @@ -99,6 +80,31 @@ The validation `Top1 Acc` curve is shown below Compare with training from scratch, it improve by 65\% to 94.02\% +You can use the trained model to infer the result of image `docs/images/quick_start/flowers102/image_06739.jpg`. The command is as follows. + + +```shell +python3 tools/infer/infer.py \ + -i docs/images/quick_start/flowers102/image_06739.jpg \ + --model=ResNet50_vd \ + --pretrained_model="output/ResNet50_vd/best_model/ppcls" \ + --class_num=102 +``` + +The output is as follows. Top-5 class ids and their scores are printed. + +``` +Current image file: docs/images/quick_start/flowers102/image_06739.jpg + top1, class id: 0, probability: 0.5129 + top2, class id: 50, probability: 0.0671 + top3, class id: 18, probability: 0.0377 + top4, class id: 82, probability: 0.0238 + top5, class id: 54, probability: 0.0231 +``` + +* Note: Results are different for different models, so you might get different results for the command. + + ### SSLD finetune - ResNet50_vd_ssld pretrained model (Acc 82.39\%) Note: when finetuning model, which has been trained by SSLD, please use smaller learning rate in the middle of net. @@ -115,10 +121,7 @@ Tringing script ```shell export CUDA_VISIBLE_DEVICES=0 -python -m paddle.distributed.launch \ -    --gpus="0" \ -    tools/train.py \ -        -c ./configs/quick_start/ResNet50_vd_ssld_finetune.yaml +python3 tools/train.py -c ./configs/quick_start/ResNet50_vd_ssld_finetune.yaml ``` Compare with finetune on the 79.12% pretrained model, it improve by 0.9% to 95%. @@ -130,10 +133,7 @@ Training script ```shell export CUDA_VISIBLE_DEVICES=0 -python -m paddle.distributed.launch \ -    --gpus="0" \ -    tools/train.py \ -        -c ./configs/quick_start/MobileNetV3_large_x1_0_finetune.yaml +python3 tools/train.py -c ./configs/quick_start/MobileNetV3_large_x1_0_finetune.yaml ``` Compare with ResNet50_vd pretrained model, it decrease by 5% to 90%. Different architecture generates different performance, actually it is a task-oriented decision to apply the best performance model, should consider the inference time, storage, heterogeneous device, etc. @@ -147,10 +147,7 @@ Training script ```shell export CUDA_VISIBLE_DEVICES=0 -python -m paddle.distributed.launch \ -    --gpus="0" \ -    tools/train.py \ -        -c ./configs/quick_start/ResNet50_vd_ssld_random_erasing_finetune.yaml +python3 tools/train.py -c ./configs/quick_start/ResNet50_vd_ssld_random_erasing_finetune.yaml ``` It improves by 1.27\% to 96.27\% @@ -184,10 +181,7 @@ Final training script ```shell export CUDA_VISIBLE_DEVICES=0 -python -m paddle.distributed.launch \ -    --gpus="0" \ -    tools/train.py \ -        -c ./configs/quick_start/R50_vd_distill_MV3_large_x1_0.yaml +python3 tools/train.py -c ./configs/quick_start/R50_vd_distill_MV3_large_x1_0.yaml ``` It significantly imporve by 6.47% to 96.47% with more unlabeled data and teacher model. @@ -214,4 +208,4 @@ The whole accuracy curves are shown below * **NOTE**: As flowers102 is a small dataset, validatation accuracy maybe float 1%. -* Please refer to [Getting_started](./getting_started) for more details +* Please refer to [Getting_started](./getting_started_en.md) for more details diff --git a/docs/zh_CN/tutorials/install.md b/docs/zh_CN/tutorials/install.md index a26d7bef..eaf4af14 100644 --- a/docs/zh_CN/tutorials/install.md +++ b/docs/zh_CN/tutorials/install.md @@ -16,13 +16,13 @@ 如果已经安装好了cuda、cudnn、nccl或者安装好了docker、nvidia-docker运行环境,可以pip安装最新GPU版本PaddlePaddle ```bash -pip install paddlepaddle-gpu==2.0.0 --upgrade +pip3 install paddlepaddle-gpu==2.0.0 --upgrade -i https://mirror.baidu.com/pypi/simple ``` 如果希望在CPU环境中使用PaddlePaddle,可以运行下面的命令安装PaddlePaddle。 ```bash -pip install paddlepaddle==2.0.0 --upgrade +pip3 install paddlepaddle==2.0.0 --upgrade -i https://mirror.baidu.com/pypi/simple ``` ### 源码编译PaddlePaddle @@ -40,7 +40,7 @@ paddle.utils.run_check() 查看PaddlePaddle版本的命令如下: ```bash -python -c "import paddle; print(paddle.__version__)" +python3 -c "import paddle; print(paddle.__version__)" ``` 注意: @@ -60,24 +60,30 @@ python -c "import paddle; print(paddle.__version__)" **克隆PaddleClas模型库:** +```bash +git clone https://github.com/PaddlePaddle/PaddleClas.git -b release/2.0 ``` -cd path_to_clone_PaddleClas -git clone https://github.com/PaddlePaddle/PaddleClas.git + +如果从github上网速太慢,可以从gitee下载,下载命令如下: + +```bash +git clone https://gitee.com/paddlepaddle/PaddleClas.git -b release/2.0 ``` + + **安装Python依赖库:** -Python依赖库在[requirements.txt](https://github.com/PaddlePaddle/PaddleClas/blob/master/requirements.txt)中给出,可通过如下命令安装: +Python依赖库在[requirements.txt](https://github.com/PaddlePaddle/PaddleClas/blob/release/2.0/requirements.txt)中给出,可通过如下命令安装: -``` -pip install --upgrade -r requirements.txt +```bash +pip3 install --upgrade -r requirements.txt -i https://mirror.baidu.com/pypi/simple ``` visualdl可能出现安装失败,请尝试 -``` -pip3 install --upgrade visualdl==2.0.0b3 -i https://mirror.baidu.com/pypi/simple - +```bash +pip3 install --upgrade visualdl==2.1.1 -i https://mirror.baidu.com/pypi/simple ``` 此外,visualdl目前只支持在python3下运行,因此如果希望使用visualdl,需要使用python3。 diff --git a/docs/zh_CN/tutorials/quick_start.md b/docs/zh_CN/tutorials/quick_start.md index 09c35a9d..1a04858b 100644 --- a/docs/zh_CN/tutorials/quick_start.md +++ b/docs/zh_CN/tutorials/quick_start.md @@ -11,27 +11,16 @@ cd path_to_PaddleClas ``` -* 进入`dataset/flowers102`目录,下载并解压flowers102数据集. +* 进入`dataset/flowers102`目录,下载并解压flowers102数据集。 -```shell -cd dataset/flowers102 -wget https://www.robots.ox.ac.uk/~vgg/data/flowers/102/102flowers.tgz -wget https://www.robots.ox.ac.uk/~vgg/data/flowers/102/imagelabels.mat -wget https://www.robots.ox.ac.uk/~vgg/data/flowers/102/setid.mat -tar -xf 102flowers.tgz -``` - -* 制作train/val/test标签文件 ```shell -python generate_flowers102_list.py jpg train > train_list.txt -python generate_flowers102_list.py jpg valid > val_list.txt -python generate_flowers102_list.py jpg test > extra_list.txt -cat train_list.txt extra_list.txt > train_extra_list.txt +cd dataset/flowers102 +# 如果希望从浏览器中直接下载,可以复制该链接并访问,然后下载解压即可 +wget https://paddle-imagenet-models-name.bj.bcebos.com/data/flowers102.zip +unzip flowers102.zip ``` -**注意**:这里将train_list.txt和extra_list.txt合并成train_extra_list.txt,是为了之后在进行知识蒸馏时,使用更多的数据提升无标签知识蒸馏任务的效果。 - * 返回`PaddleClas`根目录 ``` @@ -50,7 +39,6 @@ cd pretrained wget https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ResNet50_vd_pretrained.pdparams wget https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/ResNet50_vd_ssld_pretrained.pdparams wget https://paddle-imagenet-models-name.bj.bcebos.com/dygraph/MobileNetV3_large_x1_0_pretrained.pdparams - cd ../ ``` @@ -69,11 +57,7 @@ cd ../ ```shell export CUDA_VISIBLE_DEVICES=0 -python -m paddle.distributed.launch \ -    --gpus="0" \ -    tools/train.py \ -        -c ./configs/quick_start/ResNet50_vd.yaml - +python3 tools/train.py -c ./configs/quick_start/ResNet50_vd.yaml ``` 验证集的`Top1 Acc`曲线如下所示,最高准确率为0.2735。 @@ -87,19 +71,39 @@ python -m paddle.distributed.launch \ ```shell export CUDA_VISIBLE_DEVICES=0 -python -m paddle.distributed.launch \ -    --gpus="0" \ -    tools/train.py \ -        -c ./configs/quick_start/ResNet50_vd_finetune.yaml - +python3 tools/train.py -c ./configs/quick_start/ResNet50_vd_finetune.yaml ``` 验证集的`Top1 Acc`曲线如下所示,最高准确率为0.9402,加载预训练模型之后,flowers102数据集精度大幅提升,绝对精度涨幅超过65\%。 ![](../../images/quick_start/r50_vd_pretrained_acc.png) -### 3.3 SSLD模型微调-基于ResNet50_vd_ssld预训练模型(准确率82.39\%) +使用训练完的预训练模型对图片`docs/images/quick_start/flowers102/image_06739.jpg`进行预测,预测命令为 + +```shell +python3 tools/infer/infer.py \ + -i docs/images/quick_start/flowers102/image_06739.jpg \ + --model=ResNet50_vd \ + --pretrained_model="output/ResNet50_vd/best_model/ppcls" \ + --class_num=102 +``` + +最终可以得到如下结果,打印出了Top-5对应的class id以及score。 + +``` +Current image file: docs/images/quick_start/flowers102/image_06739.jpg + top1, class id: 0, probability: 0.5129 + top2, class id: 50, probability: 0.0671 + top3, class id: 18, probability: 0.0377 + top4, class id: 82, probability: 0.0238 + top5, class id: 54, probability: 0.0231 +``` + +* 注意:这里每个模型的训练结果都不相同,因此结果可能稍有不同。 + + +### 3.3 SSLD模型微调-基于ResNet50_vd_ssld预训练模型(准确率82.39\%) 需要注意的是,在使用通过知识蒸馏得到的预训练模型进行微调时,我们推荐使用相对较小的网络中间层学习率。 @@ -113,12 +117,10 @@ pretrained_model: "./pretrained/ResNet50_vd_ssld_pretrained" ``` 训练脚本如下。 + ```shell export CUDA_VISIBLE_DEVICES=0 -python -m paddle.distributed.launch \ -    --gpus="0" \ -    tools/train.py \ -        -c ./configs/quick_start/ResNet50_vd_ssld_finetune.yaml +python3 tools/train.py -c ./configs/quick_start/ResNet50_vd_ssld_finetune.yaml ``` 最终flowers102验证集上精度指标为0.95,相对于79.12\%预训练模型的微调结构,新数据集指标可以再次提升0.9\%。 @@ -130,10 +132,7 @@ python -m paddle.distributed.launch \ ```shell export CUDA_VISIBLE_DEVICES=0 -python -m paddle.distributed.launch \ -    --gpus="0" \ -    tools/train.py \ -        -c ./configs/quick_start/MobileNetV3_large_x1_0_finetune.yaml +python3 tools/train.py -c ./configs/quick_start/MobileNetV3_large_x1_0_finetune.yaml ``` 最终flowers102验证集上的精度为0.90,比加载了预训练模型的ResNet50_vd的精度差了5\%。不同模型结构的网络在相同数据集上的性能表现不同,需要根据预测耗时以及存储的需求选择合适的模型。 @@ -146,10 +145,7 @@ python -m paddle.distributed.launch \ ```shell export CUDA_VISIBLE_DEVICES=0 -python -m paddle.distributed.launch \ -    --gpus="0" \ -    tools/train.py \ -        -c ./configs/quick_start/ResNet50_vd_ssld_random_erasing_finetune.yaml +python3 tools/train.py -c ./configs/quick_start/ResNet50_vd_ssld_random_erasing_finetune.yaml ``` 最终flowers102验证集上的精度为0.9627,使用数据增广可以使得模型精度再次提升1.27\%。 @@ -185,9 +181,7 @@ TRAIN: ```shell export CUDA_VISIBLE_DEVICES=0 -python -m paddle.distributed.launch \ -    --gpus="0" \ -    tools/train.py \ +python3 tools/train.py \         -c ./configs/quick_start/R50_vd_distill_MV3_large_x1_0.yaml ``` -- GitLab