From d6c651114ac0e58ad214dc39a76f099c5d0d9dd0 Mon Sep 17 00:00:00 2001 From: ruri Date: Thu, 5 Sep 2019 21:16:45 +0800 Subject: [PATCH] Refine Image classification (#2974) * Refine Image classification --- PaddleCV/image_classification/README.md | 520 +++++---- .../{README_cn.md => README_en.md} | 342 +++--- PaddleCV/image_classification/build_model.py | 119 ++ PaddleCV/image_classification/eval.py | 51 +- PaddleCV/image_classification/infer.py | 73 +- .../image_classification/legacy/README.md | 6 + .../{ => legacy}/dist_train/README.md | 4 +- .../{ => legacy}/dist_train/__init__.py | 0 .../{ => legacy}/dist_train/batch_merge.py | 16 +- .../{ => legacy}/dist_train/dist_train.py | 0 .../{ => legacy}/dist_train/dist_utils.py | 0 .../{ => legacy}/dist_train/env.py | 4 +- .../{ => legacy}/dist_train/run_mp_mode.sh | 0 .../{ => legacy}/dist_train/run_nccl2_mode.sh | 0 .../{ => legacy}/dist_train/run_ps_mode.sh | 0 .../images/alexnet_imagenet1k_acc1.png | Bin .../{ => legacy}/images/curve.jpg | Bin .../images/imagenet_dist_performance.png | Bin .../images/imagenet_dist_speedup.png | Bin .../images/mobielenetv1_imagenet1k_acc1.png | Bin .../images/resnet101_imagenet1k_acc1.png | Bin .../images/resnet50_32gpus-acc1.png | Bin .../images/resnet50_imagenet1k_acc1.png | Bin .../{ => legacy}/images/resnet_dgc.png | Bin .../images/vgg11_imagenet1k_acc1.png | Bin .../image_classification/legacy/reader_pil.py | 219 ++++ .../image_classification/models/__init__.py | 27 +- .../image_classification/models/alexnet.py | 14 +- .../image_classification/models/darknet.py | 101 +- .../image_classification/models/densenet.py | 213 ++-- PaddleCV/image_classification/models/dpn.py | 13 - .../image_classification/models/googlenet.py | 23 +- .../models/inception_v4.py | 15 +- .../image_classification/models/mobilenet.py | 21 +- .../models/mobilenet_v2.py | 49 +- .../image_classification/models/resnet.py | 122 +- .../image_classification/models/resnet_vd.py | 160 +-- .../image_classification/models/resnext.py | 48 +- .../models/resnext101_wsl.py | 77 +- .../image_classification/models/resnext_vd.py | 199 ++-- .../image_classification/models/se_resnext.py | 17 +- .../models/se_resnext_vd.py | 215 ++-- .../models/shufflenet_v2.py | 265 +++-- .../models/shufflenet_v2_swish.py | 272 +++-- .../image_classification/models/squeezenet.py | 144 +-- PaddleCV/image_classification/models/vgg.py | 16 +- .../image_classification/models/xception.py | 232 ++-- PaddleCV/image_classification/reader.py | 390 ++++--- PaddleCV/image_classification/reader_cv2.py | 307 ----- PaddleCV/image_classification/run.sh | 808 +------------ .../scripts/train/AlexNet.sh | 19 + .../scripts/train/DarkNet53.sh | 17 + .../scripts/train/DenseNet121.sh | 13 + .../scripts/train/DenseNet161.sh | 13 + .../scripts/train/DenseNet169.sh | 13 + .../scripts/train/DenseNet201.sh | 13 + .../scripts/train/DenseNet264.sh | 13 + .../scripts/train/GoogLeNet.sh | 19 + .../scripts/train/InceptionV4.sh | 23 + .../scripts/train/MobileNetV1.sh | 19 + .../scripts/train/MobileNetV2.sh | 19 + .../scripts/train/MobileNetV2_x0_25.sh | 14 + .../scripts/train/MobileNetV2_x0_5.sh | 14 + .../scripts/train/MobileNetV2_x1_5.sh | 12 + .../scripts/train/MobileNetV2_x2_0.sh | 12 + .../scripts/train/ResNeXt101_32x4d.sh | 14 + .../scripts/train/ResNeXt101_64x4d.sh | 19 + .../scripts/train/ResNeXt101_vd_64x4d.sh | 22 + .../scripts/train/ResNeXt152_32x4d.sh | 12 + .../scripts/train/ResNeXt152_64x4d.sh | 18 + .../scripts/train/ResNeXt50_32x4d.sh | 11 + .../scripts/train/ResNeXt50_vd_64x4d.sh | 15 + .../scripts/train/ResNet101.sh | 19 + .../scripts/train/ResNet101_vd.sh | 22 + .../scripts/train/ResNet152.sh | 19 + .../scripts/train/ResNet152_vd.sh | 21 + .../scripts/train/ResNet18.sh | 19 + .../scripts/train/ResNet200_vd.sh | 22 + .../scripts/train/ResNet34.sh | 18 + .../scripts/train/ResNet50.sh | 19 + .../scripts/train/ResNet50_vc.sh | 19 + .../scripts/train/ResNet50_vd.sh | 21 + .../scripts/train/SE_154_vd.sh | 22 + .../scripts/train/SE_ResNeXt101_32x4d.sh | 19 + .../scripts/train/SE_ResNeXt50_32x4d.sh | 20 + .../scripts/train/ShuffleNetV2.sh | 18 + .../scripts/train/ShuffleNetV2_swish.sh | 18 + .../scripts/train/ShuffleNetV2_x0_25.sh | 15 + .../scripts/train/ShuffleNetV2_x0_33.sh | 15 + .../scripts/train/ShuffleNetV2_x0_5.sh | 15 + .../scripts/train/ShuffleNetV2_x1_5.sh | 13 + .../scripts/train/ShuffleNetV2_x2_0.sh | 12 + .../scripts/train/SqueezeNet1_0.sh | 12 + .../scripts/train/SqueezeNet1_1.sh | 12 + .../scripts/train/VGG11.sh | 19 + .../scripts/train/VGG13.sh | 19 + .../scripts/train/VGG16.sh | 19 + .../scripts/train/VGG19.sh | 19 + .../scripts/train/Xception_41.sh | 12 + PaddleCV/image_classification/train.py | 648 +++-------- .../image_classification/utils/__init__.py | 18 +- .../image_classification/utils/fp16_utils.py | 133 --- .../utils/learning_rate.py | 93 -- .../image_classification/utils/optimizer.py | 242 ++++ .../utils/tools/readable_label.txt | 1000 +++++++++++++++++ .../image_classification/utils/utility.py | 327 +++++- 106 files changed, 4912 insertions(+), 3473 deletions(-) rename PaddleCV/image_classification/{README_cn.md => README_en.md} (52%) create mode 100644 PaddleCV/image_classification/build_model.py rename PaddleCV/image_classification/{ => legacy}/dist_train/README.md (99%) rename PaddleCV/image_classification/{ => legacy}/dist_train/__init__.py (100%) rename PaddleCV/image_classification/{ => legacy}/dist_train/batch_merge.py (89%) rename PaddleCV/image_classification/{ => legacy}/dist_train/dist_train.py (100%) rename PaddleCV/image_classification/{ => legacy}/dist_train/dist_utils.py (100%) rename PaddleCV/image_classification/{ => legacy}/dist_train/env.py (95%) rename PaddleCV/image_classification/{ => legacy}/dist_train/run_mp_mode.sh (100%) rename PaddleCV/image_classification/{ => legacy}/dist_train/run_nccl2_mode.sh (100%) rename PaddleCV/image_classification/{ => legacy}/dist_train/run_ps_mode.sh (100%) rename PaddleCV/image_classification/{ => legacy}/images/alexnet_imagenet1k_acc1.png (100%) rename PaddleCV/image_classification/{ => legacy}/images/curve.jpg (100%) rename PaddleCV/image_classification/{ => legacy}/images/imagenet_dist_performance.png (100%) rename PaddleCV/image_classification/{ => legacy}/images/imagenet_dist_speedup.png (100%) rename PaddleCV/image_classification/{ => legacy}/images/mobielenetv1_imagenet1k_acc1.png (100%) rename PaddleCV/image_classification/{ => legacy}/images/resnet101_imagenet1k_acc1.png (100%) rename PaddleCV/image_classification/{ => legacy}/images/resnet50_32gpus-acc1.png (100%) rename PaddleCV/image_classification/{ => legacy}/images/resnet50_imagenet1k_acc1.png (100%) rename PaddleCV/image_classification/{ => legacy}/images/resnet_dgc.png (100%) rename PaddleCV/image_classification/{ => legacy}/images/vgg11_imagenet1k_acc1.png (100%) create mode 100755 PaddleCV/image_classification/legacy/reader_pil.py mode change 100755 => 100644 PaddleCV/image_classification/reader.py delete mode 100644 PaddleCV/image_classification/reader_cv2.py create mode 100644 PaddleCV/image_classification/scripts/train/AlexNet.sh create mode 100644 PaddleCV/image_classification/scripts/train/DarkNet53.sh create mode 100644 PaddleCV/image_classification/scripts/train/DenseNet121.sh create mode 100644 PaddleCV/image_classification/scripts/train/DenseNet161.sh create mode 100644 PaddleCV/image_classification/scripts/train/DenseNet169.sh create mode 100644 PaddleCV/image_classification/scripts/train/DenseNet201.sh create mode 100644 PaddleCV/image_classification/scripts/train/DenseNet264.sh create mode 100644 PaddleCV/image_classification/scripts/train/GoogLeNet.sh create mode 100644 PaddleCV/image_classification/scripts/train/InceptionV4.sh create mode 100644 PaddleCV/image_classification/scripts/train/MobileNetV1.sh create mode 100644 PaddleCV/image_classification/scripts/train/MobileNetV2.sh create mode 100644 PaddleCV/image_classification/scripts/train/MobileNetV2_x0_25.sh create mode 100644 PaddleCV/image_classification/scripts/train/MobileNetV2_x0_5.sh create mode 100644 PaddleCV/image_classification/scripts/train/MobileNetV2_x1_5.sh create mode 100644 PaddleCV/image_classification/scripts/train/MobileNetV2_x2_0.sh create mode 100644 PaddleCV/image_classification/scripts/train/ResNeXt101_32x4d.sh create mode 100644 PaddleCV/image_classification/scripts/train/ResNeXt101_64x4d.sh create mode 100644 PaddleCV/image_classification/scripts/train/ResNeXt101_vd_64x4d.sh create mode 100644 PaddleCV/image_classification/scripts/train/ResNeXt152_32x4d.sh create mode 100644 PaddleCV/image_classification/scripts/train/ResNeXt152_64x4d.sh create mode 100644 PaddleCV/image_classification/scripts/train/ResNeXt50_32x4d.sh create mode 100644 PaddleCV/image_classification/scripts/train/ResNeXt50_vd_64x4d.sh create mode 100644 PaddleCV/image_classification/scripts/train/ResNet101.sh create mode 100644 PaddleCV/image_classification/scripts/train/ResNet101_vd.sh create mode 100644 PaddleCV/image_classification/scripts/train/ResNet152.sh create mode 100644 PaddleCV/image_classification/scripts/train/ResNet152_vd.sh create mode 100644 PaddleCV/image_classification/scripts/train/ResNet18.sh create mode 100644 PaddleCV/image_classification/scripts/train/ResNet200_vd.sh create mode 100644 PaddleCV/image_classification/scripts/train/ResNet34.sh create mode 100644 PaddleCV/image_classification/scripts/train/ResNet50.sh create mode 100644 PaddleCV/image_classification/scripts/train/ResNet50_vc.sh create mode 100644 PaddleCV/image_classification/scripts/train/ResNet50_vd.sh create mode 100644 PaddleCV/image_classification/scripts/train/SE_154_vd.sh create mode 100644 PaddleCV/image_classification/scripts/train/SE_ResNeXt101_32x4d.sh create mode 100644 PaddleCV/image_classification/scripts/train/SE_ResNeXt50_32x4d.sh create mode 100644 PaddleCV/image_classification/scripts/train/ShuffleNetV2.sh create mode 100644 PaddleCV/image_classification/scripts/train/ShuffleNetV2_swish.sh create mode 100644 PaddleCV/image_classification/scripts/train/ShuffleNetV2_x0_25.sh create mode 100644 PaddleCV/image_classification/scripts/train/ShuffleNetV2_x0_33.sh create mode 100644 PaddleCV/image_classification/scripts/train/ShuffleNetV2_x0_5.sh create mode 100644 PaddleCV/image_classification/scripts/train/ShuffleNetV2_x1_5.sh create mode 100644 PaddleCV/image_classification/scripts/train/ShuffleNetV2_x2_0.sh create mode 100644 PaddleCV/image_classification/scripts/train/SqueezeNet1_0.sh create mode 100644 PaddleCV/image_classification/scripts/train/SqueezeNet1_1.sh create mode 100644 PaddleCV/image_classification/scripts/train/VGG11.sh create mode 100644 PaddleCV/image_classification/scripts/train/VGG13.sh create mode 100644 PaddleCV/image_classification/scripts/train/VGG16.sh create mode 100644 PaddleCV/image_classification/scripts/train/VGG19.sh create mode 100644 PaddleCV/image_classification/scripts/train/Xception_41.sh delete mode 100644 PaddleCV/image_classification/utils/fp16_utils.py delete mode 100644 PaddleCV/image_classification/utils/learning_rate.py create mode 100644 PaddleCV/image_classification/utils/optimizer.py create mode 100644 PaddleCV/image_classification/utils/tools/readable_label.txt diff --git a/PaddleCV/image_classification/README.md b/PaddleCV/image_classification/README.md index a3ddec56..758aecbf 100644 --- a/PaddleCV/image_classification/README.md +++ b/PaddleCV/image_classification/README.md @@ -1,294 +1,359 @@ -# Image Classification and Model Zoo +中文 | [English](README_en.md) + +# 图像分类以及模型库 + +## 内容 +- [简介](#简介) +- [快速开始](#快速开始) + - [安装说明](#安装说明) + - [数据准备](#数据准备) + - [模型训练](#模型训练) + - [参数微调](#参数微调) + - [模型评估](#模型评估) + - [模型预测](#模型预测) +- [进阶使用](#进阶使用) + - [Mixup训练](#mixup训练) + - [混合精度训练](#混合精度训练) + - [自定义数据集](#自定义数据集) +- [已发布模型及其性能](#已发布模型及其性能) +- [FAQ](#faq) +- [参考文献](#参考文献) +- [版本更新](#版本更新) +- [如何贡献代码](#如何贡献代码) --- -## Table of Contents -- [Introduction](#introduction) -- [Quick Start](#quick-start) - - [Installation](#installation) - - [Data preparation](#data-preparation) - - [Training](#training) - - [Finetuning](#finetuning) - - [Evaluation](#evaluation) - - [Inference](#inference) -- [Advanced Usage](#advanced-usage) - - [Using Mixed-Precision Training](#using-mixed-precision-training) - - [CE](#ce) -- [Supported Models and Performances](#supported-models-and-performances) -- [Reference](#reference) -- [Update](#update) -- [Contribute](#contribute) +## 简介 +图像分类是计算机视觉的重要领域,它的目标是将图像分类到预定义的标签。近期,许多研究者提出很多不同种类的神经网络,并且极大的提升了分类算法的性能。本页将介绍如何使用PaddlePaddle进行图像分类。 -## Introduction +## 快速开始 -Image classification, which is an important field of computer vision, is to classify an image into pre-defined labels. Recently, many researchers developed different kinds of neural networks and highly improve the classification performance. This page introduces how to do image classification with PaddlePaddle Fluid. +### 安装说明 -## Quick Start +在当前目录下运行样例代码需要python 2.7及以上版本,PadddlePaddle Fluid v1.5.1或以上的版本。如果你的运行环境中的PaddlePaddle低于此版本,请根据 [安装文档](http://paddlepaddle.org/documentation/docs/zh/1.5/beginners_guide/install/index_cn.html) 中的说明来更新PaddlePaddle。 -### Installation +#### 环境依赖 -Running sample code in this directory requires Python 2.7 and later, PaddelPaddle Fluid v1.5 and later, the latest release version is recommended, If the PaddlePaddle on your device is lower than v1.5, please follow the instructions in [installation document](http://paddlepaddle.org/documentation/docs/zh/1.4/beginners_guide/install/index_cn.html) and make an update. +python >= 2.7,CUDA >= 8.0,CUDNN >= 7.0 +运行训练代码需要安装numpy,cv2 -### Data preparation +```bash +pip install opencv-python +pip install numpy +``` + +### 数据准备 -An example for ImageNet classification is as follows. First of all, preparation of imagenet data can be done as: +下面给出了ImageNet分类任务的样例,首先,通过如下的方式进行数据的准备: ``` cd data/ILSVRC2012/ sh download_imagenet2012.sh ``` +在```download_imagenet2012.sh```脚本中,通过下面三步来准备数据: -In the shell script ```download_imagenet2012.sh```, there are three steps to prepare data: +**步骤一:** 首先在```image-net.org```网站上完成注册,用于获得一对```Username```和```AccessKey```。 -**step-1:** Register at ```image-net.org``` first in order to get a pair of ```Username``` and ```AccessKey```, which are used to download ImageNet data. +**步骤二:** 从ImageNet官网下载ImageNet-2012的图像数据。训练以及验证数据集会分别被下载到"train" 和 "val" 目录中。注意,ImageNet数据的大小超过140GB,下载非常耗时;已经自行下载ImageNet的用户可以直接将数据组织放置到```data/ILSVRC2012```。 -**step-2:** Download ImageNet-2012 dataset from website. The training and validation data will be downloaded into folder "train" and "val" respectively. Please note that the size of data is more than 40 GB, it will take much time to download. Users who have downloaded the ImageNet data can organize it into ```data/ILSVRC2012``` directly. +**步骤三:** 下载训练与验证集合对应的标签文件。下面两个文件分别包含了训练集合与验证集合中图像的标签: -**step-3:** Download training and validation label files. There are two label files which contain train and validation image labels respectively: - -* train_list.txt: label file of imagenet-2012 training set, with each line seperated by ```SPACE```, like: +* train_list.txt: ImageNet-2012训练集合的标签文件,每一行采用"空格"分隔图像路径与标注,例如: ``` train/n02483708/n02483708_2436.jpeg 369 ``` -* val_list.txt: label file of imagenet-2012 validation set, with each line seperated by ```SPACE```, like. +* val_list.txt: ImageNet-2012验证集合的标签文件,每一行采用"空格"分隔图像路径与标注,例如: ``` val/ILSVRC2012_val_00000001.jpeg 65 ``` +注意:可能需要根据本地环境调整reader.py中相关路径来正确读取数据。 -You may need to modify the path in reader.py to load data correctly. - -### Training - -After data preparation, one can start the training step by: +### 模型训练 +数据准备完毕后,可以通过如下的方式启动训练: ``` python train.py \ - --model=SE_ResNeXt50_32x4d \ - --batch_size=32 \ + --model=ResNet50 \ + --batch_size=256 \ --total_images=1281167 \ --class_dim=1000 \ --image_shape=3,224,224 \ --model_save_dir=output/ \ - --with_mem_opt=False \ - --with_inplace=True \ --lr_strategy=piecewise_decay \ --lr=0.1 ``` -**parameter introduction:** - -* **model**: name model to use. Default: "SE_ResNeXt50_32x4d". -* **num_epochs**: the number of epochs. Default: 120. -* **batch_size**: the size of each mini-batch. Default: 256. -* **use_gpu**: whether to use GPU or not. Default: True. -* **total_images**: total number of images in the training set. Default: 1281167. -* **class_dim**: the class number of the classification task. Default: 1000. -* **image_shape**: input size of the network. Default: "3,224,224". -* **model_save_dir**: the directory to save trained model. Default: "output". -* **with_mem_opt**: whether to use memory optimization or not. Default: False. -* **with_inplace**: whether to use inplace memory optimization or not. Default: True. -* **lr_strategy**: learning rate changing strategy. Default: "piecewise_decay". -* **lr**: initialized learning rate. Default: 0.1. -* **pretrained_model**: model path for pretraining. Default: None. -* **checkpoint**: the checkpoint path to resume. Default: None. -* **data_dir**: the data path. Default: "./data/ILSVRC2012". -* **fp16**: whether to enable half precision training with fp16. Default: False. -* **scale_loss**: scale loss for fp16. Default: 1.0. -* **l2_decay**: L2_decay parameter. Default: 1e-4. -* **momentum_rate**: momentum_rate. Default: 0.9. -* **use_label_smoothing**: whether to use label_smoothing or not. Default:False. -* **label_smoothing_epsilon**: the label_smoothing_epsilon. Default:0.2. -* **lower_scale**: the lower scale in random crop data processing, upper is 1.0. Default:0.08. -* **lower_ratio**: the lower ratio in ramdom crop. Default:3./4. . -* **upper_ration**: the upper ratio in ramdom crop. Default:4./3. . -* **resize_short_size**: the resize_short_size. Default: 256. -* **use_mixup**: whether to use mixup data processing or not. Default:False. -* **mixup_alpha**: the mixup_alpha parameter. Default: 0.2. -* **is_distill**: whether to use distill or not. Default: False. - -Or can start the training step by running the ```run.sh```. - -**data reader introduction:** Data reader is defined in PIL: ```reader.py```and opencv: ```reader_cv2.py```, default reader is implemented by opencv. In [Training](#training), random crop and flipping are used, while center crop is used in [Evaluation](#evaluation) and [Inference](#inference) stages. Supported data augmentation includes: - -* rotation -* color jitter (haven't implemented in cv2_reader) -* random crop -* center crop -* resize -* flipping - -### Finetuning - -Finetuning is to finetune model weights in a specific task by loading pretrained weights. One can download [pretrained models](#supported-models-and-performances) and set its path to ```path_to_pretrain_model```, one can finetune a model by running following command: +注意: 当添加如step_epochs这种列表型参数,需要去掉"=",如:--step_epochs 10 20 30 + +或通过run.sh 启动训练 + +```bash +bash run.sh train 模型名 ``` + +**参数说明:** + +环境配置部分: + +* **data_dir**: 数据存储路径,默认值: "./data/ILSVRC2012/" +* **model_save_dir**: 模型存储路径,默认值: "output/" +* **save_param**: params存储路径,默认值: None +* **pretrained_model**: 加载预训练模型路径,默认值: None +* **checkpoint**: 加载用于继续训练的检查点(指定具体模型存储路径,如"output/ResNet50/100/"),默认值: None + +模型类型和超参配置: + +* **model**: 模型名称, 默认值: "ResNet50" +* **total_images**: 图片数,ImageNet2012,默认值: 1281167 +* **class_dim**: 类别数,默认值: 1000 +* **image_shape**: 图片大小,默认值: "3,224,224" +* **num_epochs**: 训练回合数,默认值: 120 +* **batch_size**: batch size大小(所有设备),默认值: 8 +* **test_batch_size**: 测试batch大小,默认值:16 +* **lr_strategy**: 学习率变化策略,默认值: "piecewise_decay" +* **lr**: 初始学习率,默认值: 0.1 +* **l2_decay**: l2_decay值,默认值: 1e-4 +* **momentum_rate**: momentum_rate值,默认值: 0.9 +* **step_epochs**: piecewise dacay的decay step,默认值:[30,60,90] + +数据读取器和预处理配置: + +* **lower_scale**: 数据随机裁剪处理时的lower scale值, upper scale值固定为1.0,默认值:0.08 +* **lower_ratio**: 数据随机裁剪处理时的lower ratio值,默认值:3./4. +* **upper_ratio**: 数据随机裁剪处理时的upper ratio值,默认值:4./3. +* **resize_short_size**: 指定数据处理时改变图像大小的短边值,默认值: 256 +* **crop_size**: 指定裁剪的大小,默认值:224 +* **use_mixup**: 是否对数据进行mixup处理,默认值: False +* **mixup_alpha**: 指定mixup处理时的alpha值,默认值: 0.2 +* **reader_thread**: 多线程reader的线程数量,默认值: 8 +* **reader_buf_size**: 多线程reader的buf_size, 默认值: 2048 +* **interpolation**: 插值方法, 默认值:None +* **image_mean**: 图片均值,默认值:[0.485, 0.456, 0.406] +* **image_std**: 图片std,默认值:[0.229, 0.224, 0.225] + + +一些开关: + +* **use_gpu**: 是否在GPU上运行,默认值: True +* **use_inplace**: 是否开启inplace显存优化,默认值: True +* **use_label_smoothing**: 是否对数据进行label smoothing处理,默认值: False +* **label_smoothing_epsilon**: label_smoothing的epsilon, 默认值:0.2 +* **random_seed**: 随机数种子, 默认值: 1000 + +**数据读取器说明:** 数据读取器定义在```reader.py```文件中,现在默认基于cv2的数据读取器, 在[训练阶段](#模型训练),默认采用的增广方式是随机裁剪与水平翻转, 而在[模型评估](#模型评估)与[模型预测](#模型预测)阶段用的默认方式是中心裁剪。当前支持的数据增广方式有: + +* 旋转 +* 颜色抖动(暂未实现) +* 随机裁剪 +* 中心裁剪 +* 长宽调整 +* 水平翻转 + +### 参数微调 + +参数微调(Finetune)是指在特定任务上微调已训练模型的参数。可以下载[已发布模型及其性能](#已发布模型及其性能)并且设置```path_to_pretrain_model```为模型所在路径,微调一个模型可以采用如下的命令: + +```bash python train.py \ + --model=model_name \ --pretrained_model=${path_to_pretrain_model} ``` +注意:根据具体模型和任务添加并调整其他参数 -Note: Add and adjust other parameters accroding to specific models and tasks. - -### Evaluation +### 模型评估 -Evaluation is to evaluate the performance of a trained model. One can download [pretrained models](#supported-models-and-performances) and set its path to ```path_to_pretrain_model```. Then top1/top5 accuracy can be obtained by running the following command: +模型评估(Eval)是指对训练完毕的模型评估各类性能指标。可以下载[已发布模型及其性能](#已发布模型及其性能)并且设置```path_to_pretrain_model```为模型所在路径。运行如下的命令,可以获得模型top-1/top-5精度: -``` +```bash python eval.py \ + --model=model_name \ --pretrained_model=${path_to_pretrain_model} ``` +注意:根据具体模型和任务添加并调整其他参数 -Note: Add and adjust other parameters accroding to specific models and tasks. +### 模型预测 -### Inference +模型预测(Infer)可以获取一个模型的预测分数或者图像的特征,可以下载[已发布模型及其性能](#已发布模型及其性能)并且设置```path_to_pretrain_model```为模型所在路径。运行如下的命令获得预测结果: -Inference is used to get prediction score or image features based on trained models. One can download [pretrained models](#supported-models-and-performances) and set its path to ```path_to_pretrain_model```. Run following command then obtain prediction score. +**参数说明:** -``` +* **save_inference**: 是否保存模型,默认值:False +* **topk**: 按照置信由高到低排序标签结果,返回的结果数量,默认值:1 +* **label_path**: 可读标签文件路径,默认值:"./utils/tools/readable_label.txt" + +```bash python infer.py \ + --model=model_name \ --pretrained_model=${path_to_pretrain_model} ``` +注意:根据具体模型和任务添加并调整其他参数 + +模型预测默认ImageNet1000类类别,标签文件存储在/utils/tools/readable_label.txt中,如果使用自定义数据,请指定--label_path参数 -Note: Add and adjust other parameters accroding to specific models and tasks. -## Advanced Usage +## 进阶使用 -### Using Mixed-Precision Training +### Mixup训练 -You may add `--fp16=1` to start train using mixed precisioin training, which the training process will use float16 and the output model ("master" parameters) is saved as float32. You also may need to pass `--scale_loss` to overcome accuracy issues, usually `--scale_loss=8.0` will do. +训练中指定 --use_mixup=True 开启Mixup训练,本模型库中所有后缀为_vd的模型即代表开启Mixup训练 -Note that currently `--fp16` can not use together with `--with_mem_opt`, so pass `--with_mem_opt=0` to disable memory optimization pass. +Mixup相关介绍参考[mixup: Beyond Empirical Risk Minimization](https://arxiv.org/abs/1710.09412) -### CE +### 混合精度训练 -CE is only for internal testing, don't have to set it. +FP16相关内容已经迁移至PaddlePaddle/Fleet 中 -## Supported Models and Performances +### 自定义数据集 -The image classification models currently supported by PaddlePaddle are listed in the table. It shows the top-1/top-5 accuracy on the ImageNet-2012 validation set of these models, the inference time of Paddle Fluid and Paddle TensorRT based on dynamic link library(test GPU model: Tesla P4). -As the activation function ```swish``` and ```relu6``` which separately used in ShuffleNetV2_swish and MobileNetV2 net are not supported by Paddle TensorRT, inference acceleration performance of them doesn't significient improve. Pretrained models can be downloaded by clicking related model names. +PaddlePaddle/Models ImageClassification 支持自定义数据 + +1. 组织自定义数据,调整数据读取器以正确的传入数据 +2. 注意更改训练脚本中 --data_dim --total_image 等参数 + + +## 已发布模型及其性能 +表格中列出了在models目录下目前支持的图像分类模型,并且给出了已完成训练的模型在ImageNet-2012验证集合上的top-1和top-5精度,以及Paddle Fluid和Paddle TensorRT基于动态链接库的预测时间(测试GPU型号为NVIDIA® Tesla® P4)。 +可以通过点击相应模型的名称下载对应的预训练模型。 + +- 注意 + - 1:ResNet50_vd_v2是ResNet50_vd蒸馏版本。 + - 2:InceptionV4和Xception采用的输入图像的分辨率为299x299,DarkNet53为256x256,Fix_ResNeXt101_32x48d_wsl为320x320,其余模型使用的分辨率均为224x224。在预测时,DarkNet53与Fix_ResNeXt101_32x48d_wsl系列网络resize_short_size与输入的图像分辨率的宽或高相同,InceptionV4和Xception网络resize_short_size为320,其余网络resize_short_size均为256。 + - 3:调用动态链接库预测时需要将训练模型转换为二进制模型 + + ```bash + python infer.py \ + --model=model_name \ + --pretrained_model=${path_to_pretrain_model} \ + --save_inference=True + ``` + + - 4: ResNeXt101_wsl系列的预训练模型转自pytorch模型,详情见[ResNeXt wsl](https://pytorch.org/hub/facebookresearch_WSL-Images_resnext/)。 -- Note1: ResNet50_vd_v2 is the distilled version of ResNet50_vd. -- Note2: The image resolution feeded in InceptionV4 and Xception net is ```299x299```, Fix_ResNeXt101_32x48d_wsl is ```320x320```, DarkNet is ```256x256```, others are ```224x224```.In test time, the resize_short_size of the DarkNet53 and Fix_ResNeXt101_32x48d_wsl series networks is the same as the width or height of the input image resolution, the InceptionV4 and Xception network resize_short_size is 320, and the other networks resize_short_size are 256. -- Note3: It's necessary to convert the train model to a binary model when appling dynamic link library to infer, One can do it by running following command: - ```python infer.py --save_inference=True``` -- Note4: The pretrained model of the ResNeXt101_wsl series network is converted from the pytorch model. Please go to [RESNEXT WSL](https://pytorch.org/hub/facebookresearch_WSL-Images_resnext/) for details. - ### AlexNet -|model | top-1/top-5 accuracy(CV2) | Paddle Fluid inference time(ms) | Paddle TensorRT inference time(ms) | -|- |:-: |:-: |:-: | -|[AlexNet](http://paddle-imagenet-models-name.bj.bcebos.com/AlexNet_pretrained.tar) | 56.72%/79.17% | 3.083 | 2.728 | +|Model | Top-1 | Top-5 | Paddle Fluid inference time(ms) | Paddle TensorRT inference time(ms) | +|- |:-: |:-: |:-: |:-: | +|[AlexNet](http://paddle-imagenet-models-name.bj.bcebos.com/AlexNet_pretrained.tar) | 56.72% | 79.17% | 3.083 | 2.728 | ### SqueezeNet -|model | top-1/top-5 accuracy(CV2) | Paddle Fluid inference time(ms) | Paddle TensorRT inference time(ms) | -|- |:-: |:-: |:-: | -|[SqueezeNet1_0](https://paddle-imagenet-models-name.bj.bcebos.com/SqueezeNet1_0_pretrained.tar) | 59.60%/81.66% | 2.740 | 1.688 | -|[SqueezeNet1_1](https://paddle-imagenet-models-name.bj.bcebos.com/SqueezeNet1_1_pretrained.tar) | 60.08%/81.85% | 2.751 | 1.270 | - -### VGG -|model | top-1/top-5 accuracy(CV2) | Paddle Fluid inference time(ms) | Paddle TensorRT inference time(ms) | -|- |:-: |:-: |:-: | -|[VGG11](https://paddle-imagenet-models-name.bj.bcebos.com/VGG11_pretrained.tar) | 69.28%/89.09% | 8.223 | 6.821 | -|[VGG13](https://paddle-imagenet-models-name.bj.bcebos.com/VGG13_pretrained.tar) | 70.02%/89.42% | 9.512 | 7.783 | -|[VGG16](https://paddle-imagenet-models-name.bj.bcebos.com/VGG16_pretrained.tar) | 72.00%/90.69% | 11.315 | 9.067 | -|[VGG19](https://paddle-imagenet-models-name.bj.bcebos.com/VGG19_pretrained.tar) | 72.56%/90.93% | 13.096 | 10.388 | - -### MobileNet -|model | top-1/top-5 accuracy(CV2) | Paddle Fluid inference time(ms) | Paddle TensorRT inference time(ms) | -|- |:-: |:-: |:-: | -|[MobileNetV1](http://paddle-imagenet-models-name.bj.bcebos.com/MobileNetV1_pretrained.tar) | 70.99%/89.68% | 2.609 |1.615 | -|[MobileNetV2_x0_25](https://paddle-imagenet-models-name.bj.bcebos.com/MobileNetV2_x0_25_pretrained.tar) | 53.21%/76.52% | 4.267 | 3.777 | -|[MobileNetV2_x0_5](https://paddle-imagenet-models-name.bj.bcebos.com/MobileNetV2_x0_5_pretrained.tar) | 65.03%/85.72% | 4.514 | 4.150 | -|[MobileNetV2_x1_0](https://paddle-imagenet-models-name.bj.bcebos.com/MobileNetV2_pretrained.tar) | 72.15%/90.65% | 4.546 | 5.278 | -|[MobileNetV2_x1_5](https://paddle-imagenet-models-name.bj.bcebos.com/MobileNetV2_x1_5_pretrained.tar) | 74.12%/91.67% | 5.235 | 6.909 | -|[MobileNetV2_x2_0](https://paddle-imagenet-models-name.bj.bcebos.com/MobileNetV2_x2_0_pretrained.tar) | 75.23%/92.58% | 6.680 | 7.658 | - -### ShuffleNet -|model | top-1/top-5 accuracy(CV2) | Paddle Fluid inference time(ms) | Paddle TensorRT inference time(ms) | -|- |:-: |:-: |:-: | -|[ShuffleNetV2_x0_25](https://paddle-imagenet-models-name.bj.bcebos.com/ShuffleNetV2_x0_25_pretrained.tar) | 49.90%/73.79% | 5.956 | 2.961 | -|[ShuffleNetV2_x0_33](https://paddle-imagenet-models-name.bj.bcebos.com/ShuffleNetV2_x0_33_pretrained.tar) | 53.73%/77.05% | 5.896 | 2.941 | -|[ShuffleNetV2_x0_5](https://paddle-imagenet-models-name.bj.bcebos.com/ShuffleNetV2_x0_5_pretrained.tar) | 60.32%/82.26% | 6.048 | 3.088 | -|[ShuffleNetV2_x1_0](https://paddle-imagenet-models-name.bj.bcebos.com/ShuffleNetV2_x1_0_pretrained.tar) | 68.80%/88.45% | 6.101 | 3.616 | -|[ShuffleNetV2_x1_5](https://paddle-imagenet-models-name.bj.bcebos.com/ShuffleNetV2_x1_5_pretrained.tar) | 71.63%/90.15% | 6.113 | 3.699 | -|[ShuffleNetV2_x2_0](https://paddle-imagenet-models-name.bj.bcebos.com/ShuffleNetV2_x2_0_pretrained.tar) | 73.15%/91.20% | 6.430 | 4.553 | -|[ShuffleNetV2_x1_0_swish](https://paddle-imagenet-models-name.bj.bcebos.com/ShuffleNetV2_pretrained.tar) | 70.03%/89.17% | 6.078 | 6.282 | - -### ResNet -|model | top-1/top-5 accuracy(CV2) | Paddle Fluid inference time(ms) | Paddle TensorRT inference time(ms) | -|- |:-: |:-: |:-: | -|[ResNet18](https://paddle-imagenet-models-name.bj.bcebos.com/ResNet18_pretrained.tar) | 70.98%/89.92% | 3.456 | 2.484 | -|[ResNet34](https://paddle-imagenet-models-name.bj.bcebos.com/ResNet34_pretrained.tar) | 74.57%/92.14% | 5.668 | 3.767 | -|[ResNet50](http://paddle-imagenet-models-name.bj.bcebos.com/ResNet50_pretrained.tar) | 76.50%/93.00% | 8.787 | 5.434 | -|[ResNet50_vc](https://paddle-imagenet-models-name.bj.bcebos.com/ResNet50_vc_pretrained.tar) |78.35%/94.03% | 9.013 | 5.463 | -|[ResNet50_vd](https://paddle-imagenet-models-name.bj.bcebos.com/ResNet50_vd_pretrained.tar) | 79.12%/94.44% | 9.058 | 5.510 | -|[ResNet50_vd_v2](https://paddle-imagenet-models-name.bj.bcebos.com/ResNet50_vd_v2_pretrained.tar) | 79.84%/94.93% | 9.058 | 5.510 | -|[ResNet101](http://paddle-imagenet-models-name.bj.bcebos.com/ResNet101_pretrained.tar) | 77.56%/93.64% | 15.447 | 8.779 | -|[ResNet101_vd](https://paddle-imagenet-models-name.bj.bcebos.com/ResNet101_vd_pretrained.tar) | 80.17%/94.97% | 15.685 | 8.878 | -|[ResNet152](https://paddle-imagenet-models-name.bj.bcebos.com/ResNet152_pretrained.tar) | 78.26%/93.96% | 21.816 | 12.148 | -|[ResNet152_vd](https://paddle-imagenet-models-name.bj.bcebos.com/ResNet152_vd_pretrained.tar) | 80.59%/95.30% | 22.041 | 12.259 | -|[ResNet200_vd](https://paddle-imagenet-models-name.bj.bcebos.com/ResNet200_vd_pretrained.tar) | 80.93%/95.33% | 28.015 | 15.278 | - -### ResNeXt -|model | top-1/top-5 accuracy(CV2) | Paddle Fluid inference time(ms) | Paddle TensorRT inference time(ms) | -|- |:-: |:-: |:-: | -|[ResNeXt50_32x4d](https://paddle-imagenet-models-name.bj.bcebos.com/ResNeXt50_32x4d_pretrained.tar) | 77.75%/93.82% | 12.863 | 9.837 | -|[ResNeXt50_vd_32x4d](https://paddle-imagenet-models-name.bj.bcebos.com/ResNeXt50_vd_32x4d_pretrained.tar) | 79.56%/94.62% | 13.673 | 9.991 | -|[ResNeXt50_64x4d](https://paddle-imagenet-models-name.bj.bcebos.com/ResNeXt50_64x4d_pretrained.tar) | 78.43%/94.13% | 28.162 | 18.271 | -|[ResNeXt50_vd_64x4d](https://paddle-imagenet-models-name.bj.bcebos.com/ResNeXt50_vd_64x4d_pretrained.tar) | 80.12%/94.86% | 20.888 | 17.687 | -|[ResNeXt101_32x4d](https://paddle-imagenet-models-name.bj.bcebos.com/ResNeXt101_32x4d_pretrained.tar) | 78.65%/94.19% | 24.154 | 21.387 | -|[ResNeXt101_64x4d](https://paddle-imagenet-models-name.bj.bcebos.com/ResNeXt50_64x4d_pretrained.tar) | 78.43%/94.13% | 41.073 | 38.736 | -|[ResNeXt101_vd_64x4d](https://paddle-imagenet-models-name.bj.bcebos.com/ResNeXt101_vd_64x4d_pretrained.tar) | 80.78%/95.20% | 42.277 | 40.929 | -|[ResNeXt152_32x4d](https://paddle-imagenet-models-name.bj.bcebos.com/ResNeXt152_32x4d_pretrained.tar) | 78.98%/94.33% | 37.007 | 31.301 | -|[ResNeXt152_64x4d](https://paddle-imagenet-models-name.bj.bcebos.com/ResNeXt152_64x4d_pretrained.tar) | 79.51%/94.71% | 58.966 | 57.267 | - -### DenseNet -|model | top-1/top-5 accuracy(CV2) | Paddle Fluid inference time(ms) | Paddle TensorRT inference time(ms) | -|- |:-: |:-: |:-: | -|[DenseNet121](https://paddle-imagenet-models-name.bj.bcebos.com/DenseNet121_pretrained.tar) | 75.66%/92.58% | 12.437 | 5.813 | -|[DenseNet161](https://paddle-imagenet-models-name.bj.bcebos.com/DenseNet161_pretrained.tar) | 78.57%/94.14% | 27.717 | 12.861 | -|[DenseNet169](https://paddle-imagenet-models-name.bj.bcebos.com/DenseNet169_pretrained.tar) | 76.81%/93.31% | 18.941 | 8.146 | -|[DenseNet201](https://paddle-imagenet-models-name.bj.bcebos.com/DenseNet201_pretrained.tar) | 77.63%/93.66% | 26.583 | 10.549 | -|[DenseNet264](https://paddle-imagenet-models-name.bj.bcebos.com/DenseNet264_pretrained.tar) | 77.96%/93.85% | 41.495 | 15.574 | - -### SENet -|model | top-1/top-5 accuracy(CV2) | Paddle Fluid inference time(ms) | Paddle TensorRT inference time(ms) | -|- |:-: |:-: |:-: | -|[SE_ResNeXt50_32x4d](https://paddle-imagenet-models-name.bj.bcebos.com/SE_ResNeXt50_32x4d_pretrained.tar) | 78.44%/93.96% | 14.916 | 12.126 | -|[SE_ResNeXt101_32x4d](https://paddle-imagenet-models-name.bj.bcebos.com/SE_ResNeXt101_32x4d_pretrained.tar) | 79.12%/94.20% | 30.085 | 24.110 | -|[SENet154_vd](https://paddle-imagenet-models-name.bj.bcebos.com/SENet154_vd_pretrained.tar) | 81.40%/95.48% | 71.892 | 64.855 | - -### Inception -|model | top-1/top-5 accuracy(CV2) | Paddle Fluid inference time(ms) | Paddle TensorRT inference time(ms) | -|- |:-: |:-: |:-: | -|[GoogLeNet](https://paddle-imagenet-models-name.bj.bcebos.com/GoogleNet_pretrained.tar) | 70.70%/89.66% | 6.528 | 3.076 | -|[Xception_41](https://paddle-imagenet-models-name.bj.bcebos.com/Xception41_pretrained.tar) | 79.30%/94.53% | 13.757 | 10.831 | -|[InceptionV4](https://paddle-imagenet-models-name.bj.bcebos.com/InceptionV4_pretrained.tar) | 80.77%/95.26% | 32.413 | 18.154 | +|Model | Top-1 | Top-5 | Paddle Fluid inference time(ms) | Paddle TensorRT inference time(ms) | +|- |:-: |:-: |:-: |:-: | +|[SqueezeNet1_0](https://paddle-imagenet-models-name.bj.bcebos.com/SqueezeNet1_0_pretrained.tar) | 59.60% | 81.66% | 2.740 | 1.688 | +|[SqueezeNet1_1](https://paddle-imagenet-models-name.bj.bcebos.com/SqueezeNet1_1_pretrained.tar) | 60.08% | 81.85% | 2.751 | 1.270 | + +### VGG Series +|Model | Top-1 | Top-5 | Paddle Fluid inference time(ms) | Paddle TensorRT inference time(ms) | +|- |:-: |:-: |:-: |:-: | +|[VGG11](https://paddle-imagenet-models-name.bj.bcebos.com/VGG11_pretrained.tar) | 69.28% | 89.09% | 8.223 | 6.821 | +|[VGG13](https://paddle-imagenet-models-name.bj.bcebos.com/VGG13_pretrained.tar) | 70.02% | 89.42% | 9.512 | 7.783 | +|[VGG16](https://paddle-imagenet-models-name.bj.bcebos.com/VGG16_pretrained.tar) | 72.00% | 90.69% | 11.315 | 9.067 | +|[VGG19](https://paddle-imagenet-models-name.bj.bcebos.com/VGG19_pretrained.tar) | 72.56% | 90.93% | 13.096 | 10.388 | + +### MobileNet Series +|Model | Top-1 | Top-5 | Paddle Fluid inference time(ms) | Paddle TensorRT inference time(ms) | +|- |:-: |:-: |:-: |:-: | +|[MobileNetV1](http://paddle-imagenet-models-name.bj.bcebos.com/MobileNetV1_pretrained.tar) | 70.99% | 89.68% | 2.609 |1.615 | +|[MobileNetV2](https://paddle-imagenet-models-name.bj.bcebos.com/MobileNetV2_pretrained.tar) | 72.15% | 90.65% | 4.546 | 5.278 | +|[MobileNetV2_x0_25](https://paddle-imagenet-models-name.bj.bcebos.com/MobileNetV2_x0_25_pretrained.tar) | 53.21% | 76.52% | 4.267 | 3.777 | +|[MobileNetV2_x0_5](https://paddle-imagenet-models-name.bj.bcebos.com/MobileNetV2_x0_5_pretrained.tar) | 65.03% | 85.72% | 4.514 | 4.150 | +|[MobileNetV2_x1_5](https://paddle-imagenet-models-name.bj.bcebos.com/MobileNetV2_x1_5_pretrained.tar) | 74.12% | 91.67% | 5.235 | 6.909 | +|[MobileNetV2_x2_0](https://paddle-imagenet-models-name.bj.bcebos.com/MobileNetV2_x2_0_pretrained.tar) | 75.23% | 92.58% | 6.680 | 7.658 | + +### ShuffleNet Series +|Model | Top-1 | Top-5 | Paddle Fluid inference time(ms) | Paddle TensorRT inference time(ms) | +|- |:-: |:-: |:-: |:-: | +|[ShuffleNetV2](https://paddle-imagenet-models-name.bj.bcebos.com/ShuffleNetV2_pretrained.tar) | 68.80% | 88.45% | 6.101 | 3.616 | +|[ShuffleNetV2_x0_25](https://paddle-imagenet-models-name.bj.bcebos.com/ShuffleNetV2_x0_25_pretrained.tar) | 49.90% | 73.79% | 5.956 | 2.961 | +|[ShuffleNetV2_x0_33](https://paddle-imagenet-models-name.bj.bcebos.com/ShuffleNetV2_x0_33_pretrained.tar) | 53.73% | 77.05% | 5.896 | 2.941 | +|[ShuffleNetV2_x0_5](https://paddle-imagenet-models-name.bj.bcebos.com/ShuffleNetV2_x0_5_pretrained.tar) | 60.32% | 82.26% | 6.048 | 3.088 | +|[ShuffleNetV2_x1_5](https://paddle-imagenet-models-name.bj.bcebos.com/ShuffleNetV2_x1_5_pretrained.tar) | 71.63% | 90.15% | 6.113 | 3.699 | +|[ShuffleNetV2_x2_0](https://paddle-imagenet-models-name.bj.bcebos.com/ShuffleNetV2_x2_0_pretrained.tar) | 73.15% | 91.20% | 6.430 | 4.553 | +|[ShuffleNetV2_swish](https://paddle-imagenet-models-name.bj.bcebos.com/ShuffleNetV2_swish_pretrained.tar) | 70.03% | 89.17% | 6.078 | 6.282 | + +### ResNet Series +|Model | Top-1 | Top-5 | Paddle Fluid inference time(ms) | Paddle TensorRT inference time(ms) | +|- |:-: |:-: |:-: |:-: | +|[ResNet18](https://paddle-imagenet-models-name.bj.bcebos.com/ResNet18_pretrained.tar) | 70.98% | 89.92% | 3.456 | 2.484 | +|[ResNet34](https://paddle-imagenet-models-name.bj.bcebos.com/ResNet34_pretrained.tar) | 74.57% | 92.14% | 5.668 | 3.767 | +|[ResNet50](http://paddle-imagenet-models-name.bj.bcebos.com/ResNet50_pretrained.tar) | 76.50% | 93.00% | 8.787 | 5.434 | +|[ResNet50_vc](https://paddle-imagenet-models-name.bj.bcebos.com/ResNet50_vc_pretrained.tar) |78.35% | 94.03% | 9.013 | 5.463 | +|[ResNet50_vd](https://paddle-imagenet-models-name.bj.bcebos.com/ResNet50_vd_pretrained.tar) | 79.12% | 94.44% | 9.058 | 5.510 | +|[ResNet50_vd_v2](https://paddle-imagenet-models-name.bj.bcebos.com/ResNet50_vd_v2_pretrained.tar) | 79.84% | 94.93% | 9.058 | 5.510 | +|[ResNet101](http://paddle-imagenet-models-name.bj.bcebos.com/ResNet101_pretrained.tar) | 77.56% | 93.64% | 15.447 | 8.779 | +|[ResNet101_vd](https://paddle-imagenet-models-name.bj.bcebos.com/ResNet101_vd_pretrained.tar) | 80.17% | 94.97% | 15.685 | 8.878 | +|[ResNet152](https://paddle-imagenet-models-name.bj.bcebos.com/ResNet152_pretrained.tar) | 78.26% | 93.96% | 21.816 | 12.148 | +|[ResNet152_vd](https://paddle-imagenet-models-name.bj.bcebos.com/ResNet152_vd_pretrained.tar) | 80.59% | 95.30% | 22.041 | 12.259 | +|[ResNet200_vd](https://paddle-imagenet-models-name.bj.bcebos.com/ResNet200_vd_pretrained.tar) | 80.93% | 95.33% | 28.015 | 15.278 | + +### ResNeXt Series +|Model | Top-1 | Top-5 | Paddle Fluid inference time(ms) | Paddle TensorRT inference time(ms) | +|- |:-: |:-: |:-: |:-: | +|[ResNeXt50_32x4d](https://paddle-imagenet-models-name.bj.bcebos.com/ResNeXt50_32x4d_pretrained.tar) | 77.75% | 93.82% | 12.863 | 9.837 | +|[ResNeXt50_vd_32x4d](https://paddle-imagenet-models-name.bj.bcebos.com/ResNeXt50_vd_32x4d_pretrained.tar) | 79.56% | 94.62% | 13.673 | 9.991 | +|[ResNeXt50_64x4d](https://paddle-imagenet-models-name.bj.bcebos.com/ResNeXt50_64x4d_pretrained.tar) | 78.43% | 94.13% | 28.162 | 18.271 | +|[ResNeXt50_vd_64x4d](https://paddle-imagenet-models-name.bj.bcebos.com/ResNeXt50_vd_64x4d_pretrained.tar) | 80.12% | 94.86% | 20.888 | 17.687 | +|[ResNeXt101_32x4d](https://paddle-imagenet-models-name.bj.bcebos.com/ResNeXt101_32x4d_pretrained.tar) | 78.65% | 94.19% | 24.154 | 21.387 | +|[ResNeXt101_64x4d](https://paddle-imagenet-models-name.bj.bcebos.com/ResNeXt50_64x4d_pretrained.tar) | 78.43% | 94.13% | 41.073 | 38.736 | +|[ResNeXt101_vd_64x4d](https://paddle-imagenet-models-name.bj.bcebos.com/ResNeXt101_vd_64x4d_pretrained.tar) | 80.78% | 95.20% | 42.277 | 40.929 | +|[ResNeXt152_32x4d](https://paddle-imagenet-models-name.bj.bcebos.com/ResNeXt152_32x4d_pretrained.tar) | 78.98% | 94.33% | 37.007 | 31.301 | +|[ResNeXt152_64x4d](https://paddle-imagenet-models-name.bj.bcebos.com/ResNeXt152_64x4d_pretrained.tar) | 79.51% | 94.71% | 58.966 | 57.267 | + +### DenseNet Series +|Model | Top-1 | Top-5 | Paddle Fluid inference time(ms) | Paddle TensorRT inference time(ms) | +|- |:-: |:-: |:-: |:-: | +|[DenseNet121](https://paddle-imagenet-models-name.bj.bcebos.com/DenseNet121_pretrained.tar) | 75.66% | 92.58% | 12.437 | 5.813 | +|[DenseNet161](https://paddle-imagenet-models-name.bj.bcebos.com/DenseNet161_pretrained.tar) | 78.57% | 94.14% | 27.717 | 12.861 | +|[DenseNet169](https://paddle-imagenet-models-name.bj.bcebos.com/DenseNet169_pretrained.tar) | 76.81% | 93.31% | 18.941 | 8.146 | +|[DenseNet201](https://paddle-imagenet-models-name.bj.bcebos.com/DenseNet201_pretrained.tar) | 77.63% | 93.66% | 26.583 | 10.549 | +|[DenseNet264](https://paddle-imagenet-models-name.bj.bcebos.com/DenseNet264_pretrained.tar) | 77.96% | 93.85% | 41.495 | 15.574 | + +### SENet Series +|Model | Top-1 | Top-5 | Paddle Fluid inference time(ms) | Paddle TensorRT inference time(ms) | +|- |:-: |:-: |:-: |:-: | +|[SE_ResNeXt50_32x4d](https://paddle-imagenet-models-name.bj.bcebos.com/SE_ResNeXt50_32x4d_pretrained.tar) | 78.44% | 93.96% | 14.916 | 12.126 | +|[SE_ResNeXt101_32x4d](https://paddle-imagenet-models-name.bj.bcebos.com/SE_ResNeXt101_32x4d_pretrained.tar) | 79.12% | 94.20% | 30.085 | 24.110 | +|[SENet_154_vd](https://paddle-imagenet-models-name.bj.bcebos.com/SENet_154_vd_pretrained.tar) | 81.40% | 95.48% | 71.892 | 64.855 | + +### Inception Series +| Model | Top-1 | Top-5 | Paddle Fluid inference time(ms) | Paddle TensorRT inference time(ms) | +|- |:-: |:-: |:-: |:-: | +|[GoogLeNet](https://paddle-imagenet-models-name.bj.bcebos.com/GoogLeNet_pretrained.tar) | 70.70% | 89.66% | 6.528 | 3.076 | +|[Xception_41](https://paddle-imagenet-models-name.bj.bcebos.com/Xception_41_pretrained.tar) | 79.30% | 94.53% | 13.757 | 10.831 | +|[InceptionV4](https://paddle-imagenet-models-name.bj.bcebos.com/InceptionV4_pretrained.tar) | 80.77% | 95.26% | 32.413 | 18.154 | ### DarkNet -|model | top-1/top-5 accuracy(CV2) | Paddle Fluid inference time(ms) | Paddle TensorRT inference time(ms) | -|- |:-: |:-: |:-: | -|[DarkNet53](https://paddle-imagenet-models-name.bj.bcebos.com/DarkNet53_ImageNet1k_pretrained.tar) | 78.04%/94.05% | 11.969 | 7.153 | - -### ResNeXt101_wsl -|model | top-1/top-5 accuracy(CV2) | Paddle Fluid inference time(ms) | Paddle TensorRT inference time(ms) | -|- |:-: |:-: |:-: | -|[ResNeXt101_32x8d_wsl](https://paddle-imagenet-models-name.bj.bcebos.com/ResNeXt101_32x8d_wsl_pretrained.tar) | 82.55%/96.74% | 33.310 | 27.648 | -|[ResNeXt101_32x16d_wsl](https://paddle-imagenet-models-name.bj.bcebos.com/ResNeXt101_32x16d_wsl_pretrained.tar) | 84.24%/97.26% | 54.320 | 46.064 | -|[ResNeXt101_32x32d_wsl](https://paddle-imagenet-models-name.bj.bcebos.com/ResNeXt101_32x32d_wsl_pretrained.tar) | 84.97%/97.59% | 97.734 | 87.961 | -|[ResNeXt101_32x48d_wsl](https://paddle-imagenet-models-name.bj.bcebos.com/ResNeXt101_32x48d_wsl_pretrained.tar) | 85.37%/97.69% | 161.722 | | -|[Fix_ResNeXt101_32x48d_wsl](https://paddle-imagenet-models-name.bj.bcebos.com/Fix_ResNeXt101_32x48d_wsl_pretrained.tar) | 86.26%/97.97% | 236.091 | | +|Model | Top-1 | Top-5 | Paddle Fluid inference time(ms) | Paddle TensorRT inference time(ms) | +|- |:-: |:-: |:-: |:-: | +|[DarkNet53](https://paddle-imagenet-models-name.bj.bcebos.com/DarkNet53_ImageNet1k_pretrained.tar) | 78.04% | 94.05% | 11.969 | 7.153 | + +### ResNeXt101_wsl Series +|Model | Top-1 | Top-5 | Paddle Fluid inference time(ms) | Paddle TensorRT inference time(ms) | +|- |:-: |:-: |:-: |:-: | +|[ResNeXt101_32x8d_wsl](https://paddle-imagenet-models-name.bj.bcebos.com/ResNeXt101_32x8d_wsl_pretrained.tar) | 82.55% | 96.74% | 33.310 | 27.648 | +|[ResNeXt101_32x16d_wsl](https://paddle-imagenet-models-name.bj.bcebos.com/ResNeXt101_32x16d_wsl_pretrained.tar) | 84.24% | 97.26% | 54.320 | 46.064 | +|[ResNeXt101_32x32d_wsl](https://paddle-imagenet-models-name.bj.bcebos.com/ResNeXt101_32x32d_wsl_pretrained.tar) | 84.97% | 97.59% | 97.734 | 87.961 | +|[ResNeXt101_32x48d_wsl](https://paddle-imagenet-models-name.bj.bcebos.com/ResNeXt101_32x48d_wsl_pretrained.tar) | 85.37% | 97.69% | 161.722 | | +|[Fix_ResNeXt101_32x48d_wsl](https://paddle-imagenet-models-name.bj.bcebos.com/Fix_ResNeXt101_32x48d_wsl_pretrained.tar) | 86.26% | 97.97% | 236.091 | | + ## FAQ -**Q:** How to solve this problem when I try to train a 6-classes dataset with indicating pretrained_model parameter ? -``` -Enforce failed. Expected x_dims[1] == labels_dims[1], but received x_dims[1]:1000 != labels_dims[1]:6. -``` +**Q:** 加载预训练模型报错,Enforce failed. Expected x_dims[1] == labels_dims[1], but received x_dims[1]:1000 != labels_dims[1]:6. + +**A:** 类别数匹配不上,删掉最后一层分类层FC -**A:** It may be caused by dismatch dimensions. Please remove fc parameter in pretrained models, It usually named with a prefix ```fc_``` +**Q:** reader中报错AttributeError: 'NoneType' object has no attribute 'shape' -## Reference +**A:** 文件路径load错误 +**Q:** 出现cudaStreamSynchronize an illegal memory access was encountered errno:77 错误 +**A:** 可能是因为显存问题导致,添加如下环境变量: + + export FLAGS_fast_eager_deletion_mode=1 + export FLAGS_eager_delete_tensor_gb=0.0 + export FLAGS_fraction_of_gpu_memory_to_use=0.98 + +## 参考文献 - AlexNet: [imagenet-classification-with-deep-convolutional-neural-networks](https://papers.nips.cc/paper/4824-imagenet-classification-with-deep-convolutional-neural-networks.pdf), Alex Krizhevsky, Ilya Sutskever, Geoffrey E. Hinton - ResNet: [Deep Residual Learning for Image Recognitio](https://arxiv.org/abs/1512.03385), Kaiming He, Xiangyu Zhang, Shaoqing Ren, Jian Sun - ResNeXt: [Aggregated Residual Transformations for Deep Neural Networks](https://arxiv.org/abs/1611.05431), Saining Xie, Ross Girshick, Piotr Dollár, Zhuowen Tu, Kaiming He @@ -307,18 +372,17 @@ Enforce failed. Expected x_dims[1] == labels_dims[1], but received x_dims[1]:100 - ResNeXt101_wsl: [Exploring the Limits of Weakly Supervised Pretraining](https://arxiv.org/abs/1805.00932), Dhruv Mahajan, Ross Girshick, Vignesh Ramanathan, Kaiming He, Manohar Paluri, Yixuan Li, Ashwin Bharambe, Laurens van der Maaten - Fix_ResNeXt101_wsl: [Fixing the train-test resolution discrepancy](https://arxiv.org/abs/1906.06423), Hugo Touvron, Andrea Vedaldi, Matthijs Douze, Herve ́ Je ́gou -## Update - -- 2018/12/03 **Stage1**: Update AlexNet, ResNet50, ResNet101, MobileNetV1 -- 2018/12/23 **Stage2**: Update VGG Series, SeResNeXt50_32x4d, SeResNeXt101_32x4d, ResNet152 -- 2019/01/31 Update MobileNetV2_x1_0 -- 2019/04/01 **Stage3**: Update ResNet18, ResNet34, GoogLeNet, ShuffleNetV2 -- 2019/06/12 **Stage4**:Update ResNet50_vc, ResNet50_vd, ResNet101_vd, ResNet152_vd, ResNet200_vd, SE154_vd InceptionV4, ResNeXt101_64x4d, ResNeXt101_vd_64x4d -- 2019/06/22 Update ResNet50_vd_v2 -- 2019/07/02 **Stage5**: Update MobileNetV2_x0_5, ResNeXt50_32x4d, ResNeXt50_64x4d, Xception_41, ResNet101_vd -- 2019/07/19 **Stage6**: Update ShuffleNetV2_x0_25, ShuffleNetV2_x0_33, ShuffleNetV2_x0_5, ShuffleNetV2_x1_0, ShuffleNetV2_x1_5, ShuffleNetV2_x2_0, MobileNetV2_x0_25, MobileNetV2_x1_5, MobileNetV2_x2_0, ResNeXt50_vd_64x4d, ResNeXt101_32x4d, ResNeXt152_32x4d -- 2019/08/01 **Stage7**: Update DarkNet53, DenseNet121. Densenet161, DenseNet169, DenseNet201, DenseNet264, SqueezeNet1_0, SqueezeNet1_1, ResNeXt50_vd_32x4d, ResNeXt152_64x4d, ResNeXt101_32x8d_wsl, ResNeXt101_32x16d_wsl, ResNeXt101_32x32d_wsl, ResNeXt101_32x48d_wsl, Fix_ResNeXt101_32x48d_wsl +## 版本更新 +- 2018/12/03 **Stage1**: 更新AlexNet,ResNet50,ResNet101,MobileNetV1 +- 2018/12/23 **Stage2**: 更新VGG系列,SeResNeXt50_32x4d,SeResNeXt101_32x4d,ResNet152 +- 2019/01/31 更新MobileNetV2_x1_0 +- 2019/04/01 **Stage3**: 更新ResNet18,ResNet34,GoogLeNet,ShuffleNetV2 +- 2019/06/12 **Stage4**: 更新ResNet50_vc,ResNet50_vd,ResNet101_vd,ResNet152_vd,ResNet200_vd,SE154_vd InceptionV4,ResNeXt101_64x4d,ResNeXt101_vd_64x4d +- 2019/06/22 更新ResNet50_vd_v2 +- 2019/07/02 **Stage5**: 更新MobileNetV2_x0_5,ResNeXt50_32x4d,ResNeXt50_64x4d,Xception_41,ResNet101_vd +- 2019/07/19 **Stage6**: 更新ShuffleNetV2_x0_25,ShuffleNetV2_x0_33,ShuffleNetV2_x0_5,ShuffleNetV2_x1_0,ShuffleNetV2_x1_5,ShuffleNetV2_x2_0,MobileNetV2_x0_25,MobileNetV2_x1_5,MobileNetV2_x2_0,ResNeXt50_vd_64x4d,ResNeXt101_32x4d,ResNeXt152_32x4d +- 2019/08/01 **Stage7**: 更新DarkNet53,DenseNet121,Densenet161,DenseNet169,DenseNet201,DenseNet264,SqueezeNet1_0,SqueezeNet1_1,ResNeXt50_vd_32x4d,ResNeXt152_64x4d,ResNeXt101_32x8d_wsl,ResNeXt101_32x16d_wsl,ResNeXt101_32x32d_wsl,ResNeXt101_32x48d_wsl,Fix_ResNeXt101_32x48d_wsl -## Contribute +## 如何贡献代码 -If you can fix an issue or add a new feature, please open a PR to us. If your PR is accepted, you can get scores according to the quality and difficulty of your PR(0~5), while you got 10 scores, you can contact us for interview or recommendation letter. +如果你可以修复某个issue或者增加一个新功能,欢迎给我们提交PR。如果对应的PR被接受了,我们将根据贡献的质量和难度进行打分(0-5分,越高越好)。如果你累计获得了10分,可以联系我们获得面试机会或者为你写推荐信。 diff --git a/PaddleCV/image_classification/README_cn.md b/PaddleCV/image_classification/README_en.md similarity index 52% rename from PaddleCV/image_classification/README_cn.md rename to PaddleCV/image_classification/README_en.md index 27bfb3ea..3b9e7e0d 100644 --- a/PaddleCV/image_classification/README_cn.md +++ b/PaddleCV/image_classification/README_en.md @@ -1,179 +1,232 @@ -# 图像分类以及模型库 +English | [中文](README.md) + +# Image Classification and Model Zoo + +## Table of Contents + +- [Introduction](#introduction) +- [Quick Start](#quick-start) + - [Installation](#installation) + - [Data preparation](#data-preparation) + - [Training](#training) + - [Finetuning](#finetuning) + - [Evaluation](#evaluation) + - [Inference](#inference) +- [Advanced Usage](#advanced-usage) + - [Mixup Training](#mixup-training) + - [Using Mixed-Precision Training](#using-mixed-precision-training) + - [Custom Dataset](#custom-dataset) +- [Supported Models and Performances](#supported-models-and-performances) +- [Reference](#reference) +- [Update](#update) +- [Contribute](#contribute) --- -## 内容 -- [简介](#简介) -- [快速开始](#快速开始) - - [安装说明](#安装说明) - - [数据准备](#数据准备) - - [模型训练](#模型训练) - - [参数微调](#参数微调) - - [模型评估](#模型评估) - - [模型预测](#模型预测) -- [进阶使用](#进阶使用) - - [混合精度训练](#混合精度训练) - - [CE测试](#ce测试) -- [已发布模型及其性能](#已发布模型及其性能) -- [FAQ](#faq) -- [参考文献](#参考文献) -- [版本更新](#版本更新) -- [如何贡献代码](#如何贡献代码) -- [反馈](#反馈) - -## 简介 -图像分类是计算机视觉的重要领域,它的目标是将图像分类到预定义的标签。近期,许多研究者提出很多不同种类的神经网络,并且极大的提升了分类算法的性能。本页将介绍如何使用PaddlePaddle进行图像分类。 - -## 快速开始 - -### 安装说明 -在当前目录下运行样例代码需要python 2.7及以上版本,PadddlePaddle Fluid v1.5或以上的版本。如果你的运行环境中的PaddlePaddle低于此版本,请根据 [installation document](http://paddlepaddle.org/documentation/docs/zh/1.4/beginners_guide/install/index_cn.html) 中的说明来更新PaddlePaddle。 - -### 数据准备 - -下面给出了ImageNet分类任务的样例,首先,通过如下的方式进行数据的准备: -``` + +## Introduction + +Image classification, which is an important field of computer vision, is to classify images into pre-defined labels. Recently, many researchers have developed different kinds of neural networks and highly improved the classification performance. This page introduces how to do image classification with PaddlePaddle Fluid. + +## Quick Start + +### Installation + +Running samples in this directory requires Python 2.7 and later, CUDA 8.0 and later, CUDNN 7.0 and later, python package: numpy and opencv-python, PaddelPaddle Fluid v1.5 and later, the latest release version is recommended, If the PaddlePaddle on your device is lower than v1.5, please follow the instructions in [installation document](http://paddlepaddle.org/documentation/docs/zh/1.5/beginners_guide/install/index_cn.html) and make an update. + +### Data preparation + +An example for ImageNet classification is as follows. First of all, preparation of imagenet data can be done as: + +```bash cd data/ILSVRC2012/ sh download_imagenet2012.sh ``` -在```download_imagenet2012.sh```脚本中,通过下面三步来准备数据: -**步骤一:** 首先在```image-net.org```网站上完成注册,用于获得一对```Username```和```AccessKey```。 +In the shell script ```download_imagenet2012.sh```, there are three steps to prepare data: -**步骤二:** 从ImageNet官网下载ImageNet-2012的图像数据。训练以及验证数据集会分别被下载到"train" 和 "val" 目录中。请注意,ImaegNet数据的大小超过40GB,下载非常耗时;已经自行下载ImageNet的用户可以直接将数据组织放置到```data/ILSVRC2012```。 +**step-1:** Register at ```image-net.org``` first in order to get a pair of ```Username``` and ```AccessKey```, which are used to download ImageNet data. -**步骤三:** 下载训练与验证集合对应的标签文件。下面两个文件分别包含了训练集合与验证集合中图像的标签: +**step-2:** Download ImageNet-2012 dataset from website. The training and validation data will be downloaded into folder "train" and "val" respectively. Please note that the size of data is more than 40 GB, it will take much time to download. Users who have downloaded the ImageNet data can organize it into ```data/ILSVRC2012``` directly. -* train_list.txt: ImageNet-2012训练集合的标签文件,每一行采用"空格"分隔图像路径与标注,例如: +**step-3:** Download training and validation label files. There are two label files which contain train and validation image labels respectively: + +* train_list.txt: label file of imagenet-2012 training set, with each line seperated by ```SPACE```, like: ``` train/n02483708/n02483708_2436.jpeg 369 ``` -* val_list.txt: ImageNet-2012验证集合的标签文件,每一行采用"空格"分隔图像路径与标注,例如: +* val_list.txt: label file of imagenet-2012 validation set, with each line seperated by ```SPACE```, like. ``` val/ILSVRC2012_val_00000001.jpeg 65 ``` -注意:可能需要根据本地环境调整reader.py相关路径来正确读取数据。 -### 模型训练 +Note: You may need to modify the data path in reader.py to load data correctly. + +### Training + +After data preparation, one can start the training step by: -数据准备完毕后,可以通过如下的方式启动训练: ``` python train.py \ - --model=SE_ResNeXt50_32x4d \ - --batch_size=32 \ + --model=ResNet50 \ + --batch_size=256 \ --total_images=1281167 \ --class_dim=1000 \ --image_shape=3,224,224 \ --model_save_dir=output/ \ - --with_mem_opt=False \ - --with_inplace=True \ --lr_strategy=piecewise_decay \ --lr=0.1 ``` -**参数说明:** - -* **model**: 模型名称, 默认值: "SE_ResNeXt50_32x4d" -* **num_epochs**: 训练回合数,默认值: 120 -* **batch_size**: 批大小,默认值: 256 -* **use_gpu**: 是否在GPU上运行,默认值: True -* **total_images**: 图片数,ImageNet2012默认值: 1281167. -* **class_dim**: 类别数,默认值: 1000 -* **image_shape**: 图片大小,默认值: "3,224,224" -* **model_save_dir**: 模型存储路径,默认值: "output/" -* **with_mem_opt**: 是否开启显存优化,默认值: False -* **with_inplace**: 是否开启inplace显存优化,默认值: True -* **lr_strategy**: 学习率变化策略,默认值: "piecewise_decay" -* **lr**: 初始学习率,默认值: 0.1 -* **pretrained_model**: 预训练模型路径,默认值: None -* **checkpoint**: 用于继续训练的检查点(指定具体模型存储路径,如"output/SE_ResNeXt50_32x4d/100/"),默认值: None -* **fp16**: 是否开启混合精度训练,默认值: False -* **scale_loss**: 调整混合训练的loss scale值,默认值: 1.0 -* **l2_decay**: l2_decay值,默认值: 1e-4 -* **momentum_rate**: momentum_rate值,默认值: 0.9 -* **use_label_smoothing**: 是否对数据进行label smoothing处理,默认值:False -* **label_smoothing_epsilon**: label_smoothing的epsilon值,默认值:0.2 -* **lower_scale**: 数据随机裁剪处理时的lower scale值, upper scale值固定为1.0,默认值:0.08 -* **lower_ratio**: 数据随机裁剪处理时的lower ratio值,默认值:3./4. -* **upper_ration**: 数据随机裁剪处理时的upper ratio值,默认值:4./3. -* **resize_short_size**: 指定数据处理时改变图像大小的短边值,默认值: 256 -* **use_mixup**: 是否对数据进行mixup处理,默认值:False -* **mixup_alpha**: 指定mixup处理时的alpha值,默认值: 0.2 -* **is_distill**: 是否进行蒸馏训练,默认值: False - -**在```run.sh```中有用于训练的脚本.** - -**数据读取器说明:** 数据读取器定义在PIL:```reader.py```和CV2:```reader_cv2.py```文件中,现在默认基于cv2的数据读取器, 在[训练阶段](#模型训练), 默认采用的增广方式是随机裁剪与水平翻转, 而在[模型评估](#模型评估)与[模型预测](#模型预测)阶段用的默认方式是中心裁剪。当前支持的数据增广方式有: - -* 旋转 -* 颜色抖动(cv2暂未实现) -* 随机裁剪 -* 中心裁剪 -* 长宽调整 -* 水平翻转 - -### 参数微调 - -参数微调是指在特定任务上微调已训练模型的参数。可以下载[已有模型及其性能](#已有模型及其性能)并且设置```path_to_pretrain_model```为模型所在路径,微调一个模型可以采用如下的命令: +or running run.sh scripts + +```bash +bash run.sh train model_name +``` + +**parameter introduction:** + +Environment settings: + +* **data_dir**: the data root directory Default: "./data/ILSVRC2012". +* **model_save_dir**: the directory to save trained model. Default: "output". +* **save_param**: the path to save params. Default: None. +* **pretrained_model**: load model path for pretraining. Default: None. +* **checkpoint**: load the checkpoint path to resume. Default: None. + +Solver and hyperparameters: + +* **model**: name model to use. Default: "ResNet50". +* **total_images**: total number of images in the training set. Default: 1281167. +* **class_dim**: the class number of the classification task. Default: 1000. +* **image_shape**: input size of the network. Default: "3,224,224". +* **num_epochs**: the number of epochs. Default: 120. +* **batch_size**: the batch size of all devices. Default: 8. +* **test_batch_size**: the test batch size, Default: 16 +* **lr_strategy**: learning rate changing strategy. Default: "piecewise_decay". +* **lr**: initialized learning rate. Default: 0.1. +* **l2_decay**: L2_decay parameter. Default: 1e-4. +* **momentum_rate**: momentum_rate. Default: 0.9. +* **step_epochs**: piecewise dacay的decay step, Default: [30,60,90] + +Reader and preprocess: + +* **lower_scale**: the lower scale in random crop data processing, upper is 1.0. Default:0.08. +* **lower_ratio**: the lower ratio in ramdom crop. Default:3./4. . +* **upper_ration**: the upper ratio in ramdom crop. Default:4./3. . +* **resize_short_size**: the resize_short_size. Default: 256. +* **crop_size**: the crop size, Default: 224. +* **use_mixup**: whether to use mixup data processing or not. Default:False. +* **mixup_alpha**: the mixup_alpha parameter. Default: 0.2. +* **reader_thread**: the number of threads in multi thread reader, Default: 8 +* **reader_buf_size**: the buff size of multi thread reader, Default: 2048 +* **interpolation**: interpolation method, Default: None +* **image_mean**: image mean, Default: [0.485, 0.456, 0.406] +* **image_std**: image std, Default: [0.229, 0.224, 0.225] + + +Switch: + +* **use_gpu**: whether to use GPU or not. Default: True. +* **use_inplace**: whether to use inplace memory optimization or not. Default: True. +* **use_label_smoothing**: whether to use label_smoothing or not. Default:False. +* **label_smoothing_epsilon**: the label_smoothing_epsilon. Default:0.2. +* **random_seed**: random seed for debugging, Default: 1000 + +**data reader introduction:** Data reader is defined in ```reader.py```, default reader is implemented by opencv. In the [Training](#training) Stage, random crop and flipping are applied, while center crop is applied in the [Evaluation](#evaluation) and [Inference](#inference) stages. Supported data augmentation includes: + +* rotation +* color jitter (haven't implemented in cv2_reader) +* random crop +* center crop +* resize +* flipping + +### Finetuning + +Finetuning is to finetune model weights in a specific task by loading pretrained weights. One can download [pretrained models](#supported-models-and-performances) and set its path to ```path_to_pretrain_model```, one can finetune a model by running following command: + ``` python train.py \ + --model=model_name \ --pretrained_model=${path_to_pretrain_model} ``` -注意:根据具体模型和任务添加并调整其他参数 -### 模型评估 -模型评估是指对训练完毕的模型评估各类性能指标。可以下载[已有模型及其性能](#已有模型及其性能)并且设置```path_to_pretrain_model```为模型所在路径。运行如下的命令,可以获得模型top-1/top-5精度: +Note: Add and adjust other parameters accroding to specific models and tasks. + +### Evaluation + +Evaluation is to evaluate the performance of a trained model. One can download [pretrained models](#supported-models-and-performances) and set its path to ```path_to_pretrain_model```. Then top1/top5 accuracy can be obtained by running the following command: + ``` python eval.py \ + --model=model_name \ --pretrained_model=${path_to_pretrain_model} ``` -注意:根据具体模型和任务添加并调整其他参数 -### 模型预测 -模型预测可以获取一个模型的预测分数或者图像的特征,可以下载[已有模型及其性能](#已有模型及其性能)并且设置```path_to_pretrain_model```为模型所在路径。运行如下的命令获得预测分数,: -``` +Note: Add and adjust other parameters accroding to specific models and tasks. + +### Inference + +**some Inference stage unique parameters** + +* **save_inference**: whether to save binary model, Default: False +* **topk**: the number of sorted predicated labels to show, Default: 1 +* **label_path**: readable label filepath, Default: "/utils/tools/readable_label.txt" + +Inference is used to get prediction score or image features based on trained models. One can download [pretrained models](#supported-models-and-performances) and set its path to ```path_to_pretrain_model```. Run following command then obtain prediction score. + +```bash python infer.py \ + --model=model_name \ --pretrained_model=${path_to_pretrain_model} ``` -注意:根据具体模型和任务添加并调整其他参数 +Note: Add and adjust other parameters accroding to specific models and tasks. + +## Advanced Usage -##进阶使用 +### Mixup Training +Set --use_mixup=True to start Mixup training, all of the models with a suffix "_vd" is training by mixup. -### 混合精度训练 +Refer to [mixup: Beyond Empirical Risk Minimization](https://arxiv.org/abs/1710.09412) -可以通过开启`--fp16=True`启动混合精度训练,这样训练过程会使用float16数据,并输出float32的模型参数("master"参数)。您可能需要同时传入`--scale_loss`来解决fp16训练的精度问题,通常传入`--scale_loss=8.0`即可。 +### Using Mixed-Precision Training -注意,目前混合精度训练不能和内存优化功能同时使用,所以需要传`--with_mem_opt=False`这个参数来禁用内存优化功能。 +Mixed-precision part is moving to PaddlePaddle/Fleet now. -### CE测试 -注意:CE相关代码仅用于内部测试,enable_ce默认设置False。 +### Custom Dataset -## 已发布模型及其性能 -表格中列出了在models目录下目前支持的图像分类模型,并且给出了已完成训练的模型在ImageNet-2012验证集合上的top-1/top-5精度,以及Paddle Fluid和Paddle TensorRT基于动态链接库的预测时间(测 -试GPU型号为Tesla P4)。由于Paddle TensorRT对ShuffleNetV2_swish使用的激活函数swish,MobileNetV2使用的激活函数relu6不支持,因此预测加速不明显。可以通过点击相应模型的名称下载对应的预训练模型。 -- 注意 - - 1:ResNet50_vd_v2是ResNet50_vd蒸馏版本。 - - 2:InceptionV4和Xception采用的输入图像的分辨率为299x299,DarkNet53为256x256,Fix_ResNeXt101_32x48d_wsl为320x320,其余模型使用的分辨率均为224x224。在预测时,DarkNet53与Fix_ResNeXt101_32x48d_wsl系列网络resize_short_size与输入的图像分辨率的宽或高相同,InceptionV4和Xception网络resize_short_size为320,其余网络resize_short_size均为256。 - - 3:调用动态链接库预测时需要将训练模型转换为二进制模型 +## Supported Models and Performances - ```python infer.py --save_inference=True``` - - 4: ResNeXt101_wsl系列的预训练模型转自pytorch模型,详情请移步[RESNEXT WSL](https://pytorch.org/hub/facebookresearch_WSL-Images_resnext/)。 +The image classification models currently supported by PaddlePaddle are listed in the table. It shows the top-1/top-5 accuracy on the ImageNet-2012 validation set of these models, the inference time of Paddle Fluid and Paddle TensorRT based on dynamic link library(test GPU model: Tesla P4). +Pretrained models can be downloaded by clicking related model names. +- Note + - 1: ResNet50_vd_v2 is the distilled version of ResNet50_vd. + - 2: The image resolution feeded in InceptionV4 and Xception net is ```299x299```, Fix_ResNeXt101_32x48d_wsl is ```320x320```, DarkNet is ```256x256```, others are ```224x224```.In test time, the resize_short_size of the DarkNet53 and Fix_ResNeXt101_32x48d_wsl series networks is the same as the width or height of the input image resolution, the InceptionV4 and Xception network resize_short_size is 320, and the other networks resize_short_size are 256. + - 3: It's necessary to convert the train model to a binary model when appling dynamic link library to infer, One can do it by running following command: + ```bash + python infer.py\ + --model=model_name \ + --pretrained_model=${path_to_pretrained_model} \ + --save_inference=True + ``` + - 4: The pretrained model of the ResNeXt101_wsl series network is converted from the pytorch model. Please refer to [RESNEXT WSL](https://pytorch.org/hub/facebookresearch_WSL-Images_resnext/) for details. -### AlexNet +### AlexNet Series |model | top-1/top-5 accuracy(CV2) | Paddle Fluid inference time(ms) | Paddle TensorRT inference time(ms) | |- |:-: |:-: |:-: | |[AlexNet](http://paddle-imagenet-models-name.bj.bcebos.com/AlexNet_pretrained.tar) | 56.72%/79.17% | 3.083 | 2.728 | -### SqueezeNet +### SqueezeNet Series |model | top-1/top-5 accuracy(CV2) | Paddle Fluid inference time(ms) | Paddle TensorRT inference time(ms) | |- |:-: |:-: |:-: | |[SqueezeNet1_0](https://paddle-imagenet-models-name.bj.bcebos.com/SqueezeNet1_0_pretrained.tar) | 59.60%/81.66% | 2.740 | 1.688 | |[SqueezeNet1_1](https://paddle-imagenet-models-name.bj.bcebos.com/SqueezeNet1_1_pretrained.tar) | 60.08%/81.85% | 2.751 | 1.270 | -### VGG +### VGG Series |model | top-1/top-5 accuracy(CV2) | Paddle Fluid inference time(ms) | Paddle TensorRT inference time(ms) | |- |:-: |:-: |:-: | |[VGG11](https://paddle-imagenet-models-name.bj.bcebos.com/VGG11_pretrained.tar) | 69.28%/89.09% | 8.223 | 6.821 | @@ -181,7 +234,7 @@ python infer.py \ |[VGG16](https://paddle-imagenet-models-name.bj.bcebos.com/VGG16_pretrained.tar) | 72.00%/90.69% | 11.315 | 9.067 | |[VGG19](https://paddle-imagenet-models-name.bj.bcebos.com/VGG19_pretrained.tar) | 72.56%/90.93% | 13.096 | 10.388 | -### MobileNet +### MobileNet Series |model | top-1/top-5 accuracy(CV2) | Paddle Fluid inference time(ms) | Paddle TensorRT inference time(ms) | |- |:-: |:-: |:-: | |[MobileNetV1](http://paddle-imagenet-models-name.bj.bcebos.com/MobileNetV1_pretrained.tar) | 70.99%/89.68% | 2.609 |1.615 | @@ -191,7 +244,7 @@ python infer.py \ |[MobileNetV2_x1_5](https://paddle-imagenet-models-name.bj.bcebos.com/MobileNetV2_x1_5_pretrained.tar) | 74.12%/91.67% | 5.235 | 6.909 | |[MobileNetV2_x2_0](https://paddle-imagenet-models-name.bj.bcebos.com/MobileNetV2_x2_0_pretrained.tar) | 75.23%/92.58% | 6.680 | 7.658 | -### ShuffleNet +### ShuffleNet Series |model | top-1/top-5 accuracy(CV2) | Paddle Fluid inference time(ms) | Paddle TensorRT inference time(ms) | |- |:-: |:-: |:-: | |[ShuffleNetV2_x0_25](https://paddle-imagenet-models-name.bj.bcebos.com/ShuffleNetV2_x0_25_pretrained.tar) | 49.90%/73.79% | 5.956 | 2.961 | @@ -202,7 +255,7 @@ python infer.py \ |[ShuffleNetV2_x2_0](https://paddle-imagenet-models-name.bj.bcebos.com/ShuffleNetV2_x2_0_pretrained.tar) | 73.15%/91.20% | 6.430 | 4.553 | |[ShuffleNetV2_x1_0_swish](https://paddle-imagenet-models-name.bj.bcebos.com/ShuffleNetV2_pretrained.tar) | 70.03%/89.17% | 6.078 | 6.282 | -### ResNet +### ResNet Series |model | top-1/top-5 accuracy(CV2) | Paddle Fluid inference time(ms) | Paddle TensorRT inference time(ms) | |- |:-: |:-: |:-: | |[ResNet18](https://paddle-imagenet-models-name.bj.bcebos.com/ResNet18_pretrained.tar) | 70.98%/89.92% | 3.456 | 2.484 | @@ -217,7 +270,7 @@ python infer.py \ |[ResNet152_vd](https://paddle-imagenet-models-name.bj.bcebos.com/ResNet152_vd_pretrained.tar) | 80.59%/95.30% | 22.041 | 12.259 | |[ResNet200_vd](https://paddle-imagenet-models-name.bj.bcebos.com/ResNet200_vd_pretrained.tar) | 80.93%/95.33% | 28.015 | 15.278 | -### ResNeXt +### ResNeXt Series |model | top-1/top-5 accuracy(CV2) | Paddle Fluid inference time(ms) | Paddle TensorRT inference time(ms) | |- |:-: |:-: |:-: | |[ResNeXt50_32x4d](https://paddle-imagenet-models-name.bj.bcebos.com/ResNeXt50_32x4d_pretrained.tar) | 77.75%/93.82% | 12.863 | 9.837 | @@ -230,7 +283,7 @@ python infer.py \ |[ResNeXt152_32x4d](https://paddle-imagenet-models-name.bj.bcebos.com/ResNeXt152_32x4d_pretrained.tar) | 78.98%/94.33% | 37.007 | 31.301 | |[ResNeXt152_64x4d](https://paddle-imagenet-models-name.bj.bcebos.com/ResNeXt152_64x4d_pretrained.tar) | 79.51%/94.71% | 58.966 | 57.267 | -### DenseNet +### DenseNet Series |model | top-1/top-5 accuracy(CV2) | Paddle Fluid inference time(ms) | Paddle TensorRT inference time(ms) | |- |:-: |:-: |:-: | |[DenseNet121](https://paddle-imagenet-models-name.bj.bcebos.com/DenseNet121_pretrained.tar) | 75.66%/92.58% | 12.437 | 5.813 | @@ -239,18 +292,18 @@ python infer.py \ |[DenseNet201](https://paddle-imagenet-models-name.bj.bcebos.com/DenseNet201_pretrained.tar) | 77.63%/93.66% | 26.583 | 10.549 | |[DenseNet264](https://paddle-imagenet-models-name.bj.bcebos.com/DenseNet264_pretrained.tar) | 77.96%/93.85% | 41.495 | 15.574 | -### SENet +### SENet Series |model | top-1/top-5 accuracy(CV2) | Paddle Fluid inference time(ms) | Paddle TensorRT inference time(ms) | |- |:-: |:-: |:-: | |[SE_ResNeXt50_32x4d](https://paddle-imagenet-models-name.bj.bcebos.com/SE_ResNeXt50_32x4d_pretrained.tar) | 78.44%/93.96% | 14.916 | 12.126 | |[SE_ResNeXt101_32x4d](https://paddle-imagenet-models-name.bj.bcebos.com/SE_ResNeXt101_32x4d_pretrained.tar) | 79.12%/94.20% | 30.085 | 24.110 | -|[SENet154_vd](https://paddle-imagenet-models-name.bj.bcebos.com/SENet154_vd_pretrained.tar) | 81.40%/95.48% | 71.892 | 64.855 | +|[SE_154_vd](https://paddle-imagenet-models-name.bj.bcebos.com/SE_154_vd_pretrained.tar) | 81.40%/95.48% | 71.892 | 64.855 | -### Inception +### Inception Series |model | top-1/top-5 accuracy(CV2) | Paddle Fluid inference time(ms) | Paddle TensorRT inference time(ms) | |- |:-: |:-: |:-: | -|[GoogLeNet](https://paddle-imagenet-models-name.bj.bcebos.com/GoogleNet_pretrained.tar) | 70.70%/89.66% | 6.528 | 3.076 | -|[Xception_41](https://paddle-imagenet-models-name.bj.bcebos.com/Xception41_pretrained.tar) | 79.30%/94.53% | 13.757 | 10.831 | +|[GoogLeNet](https://paddle-imagenet-models-name.bj.bcebos.com/GoogLeNet_pretrained.tar) | 70.70%/89.66% | 6.528 | 3.076 | +|[Xception_41](https://paddle-imagenet-models-name.bj.bcebos.com/Xception_41_pretrained.tar) | 79.30%/94.53% | 13.757 | 10.831 | |[InceptionV4](https://paddle-imagenet-models-name.bj.bcebos.com/InceptionV4_pretrained.tar) | 80.77%/95.26% | 32.413 | 18.154 | ### DarkNet @@ -258,7 +311,7 @@ python infer.py \ |- |:-: |:-: |:-: | |[DarkNet53](https://paddle-imagenet-models-name.bj.bcebos.com/DarkNet53_ImageNet1k_pretrained.tar) | 78.04%/94.05% | 11.969 | 7.153 | -### ResNeXt101_wsl +### ResNeXt101_:wwsl Series |model | top-1/top-5 accuracy(CV2) | Paddle Fluid inference time(ms) | Paddle TensorRT inference time(ms) | |- |:-: |:-: |:-: | |[ResNeXt101_32x8d_wsl](https://paddle-imagenet-models-name.bj.bcebos.com/ResNeXt101_32x8d_wsl_pretrained.tar) | 82.55%/96.74% | 33.310 | 27.648 | @@ -267,14 +320,18 @@ python infer.py \ |[ResNeXt101_32x48d_wsl](https://paddle-imagenet-models-name.bj.bcebos.com/ResNeXt101_32x48d_wsl_pretrained.tar) | 85.37%/97.69% | 161.722 | | |[Fix_ResNeXt101_32x48d_wsl](https://paddle-imagenet-models-name.bj.bcebos.com/Fix_ResNeXt101_32x48d_wsl_pretrained.tar) | 86.26%/97.97% | 236.091 | | - ## FAQ -**Q:** 加载预训练模型报错,Enforce failed. Expected x_dims[1] == labels_dims[1], but received x_dims[1]:1000 != labels_dims[1]:6. +**Q:** How to solve this problem when I try to train a 6-classes dataset with indicating pretrained_model parameter ? +``` +Enforce failed. Expected x_dims[1] == labels_dims[1], but received x_dims[1]:1000 != labels_dims[1]:6. +``` + +**A:** It may be caused by dismatch dimensions. Please remove fc parameter in pretrained models, It usually named with a prefix ```fc_``` + +## Reference -**A:** 维度对不上,删掉预训练参数中的FC -## 参考文献 - AlexNet: [imagenet-classification-with-deep-convolutional-neural-networks](https://papers.nips.cc/paper/4824-imagenet-classification-with-deep-convolutional-neural-networks.pdf), Alex Krizhevsky, Ilya Sutskever, Geoffrey E. Hinton - ResNet: [Deep Residual Learning for Image Recognitio](https://arxiv.org/abs/1512.03385), Kaiming He, Xiangyu Zhang, Shaoqing Ren, Jian Sun - ResNeXt: [Aggregated Residual Transformations for Deep Neural Networks](https://arxiv.org/abs/1611.05431), Saining Xie, Ross Girshick, Piotr Dollár, Zhuowen Tu, Kaiming He @@ -293,17 +350,18 @@ python infer.py \ - ResNeXt101_wsl: [Exploring the Limits of Weakly Supervised Pretraining](https://arxiv.org/abs/1805.00932), Dhruv Mahajan, Ross Girshick, Vignesh Ramanathan, Kaiming He, Manohar Paluri, Yixuan Li, Ashwin Bharambe, Laurens van der Maaten - Fix_ResNeXt101_wsl: [Fixing the train-test resolution discrepancy](https://arxiv.org/abs/1906.06423), Hugo Touvron, Andrea Vedaldi, Matthijs Douze, Herve ́ Je ́gou -## 版本更新 -- 2018/12/03 **Stage1**: 更新AlexNet,ResNet50,ResNet101,MobileNetV1 -- 2018/12/23 **Stage2**: 更新VGG系列 SeResNeXt50_32x4d,SeResNeXt101_32x4d,ResNet152 -- 2019/01/31 更新MobileNetV2_x1_0 -- 2019/04/01 **Stage3**: 更新ResNet18,ResNet34,GoogLeNet,ShuffleNetV2 -- 2019/06/12 **Stage4**: 更新ResNet50_vc,ResNet50_vd,ResNet101_vd,ResNet152_vd,ResNet200_vd,SE154_vd InceptionV4,ResNeXt101_64x4d,ResNeXt101_vd_64x4d -- 2019/06/22 更新ResNet50_vd_v2 -- 2019/07/02 **Stage5**: 更新MobileNetV2_x0_5, ResNeXt50_32x4d, ResNeXt50_64x4d, Xception_41, ResNet101_vd -- 2019/07/19 **Stage6**: 更新ShuffleNetV2_x0_25, ShuffleNetV2_x0_33, ShuffleNetV2_x0_5, ShuffleNetV2_x1_0, ShuffleNetV2_x1_5, ShuffleNetV2_x2_0, MobileNetV2_x0_25, MobileNetV2_x1_5, MobileNetV2_x2_0, ResNeXt50_vd_64x4d, ResNeXt101_32x4d, ResNeXt152_32x4d -- 2019/08/01 **Stage7**: 更新DarkNet53, DenseNet121. Densenet161, DenseNet169, DenseNet201, DenseNet264, SqueezeNet1_0, SqueezeNet1_1, ResNeXt50_vd_32x4d, ResNeXt152_64x4d, ResNeXt101_32x8d_wsl, ResNeXt101_32x16d_wsl, ResNeXt101_32x32d_wsl, ResNeXt101_32x48d_wsl, Fix_ResNeXt101_32x48d_wsl +## Update + +- 2018/12/03 **Stage1**: Update AlexNet, ResNet50, ResNet101, MobileNetV1 +- 2018/12/23 **Stage2**: Update VGG Series, SeResNeXt50_32x4d, SeResNeXt101_32x4d, ResNet152 +- 2019/01/31 Update MobileNetV2_x1_0 +- 2019/04/01 **Stage3**: Update ResNet18, ResNet34, GoogLeNet, ShuffleNetV2 +- 2019/06/12 **Stage4**:Update ResNet50_vc, ResNet50_vd, ResNet101_vd, ResNet152_vd, ResNet200_vd, SE154_vd InceptionV4, ResNeXt101_64x4d, ResNeXt101_vd_64x4d +- 2019/06/22 Update ResNet50_vd_v2 +- 2019/07/02 **Stage5**: Update MobileNetV2_x0_5, ResNeXt50_32x4d, ResNeXt50_64x4d, Xception_41, ResNet101_vd +- 2019/07/19 **Stage6**: Update ShuffleNetV2_x0_25, ShuffleNetV2_x0_33, ShuffleNetV2_x0_5, ShuffleNetV2_x1_0, ShuffleNetV2_x1_5, ShuffleNetV2_x2_0, MobileNetV2_x0_25, MobileNetV2_x1_5, MobileNetV2_x2_0, ResNeXt50_vd_64x4d, ResNeXt101_32x4d, ResNeXt152_32x4d +- 2019/08/01 **Stage7**: Update DarkNet53, DenseNet121. Densenet161, DenseNet169, DenseNet201, DenseNet264, SqueezeNet1_0, SqueezeNet1_1, ResNeXt50_vd_32x4d, ResNeXt152_64x4d, ResNeXt101_32x8d_wsl, ResNeXt101_32x16d_wsl, ResNeXt101_32x32d_wsl, ResNeXt101_32x48d_wsl, Fix_ResNeXt101_32x48d_wsl -## 如何贡献代码 +## Contribute -如果你可以修复某个issue或者增加一个新功能,欢迎给我们提交PR。如果对应的PR被接受了,我们将根据贡献的质量和难度进行打分(0-5分,越高越好)。如果你累计获得了10分,可以联系我们获得面试机会或者为你写推荐信。 +If you can fix an issue or add a new feature, please open a PR to us. If your PR is accepted, you can get scores according to the quality and difficulty of your PR(0~5), while you got 10 scores, you can contact us for interview or recommendation letter. diff --git a/PaddleCV/image_classification/build_model.py b/PaddleCV/image_classification/build_model.py new file mode 100644 index 00000000..55ebae1e --- /dev/null +++ b/PaddleCV/image_classification/build_model.py @@ -0,0 +1,119 @@ +#copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve. +# +#Licensed under the Apache License, Version 2.0 (the "License"); +#you may not use this file except in compliance with the License. +#You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +#Unless required by applicable law or agreed to in writing, software +#distributed under the License is distributed on an "AS IS" BASIS, +#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +#See the License for the specific language governing permissions and +#limitations under the License. +import paddle +import paddle.fluid as fluid +import utils.utility as utility + + +def _calc_label_smoothing_loss(softmax_out, label, class_dim, epsilon): + """Calculate label smoothing loss + + Returns: + label smoothing loss + + """ + + label_one_hot = fluid.layers.one_hot(input=label, depth=class_dim) + smooth_label = fluid.layers.label_smooth( + label=label_one_hot, epsilon=epsilon, dtype="float32") + loss = fluid.layers.cross_entropy( + input=softmax_out, label=smooth_label, soft_label=True) + return loss + + +def _basic_model(data, model, args, is_train): + image = data[0] + label = data[1] + + net_out = model.net(input=image, class_dim=args.class_dim) + softmax_out = fluid.layers.softmax(net_out, use_cudnn=False) + + if is_train and args.use_label_smoothing: + cost = _calc_label_smoothing_loss(softmax_out, label, args.class_dim, + args.epsilon) + + else: + cost = fluid.layers.cross_entropy(input=softmax_out, label=label) + + avg_cost = fluid.layers.mean(cost) + acc_top1 = fluid.layers.accuracy(input=softmax_out, label=label, k=1) + acc_top5 = fluid.layers.accuracy(input=softmax_out, label=label, k=5) + return [avg_cost, acc_top1, acc_top5] + + +def _googlenet_model(data, model, args, is_train): + """GoogLeNet model output, include avg_cost, acc_top1 and acc_top5 + + Returns: + GoogLeNet model output + + """ + image = data[0] + label = data[1] + + out0, out1, out2 = model.net(input=image, class_dim=args.class_dim) + cost0 = fluid.layers.cross_entropy(input=out0, label=label) + cost1 = fluid.layers.cross_entropy(input=out1, label=label) + cost2 = fluid.layers.cross_entropy(input=out2, label=label) + + avg_cost0 = fluid.layers.mean(x=cost0) + avg_cost1 = fluid.layers.mean(x=cost1) + avg_cost2 = fluid.layers.mean(x=cost2) + + avg_cost = avg_cost0 + 0.3 * avg_cost1 + 0.3 * avg_cost2 + acc_top1 = fluid.layers.accuracy(input=out0, label=label, k=1) + acc_top5 = fluid.layers.accuracy(input=out0, label=label, k=5) + + return [avg_cost, acc_top1, acc_top5] + + +def _mixup_model(data, model, args, is_train): + """output of Mixup processing network, include avg_cost + """ + image = data[0] + y_a = data[1] + y_b = data[2] + lam = data[3] + + net_out = model.net(input=image, class_dim=args.class_dim) + softmax_out = fluid.layers.softmax(net_out, use_cudnn=False) + if not args.use_label_smoothing: + loss_a = fluid.layers.cross_entropy(input=softmax_out, label=y_a) + loss_b = fluid.layers.cross_entropy(input=softmax_out, label=y_b) + else: + loss_a = _calc_label_smoothing_loss(softmax_out, y_a, args.class_dim, + args.epsilon) + loss_b = _calc_label_smoothing_loss(softmax_out, y_b, args.class_dim, + args.epsilon) + + loss_a_mean = fluid.layers.mean(x=loss_a) + loss_b_mean = fluid.layers.mean(x=loss_b) + cost = lam * loss_a_mean + (1 - lam) * loss_b_mean + avg_cost = fluid.layers.mean(x=cost) + return [avg_cost] + + +def create_model(model, args, is_train): + """Create model, include basic model, googlenet model and mixup model + """ + py_reader, data = utility.create_pyreader(is_train, args) + + if args.model == "GoogLeNet": + loss_out = _googlenet_model(data, model, args, is_train) + else: + if args.use_mixup and is_train: + loss_out = _mixup_model(data, model, args, is_train) + else: + loss_out = _basic_model(data, model, args, is_train) + return py_reader, loss_out diff --git a/PaddleCV/image_classification/eval.py b/PaddleCV/image_classification/eval.py index dd4605ad..7869bf52 100644 --- a/PaddleCV/image_classification/eval.py +++ b/PaddleCV/image_classification/eval.py @@ -26,43 +26,48 @@ import functools import paddle import paddle.fluid as fluid -import reader_cv2 as reader +import reader import models -from utils.learning_rate import cosine_decay -from utils.utility import add_arguments, print_arguments, check_gpu +from utils import * parser = argparse.ArgumentParser(description=__doc__) add_arg = functools.partial(add_arguments, argparser=parser) # yapf: disable -add_arg('batch_size', int, 256, "Minibatch size.") -add_arg('use_gpu', bool, True, "Whether to use GPU or not.") -add_arg('class_dim', int, 1000, "Class number.") -add_arg('image_shape', str, "3,224,224", "Input image size") -add_arg('pretrained_model', str, None, "Whether to use pretrained model.") -add_arg('model', str, "SE_ResNeXt50_32x4d", "Set the network to use.") -add_arg('resize_short_size', int, 256, "Set resize short size") +add_arg('data_dir', str, "./data/ILSVRC2012/", "The ImageNet datset") +add_arg('batch_size', int, 256, "Minibatch size.") +add_arg('use_gpu', bool, True, "Whether to use GPU or not.") +add_arg('class_dim', int, 1000, "Class number.") +add_arg('image_shape', str, "3,224,224", "Input image size") +parser.add_argument("--pretrained_model", default=None, required=True, type=str, help="The path to load pretrained model") +add_arg('model', str, "ResNet50", "Set the network to use.") +add_arg('resize_short_size', int, 256, "Set resize short size") +add_arg('reader_thread', int, 8, "The number of multi thread reader") +add_arg('reader_buf_size', int, 2048, "The buf size of multi thread reader") +parser.add_argument('--image_mean', nargs='+', type=float, default=[0.485, 0.456, 0.406], help="The mean of input image data") +parser.add_argument('--image_std', nargs='+', type=float, default=[0.229, 0.224, 0.225], help="The std of input image data") +add_arg('crop_size', int, 224, "The value of crop size") # yapf: enable def eval(args): - # parameters from arguments - class_dim = args.class_dim - model_name = args.model - pretrained_model = args.pretrained_model image_shape = [int(m) for m in args.image_shape.split(",")] model_list = [m for m in dir(models) if "__" not in m] - assert model_name in model_list, "{} is not in lists: {}".format(args.model, + assert args.model in model_list, "{} is not in lists: {}".format(args.model, model_list) + assert os.path.isdir( + args.pretrained_model + ), "{} doesn't exist, please load right pretrained model path for eval".format( + args.pretrained_model) image = fluid.layers.data(name='image', shape=image_shape, dtype='float32') label = fluid.layers.data(name='label', shape=[1], dtype='int64') # model definition - model = models.__dict__[model_name]() + model = models.__dict__[args.model]() - if model_name == "GoogleNet": - out0, out1, out2 = model.net(input=image, class_dim=class_dim) + if args.model == "GoogLeNet": + out0, out1, out2 = model.net(input=image, class_dim=args.class_dim) cost0 = fluid.layers.cross_entropy(input=out0, label=label) cost1 = fluid.layers.cross_entropy(input=out1, label=label) cost2 = fluid.layers.cross_entropy(input=out2, label=label) @@ -74,7 +79,8 @@ def eval(args): acc_top1 = fluid.layers.accuracy(input=out0, label=label, k=1) acc_top5 = fluid.layers.accuracy(input=out0, label=label, k=5) else: - out = model.net(input=image, class_dim=class_dim) + out = model.net(input=image, class_dim=args.class_dim) + cost, pred = fluid.layers.softmax_with_cross_entropy( out, label, return_softmax=True) avg_cost = fluid.layers.mean(x=cost) @@ -89,9 +95,10 @@ def eval(args): exe = fluid.Executor(place) exe.run(fluid.default_startup_program()) - fluid.io.load_persistables(exe, pretrained_model) + fluid.io.load_persistables(exe, args.pretrained_model) - val_reader = reader.val(settings=args, batch_size=args.batch_size) + val_reader = paddle.batch( + reader.val(settings=args), batch_size=args.batch_size) feeder = fluid.DataFeeder(place=place, feed_list=[image, label]) test_info = [[], [], []] @@ -129,7 +136,7 @@ def eval(args): def main(): args = parser.parse_args() print_arguments(args) - check_gpu(args.use_gpu) + check_gpu() eval(args) diff --git a/PaddleCV/image_classification/infer.py b/PaddleCV/image_classification/infer.py index aef983c3..4d61805d 100644 --- a/PaddleCV/image_classification/infer.py +++ b/PaddleCV/image_classification/infer.py @@ -26,43 +26,44 @@ import functools import paddle import paddle.fluid as fluid -import reader_cv2 as reader +import reader import models -import utils -from utils.utility import add_arguments, print_arguments, check_gpu +from utils import * parser = argparse.ArgumentParser(description=__doc__) # yapf: disable add_arg = functools.partial(add_arguments, argparser=parser) +add_arg('data_dir', str, "./data/ILSVRC2012/", "The ImageNet data") add_arg('use_gpu', bool, True, "Whether to use GPU or not.") add_arg('class_dim', int, 1000, "Class number.") add_arg('image_shape', str, "3,224,224", "Input image size") -add_arg('pretrained_model', str, None, "Whether to use pretrained model.") -add_arg('model', str, "SE_ResNeXt50_32x4d", "Set the network to use.") -add_arg('save_inference', bool, False, "Whether to save inference model or not") -add_arg('resize_short_size', int, 256, "Set resize short size") +parser.add_argument("--pretrained_model", default=None, required=True, type=str, help="The path to load pretrained model") +add_arg('model', str, "ResNet50", "Set the network to use.") +add_arg('save_inference', bool, False, "Whether to save inference model or not") +add_arg('resize_short_size',int, 256, "Set resize short size") +add_arg('reader_thread', int, 1, "The number of multi thread reader") +add_arg('reader_buf_size', int, 2048, "The buf size of multi thread reader") +parser.add_argument('--image_mean', nargs='+', type=float, default=[0.485, 0.456, 0.406], help="The mean of input image data") +parser.add_argument('--image_std', nargs='+', type=float, default=[0.229, 0.224, 0.225], help="The std of input image data") +add_arg('crop_size', int, 224, "The value of crop size") +add_arg('topk', int, 1, "topk") +add_arg('label_path', str, "./utils/tools/readable_label.txt", "readable label filepath") # yapf: enable def infer(args): - # parameters from arguments - class_dim = args.class_dim - model_name = args.model - save_inference = args.save_inference - pretrained_model = args.pretrained_model image_shape = [int(m) for m in args.image_shape.split(",")] model_list = [m for m in dir(models) if "__" not in m] - assert model_name in model_list, "{} is not in lists: {}".format(args.model, + assert args.model in model_list, "{} is not in lists: {}".format(args.model, model_list) - + assert os.path.isdir(args.pretrained_model + ), "please load right pretrained model path for infer" image = fluid.layers.data(name='image', shape=image_shape, dtype='float32') - - # model definition - model = models.__dict__[model_name]() - if model_name == "GoogleNet": - out, _, _ = model.net(input=image, class_dim=class_dim) + model = models.__dict__[args.model]() + if args.model == "GoogLeNet": + out, _, _ = model.net(input=image, class_dim=args.class_dim) else: - out = model.net(input=image, class_dim=class_dim) + out = model.net(input=image, class_dim=args.class_dim) out = fluid.layers.softmax(out) test_program = fluid.default_main_program().clone(for_test=True) @@ -73,39 +74,51 @@ def infer(args): exe = fluid.Executor(place) exe.run(fluid.default_startup_program()) - fluid.io.load_persistables(exe, pretrained_model) - if save_inference: + fluid.io.load_persistables(exe, args.pretrained_model) + if args.save_inference: fluid.io.save_inference_model( - dirname=model_name, + dirname=args.model, feeded_var_names=['image'], main_program=test_program, target_vars=out, executor=exe, model_filename='model', params_filename='params') - print("model: ", model_name, " is already saved") + print("model: ", args.model, " is already saved") exit(0) - test_batch_size = 1 - test_reader = reader.test(settings=args, batch_size=test_batch_size) + test_batch_size = 1 + test_reader = paddle.batch( + reader.test(settings=args), batch_size=test_batch_size) feeder = fluid.DataFeeder(place=place, feed_list=[image]) - TOPK = 1 + TOPK = args.topk + assert os.path.exists(args.label_path), "Index file doesn't exist!" + f = open(args.label_path) + label_dict = {} + for item in f.readlines(): + key = item.split(" ")[0] + value = [l.replace("\n", "") for l in item.split(" ")[1:]] + label_dict[key] = value + for batch_id, data in enumerate(test_reader()): result = exe.run(test_program, fetch_list=fetch_list, feed=feeder.feed(data)) result = result[0][0] pred_label = np.argsort(result)[::-1][:TOPK] - print("Test-{0}-score: {1}, class {2}" - .format(batch_id, result[pred_label], pred_label)) + readable_pred_label = [] + for label in pred_label: + readable_pred_label.append(label_dict[str(label)]) + print("Test-{0}-score: {1}, class{2} {3}".format(batch_id, result[ + pred_label], pred_label, readable_pred_label)) sys.stdout.flush() def main(): args = parser.parse_args() print_arguments(args) - check_gpu(args.use_gpu) + check_gpu() infer(args) diff --git a/PaddleCV/image_classification/legacy/README.md b/PaddleCV/image_classification/legacy/README.md index e4603a62..ccdb2445 100644 --- a/PaddleCV/image_classification/legacy/README.md +++ b/PaddleCV/image_classification/legacy/README.md @@ -8,3 +8,9 @@ For historical reasons, We keep "no name" models here, which are different from |- |:-: |:-:| |[ResNet152](http://paddle-imagenet-models.bj.bcebos.com/ResNet152_pretrained.zip) | 78.18%/93.93% | 78.11%/94.04% | |[SE_ResNeXt50_32x4d](http://paddle-imagenet-models.bj.bcebos.com/se_resnext_50_model.tar) | 78.32%/93.96% | 77.58%/93.73% | + +--- + +2019/08/08 +We move the dist_train and fp16 part to PaddlePaddle Fleet now. +and dist_train folder is temporary stored here. diff --git a/PaddleCV/image_classification/dist_train/README.md b/PaddleCV/image_classification/legacy/dist_train/README.md similarity index 99% rename from PaddleCV/image_classification/dist_train/README.md rename to PaddleCV/image_classification/legacy/dist_train/README.md index 4b953d79..188f529a 100644 --- a/PaddleCV/image_classification/dist_train/README.md +++ b/PaddleCV/image_classification/legacy/dist_train/README.md @@ -112,7 +112,7 @@ Speed-ups of Multiple-GPU Training of Resnet50 on Imagenet #### Environment - - GPU: NVIDIA® Tesla® V100 + - GPU: NVIDIA® Tesla® V100 - Machine number * Card number: 4 * 4 - System: Centos 6u3 - Cuda/Cudnn: 9.0/7.1 @@ -127,5 +127,3 @@ Speed-ups of Multiple-GPU Training of Resnet50 on Imagenet
Performance using DGC for resnet-fp32 under different bandwidth

- - diff --git a/PaddleCV/image_classification/dist_train/__init__.py b/PaddleCV/image_classification/legacy/dist_train/__init__.py similarity index 100% rename from PaddleCV/image_classification/dist_train/__init__.py rename to PaddleCV/image_classification/legacy/dist_train/__init__.py diff --git a/PaddleCV/image_classification/dist_train/batch_merge.py b/PaddleCV/image_classification/legacy/dist_train/batch_merge.py similarity index 89% rename from PaddleCV/image_classification/dist_train/batch_merge.py rename to PaddleCV/image_classification/legacy/dist_train/batch_merge.py index 01bb9ab0..a2be7b62 100644 --- a/PaddleCV/image_classification/dist_train/batch_merge.py +++ b/PaddleCV/image_classification/legacy/dist_train/batch_merge.py @@ -15,6 +15,7 @@ import paddle.fluid as fluid import numpy as np + def copyback_repeat_bn_params(main_prog): repeat_vars = set() for op in main_prog.global_block().ops: @@ -22,9 +23,11 @@ def copyback_repeat_bn_params(main_prog): repeat_vars.add(op.input("Mean")[0]) repeat_vars.add(op.input("Variance")[0]) for vname in repeat_vars: - real_var = fluid.global_scope().find_var("%s.repeat.0" % vname).get_tensor() + real_var = fluid.global_scope().find_var("%s.repeat.0" % + vname).get_tensor() orig_var = fluid.global_scope().find_var(vname).get_tensor() - orig_var.set(np.array(real_var), fluid.CUDAPlace(0)) # test on GPU0 + orig_var.set(np.array(real_var), fluid.CUDAPlace(0)) # test on GPU0 + def append_bn_repeat_init_op(main_prog, startup_prog, num_repeats): repeat_vars = set() @@ -32,7 +35,7 @@ def append_bn_repeat_init_op(main_prog, startup_prog, num_repeats): if op.type == "batch_norm": repeat_vars.add(op.input("Mean")[0]) repeat_vars.add(op.input("Variance")[0]) - + for i in range(num_repeats): for op in startup_prog.global_block().ops: if op.type == "fill_constant": @@ -45,13 +48,10 @@ def append_bn_repeat_init_op(main_prog, startup_prog, num_repeats): type=var.type, dtype=var.dtype, shape=var.shape, - persistable=var.persistable - ) + persistable=var.persistable) main_prog.global_block()._clone_variable(repeat_var) startup_prog.global_block().append_op( type="fill_constant", inputs={}, outputs={"Out": repeat_var}, - attrs=op.all_attrs() - ) - + attrs=op.all_attrs()) diff --git a/PaddleCV/image_classification/dist_train/dist_train.py b/PaddleCV/image_classification/legacy/dist_train/dist_train.py similarity index 100% rename from PaddleCV/image_classification/dist_train/dist_train.py rename to PaddleCV/image_classification/legacy/dist_train/dist_train.py diff --git a/PaddleCV/image_classification/dist_train/dist_utils.py b/PaddleCV/image_classification/legacy/dist_train/dist_utils.py similarity index 100% rename from PaddleCV/image_classification/dist_train/dist_utils.py rename to PaddleCV/image_classification/legacy/dist_train/dist_utils.py diff --git a/PaddleCV/image_classification/dist_train/env.py b/PaddleCV/image_classification/legacy/dist_train/env.py similarity index 95% rename from PaddleCV/image_classification/dist_train/env.py rename to PaddleCV/image_classification/legacy/dist_train/env.py index 08db25bd..3eacdc6b 100644 --- a/PaddleCV/image_classification/dist_train/env.py +++ b/PaddleCV/image_classification/legacy/dist_train/env.py @@ -23,7 +23,7 @@ def dist_env(): trainer_id = int(os.getenv("PADDLE_TRAINER_ID", "0")) num_trainers = 1 training_role = os.getenv("PADDLE_TRAINING_ROLE", "TRAINER") - assert(training_role == "PSERVER" or training_role == "TRAINER") + assert (training_role == "PSERVER" or training_role == "TRAINER") # - PADDLE_TRAINER_ENDPOINTS means nccl2 mode. # - PADDLE_PSERVER_ENDPOINTS means pserver mode. @@ -36,7 +36,7 @@ def dist_env(): num_trainers = len(trainer_endpoints) elif pserver_endpoints: num_trainers = int(os.getenv("PADDLE_TRAINERS_NUM")) - + return { "trainer_id": trainer_id, "num_trainers": num_trainers, diff --git a/PaddleCV/image_classification/dist_train/run_mp_mode.sh b/PaddleCV/image_classification/legacy/dist_train/run_mp_mode.sh similarity index 100% rename from PaddleCV/image_classification/dist_train/run_mp_mode.sh rename to PaddleCV/image_classification/legacy/dist_train/run_mp_mode.sh diff --git a/PaddleCV/image_classification/dist_train/run_nccl2_mode.sh b/PaddleCV/image_classification/legacy/dist_train/run_nccl2_mode.sh similarity index 100% rename from PaddleCV/image_classification/dist_train/run_nccl2_mode.sh rename to PaddleCV/image_classification/legacy/dist_train/run_nccl2_mode.sh diff --git a/PaddleCV/image_classification/dist_train/run_ps_mode.sh b/PaddleCV/image_classification/legacy/dist_train/run_ps_mode.sh similarity index 100% rename from PaddleCV/image_classification/dist_train/run_ps_mode.sh rename to PaddleCV/image_classification/legacy/dist_train/run_ps_mode.sh diff --git a/PaddleCV/image_classification/images/alexnet_imagenet1k_acc1.png b/PaddleCV/image_classification/legacy/images/alexnet_imagenet1k_acc1.png similarity index 100% rename from PaddleCV/image_classification/images/alexnet_imagenet1k_acc1.png rename to PaddleCV/image_classification/legacy/images/alexnet_imagenet1k_acc1.png diff --git a/PaddleCV/image_classification/images/curve.jpg b/PaddleCV/image_classification/legacy/images/curve.jpg similarity index 100% rename from PaddleCV/image_classification/images/curve.jpg rename to PaddleCV/image_classification/legacy/images/curve.jpg diff --git a/PaddleCV/image_classification/images/imagenet_dist_performance.png b/PaddleCV/image_classification/legacy/images/imagenet_dist_performance.png similarity index 100% rename from PaddleCV/image_classification/images/imagenet_dist_performance.png rename to PaddleCV/image_classification/legacy/images/imagenet_dist_performance.png diff --git a/PaddleCV/image_classification/images/imagenet_dist_speedup.png b/PaddleCV/image_classification/legacy/images/imagenet_dist_speedup.png similarity index 100% rename from PaddleCV/image_classification/images/imagenet_dist_speedup.png rename to PaddleCV/image_classification/legacy/images/imagenet_dist_speedup.png diff --git a/PaddleCV/image_classification/images/mobielenetv1_imagenet1k_acc1.png b/PaddleCV/image_classification/legacy/images/mobielenetv1_imagenet1k_acc1.png similarity index 100% rename from PaddleCV/image_classification/images/mobielenetv1_imagenet1k_acc1.png rename to PaddleCV/image_classification/legacy/images/mobielenetv1_imagenet1k_acc1.png diff --git a/PaddleCV/image_classification/images/resnet101_imagenet1k_acc1.png b/PaddleCV/image_classification/legacy/images/resnet101_imagenet1k_acc1.png similarity index 100% rename from PaddleCV/image_classification/images/resnet101_imagenet1k_acc1.png rename to PaddleCV/image_classification/legacy/images/resnet101_imagenet1k_acc1.png diff --git a/PaddleCV/image_classification/images/resnet50_32gpus-acc1.png b/PaddleCV/image_classification/legacy/images/resnet50_32gpus-acc1.png similarity index 100% rename from PaddleCV/image_classification/images/resnet50_32gpus-acc1.png rename to PaddleCV/image_classification/legacy/images/resnet50_32gpus-acc1.png diff --git a/PaddleCV/image_classification/images/resnet50_imagenet1k_acc1.png b/PaddleCV/image_classification/legacy/images/resnet50_imagenet1k_acc1.png similarity index 100% rename from PaddleCV/image_classification/images/resnet50_imagenet1k_acc1.png rename to PaddleCV/image_classification/legacy/images/resnet50_imagenet1k_acc1.png diff --git a/PaddleCV/image_classification/images/resnet_dgc.png b/PaddleCV/image_classification/legacy/images/resnet_dgc.png similarity index 100% rename from PaddleCV/image_classification/images/resnet_dgc.png rename to PaddleCV/image_classification/legacy/images/resnet_dgc.png diff --git a/PaddleCV/image_classification/images/vgg11_imagenet1k_acc1.png b/PaddleCV/image_classification/legacy/images/vgg11_imagenet1k_acc1.png similarity index 100% rename from PaddleCV/image_classification/images/vgg11_imagenet1k_acc1.png rename to PaddleCV/image_classification/legacy/images/vgg11_imagenet1k_acc1.png diff --git a/PaddleCV/image_classification/legacy/reader_pil.py b/PaddleCV/image_classification/legacy/reader_pil.py new file mode 100755 index 00000000..c445d233 --- /dev/null +++ b/PaddleCV/image_classification/legacy/reader_pil.py @@ -0,0 +1,219 @@ +#copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve. +# +#Licensed under the Apache License, Version 2.0 (the "License"); +#you may not use this file except in compliance with the License. +#You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +#Unless required by applicable law or agreed to in writing, software +#distributed under the License is distributed on an "AS IS" BASIS, +#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +#See the License for the specific language governing permissions and +#limitations under the License. + +import os +import math +import random +import functools +import numpy as np +from PIL import Image, ImageEnhance + +import paddle + +random.seed(0) +np.random.seed(0) + +DATA_DIM = 224 + +THREAD = 8 +BUF_SIZE = 2048 + +DATA_DIR = 'data/ILSVRC2012' + +img_mean = np.array([0.485, 0.456, 0.406]).reshape((3, 1, 1)) +img_std = np.array([0.229, 0.224, 0.225]).reshape((3, 1, 1)) + + +def resize_short(img, target_size): + percent = float(target_size) / min(img.size[0], img.size[1]) + resized_width = int(round(img.size[0] * percent)) + resized_height = int(round(img.size[1] * percent)) + img = img.resize((resized_width, resized_height), Image.LANCZOS) + return img + + +def crop_image(img, target_size, center): + width, height = img.size + size = target_size + if center == True: + w_start = (width - size) / 2 + h_start = (height - size) / 2 + else: + w_start = np.random.randint(0, width - size + 1) + h_start = np.random.randint(0, height - size + 1) + w_end = w_start + size + h_end = h_start + size + img = img.crop((w_start, h_start, w_end, h_end)) + return img + + +def random_crop(img, size, scale=[0.08, 1.0], ratio=[3. / 4., 4. / 3.]): + aspect_ratio = math.sqrt(np.random.uniform(*ratio)) + w = 1. * aspect_ratio + h = 1. / aspect_ratio + + bound = min((float(img.size[0]) / img.size[1]) / (w**2), + (float(img.size[1]) / img.size[0]) / (h**2)) + scale_max = min(scale[1], bound) + scale_min = min(scale[0], bound) + + target_area = img.size[0] * img.size[1] * np.random.uniform(scale_min, + scale_max) + target_size = math.sqrt(target_area) + w = int(target_size * w) + h = int(target_size * h) + + i = np.random.randint(0, img.size[0] - w + 1) + j = np.random.randint(0, img.size[1] - h + 1) + + img = img.crop((i, j, i + w, j + h)) + img = img.resize((size, size), Image.LANCZOS) + return img + + +def rotate_image(img): + angle = np.random.randint(-10, 11) + img = img.rotate(angle) + return img + + +def distort_color(img): + def random_brightness(img, lower=0.5, upper=1.5): + e = np.random.uniform(lower, upper) + return ImageEnhance.Brightness(img).enhance(e) + + def random_contrast(img, lower=0.5, upper=1.5): + e = np.random.uniform(lower, upper) + return ImageEnhance.Contrast(img).enhance(e) + + def random_color(img, lower=0.5, upper=1.5): + e = np.random.uniform(lower, upper) + return ImageEnhance.Color(img).enhance(e) + + ops = [random_brightness, random_contrast, random_color] + np.random.shuffle(ops) + + img = ops[0](img) + img = ops[1](img) + img = ops[2](img) + + return img + + +def process_image(sample, mode, color_jitter, rotate): + img_path = sample[0] + + img = Image.open(img_path) + if mode == 'train': + if rotate: img = rotate_image(img) + img = random_crop(img, DATA_DIM) + else: + img = resize_short(img, target_size=256) + img = crop_image(img, target_size=DATA_DIM, center=True) + if mode == 'train': + if color_jitter: + img = distort_color(img) + if np.random.randint(0, 2) == 1: + img = img.transpose(Image.FLIP_LEFT_RIGHT) + + if img.mode != 'RGB': + img = img.convert('RGB') + + img = np.array(img).astype('float32').transpose((2, 0, 1)) / 255 + img -= img_mean + img /= img_std + + if mode == 'train' or mode == 'val': + return img, sample[1] + elif mode == 'test': + return [img] + + +def process_batch_data(input_data, mode, color_jitter, rotate): + batch_data = [] + for sample in input_data: + batch_data.append(process_image(sample, mode, color_jitter, rotate)) + return batch_data + + +def _reader_creator(file_list, + batch_size, + mode, + shuffle=False, + color_jitter=False, + rotate=False, + data_dir=DATA_DIR, + shuffle_seed=0, + infinite=False): + def reader(): + def read_file_list(): + with open(file_list) as flist: + full_lines = [line.strip() for line in flist] + if shuffle: + if shuffle_seed is not None: + np.random.seed(shuffle_seed) + np.random.shuffle(full_lines) + batch_data = [] + for line in full_lines: + img_path, label = line.split() + img_path = os.path.join(data_dir, img_path) + batch_data.append([img_path, int(label)]) + if len(batch_data) == batch_size: + if mode == 'train' or mode == 'val': + yield batch_data + elif mode == 'test': + yield [sample[0] for sample in batch_data] + batch_data = [] + + return read_file_list + + data_reader = reader() + num_trainers = int(os.environ.get('PADDLE_TRAINERS_NUM', 1)) + if mode == 'train' and num_trainers > 1: + assert shuffle_seed is not None, \ + "If num_trainers > 1, the shuffle_seed must be set, because " \ + "the order of batch data generated by reader " \ + "must be the same in the respective processes." + data_reader = fluid.contrib.reader.distributed_batch_reader(data_reader) + + mapper = functools.partial( + process_batch_data, mode=mode, color_jitter=color_jitter, rotate=rotate) + + return paddle.reader.xmap_readers(mapper, data_reader, THREAD, BUF_SIZE) + + +def train(batch_size, data_dir=DATA_DIR, shuffle_seed=0, infinite=False): + file_list = os.path.join(data_dir, 'train_list.txt') + return _reader_creator( + file_list, + batch_size, + 'train', + shuffle=True, + color_jitter=False, + rotate=False, + data_dir=data_dir, + shuffle_seed=shuffle_seed, + infinite=infinite) + + +def val(batch_size, data_dir=DATA_DIR): + file_list = os.path.join(data_dir, 'val_list.txt') + return _reader_creator( + file_list, batch_size, 'val', shuffle=False, data_dir=data_dir) + + +def test(batch_size, data_dir=DATA_DIR): + file_list = os.path.join(data_dir, 'val_list.txt') + return _reader_creator( + file_list, batch_size, 'test', shuffle=False, data_dir=data_dir) diff --git a/PaddleCV/image_classification/models/__init__.py b/PaddleCV/image_classification/models/__init__.py index 070984e8..1c82e13d 100644 --- a/PaddleCV/image_classification/models/__init__.py +++ b/PaddleCV/image_classification/models/__init__.py @@ -1,7 +1,21 @@ +#copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve. +# +#Licensed under the Apache License, Version 2.0 (the "License"); +#you may not use this file except in compliance with the License. +#You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +#Unless required by applicable law or agreed to in writing, software +#distributed under the License is distributed on an "AS IS" BASIS, +#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +#See the License for the specific language governing permissions and +#limitations under the License. + from .alexnet import AlexNet -from .mobilenet import MobileNet -from .mobilenet_v2 import MobileNetV2_x0_25, MobileNetV2_x0_5, MobileNetV2_x1_0, MobileNetV2_x1_5, MobileNetV2_x2_0, MobileNetV2_scale -from .googlenet import GoogleNet +from .mobilenet import MobileNet, MobileNetV1 +from .mobilenet_v2 import MobileNetV2, MobileNetV2_x0_25, MobileNetV2_x0_5, MobileNetV2_x1_0, MobileNetV2_x1_5, MobileNetV2_x2_0, MobileNetV2_scale +from .googlenet import GoogLeNet from .vgg import VGG11, VGG13, VGG16, VGG19 from .resnet import ResNet18, ResNet34, ResNet50, ResNet101, ResNet152 from .resnet_vc import ResNet50_vc, ResNet101_vc, ResNet152_vc @@ -11,14 +25,13 @@ from .resnext_vd import ResNeXt50_vd_64x4d, ResNeXt101_vd_64x4d, ResNeXt152_vd_6 from .resnet_dist import DistResNet from .inception_v4 import InceptionV4 from .se_resnext import SE_ResNeXt50_32x4d, SE_ResNeXt101_32x4d, SE_ResNeXt152_32x4d -from .se_resnext_vd import SE_ResNeXt50_32x4d_vd, SE_ResNeXt101_32x4d_vd, SENet154_vd +from .se_resnext_vd import SE_ResNeXt50_32x4d_vd, SE_ResNeXt101_32x4d_vd, SE_154_vd from .dpn import DPN68, DPN92, DPN98, DPN107, DPN131 -from .shufflenet_v2_swish import ShuffleNetV2, ShuffleNetV2_x0_5_swish, ShuffleNetV2_x1_0_swish, ShuffleNetV2_x1_5_swish, ShuffleNetV2_x2_0_swish -from .shufflenet_v2 import ShuffleNetV2_x0_25, ShuffleNetV2_x0_33, ShuffleNetV2_x0_5, ShuffleNetV2_x1_0, ShuffleNetV2_x1_5, ShuffleNetV2_x2_0 +from .shufflenet_v2_swish import ShuffleNetV2_swish, ShuffleNetV2_x0_5_swish, ShuffleNetV2_x1_0_swish, ShuffleNetV2_x1_5_swish, ShuffleNetV2_x2_0_swish +from .shufflenet_v2 import ShuffleNetV2_x0_25, ShuffleNetV2_x0_33, ShuffleNetV2_x0_5, ShuffleNetV2_x1_0, ShuffleNetV2_x1_5, ShuffleNetV2_x2_0, ShuffleNetV2 from .fast_imagenet import FastImageNet from .xception import Xception_41, Xception_65, Xception_71 from .densenet import DenseNet121, DenseNet161, DenseNet169, DenseNet201, DenseNet264 from .squeezenet import SqueezeNet1_0, SqueezeNet1_1 from .darknet import DarkNet53 from .resnext101_wsl import ResNeXt101_32x8d_wsl, ResNeXt101_32x16d_wsl, ResNeXt101_32x32d_wsl, ResNeXt101_32x48d_wsl, Fix_ResNeXt101_32x48d_wsl - diff --git a/PaddleCV/image_classification/models/alexnet.py b/PaddleCV/image_classification/models/alexnet.py index dcc68ec0..3f14ed18 100644 --- a/PaddleCV/image_classification/models/alexnet.py +++ b/PaddleCV/image_classification/models/alexnet.py @@ -23,22 +23,10 @@ import paddle.fluid as fluid __all__ = ['AlexNet'] -train_parameters = { - "input_size": [3, 224, 224], - "input_mean": [0.485, 0.456, 0.406], - "input_std": [0.229, 0.224, 0.225], - "learning_strategy": { - "name": "piecewise_decay", - "batch_size": 256, - "epochs": [40, 70, 100], - "steps": [0.01, 0.001, 0.0001, 0.00001] - } -} - class AlexNet(): def __init__(self): - self.params = train_parameters + pass def net(self, input, class_dim=1000): stdv = 1.0 / math.sqrt(input.shape[1] * 11 * 11) diff --git a/PaddleCV/image_classification/models/darknet.py b/PaddleCV/image_classification/models/darknet.py index 18630dd4..895809a6 100644 --- a/PaddleCV/image_classification/models/darknet.py +++ b/PaddleCV/image_classification/models/darknet.py @@ -1,37 +1,30 @@ -# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +#copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve. # -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at +#Licensed under the Apache License, Version 2.0 (the "License"); +#you may not use this file except in compliance with the License. +#You may obtain a copy of the License at # -# http://www.apache.org/licenses/LICENSE-2.0 +# http://www.apache.org/licenses/LICENSE-2.0 # -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. +#Unless required by applicable law or agreed to in writing, software +#distributed under the License is distributed on an "AS IS" BASIS, +#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +#See the License for the specific language governing permissions and +#limitations under the License. +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + import paddle.fluid as fluid from paddle.fluid.param_attr import ParamAttr import math __all__ = ["DarkNet53"] -train_parameters = { - "input_size": [3, 224, 224], - "input_mean": [0.485, 0.456, 0.406], - "input_std": [0.229, 0.224, 0.225], - "learning_strategy": { - "name": "piecewise_decay", - "batch_size": 256, - "epochs": [30, 60, 90], - "steps": [0.1, 0.01, 0.001, 0.0001] - } -} - class DarkNet53(): def __init__(self): - self.params = train_parameters + + pass def net(self, input, class_dim=1000): DarkNet_cfg = {53: ([1, 2, 8, 8, 4], self.basicblock)} @@ -45,17 +38,11 @@ class DarkNet53(): padding=1, name="yolo_input") conv = self.downsample( - conv1, - ch_out=conv1.shape[1] * 2, - name="yolo_input.downsample") - + conv1, ch_out=conv1.shape[1] * 2, name="yolo_input.downsample") + for i, stage in enumerate(stages): conv = self.layer_warp( - block_func, - conv, - 32 * (2**i), - stage, - name="stage.{}".format(i)) + block_func, conv, 32 * (2**i), stage, name="stage.{}".format(i)) if i < len(stages) - 1: # do not downsaple in the last stage conv = self.downsample( conv, @@ -64,18 +51,22 @@ class DarkNet53(): pool = fluid.layers.pool2d( input=conv, pool_type='avg', global_pooling=True) stdv = 1.0 / math.sqrt(pool.shape[1] * 1.0) - out = fluid.layers.fc(input=pool, - size=class_dim, - param_attr=ParamAttr( - initializer=fluid.initializer.Uniform(-stdv, stdv),name='fc_weights'), - bias_attr=ParamAttr(name='fc_offset')) + out = fluid.layers.fc( + input=pool, + size=class_dim, + param_attr=ParamAttr( + initializer=fluid.initializer.Uniform(-stdv, stdv), + name='fc_weights'), + bias_attr=ParamAttr(name='fc_offset')) return out - - - - - def conv_bn_layer(self, input, ch_out, filter_size, stride, padding, name=None): + def conv_bn_layer(self, + input, + ch_out, + filter_size, + stride, + padding, + name=None): conv = fluid.layers.conv2d( input=input, num_filters=ch_out, @@ -96,9 +87,13 @@ class DarkNet53(): moving_variance_name=bn_name + '.var') return out - - - def downsample(self, input, ch_out, filter_size=3, stride=2, padding=1, name=None): + def downsample(self, + input, + ch_out, + filter_size=3, + stride=2, + padding=1, + name=None): return self.conv_bn_layer( input, ch_out=ch_out, @@ -107,22 +102,14 @@ class DarkNet53(): padding=padding, name=name) - def basicblock(self, input, ch_out, name=None): - conv1 = self.conv_bn_layer( - input, ch_out, 1, 1, 0, name=name + ".0") - conv2 = self.conv_bn_layer( - conv1, ch_out * 2, 3, 1, 1, name=name + ".1") + conv1 = self.conv_bn_layer(input, ch_out, 1, 1, 0, name=name + ".0") + conv2 = self.conv_bn_layer(conv1, ch_out * 2, 3, 1, 1, name=name + ".1") out = fluid.layers.elementwise_add(x=input, y=conv2, act=None) return out - def layer_warp(self, block_func, input, ch_out, count, name=None): - res_out = block_func( - input, ch_out, name='{}.0'.format(name)) + res_out = block_func(input, ch_out, name='{}.0'.format(name)) for j in range(1, count): - res_out = block_func( - res_out, ch_out, name='{}.{}'.format(name, j)) + res_out = block_func(res_out, ch_out, name='{}.{}'.format(name, j)) return res_out - - diff --git a/PaddleCV/image_classification/models/densenet.py b/PaddleCV/image_classification/models/densenet.py index e7b8a00e..d3a7c1da 100644 --- a/PaddleCV/image_classification/models/densenet.py +++ b/PaddleCV/image_classification/models/densenet.py @@ -1,53 +1,48 @@ -# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +#copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve. # -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at +#Licensed under the Apache License, Version 2.0 (the "License"); +#you may not use this file except in compliance with the License. +#You may obtain a copy of the License at # -# http://www.apache.org/licenses/LICENSE-2.0 +# http://www.apache.org/licenses/LICENSE-2.0 # -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. +#Unless required by applicable law or agreed to in writing, software +#distributed under the License is distributed on an "AS IS" BASIS, +#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +#See the License for the specific language governing permissions and +#limitations under the License. +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + import paddle import paddle.fluid as fluid import math from paddle.fluid.param_attr import ParamAttr -__all__ = ["DenseNet", "DenseNet121", "DenseNet161", "DenseNet169", "DenseNet201", "DenseNet264"] +__all__ = [ + "DenseNet", "DenseNet121", "DenseNet161", "DenseNet169", "DenseNet201", + "DenseNet264" +] -train_parameters = { - "input_size": [3, 224, 224], - "input_mean": [0.485, 0.456, 0.406], - "input_std": [0.229, 0.224, 0.225], - "learning_strategy": { - "name": "piecewise_decay", - "batch_size": 256, - "epochs": [30, 60, 90], - "steps": [0.1, 0.01, 0.001, 0.0001] - } -} class DenseNet(): def __init__(self, layers=121): - self.params = train_parameters self.layers = layers - def net(self, input, bn_size=4, dropout=0, class_dim=1000): layers = self.layers supported_layers = [121, 161, 169, 201, 264] assert layers in supported_layers, \ "supported layers are {} but input layer is {}".format(supported_layers, layers) - densenet_spec = {121: (64, 32, [6, 12, 24, 16]), - 161: (96, 48, [6, 12, 36, 24]), - 169: (64, 32, [6, 12, 32, 32]), - 201: (64, 32, [6, 12, 48, 32]), - 264: (64, 32, [6, 12, 64, 48])} - - + densenet_spec = { + 121: (64, 32, [6, 12, 24, 16]), + 161: (96, 48, [6, 12, 36, 24]), + 169: (64, 32, [6, 12, 32, 32]), + 201: (64, 32, [6, 12, 48, 32]), + 264: (64, 32, [6, 12, 64, 48]) + } + num_init_features, growth_rate, block_config = densenet_spec[layers] conv = fluid.layers.conv2d( input=input, @@ -58,46 +53,61 @@ class DenseNet(): act=None, param_attr=ParamAttr(name="conv1_weights"), bias_attr=False) - conv = fluid.layers.batch_norm(input=conv, - act='relu', - param_attr=ParamAttr(name='conv1_bn_scale'), - bias_attr=ParamAttr(name='conv1_bn_offset'), - moving_mean_name='conv1_bn_mean', - moving_variance_name='conv1_bn_variance') - conv = fluid.layers.pool2d(input=conv, pool_size=3, pool_stride=2, pool_padding=1, pool_type='max') + conv = fluid.layers.batch_norm( + input=conv, + act='relu', + param_attr=ParamAttr(name='conv1_bn_scale'), + bias_attr=ParamAttr(name='conv1_bn_offset'), + moving_mean_name='conv1_bn_mean', + moving_variance_name='conv1_bn_variance') + conv = fluid.layers.pool2d( + input=conv, + pool_size=3, + pool_stride=2, + pool_padding=1, + pool_type='max') num_features = num_init_features for i, num_layers in enumerate(block_config): - conv = self.make_dense_block(conv, num_layers, bn_size, growth_rate, dropout, name='conv'+str(i+2)) + conv = self.make_dense_block( + conv, + num_layers, + bn_size, + growth_rate, + dropout, + name='conv' + str(i + 2)) num_features = num_features + num_layers * growth_rate if i != len(block_config) - 1: - conv = self.make_transition(conv, num_features // 2, name='conv'+str(i+2)+'_blk') + conv = self.make_transition( + conv, num_features // 2, name='conv' + str(i + 2) + '_blk') num_features = num_features // 2 - conv = fluid.layers.batch_norm(input=conv, - act='relu', - param_attr=ParamAttr(name='conv5_blk_bn_scale'), - bias_attr=ParamAttr(name='conv5_blk_bn_offset'), - moving_mean_name='conv5_blk_bn_mean', - moving_variance_name='conv5_blk_bn_variance') - conv = fluid.layers.pool2d(input=conv, pool_type='avg', global_pooling=True) + conv = fluid.layers.batch_norm( + input=conv, + act='relu', + param_attr=ParamAttr(name='conv5_blk_bn_scale'), + bias_attr=ParamAttr(name='conv5_blk_bn_offset'), + moving_mean_name='conv5_blk_bn_mean', + moving_variance_name='conv5_blk_bn_variance') + conv = fluid.layers.pool2d( + input=conv, pool_type='avg', global_pooling=True) stdv = 1.0 / math.sqrt(conv.shape[1] * 1.0) - out = fluid.layers.fc(input=conv, - size=class_dim, - param_attr=fluid.param_attr.ParamAttr( - initializer=fluid.initializer.Uniform(-stdv, stdv), name="fc_weights"), - bias_attr=ParamAttr(name='fc_offset')) + out = fluid.layers.fc( + input=conv, + size=class_dim, + param_attr=fluid.param_attr.ParamAttr( + initializer=fluid.initializer.Uniform(-stdv, stdv), + name="fc_weights"), + bias_attr=ParamAttr(name='fc_offset')) return out - - def make_transition(self, input, num_output_features, name=None): - bn_ac = fluid.layers.batch_norm(input, - act='relu', - param_attr=ParamAttr(name=name + '_bn_scale'), - bias_attr=ParamAttr(name + '_bn_offset'), - moving_mean_name=name + '_bn_mean', - moving_variance_name=name + '_bn_variance' - ) - + bn_ac = fluid.layers.batch_norm( + input, + act='relu', + param_attr=ParamAttr(name=name + '_bn_scale'), + bias_attr=ParamAttr(name + '_bn_offset'), + moving_mean_name=name + '_bn_mean', + moving_variance_name=name + '_bn_variance') + bn_ac_conv = fluid.layers.conv2d( input=bn_ac, num_filters=num_output_features, @@ -105,25 +115,36 @@ class DenseNet(): stride=1, act=None, bias_attr=False, - param_attr=ParamAttr(name=name + "_weights") - ) - pool = fluid.layers.pool2d(input=bn_ac_conv, pool_size=2, pool_stride=2, pool_type='avg') + param_attr=ParamAttr(name=name + "_weights")) + pool = fluid.layers.pool2d( + input=bn_ac_conv, pool_size=2, pool_stride=2, pool_type='avg') return pool - - def make_dense_block(self, input, num_layers, bn_size, growth_rate, dropout, name=None): + + def make_dense_block(self, + input, + num_layers, + bn_size, + growth_rate, + dropout, + name=None): conv = input for layer in range(num_layers): - conv = self.make_dense_layer(conv, growth_rate, bn_size, dropout, name=name + '_' + str(layer+1)) + conv = self.make_dense_layer( + conv, + growth_rate, + bn_size, + dropout, + name=name + '_' + str(layer + 1)) return conv - - + def make_dense_layer(self, input, growth_rate, bn_size, dropout, name=None): - bn_ac = fluid.layers.batch_norm(input, - act='relu', - param_attr=ParamAttr(name=name + '_x1_bn_scale'), - bias_attr=ParamAttr(name + '_x1_bn_offset'), - moving_mean_name=name + '_x1_bn_mean', - moving_variance_name=name + '_x1_bn_variance') + bn_ac = fluid.layers.batch_norm( + input, + act='relu', + param_attr=ParamAttr(name=name + '_x1_bn_scale'), + bias_attr=ParamAttr(name + '_x1_bn_offset'), + moving_mean_name=name + '_x1_bn_mean', + moving_variance_name=name + '_x1_bn_variance') bn_ac_conv = fluid.layers.conv2d( input=bn_ac, num_filters=bn_size * growth_rate, @@ -132,12 +153,13 @@ class DenseNet(): act=None, bias_attr=False, param_attr=ParamAttr(name=name + "_x1_weights")) - bn_ac = fluid.layers.batch_norm(bn_ac_conv, - act='relu', - param_attr=ParamAttr(name=name + '_x2_bn_scale'), - bias_attr=ParamAttr(name + '_x2_bn_offset'), - moving_mean_name=name + '_x2_bn_mean', - moving_variance_name=name + '_x2_bn_variance') + bn_ac = fluid.layers.batch_norm( + bn_ac_conv, + act='relu', + param_attr=ParamAttr(name=name + '_x2_bn_scale'), + bias_attr=ParamAttr(name + '_x2_bn_offset'), + moving_mean_name=name + '_x2_bn_mean', + moving_variance_name=name + '_x2_bn_variance') bn_ac_conv = fluid.layers.conv2d( input=bn_ac, num_filters=growth_rate, @@ -148,33 +170,32 @@ class DenseNet(): bias_attr=False, param_attr=ParamAttr(name=name + "_x2_weights")) if dropout: - bn_ac_conv = fluid.layers.dropout(x=bn_ac_conv, dropout_prob=dropout) + bn_ac_conv = fluid.layers.dropout( + x=bn_ac_conv, dropout_prob=dropout) bn_ac_conv = fluid.layers.concat([input, bn_ac_conv], axis=1) return bn_ac_conv - - + + def DenseNet121(): - model=DenseNet(layers=121) + model = DenseNet(layers=121) return model + def DenseNet161(): - model=DenseNet(layers=161) + model = DenseNet(layers=161) return model + def DenseNet169(): - model=DenseNet(layers=169) + model = DenseNet(layers=169) return model + def DenseNet201(): - model=DenseNet(layers=201) + model = DenseNet(layers=201) return model + def DenseNet264(): - model=DenseNet(layers=264) + model = DenseNet(layers=264) return model - - - - - - diff --git a/PaddleCV/image_classification/models/dpn.py b/PaddleCV/image_classification/models/dpn.py index bbc19aea..4ab6d6b0 100644 --- a/PaddleCV/image_classification/models/dpn.py +++ b/PaddleCV/image_classification/models/dpn.py @@ -27,22 +27,9 @@ from paddle.fluid.param_attr import ParamAttr __all__ = ["DPN", "DPN68", "DPN92", "DPN98", "DPN107", "DPN131"] -train_parameters = { - "input_size": [3, 224, 224], - "input_mean": [0.485, 0.456, 0.406], - "input_std": [0.229, 0.224, 0.225], - "learning_strategy": { - "name": "piecewise_decay", - "batch_size": 256, - "epochs": [30, 60, 90], - "steps": [0.1, 0.01, 0.001, 0.0001] - } -} - class DPN(object): def __init__(self, layers=68): - self.params = train_parameters self.layers = layers def net(self, input, class_dim=1000): diff --git a/PaddleCV/image_classification/models/googlenet.py b/PaddleCV/image_classification/models/googlenet.py index 9bb3ad6d..5b92d577 100644 --- a/PaddleCV/image_classification/models/googlenet.py +++ b/PaddleCV/image_classification/models/googlenet.py @@ -20,24 +20,13 @@ import paddle import paddle.fluid as fluid from paddle.fluid.param_attr import ParamAttr -__all__ = ['GoogleNet'] - -train_parameters = { - "input_size": [3, 224, 224], - "input_mean": [0.485, 0.456, 0.406], - "input_std": [0.229, 0.224, 0.225], - "learning_strategy": { - "name": "piecewise_decay", - "batch_size": 256, - "epochs": [30, 70, 100], - "steps": [0.1, 0.01, 0.001, 0.0001] - } -} - - -class GoogleNet(): +__all__ = ['GoogLeNet'] + + +class GoogLeNet(): def __init__(self): - self.params = train_parameters + + pass def conv_layer(self, input, diff --git a/PaddleCV/image_classification/models/inception_v4.py b/PaddleCV/image_classification/models/inception_v4.py index 47d406be..483e6153 100644 --- a/PaddleCV/image_classification/models/inception_v4.py +++ b/PaddleCV/image_classification/models/inception_v4.py @@ -24,22 +24,11 @@ from paddle.fluid.param_attr import ParamAttr __all__ = ['InceptionV4'] -train_parameters = { - "input_size": [3, 224, 224], - "input_mean": [0.485, 0.456, 0.406], - "input_std": [0.229, 0.224, 0.225], - "learning_strategy": { - "name": "piecewise_decay", - "batch_size": 256, - "epochs": [30, 60, 90], - "steps": [0.1, 0.01, 0.001, 0.0001] - } -} - class InceptionV4(): def __init__(self): - self.params = train_parameters + + pass def net(self, input, class_dim=1000): x = self.inception_stem(input) diff --git a/PaddleCV/image_classification/models/mobilenet.py b/PaddleCV/image_classification/models/mobilenet.py index 4a1154e1..1b99dddb 100644 --- a/PaddleCV/image_classification/models/mobilenet.py +++ b/PaddleCV/image_classification/models/mobilenet.py @@ -20,24 +20,12 @@ import paddle.fluid as fluid from paddle.fluid.initializer import MSRA from paddle.fluid.param_attr import ParamAttr -__all__ = ['MobileNet'] - -train_parameters = { - "input_size": [3, 224, 224], - "input_mean": [0.485, 0.456, 0.406], - "input_std": [0.229, 0.224, 0.225], - "learning_strategy": { - "name": "piecewise_decay", - "batch_size": 256, - "epochs": [30, 60, 90], - "steps": [0.1, 0.01, 0.001, 0.0001] - } -} +__all__ = ['MobileNet', 'MobileNetV1'] class MobileNet(): def __init__(self): - self.params = train_parameters + pass def net(self, input, class_dim=1000, scale=1.0): # conv1: 112x112 @@ -208,3 +196,8 @@ class MobileNet(): padding=0, name=name + "_sep") return pointwise_conv + + +def MobileNetV1(): + model = MobileNet() + return model diff --git a/PaddleCV/image_classification/models/mobilenet_v2.py b/PaddleCV/image_classification/models/mobilenet_v2.py index 90e2ff60..124b869b 100644 --- a/PaddleCV/image_classification/models/mobilenet_v2.py +++ b/PaddleCV/image_classification/models/mobilenet_v2.py @@ -19,28 +19,17 @@ import paddle.fluid as fluid from paddle.fluid.initializer import MSRA from paddle.fluid.param_attr import ParamAttr -__all__ = ['MobileNetV2', 'MobileNetV2_x0_25, ''MobileNetV2_x0_5', 'MobileNetV2_x1_0', 'MobileNetV2_x1_5', 'MobileNetV2_x2_0', - 'MobileNetV2_scale'] - -train_parameters = { - "input_size": [3, 224, 224], - "input_mean": [0.485, 0.456, 0.406], - "input_std": [0.229, 0.224, 0.225], - "learning_strategy": { - "name": "piecewise_decay", - "batch_size": 256, - "epochs": [30, 60, 90], - "steps": [0.1, 0.01, 0.001, 0.0001] - } -} +__all__ = [ + 'MobileNetV2', 'MobileNetV2_x0_25, ' + 'MobileNetV2_x0_5', 'MobileNetV2_x1_0', 'MobileNetV2_x1_5', + 'MobileNetV2_x2_0', 'MobileNetV2_scale' +] class MobileNetV2(): def __init__(self, scale=1.0, change_depth=False): - self.params = train_parameters self.scale = scale - self.change_depth=change_depth - + self.change_depth = change_depth def net(self, input, class_dim=1000): scale = self.scale @@ -55,13 +44,13 @@ class MobileNetV2(): (6, 160, 3, 2), (6, 320, 1, 1), ] if change_depth == False else [ - (1, 16, 1, 1), - (6, 24, 2, 2), - (6, 32, 5, 2), - (6, 64, 7, 2), - (6, 96, 5, 1), - (6, 160, 3, 2), - (6, 320, 1, 1), + (1, 16, 1, 1), + (6, 24, 2, 2), + (6, 32, 5, 2), + (6, 64, 7, 2), + (6, 96, 5, 1), + (6, 160, 3, 2), + (6, 320, 1, 1), ] #conv1 @@ -224,29 +213,33 @@ class MobileNetV2(): expansion_factor=t, name=name + '_' + str(i + 1)) return last_residual_block - - - + + def MobileNetV2_x0_25(): model = MobileNetV2(scale=0.25) return model + def MobileNetV2_x0_5(): model = MobileNetV2(scale=0.5) return model + def MobileNetV2_x1_0(): model = MobileNetV2(scale=1.0) return model + def MobileNetV2_x1_5(): model = MobileNetV2(scale=1.5) return model + def MobileNetV2_x2_0(): model = MobileNetV2(scale=2.0) return model + def MobileNetV2_scale(): model = MobileNetV2(scale=1.2, change_depth=True) - return model \ No newline at end of file + return model diff --git a/PaddleCV/image_classification/models/resnet.py b/PaddleCV/image_classification/models/resnet.py index 3f705d40..d7d29a9d 100644 --- a/PaddleCV/image_classification/models/resnet.py +++ b/PaddleCV/image_classification/models/resnet.py @@ -22,24 +22,13 @@ import paddle import paddle.fluid as fluid from paddle.fluid.param_attr import ParamAttr -__all__ = ["ResNet", "ResNet18", "ResNet34", "ResNet50", "ResNet101", "ResNet152"] - -train_parameters = { - "input_size": [3, 224, 224], - "input_mean": [0.485, 0.456, 0.406], - "input_std": [0.229, 0.224, 0.225], - "learning_strategy": { - "name": "piecewise_decay", - "batch_size": 256, - "epochs": [30, 60, 90], - "steps": [0.1, 0.01, 0.001, 0.0001] - } -} +__all__ = [ + "ResNet", "ResNet18", "ResNet34", "ResNet50", "ResNet101", "ResNet152" +] class ResNet(): def __init__(self, layers=50): - self.params = train_parameters self.layers = layers def net(self, input, class_dim=1000): @@ -59,7 +48,12 @@ class ResNet(): num_filters = [64, 128, 256, 512] conv = self.conv_bn_layer( - input=input, num_filters=64, filter_size=7, stride=2, act='relu',name="conv1") + input=input, + num_filters=64, + filter_size=7, + stride=2, + act='relu', + name="conv1") conv = fluid.layers.pool2d( input=conv, pool_size=3, @@ -71,41 +65,44 @@ class ResNet(): for i in range(depth[block]): if layers in [101, 152] and block == 2: if i == 0: - conv_name="res"+str(block+2)+"a" + conv_name = "res" + str(block + 2) + "a" else: - conv_name="res"+str(block+2)+"b"+str(i) + conv_name = "res" + str(block + 2) + "b" + str(i) else: - conv_name="res"+str(block+2)+chr(97+i) + conv_name = "res" + str(block + 2) + chr(97 + i) conv = self.bottleneck_block( input=conv, num_filters=num_filters[block], - stride=2 if i == 0 and block != 0 else 1, name=conv_name) + stride=2 if i == 0 and block != 0 else 1, + name=conv_name) pool = fluid.layers.pool2d( input=conv, pool_size=7, pool_type='avg', global_pooling=True) stdv = 1.0 / math.sqrt(pool.shape[1] * 1.0) - out = fluid.layers.fc(input=pool, - size=class_dim, - param_attr=fluid.param_attr.ParamAttr( - initializer=fluid.initializer.Uniform(-stdv, stdv))) + out = fluid.layers.fc( + input=pool, + size=class_dim, + param_attr=fluid.param_attr.ParamAttr( + initializer=fluid.initializer.Uniform(-stdv, stdv))) else: for block in range(len(depth)): for i in range(depth[block]): - conv_name="res"+str(block+2)+chr(97+i) + conv_name = "res" + str(block + 2) + chr(97 + i) conv = self.basic_block( input=conv, num_filters=num_filters[block], stride=2 if i == 0 and block != 0 else 1, - is_first=block==i==0, + is_first=block == i == 0, name=conv_name) pool = fluid.layers.pool2d( input=conv, pool_size=7, pool_type='avg', global_pooling=True) stdv = 1.0 / math.sqrt(pool.shape[1] * 1.0) - out = fluid.layers.fc(input=pool, - size=class_dim, - param_attr=fluid.param_attr.ParamAttr( - initializer=fluid.initializer.Uniform(-stdv, stdv))) + out = fluid.layers.fc( + input=pool, + size=class_dim, + param_attr=fluid.param_attr.ParamAttr( + initializer=fluid.initializer.Uniform(-stdv, stdv))) return out def conv_bn_layer(self, @@ -127,18 +124,19 @@ class ResNet(): param_attr=ParamAttr(name=name + "_weights"), bias_attr=False, name=name + '.conv2d.output.1') - + if name == "conv1": bn_name = "bn_" + name else: - bn_name = "bn" + name[3:] - return fluid.layers.batch_norm(input=conv, - act=act, - name=bn_name+'.output.1', - param_attr=ParamAttr(name=bn_name + '_scale'), - bias_attr=ParamAttr(bn_name + '_offset'), - moving_mean_name=bn_name + '_mean', - moving_variance_name=bn_name + '_variance',) + bn_name = "bn" + name[3:] + return fluid.layers.batch_norm( + input=conv, + act=act, + name=bn_name + '.output.1', + param_attr=ParamAttr(name=bn_name + '_scale'), + bias_attr=ParamAttr(bn_name + '_offset'), + moving_mean_name=bn_name + '_mean', + moving_variance_name=bn_name + '_variance', ) def shortcut(self, input, ch_out, stride, is_first, name): ch_in = input.shape[1] @@ -149,29 +147,53 @@ class ResNet(): def bottleneck_block(self, input, num_filters, stride, name): conv0 = self.conv_bn_layer( - input=input, num_filters=num_filters, filter_size=1, act='relu',name=name+"_branch2a") + input=input, + num_filters=num_filters, + filter_size=1, + act='relu', + name=name + "_branch2a") conv1 = self.conv_bn_layer( input=conv0, num_filters=num_filters, filter_size=3, stride=stride, act='relu', - name=name+"_branch2b") + name=name + "_branch2b") conv2 = self.conv_bn_layer( - input=conv1, num_filters=num_filters * 4, filter_size=1, act=None, name=name+"_branch2c") + input=conv1, + num_filters=num_filters * 4, + filter_size=1, + act=None, + name=name + "_branch2c") + + short = self.shortcut( + input, + num_filters * 4, + stride, + is_first=False, + name=name + "_branch1") - short = self.shortcut(input, num_filters * 4, stride, is_first=False, name=name + "_branch1") + return fluid.layers.elementwise_add( + x=short, y=conv2, act='relu', name=name + ".add.output.5") - return fluid.layers.elementwise_add(x=short, y=conv2, act='relu',name=name+".add.output.5") - def basic_block(self, input, num_filters, stride, is_first, name): - conv0 = self.conv_bn_layer(input=input, num_filters=num_filters, filter_size=3, act='relu', stride=stride, - name=name+"_branch2a") - conv1 = self.conv_bn_layer(input=conv0, num_filters=num_filters, filter_size=3, act=None, - name=name+"_branch2b") - short = self.shortcut(input, num_filters, stride, is_first, name=name + "_branch1") + conv0 = self.conv_bn_layer( + input=input, + num_filters=num_filters, + filter_size=3, + act='relu', + stride=stride, + name=name + "_branch2a") + conv1 = self.conv_bn_layer( + input=conv0, + num_filters=num_filters, + filter_size=3, + act=None, + name=name + "_branch2b") + short = self.shortcut( + input, num_filters, stride, is_first, name=name + "_branch1") return fluid.layers.elementwise_add(x=short, y=conv1, act='relu') - + def ResNet18(): model = ResNet(layers=18) diff --git a/PaddleCV/image_classification/models/resnet_vd.py b/PaddleCV/image_classification/models/resnet_vd.py index e56a9766..bb8e3184 100644 --- a/PaddleCV/image_classification/models/resnet_vd.py +++ b/PaddleCV/image_classification/models/resnet_vd.py @@ -22,26 +22,16 @@ import paddle import paddle.fluid as fluid from paddle.fluid.param_attr import ParamAttr -__all__ = ["ResNet", "ResNet50_vd","ResNet101_vd", "ResNet152_vd", "ResNet200_vd"] - -train_parameters = { - "input_size": [3, 224, 224], - "input_mean": [0.485, 0.456, 0.406], - "input_std": [0.229, 0.224, 0.225], - "learning_strategy": { - "name": "piecewise_decay", - "batch_size": 256, - "epochs": [30, 60, 90], - "steps": [0.1, 0.01, 0.001, 0.0001] - } -} +__all__ = [ + "ResNet", "ResNet50_vd", "ResNet101_vd", "ResNet152_vd", "ResNet200_vd" +] class ResNet(): - def __init__(self, layers=50, is_3x3 = False): - self.params = train_parameters + def __init__(self, layers=50, is_3x3=False): self.layers = layers self.is_3x3 = is_3x3 + def net(self, input, class_dim=1000): is_3x3 = self.is_3x3 layers = self.layers @@ -60,14 +50,33 @@ class ResNet(): num_filters = [64, 128, 256, 512] if is_3x3 == False: conv = self.conv_bn_layer( - input=input, num_filters=64, filter_size=7, stride=2, act='relu') + input=input, + num_filters=64, + filter_size=7, + stride=2, + act='relu') else: conv = self.conv_bn_layer( - input=input, num_filters=32, filter_size=3, stride=2, act='relu', name='conv1_1') + input=input, + num_filters=32, + filter_size=3, + stride=2, + act='relu', + name='conv1_1') conv = self.conv_bn_layer( - input=conv, num_filters=32, filter_size=3, stride=1, act='relu', name='conv1_2') + input=conv, + num_filters=32, + filter_size=3, + stride=1, + act='relu', + name='conv1_2') conv = self.conv_bn_layer( - input=conv, num_filters=64, filter_size=3, stride=1, act='relu', name='conv1_3') + input=conv, + num_filters=64, + filter_size=3, + stride=1, + act='relu', + name='conv1_3') conv = fluid.layers.pool2d( input=conv, @@ -80,32 +89,29 @@ class ResNet(): for i in range(depth[block]): if layers in [101, 152, 200] and block == 2: if i == 0: - conv_name="res"+str(block+2)+"a" + conv_name = "res" + str(block + 2) + "a" else: - conv_name="res"+str(block+2)+"b"+str(i) + conv_name = "res" + str(block + 2) + "b" + str(i) else: - conv_name="res"+str(block+2)+chr(97+i) + conv_name = "res" + str(block + 2) + chr(97 + i) conv = self.bottleneck_block( input=conv, num_filters=num_filters[block], stride=2 if i == 0 and block != 0 else 1, - if_first=block==0, + if_first=block == 0, name=conv_name) pool = fluid.layers.pool2d( input=conv, pool_size=7, pool_type='avg', global_pooling=True) stdv = 1.0 / math.sqrt(pool.shape[1] * 1.0) - out = fluid.layers.fc(input=pool, - size=class_dim, - param_attr=fluid.param_attr.ParamAttr( - initializer=fluid.initializer.Uniform(-stdv, stdv))) - - + out = fluid.layers.fc( + input=pool, + size=class_dim, + param_attr=fluid.param_attr.ParamAttr( + initializer=fluid.initializer.Uniform(-stdv, stdv))) + return out - - - def conv_bn_layer(self, input, @@ -128,29 +134,30 @@ class ResNet(): if name == "conv1": bn_name = "bn_" + name else: - bn_name = "bn" + name[3:] - return fluid.layers.batch_norm(input=conv, - act=act, - param_attr=ParamAttr(name=bn_name + '_scale'), - bias_attr=ParamAttr(bn_name + '_offset'), - moving_mean_name=bn_name + '_mean', - moving_variance_name=bn_name + '_variance') - - + bn_name = "bn" + name[3:] + return fluid.layers.batch_norm( + input=conv, + act=act, + param_attr=ParamAttr(name=bn_name + '_scale'), + bias_attr=ParamAttr(bn_name + '_offset'), + moving_mean_name=bn_name + '_mean', + moving_variance_name=bn_name + '_variance') + def conv_bn_layer_new(self, - input, - num_filters, - filter_size, - stride=1, - groups=1, - act=None, - name=None): - pool = fluid.layers.pool2d(input=input, + input, + num_filters, + filter_size, + stride=1, + groups=1, + act=None, + name=None): + pool = fluid.layers.pool2d( + input=input, pool_size=2, pool_stride=2, pool_padding=0, pool_type='avg') - + conv = fluid.layers.conv2d( input=pool, num_filters=num_filters, @@ -165,14 +172,13 @@ class ResNet(): bn_name = "bn_" + name else: bn_name = "bn" + name[3:] - return fluid.layers.batch_norm(input=conv, - act=act, - param_attr=ParamAttr(name=bn_name + '_scale'), - bias_attr=ParamAttr(bn_name + '_offset'), - moving_mean_name=bn_name + '_mean', - moving_variance_name=bn_name + '_variance') - - + return fluid.layers.batch_norm( + input=conv, + act=act, + param_attr=ParamAttr(name=bn_name + '_scale'), + bias_attr=ParamAttr(bn_name + '_offset'), + moving_mean_name=bn_name + '_mean', + moving_variance_name=bn_name + '_variance') def shortcut(self, input, ch_out, stride, name, if_first=False): ch_in = input.shape[1] @@ -180,43 +186,57 @@ class ResNet(): if if_first: return self.conv_bn_layer(input, ch_out, 1, stride, name=name) else: - return self.conv_bn_layer_new(input, ch_out, 1, stride, name=name) + return self.conv_bn_layer_new( + input, ch_out, 1, stride, name=name) else: return input def bottleneck_block(self, input, num_filters, stride, name, if_first): conv0 = self.conv_bn_layer( - input=input, num_filters=num_filters, filter_size=1, act='relu', name=name+"_branch2a") + input=input, + num_filters=num_filters, + filter_size=1, + act='relu', + name=name + "_branch2a") conv1 = self.conv_bn_layer( input=conv0, num_filters=num_filters, filter_size=3, stride=stride, act='relu', - name=name+"_branch2b") + name=name + "_branch2b") conv2 = self.conv_bn_layer( - input=conv1, num_filters=num_filters * 4, filter_size=1, act=None, name=name+"_branch2c") + input=conv1, + num_filters=num_filters * 4, + filter_size=1, + act=None, + name=name + "_branch2c") - short = self.shortcut(input, num_filters * 4, stride, if_first=if_first, name=name + "_branch1") + short = self.shortcut( + input, + num_filters * 4, + stride, + if_first=if_first, + name=name + "_branch1") return fluid.layers.elementwise_add(x=short, y=conv2, act='relu') - - def ResNet50_vd(): - model = ResNet(layers=50, is_3x3 = True) + model = ResNet(layers=50, is_3x3=True) return model + def ResNet101_vd(): - model = ResNet(layers=101, is_3x3 = True) + model = ResNet(layers=101, is_3x3=True) return model + def ResNet152_vd(): - model = ResNet(layers=152, is_3x3 = True) + model = ResNet(layers=152, is_3x3=True) return model + def ResNet200_vd(): - model = ResNet(layers=200, is_3x3 = True) + model = ResNet(layers=200, is_3x3=True) return model - diff --git a/PaddleCV/image_classification/models/resnext.py b/PaddleCV/image_classification/models/resnext.py index c2c94e29..43ac44ce 100644 --- a/PaddleCV/image_classification/models/resnext.py +++ b/PaddleCV/image_classification/models/resnext.py @@ -22,25 +22,14 @@ import paddle import paddle.fluid as fluid from paddle.fluid.param_attr import ParamAttr -__all__ = ["ResNeXt", "ResNeXt50_64x4d", "ResNeXt101_64x4d", "ResNeXt152_64x4d", "ResNeXt50_32x4d", "ResNeXt101_32x4d", - "ResNeXt152_32x4d"] - -train_parameters = { - "input_size": [3, 224, 224], - "input_mean": [0.485, 0.456, 0.406], - "input_std": [0.229, 0.224, 0.225], - "learning_strategy": { - "name": "piecewise_decay", - "batch_size": 256, - "epochs": [30, 60, 90], - "steps": [0.1, 0.01, 0.001, 0.0001] - } -} +__all__ = [ + "ResNeXt", "ResNeXt50_64x4d", "ResNeXt101_64x4d", "ResNeXt152_64x4d", + "ResNeXt50_32x4d", "ResNeXt101_32x4d", "ResNeXt152_32x4d" +] class ResNeXt(): def __init__(self, layers=50, cardinality=64): - self.params = train_parameters self.layers = layers self.cardinality = cardinality @@ -57,7 +46,7 @@ class ResNeXt(): depth = [3, 4, 23, 3] elif layers == 152: depth = [3, 8, 36, 3] - + num_filters1 = [256, 512, 1024, 2048] num_filters2 = [128, 256, 512, 1024] @@ -67,7 +56,7 @@ class ResNeXt(): filter_size=7, stride=2, act='relu', - name="res_conv1") #debug + name="res_conv1") #debug conv = fluid.layers.pool2d( input=conv, pool_size=3, @@ -86,7 +75,8 @@ class ResNeXt(): conv_name = "res" + str(block + 2) + chr(97 + i) conv = self.bottleneck_block( input=conv, - num_filters=num_filters1[block] if cardinality == 64 else num_filters2[block], + num_filters=num_filters1[block] + if cardinality == 64 else num_filters2[block], stride=2 if i == 0 and block != 0 else 1, cardinality=cardinality, name=conv_name) @@ -94,11 +84,13 @@ class ResNeXt(): pool = fluid.layers.pool2d( input=conv, pool_size=7, pool_type='avg', global_pooling=True) stdv = 1.0 / math.sqrt(pool.shape[1] * 1.0) - out = fluid.layers.fc(input=pool, - size=class_dim, - param_attr=fluid.param_attr.ParamAttr( - initializer=fluid.initializer.Uniform(-stdv, stdv),name='fc_weights'), - bias_attr=fluid.param_attr.ParamAttr(name='fc_offset')) + out = fluid.layers.fc( + input=pool, + size=class_dim, + param_attr=fluid.param_attr.ParamAttr( + initializer=fluid.initializer.Uniform(-stdv, stdv), + name='fc_weights'), + bias_attr=fluid.param_attr.ParamAttr(name='fc_offset')) return out def conv_bn_layer(self, @@ -158,13 +150,16 @@ class ResNeXt(): name=name + "_branch2b") conv2 = self.conv_bn_layer( input=conv1, - num_filters=num_filters if cardinality == 64 else num_filters*2, + num_filters=num_filters if cardinality == 64 else num_filters * 2, filter_size=1, act=None, name=name + "_branch2c") short = self.shortcut( - input, num_filters if cardinality == 64 else num_filters*2, stride, name=name + "_branch1") + input, + num_filters if cardinality == 64 else num_filters * 2, + stride, + name=name + "_branch1") return fluid.layers.elementwise_add( x=short, y=conv2, act='relu', name=name + ".add.output.5") @@ -174,6 +169,7 @@ def ResNeXt50_64x4d(): model = ResNeXt(layers=50, cardinality=64) return model + def ResNeXt50_32x4d(): model = ResNeXt(layers=50, cardinality=32) return model @@ -183,6 +179,7 @@ def ResNeXt101_64x4d(): model = ResNeXt(layers=101, cardinality=64) return model + def ResNeXt101_32x4d(): model = ResNeXt(layers=101, cardinality=32) return model @@ -192,6 +189,7 @@ def ResNeXt152_64x4d(): model = ResNeXt(layers=152, cardinality=64) return model + def ResNeXt152_32x4d(): model = ResNeXt(layers=152, cardinality=32) return model diff --git a/PaddleCV/image_classification/models/resnext101_wsl.py b/PaddleCV/image_classification/models/resnext101_wsl.py index 4f511f53..9e644b0f 100644 --- a/PaddleCV/image_classification/models/resnext101_wsl.py +++ b/PaddleCV/image_classification/models/resnext101_wsl.py @@ -1,16 +1,16 @@ -# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +#copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve. # -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at +#Licensed under the Apache License, Version 2.0 (the "License"); +#you may not use this file except in compliance with the License. +#You may obtain a copy of the License at # -# http://www.apache.org/licenses/LICENSE-2.0 +# http://www.apache.org/licenses/LICENSE-2.0 # -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. +#Unless required by applicable law or agreed to in writing, software +#distributed under the License is distributed on an "AS IS" BASIS, +#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +#See the License for the specific language governing permissions and +#limitations under the License. from __future__ import absolute_import from __future__ import division from __future__ import print_function @@ -19,24 +19,14 @@ import paddle.fluid as fluid import math from paddle.fluid.param_attr import ParamAttr -__all__ = ["ResNeXt101_32x8d_wsl", "ResNeXt101_32x16d_wsl", "ResNeXt101_32x32d_wsl", "ResNeXt101_32x48d_wsl", "Fix_ResNeXt101_32x48d_wsl"] - -train_parameters = { - "input_size": [3, 224, 224], - "input_mean": [0.485, 0.456, 0.406], - "input_std": [0.229, 0.224, 0.225], - "learning_strategy": { - "name": "piecewise_decay", - "batch_size": 256, - "epochs": [30, 60, 90], - "steps": [0.1, 0.01, 0.001, 0.0001] - } -} +__all__ = [ + "ResNeXt101_32x8d_wsl", "ResNeXt101_32x16d_wsl", "ResNeXt101_32x32d_wsl", + "ResNeXt101_32x48d_wsl", "Fix_ResNeXt101_32x48d_wsl" +] class ResNeXt101_wsl(): def __init__(self, layers=101, cardinality=32, width=48): - self.params = train_parameters self.layers = layers self.cardinality = cardinality self.width = width @@ -49,7 +39,6 @@ class ResNeXt101_wsl(): depth = [3, 4, 23, 3] base_width = cardinality * width num_filters = [base_width * i for i in [1, 2, 4, 8]] - conv = self.conv_bn_layer( input=input, @@ -57,7 +46,7 @@ class ResNeXt101_wsl(): filter_size=7, stride=2, act='relu', - name="conv1") #debug + name="conv1") #debug conv = fluid.layers.pool2d( input=conv, pool_size=3, @@ -67,7 +56,7 @@ class ResNeXt101_wsl(): for block in range(len(depth)): for i in range(depth[block]): - conv_name = 'layer' + str(block+1) + "." + str(i) + conv_name = 'layer' + str(block + 1) + "." + str(i) conv = self.bottleneck_block( input=conv, num_filters=num_filters[block], @@ -78,11 +67,13 @@ class ResNeXt101_wsl(): pool = fluid.layers.pool2d( input=conv, pool_size=7, pool_type='avg', global_pooling=True) stdv = 1.0 / math.sqrt(pool.shape[1] * 1.0) - out = fluid.layers.fc(input=pool, - size=class_dim, - param_attr=fluid.param_attr.ParamAttr( - initializer=fluid.initializer.Uniform(-stdv, stdv),name='fc.weight'), - bias_attr=fluid.param_attr.ParamAttr(name='fc.bias')) + out = fluid.layers.fc( + input=pool, + size=class_dim, + param_attr=fluid.param_attr.ParamAttr( + initializer=fluid.initializer.Uniform(-stdv, stdv), + name='fc.weight'), + bias_attr=fluid.param_attr.ParamAttr(name='fc.bias')) return out def conv_bn_layer(self, @@ -113,7 +104,8 @@ class ResNeXt101_wsl(): if "conv1" == name: bn_name = 'bn' + name[-1] else: - bn_name = (name[:10] if name[7:9].isdigit() else name[:9]) + 'bn' + name[-1] + bn_name = (name[:10] if name[7:9].isdigit() else name[:9] + ) + 'bn' + name[-1] return fluid.layers.batch_norm( input=conv, act=act, @@ -148,32 +140,35 @@ class ResNeXt101_wsl(): name=name + ".conv2") conv2 = self.conv_bn_layer( input=conv1, - num_filters=num_filters//(width//8), + num_filters=num_filters // (width // 8), filter_size=1, act=None, name=name + ".conv3") short = self.shortcut( - input, num_filters//(width//8), stride, name=name + ".downsample") - - return fluid.layers.elementwise_add( - x=short, y=conv2, act='relu') + input, + num_filters // (width // 8), + stride, + name=name + ".downsample") + return fluid.layers.elementwise_add(x=short, y=conv2, act='relu') - def ResNeXt101_32x8d_wsl(): model = ResNeXt101_wsl(cardinality=32, width=8) return model - + + def ResNeXt101_32x16d_wsl(): model = ResNeXt101_wsl(cardinality=32, width=16) return model + def ResNeXt101_32x32d_wsl(): model = ResNeXt101_wsl(cardinality=32, width=32) return model - + + def ResNeXt101_32x48d_wsl(): model = ResNeXt101_wsl(cardinality=32, width=48) return model diff --git a/PaddleCV/image_classification/models/resnext_vd.py b/PaddleCV/image_classification/models/resnext_vd.py index b61745d3..d1069c36 100644 --- a/PaddleCV/image_classification/models/resnext_vd.py +++ b/PaddleCV/image_classification/models/resnext_vd.py @@ -1,41 +1,34 @@ -# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +#copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve. # -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at +#Licensed under the Apache License, Version 2.0 (the "License"); +#you may not use this file except in compliance with the License. +#You may obtain a copy of the License at # -# http://www.apache.org/licenses/LICENSE-2.0 +# http://www.apache.org/licenses/LICENSE-2.0 # -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. +#Unless required by applicable law or agreed to in writing, software +#distributed under the License is distributed on an "AS IS" BASIS, +#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +#See the License for the specific language governing permissions and +#limitations under the License. import paddle import paddle.fluid as fluid from paddle.fluid.param_attr import ParamAttr import math -__all__ = ["ResNeXt","ResNeXt50_vd_64x4d","ResNeXt101_vd_64x4d","ResNeXt152_vd_64x4d","ResNeXt50_vd_32x4d","ResNeXt101_vd_32x4d", "ResNeXt152_vd_32x4d"] -train_parameters = { - "input_size": [3, 224, 224], - "input_mean": [0.485, 0.456, 0.406], - "input_std": [0.229, 0.224, 0.225], - "learning_strategy": { - "name": "piecewise_decay", - "batch_size": 256, - "epochs": [30, 60, 90], - "steps": [0.1, 0.01, 0.001, 0.0001] - } -} +__all__ = [ + "ResNeXt", "ResNeXt50_vd_64x4d", "ResNeXt101_vd_64x4d", + "ResNeXt152_vd_64x4d", "ResNeXt50_vd_32x4d", "ResNeXt101_vd_32x4d", + "ResNeXt152_vd_32x4d" +] class ResNeXt(): - def __init__(self, layers=50, is_3x3 = False, cardinality=64): - self.params = train_parameters + def __init__(self, layers=50, is_3x3=False, cardinality=64): self.layers = layers self.is_3x3 = is_3x3 self.cardinality = cardinality + def net(self, input, class_dim=1000): is_3x3 = self.is_3x3 layers = self.layers @@ -52,17 +45,36 @@ class ResNeXt(): depth = [3, 8, 36, 3] num_filters1 = [256, 512, 1024, 2048] num_filters2 = [128, 256, 512, 1024] - + if is_3x3 == False: conv = self.conv_bn_layer( - input=input, num_filters=64, filter_size=7, stride=2, act='relu') + input=input, + num_filters=64, + filter_size=7, + stride=2, + act='relu') else: conv = self.conv_bn_layer( - input=input, num_filters=32, filter_size=3, stride=2, act='relu', name='conv1_1') + input=input, + num_filters=32, + filter_size=3, + stride=2, + act='relu', + name='conv1_1') conv = self.conv_bn_layer( - input=conv, num_filters=32, filter_size=3, stride=1, act='relu', name='conv1_2') + input=conv, + num_filters=32, + filter_size=3, + stride=1, + act='relu', + name='conv1_2') conv = self.conv_bn_layer( - input=conv, num_filters=64, filter_size=3, stride=1, act='relu', name='conv1_3') + input=conv, + num_filters=64, + filter_size=3, + stride=1, + act='relu', + name='conv1_3') conv = fluid.layers.pool2d( input=conv, @@ -75,32 +87,32 @@ class ResNeXt(): for i in range(depth[block]): if layers in [101, 152, 200] and block == 2: if i == 0: - conv_name="res"+str(block+2)+"a" + conv_name = "res" + str(block + 2) + "a" else: - conv_name="res"+str(block+2)+"b"+str(i) + conv_name = "res" + str(block + 2) + "b" + str(i) else: - conv_name="res"+str(block+2)+chr(97+i) + conv_name = "res" + str(block + 2) + chr(97 + i) conv = self.bottleneck_block( input=conv, - num_filters=num_filters1[block] if cardinality == 64 else num_filters2[block], + num_filters=num_filters1[block] + if cardinality == 64 else num_filters2[block], stride=2 if i == 0 and block != 0 else 1, cardinality=cardinality, - if_first=block==0, + if_first=block == 0, name=conv_name) pool = fluid.layers.pool2d( input=conv, pool_size=7, pool_type='avg', global_pooling=True) stdv = 1.0 / math.sqrt(pool.shape[1] * 1.0) - out = fluid.layers.fc(input=pool, - size=class_dim, - param_attr=fluid.param_attr.ParamAttr( - initializer=fluid.initializer.Uniform(-stdv, stdv),name='fc_weights'), - bias_attr=fluid.param_attr.ParamAttr(name='fc_offset')) - + out = fluid.layers.fc( + input=pool, + size=class_dim, + param_attr=fluid.param_attr.ParamAttr( + initializer=fluid.initializer.Uniform(-stdv, stdv), + name='fc_weights'), + bias_attr=fluid.param_attr.ParamAttr(name='fc_offset')) + return out - - - def conv_bn_layer(self, input, @@ -118,34 +130,36 @@ class ResNeXt(): padding=(filter_size - 1) // 2, groups=groups, act=None, + use_cudnn=False, param_attr=ParamAttr(name=name + "_weights"), bias_attr=False) if name == "conv1": bn_name = "bn_" + name else: - bn_name = "bn" + name[3:] - return fluid.layers.batch_norm(input=conv, - act=act, - param_attr=ParamAttr(name=bn_name + '_scale'), - bias_attr=ParamAttr(bn_name + '_offset'), - moving_mean_name=bn_name + '_mean', - moving_variance_name=bn_name + '_variance') - - + bn_name = "bn" + name[3:] + return fluid.layers.batch_norm( + input=conv, + act=act, + param_attr=ParamAttr(name=bn_name + '_scale'), + bias_attr=ParamAttr(bn_name + '_offset'), + moving_mean_name=bn_name + '_mean', + moving_variance_name=bn_name + '_variance') + def conv_bn_layer_new(self, - input, - num_filters, - filter_size, - stride=1, - groups=1, - act=None, - name=None): - pool = fluid.layers.pool2d(input=input, + input, + num_filters, + filter_size, + stride=1, + groups=1, + act=None, + name=None): + pool = fluid.layers.pool2d( + input=input, pool_size=2, pool_stride=2, pool_padding=0, pool_type='avg') - + conv = fluid.layers.conv2d( input=pool, num_filters=num_filters, @@ -154,20 +168,20 @@ class ResNeXt(): padding=(filter_size - 1) // 2, groups=groups, act=None, + use_cudnn=False, param_attr=ParamAttr(name=name + "_weights"), bias_attr=False) if name == "conv1": bn_name = "bn_" + name else: bn_name = "bn" + name[3:] - return fluid.layers.batch_norm(input=conv, - act=act, - param_attr=ParamAttr(name=bn_name + '_scale'), - bias_attr=ParamAttr(bn_name + '_offset'), - moving_mean_name=bn_name + '_mean', - moving_variance_name=bn_name + '_variance') - - + return fluid.layers.batch_norm( + input=conv, + act=act, + param_attr=ParamAttr(name=bn_name + '_scale'), + bias_attr=ParamAttr(bn_name + '_offset'), + moving_mean_name=bn_name + '_mean', + moving_variance_name=bn_name + '_variance') def shortcut(self, input, ch_out, stride, name, if_first=False): ch_in = input.shape[1] @@ -175,13 +189,19 @@ class ResNeXt(): if if_first: return self.conv_bn_layer(input, ch_out, 1, stride, name=name) else: - return self.conv_bn_layer_new(input, ch_out, 1, stride, name=name) + return self.conv_bn_layer_new( + input, ch_out, 1, stride, name=name) else: return input - def bottleneck_block(self, input, num_filters, stride, cardinality, name, if_first): + def bottleneck_block(self, input, num_filters, stride, cardinality, name, + if_first): conv0 = self.conv_bn_layer( - input=input, num_filters=num_filters, filter_size=1, act='relu', name=name+"_branch2a") + input=input, + num_filters=num_filters, + filter_size=1, + act='relu', + name=name + "_branch2a") conv1 = self.conv_bn_layer( input=conv0, num_filters=num_filters, @@ -189,36 +209,49 @@ class ResNeXt(): stride=stride, act='relu', groups=cardinality, - name=name+"_branch2b") + name=name + "_branch2b") conv2 = self.conv_bn_layer( - input=conv1, num_filters=num_filters if cardinality == 64 else num_filters*2, filter_size=1, act=None, name=name+"_branch2c") + input=conv1, + num_filters=num_filters if cardinality == 64 else num_filters * 2, + filter_size=1, + act=None, + name=name + "_branch2c") - short = self.shortcut(input, num_filters if cardinality == 64 else num_filters*2, stride, if_first=if_first, name=name + "_branch1") + short = self.shortcut( + input, + num_filters if cardinality == 64 else num_filters * 2, + stride, + if_first=if_first, + name=name + "_branch1") return fluid.layers.elementwise_add(x=short, y=conv2, act='relu') - + def ResNeXt50_vd_64x4d(): - model = ResNeXt(layers=50, is_3x3 = True) + model = ResNeXt(layers=50, is_3x3=True) return model + def ResNeXt50_vd_32x4d(): - model = ResNeXt(layers=50, cardinality=32, is_3x3 = True) + model = ResNeXt(layers=50, cardinality=32, is_3x3=True) return model + def ResNeXt101_vd_64x4d(): - model = ResNeXt(layers=101, is_3x3 = True) + model = ResNeXt(layers=101, is_3x3=True) return model + def ResNeXt101_vd_32x4d(): - model = ResNeXt(layers=101, cardinality=32, is_3x3 = True) + model = ResNeXt(layers=101, cardinality=32, is_3x3=True) return model + def ResNeXt152_vd_64x4d(): - model = ResNeXt(layers=152, is_3x3 = True) + model = ResNeXt(layers=152, is_3x3=True) return model + def ResNeXt152_vd_32x4d(): - model = ResNeXt(layers=152, cardinality=32, is_3x3 = True) + model = ResNeXt(layers=152, cardinality=32, is_3x3=True) return model - diff --git a/PaddleCV/image_classification/models/se_resnext.py b/PaddleCV/image_classification/models/se_resnext.py index a6d0d635..fabaa661 100644 --- a/PaddleCV/image_classification/models/se_resnext.py +++ b/PaddleCV/image_classification/models/se_resnext.py @@ -27,23 +27,9 @@ __all__ = [ "SE_ResNeXt152_32x4d" ] -train_parameters = { - "input_size": [3, 224, 224], - "input_mean": [0.485, 0.456, 0.406], - "input_std": [0.229, 0.224, 0.225], - "dropout_seed": None, - "learning_strategy": { - "name": "piecewise_decay", - "batch_size": 256, - "epochs": [40, 80, 100], - "steps": [0.1, 0.01, 0.001, 0.0001] - } -} - class SE_ResNeXt(): def __init__(self, layers=50): - self.params = train_parameters self.layers = layers def net(self, input, class_dim=1000): @@ -139,8 +125,7 @@ class SE_ResNeXt(): pool_type='avg', global_pooling=True, use_cudnn=False) - drop = fluid.layers.dropout( - x=pool, dropout_prob=0.5, seed=self.params['dropout_seed']) + drop = fluid.layers.dropout(x=pool, dropout_prob=0.5) stdv = 1.0 / math.sqrt(drop.shape[1] * 1.0) out = fluid.layers.fc( input=drop, diff --git a/PaddleCV/image_classification/models/se_resnext_vd.py b/PaddleCV/image_classification/models/se_resnext_vd.py index 4d591be2..b306cf57 100644 --- a/PaddleCV/image_classification/models/se_resnext_vd.py +++ b/PaddleCV/image_classification/models/se_resnext_vd.py @@ -23,26 +23,12 @@ import paddle.fluid as fluid from paddle.fluid.param_attr import ParamAttr __all__ = [ - "SE_ResNeXt", "SE_ResNeXt50_32x4d_vd", "SE_ResNeXt101_32x4d_vd", - "SENet154_vd" + "SE_ResNeXt", "SE_ResNeXt50_32x4d_vd", "SE_ResNeXt101_32x4d_vd", "SE154_vd" ] -train_parameters = { - "input_size": [3, 224, 224], - "input_mean": [0.485, 0.456, 0.406], - "input_std": [0.229, 0.224, 0.225], - "learning_strategy": { - "name": "piecewise_decay", - "batch_size": 256, - "epochs": [10, 16, 20], - "steps": [0.01, 0.001, 0.0001, 0.00001] - } -} - class SE_ResNeXt(): def __init__(self, layers=50): - self.params = train_parameters self.layers = layers def net(self, input, class_dim=1000): @@ -57,11 +43,26 @@ class SE_ResNeXt(): num_filters = [128, 256, 512, 1024] conv = self.conv_bn_layer( - input=input, num_filters=64, filter_size=3, stride=2, act='relu', name='conv1_1') + input=input, + num_filters=64, + filter_size=3, + stride=2, + act='relu', + name='conv1_1') conv = self.conv_bn_layer( - input=conv, num_filters=64, filter_size=3, stride=1, act='relu', name='conv1_2') + input=conv, + num_filters=64, + filter_size=3, + stride=1, + act='relu', + name='conv1_2') conv = self.conv_bn_layer( - input=conv, num_filters=128, filter_size=3, stride=1, act='relu', name='conv1_3') + input=conv, + num_filters=128, + filter_size=3, + stride=1, + act='relu', + name='conv1_3') conv = fluid.layers.pool2d( input=conv, pool_size=3, @@ -75,11 +76,26 @@ class SE_ResNeXt(): num_filters = [128, 256, 512, 1024] conv = self.conv_bn_layer( - input=input, num_filters=64, filter_size=3, stride=2, act='relu', name='conv1_1') + input=input, + num_filters=64, + filter_size=3, + stride=2, + act='relu', + name='conv1_1') conv = self.conv_bn_layer( - input=conv, num_filters=64, filter_size=3, stride=1, act='relu', name='conv1_2') + input=conv, + num_filters=64, + filter_size=3, + stride=1, + act='relu', + name='conv1_2') conv = self.conv_bn_layer( - input=conv, num_filters=128, filter_size=3, stride=1, act='relu', name='conv1_3') + input=conv, + num_filters=128, + filter_size=3, + stride=1, + act='relu', + name='conv1_3') conv = fluid.layers.pool2d( input=conv, pool_size=3, @@ -100,7 +116,12 @@ class SE_ResNeXt(): act='relu', name='conv1_1') conv = self.conv_bn_layer( - input=conv, num_filters=64, filter_size=3, stride=1, act='relu',name='conv1_2') + input=conv, + num_filters=64, + filter_size=3, + stride=1, + act='relu', + name='conv1_2') conv = self.conv_bn_layer( input=conv, num_filters=128, @@ -121,20 +142,22 @@ class SE_ResNeXt(): stride=2 if i == 0 and block != 0 else 1, cardinality=cardinality, reduction_ratio=reduction_ratio, - if_first=block==0, - name=str(n)+'_'+str(i+1)) + if_first=block == 0, + name=str(n) + '_' + str(i + 1)) pool = fluid.layers.pool2d( input=conv, pool_size=7, pool_type='avg', global_pooling=True) if layers == 152: pool = fluid.layers.dropout(x=pool, dropout_prob=0.2) stdv = 1.0 / math.sqrt(pool.shape[1] * 1.0) - out = fluid.layers.fc(input=pool, - size=class_dim, - param_attr=ParamAttr( - initializer=fluid.initializer.Uniform(-stdv, stdv),name='fc6_weights'), - bias_attr=ParamAttr(name='fc6_offset')) - + out = fluid.layers.fc( + input=pool, + size=class_dim, + param_attr=ParamAttr( + initializer=fluid.initializer.Uniform(-stdv, stdv), + name='fc6_weights'), + bias_attr=ParamAttr(name='fc6_offset')) + return out def shortcut(self, input, ch_out, stride, name, if_first=False): @@ -142,17 +165,36 @@ class SE_ResNeXt(): if ch_in != ch_out or stride != 1: filter_size = 1 if if_first: - return self.conv_bn_layer(input, ch_out, filter_size, stride, name='conv'+name+'_prj') + return self.conv_bn_layer( + input, + ch_out, + filter_size, + stride, + name='conv' + name + '_prj') else: - return self.conv_bn_layer_new(input, ch_out, filter_size, stride, name='conv'+name+'_prj') + return self.conv_bn_layer_new( + input, + ch_out, + filter_size, + stride, + name='conv' + name + '_prj') else: return input - - def bottleneck_block(self, input, num_filters, stride, cardinality, - reduction_ratio,if_first, name=None): + def bottleneck_block(self, + input, + num_filters, + stride, + cardinality, + reduction_ratio, + if_first, + name=None): conv0 = self.conv_bn_layer( - input=input, num_filters=num_filters, filter_size=1, act='relu',name='conv'+name+'_x1') + input=input, + num_filters=num_filters, + filter_size=1, + act='relu', + name='conv' + name + '_x1') conv1 = self.conv_bn_layer( input=conv0, num_filters=num_filters, @@ -160,18 +202,23 @@ class SE_ResNeXt(): stride=stride, groups=cardinality, act='relu', - name='conv'+name+'_x2') + name='conv' + name + '_x2') if cardinality == 64: num_filters = num_filters // 2 conv2 = self.conv_bn_layer( - input=conv1, num_filters=num_filters * 2, filter_size=1, act=None, name='conv'+name+'_x3') + input=conv1, + num_filters=num_filters * 2, + filter_size=1, + act=None, + name='conv' + name + '_x3') scale = self.squeeze_excitation( input=conv2, num_channels=num_filters * 2, reduction_ratio=reduction_ratio, - name='fc'+name) + name='fc' + name) - short = self.shortcut(input, num_filters * 2, stride, if_first=if_first, name=name) + short = self.shortcut( + input, num_filters * 2, stride, if_first=if_first, name=name) return fluid.layers.elementwise_add(x=short, y=scale, act='relu') @@ -192,29 +239,31 @@ class SE_ResNeXt(): groups=groups, act=None, bias_attr=False, - param_attr=ParamAttr(name=name + '_weights'), - ) + param_attr=ParamAttr(name=name + '_weights'), ) bn_name = name + "_bn" - return fluid.layers.batch_norm(input=conv, act=act, - param_attr=ParamAttr(name=bn_name + '_scale'), - bias_attr=ParamAttr(bn_name + '_offset'), - moving_mean_name=bn_name + '_mean', - moving_variance_name=bn_name + '_variance') - + return fluid.layers.batch_norm( + input=conv, + act=act, + param_attr=ParamAttr(name=bn_name + '_scale'), + bias_attr=ParamAttr(bn_name + '_offset'), + moving_mean_name=bn_name + '_mean', + moving_variance_name=bn_name + '_variance') + def conv_bn_layer_new(self, - input, - num_filters, - filter_size, - stride=1, - groups=1, - act=None, - name=None): - pool = fluid.layers.pool2d(input=input, + input, + num_filters, + filter_size, + stride=1, + groups=1, + act=None, + name=None): + pool = fluid.layers.pool2d( + input=input, pool_size=2, pool_stride=2, pool_padding=0, pool_type='avg') - + conv = fluid.layers.conv2d( input=pool, num_filters=num_filters, @@ -226,33 +275,39 @@ class SE_ResNeXt(): param_attr=ParamAttr(name=name + "_weights"), bias_attr=False) bn_name = name + "_bn" - return fluid.layers.batch_norm(input=conv, - act=act, - param_attr=ParamAttr(name=bn_name + '_scale'), - bias_attr=ParamAttr(bn_name + '_offset'), - moving_mean_name=bn_name + '_mean', - moving_variance_name=bn_name + '_variance') - + return fluid.layers.batch_norm( + input=conv, + act=act, + param_attr=ParamAttr(name=bn_name + '_scale'), + bias_attr=ParamAttr(bn_name + '_offset'), + moving_mean_name=bn_name + '_mean', + moving_variance_name=bn_name + '_variance') - def squeeze_excitation(self, input, num_channels, reduction_ratio, name=None): + def squeeze_excitation(self, + input, + num_channels, + reduction_ratio, + name=None): pool = fluid.layers.pool2d( input=input, pool_size=0, pool_type='avg', global_pooling=True) stdv = 1.0 / math.sqrt(pool.shape[1] * 1.0) - squeeze = fluid.layers.fc(input=pool, - size=num_channels // reduction_ratio, - act='relu', - param_attr=fluid.param_attr.ParamAttr( - initializer=fluid.initializer.Uniform( - -stdv, stdv),name=name+'_sqz_weights'), - bias_attr=ParamAttr(name=name+'_sqz_offset')) + squeeze = fluid.layers.fc( + input=pool, + size=num_channels // reduction_ratio, + act='relu', + param_attr=fluid.param_attr.ParamAttr( + initializer=fluid.initializer.Uniform(-stdv, stdv), + name=name + '_sqz_weights'), + bias_attr=ParamAttr(name=name + '_sqz_offset')) stdv = 1.0 / math.sqrt(squeeze.shape[1] * 1.0) - excitation = fluid.layers.fc(input=squeeze, - size=num_channels, - act='sigmoid', - param_attr=fluid.param_attr.ParamAttr( - initializer=fluid.initializer.Uniform( - -stdv, stdv),name=name+'_exc_weights'), - bias_attr=ParamAttr(name=name+'_exc_offset')) + excitation = fluid.layers.fc( + input=squeeze, + size=num_channels, + act='sigmoid', + param_attr=fluid.param_attr.ParamAttr( + initializer=fluid.initializer.Uniform(-stdv, stdv), + name=name + '_exc_weights'), + bias_attr=ParamAttr(name=name + '_exc_offset')) scale = fluid.layers.elementwise_mul(x=input, y=excitation, axis=0) return scale @@ -267,6 +322,6 @@ def SE_ResNeXt101_32x4d_vd(): return model -def SENet154_vd(): +def SE_154_vd(): model = SE_ResNeXt(layers=152) return model diff --git a/PaddleCV/image_classification/models/shufflenet_v2.py b/PaddleCV/image_classification/models/shufflenet_v2.py index bd20ee25..44a27f30 100644 --- a/PaddleCV/image_classification/models/shufflenet_v2.py +++ b/PaddleCV/image_classification/models/shufflenet_v2.py @@ -22,36 +22,27 @@ import paddle.fluid as fluid from paddle.fluid.initializer import MSRA from paddle.fluid.param_attr import ParamAttr -__all__ = ['ShuffleNetV2_x0_25', 'ShuffleNetV2_x0_33', 'ShuffleNetV2_x0_5', 'ShuffleNetV2_x1_0', 'ShuffleNetV2_x1_5', 'ShuffleNetV2_x2_0'] - -train_parameters = { - "input_size": [3, 224, 224], - "input_mean": [0.485, 0.456, 0.406], - "input_std": [0.229, 0.224, 0.225], - "learning_strategy": { - "name": "piecewise_decay", - "batch_size": 256, - "epochs": [30, 60, 90], - "steps": [0.1, 0.01, 0.001, 0.0001] - } -} +__all__ = [ + 'ShuffleNetV2_x0_25', 'ShuffleNetV2_x0_33', 'ShuffleNetV2_x0_5', + 'ShuffleNetV2_x1_0', 'ShuffleNetV2_x1_5', 'ShuffleNetV2_x2_0', + 'ShuffleNetV2' +] class ShuffleNetV2(): def __init__(self, scale=1.0): - self.params = train_parameters self.scale = scale def net(self, input, class_dim=1000): - scale = self.scale + scale = self.scale stage_repeats = [4, 8, 4] - + if scale == 0.25: - stage_out_channels = [-1, 24, 24, 48, 96, 512] + stage_out_channels = [-1, 24, 24, 48, 96, 512] elif scale == 0.33: - stage_out_channels = [-1, 24, 32, 64, 128, 512] + stage_out_channels = [-1, 24, 32, 64, 128, 512] elif scale == 0.5: - stage_out_channels = [-1, 24, 48, 96, 192, 1024] + stage_out_channels = [-1, 24, 48, 96, 192, 1024] elif scale == 1.0: stage_out_channels = [-1, 24, 116, 232, 464, 1024] elif scale == 1.5: @@ -59,50 +50,77 @@ class ShuffleNetV2(): elif scale == 2.0: stage_out_channels = [-1, 24, 224, 488, 976, 2048] else: - raise ValueError( - """{} groups is not supported for + raise ValueError("""{} groups is not supported for 1x1 Grouped Convolutions""".format(num_groups)) #conv1 - + input_channel = stage_out_channels[1] - conv1 = self.conv_bn_layer(input=input, filter_size=3, num_filters=input_channel, padding=1, stride=2,name='stage1_conv') - pool1 = fluid.layers.pool2d(input=conv1, pool_size=3, pool_stride=2, pool_padding=1, pool_type='max') + conv1 = self.conv_bn_layer( + input=input, + filter_size=3, + num_filters=input_channel, + padding=1, + stride=2, + name='stage1_conv') + pool1 = fluid.layers.pool2d( + input=conv1, + pool_size=3, + pool_stride=2, + pool_padding=1, + pool_type='max') conv = pool1 # bottleneck sequences for idxstage in range(len(stage_repeats)): numrepeat = stage_repeats[idxstage] - output_channel = stage_out_channels[idxstage+2] + output_channel = stage_out_channels[idxstage + 2] for i in range(numrepeat): if i == 0: - conv = self.inverted_residual_unit(input=conv, num_filters=output_channel, stride=2, - benchmodel=2,name=str(idxstage+2)+'_'+str(i+1)) + conv = self.inverted_residual_unit( + input=conv, + num_filters=output_channel, + stride=2, + benchmodel=2, + name=str(idxstage + 2) + '_' + str(i + 1)) else: - conv = self.inverted_residual_unit(input=conv, num_filters=output_channel, stride=1, - benchmodel=1,name=str(idxstage+2)+'_'+str(i+1)) - - conv_last = self.conv_bn_layer(input=conv, filter_size=1, num_filters=stage_out_channels[-1], - padding=0, stride=1, name='conv5') - pool_last = fluid.layers.pool2d(input=conv_last, pool_size=7, pool_stride=1, pool_padding=0, pool_type='avg') + conv = self.inverted_residual_unit( + input=conv, + num_filters=output_channel, + stride=1, + benchmodel=1, + name=str(idxstage + 2) + '_' + str(i + 1)) + conv_last = self.conv_bn_layer( + input=conv, + filter_size=1, + num_filters=stage_out_channels[-1], + padding=0, + stride=1, + name='conv5') + pool_last = fluid.layers.pool2d( + input=conv_last, + pool_size=7, + pool_stride=1, + pool_padding=0, + pool_type='avg') output = fluid.layers.fc(input=pool_last, size=class_dim, - param_attr=ParamAttr(initializer=MSRA(),name='fc6_weights'), + param_attr=ParamAttr( + initializer=MSRA(), name='fc6_weights'), bias_attr=ParamAttr(name='fc6_offset')) return output - def conv_bn_layer(self, - input, - filter_size, - num_filters, - stride, - padding, - num_groups=1, - use_cudnn=True, - if_act=True, - name=None): + input, + filter_size, + num_filters, + stride, + padding, + num_groups=1, + use_cudnn=True, + if_act=True, + name=None): conv = fluid.layers.conv2d( input=input, num_filters=num_filters, @@ -112,162 +130,179 @@ class ShuffleNetV2(): groups=num_groups, act=None, use_cudnn=use_cudnn, - param_attr=ParamAttr(initializer=MSRA(),name=name+'_weights'), + param_attr=ParamAttr( + initializer=MSRA(), name=name + '_weights'), bias_attr=False) - out = int((input.shape[2] - 1)/float(stride) + 1) + out = int((input.shape[2] - 1) / float(stride) + 1) bn_name = name + '_bn' if if_act: - return fluid.layers.batch_norm(input=conv, act='relu', - param_attr = ParamAttr(name=bn_name+"_scale"), - bias_attr=ParamAttr(name=bn_name+"_offset"), - moving_mean_name=bn_name + '_mean', - moving_variance_name=bn_name + '_variance') + return fluid.layers.batch_norm( + input=conv, + act='relu', + param_attr=ParamAttr(name=bn_name + "_scale"), + bias_attr=ParamAttr(name=bn_name + "_offset"), + moving_mean_name=bn_name + '_mean', + moving_variance_name=bn_name + '_variance') else: - return fluid.layers.batch_norm(input=conv, - param_attr = ParamAttr(name=bn_name+"_scale"), - bias_attr=ParamAttr(name=bn_name+"_offset"), - moving_mean_name=bn_name + '_mean', - moving_variance_name=bn_name + '_variance') + return fluid.layers.batch_norm( + input=conv, + param_attr=ParamAttr(name=bn_name + "_scale"), + bias_attr=ParamAttr(name=bn_name + "_offset"), + moving_mean_name=bn_name + '_mean', + moving_variance_name=bn_name + '_variance') - def channel_shuffle(self, x, groups): - batchsize, num_channels, height, width = x.shape[0], x.shape[1], x.shape[2], x.shape[3] + batchsize, num_channels, height, width = x.shape[0], x.shape[ + 1], x.shape[2], x.shape[3] channels_per_group = num_channels // groups - + # reshape - x = fluid.layers.reshape(x=x, shape=[batchsize, groups, channels_per_group, height, width]) + x = fluid.layers.reshape( + x=x, shape=[batchsize, groups, channels_per_group, height, width]) - x = fluid.layers.transpose(x=x, perm=[0,2,1,3,4]) + x = fluid.layers.transpose(x=x, perm=[0, 2, 1, 3, 4]) # flatten - x = fluid.layers.reshape(x=x, shape=[batchsize, num_channels, height, width]) + x = fluid.layers.reshape( + x=x, shape=[batchsize, num_channels, height, width]) return x - - def inverted_residual_unit(self, input, num_filters, stride, benchmodel, name=None): + def inverted_residual_unit(self, + input, + num_filters, + stride, + benchmodel, + name=None): assert stride in [1, 2], \ "supported stride are {} but your stride is {}".format([1,2], stride) - - oup_inc = num_filters//2 + + oup_inc = num_filters // 2 inp = input.shape[1] - + if benchmodel == 1: x1, x2 = fluid.layers.split( - input, num_or_sections=[input.shape[1]//2, input.shape[1]//2], dim=1) - + input, + num_or_sections=[input.shape[1] // 2, input.shape[1] // 2], + dim=1) + conv_pw = self.conv_bn_layer( - input=x2, - num_filters=oup_inc, - filter_size=1, + input=x2, + num_filters=oup_inc, + filter_size=1, stride=1, padding=0, num_groups=1, if_act=True, - name='stage_'+name+'_conv1') + name='stage_' + name + '_conv1') conv_dw = self.conv_bn_layer( - input=conv_pw, - num_filters=oup_inc, - filter_size=3, - stride=stride, + input=conv_pw, + num_filters=oup_inc, + filter_size=3, + stride=stride, padding=1, - num_groups=oup_inc, + num_groups=oup_inc, if_act=False, use_cudnn=False, - name='stage_'+name+'_conv2') + name='stage_' + name + '_conv2') conv_linear = self.conv_bn_layer( - input=conv_dw, - num_filters=oup_inc, - filter_size=1, - stride=1, + input=conv_dw, + num_filters=oup_inc, + filter_size=1, + stride=1, padding=0, - num_groups=1, + num_groups=1, if_act=True, - name='stage_'+name+'_conv3') - + name='stage_' + name + '_conv3') + out = fluid.layers.concat([x1, conv_linear], axis=1) - else: #branch1 conv_dw_1 = self.conv_bn_layer( - input=input, - num_filters=inp, - filter_size=3, + input=input, + num_filters=inp, + filter_size=3, stride=stride, padding=1, num_groups=inp, if_act=False, use_cudnn=False, - name='stage_'+name+'_conv4') - + name='stage_' + name + '_conv4') + conv_linear_1 = self.conv_bn_layer( - input=conv_dw_1, - num_filters=oup_inc, - filter_size=1, + input=conv_dw_1, + num_filters=oup_inc, + filter_size=1, stride=1, padding=0, num_groups=1, if_act=True, - name='stage_'+name+'_conv5') - + name='stage_' + name + '_conv5') + #branch2 conv_pw_2 = self.conv_bn_layer( - input=input, - num_filters=oup_inc, - filter_size=1, + input=input, + num_filters=oup_inc, + filter_size=1, stride=1, padding=0, num_groups=1, if_act=True, - name='stage_'+name+'_conv1') + name='stage_' + name + '_conv1') conv_dw_2 = self.conv_bn_layer( - input=conv_pw_2, - num_filters=oup_inc, - filter_size=3, - stride=stride, + input=conv_pw_2, + num_filters=oup_inc, + filter_size=3, + stride=stride, padding=1, - num_groups=oup_inc, + num_groups=oup_inc, if_act=False, use_cudnn=False, - name='stage_'+name+'_conv2') + name='stage_' + name + '_conv2') conv_linear_2 = self.conv_bn_layer( - input=conv_dw_2, - num_filters=oup_inc, - filter_size=1, - stride=1, + input=conv_dw_2, + num_filters=oup_inc, + filter_size=1, + stride=1, padding=0, - num_groups=1, + num_groups=1, if_act=True, - name='stage_'+name+'_conv3') + name='stage_' + name + '_conv3') out = fluid.layers.concat([conv_linear_1, conv_linear_2], axis=1) - + return self.channel_shuffle(out, 2) + def ShuffleNetV2_x0_25(): model = ShuffleNetV2(scale=0.25) return model + def ShuffleNetV2_x0_33(): model = ShuffleNetV2(scale=0.33) return model - + + def ShuffleNetV2_x0_5(): model = ShuffleNetV2(scale=0.5) return model + def ShuffleNetV2_x1_0(): model = ShuffleNetV2(scale=1.0) return model + def ShuffleNetV2_x1_5(): model = ShuffleNetV2(scale=1.5) return model + def ShuffleNetV2_x2_0(): model = ShuffleNetV2(scale=2.0) return model diff --git a/PaddleCV/image_classification/models/shufflenet_v2_swish.py b/PaddleCV/image_classification/models/shufflenet_v2_swish.py index 42118735..bbd0330a 100644 --- a/PaddleCV/image_classification/models/shufflenet_v2_swish.py +++ b/PaddleCV/image_classification/models/shufflenet_v2_swish.py @@ -22,32 +22,22 @@ import paddle.fluid as fluid from paddle.fluid.initializer import MSRA from paddle.fluid.param_attr import ParamAttr -__all__ = ['ShuffleNetV2_x0_5_swish', 'ShuffleNetV2_x1_0_swish', 'ShuffleNetV2_x1_5_swish', 'ShuffleNetV2_x2_0_swish'] - -train_parameters = { - "input_size": [3, 224, 224], - "input_mean": [0.485, 0.456, 0.406], - "input_std": [0.229, 0.224, 0.225], - "learning_strategy": { - "name": "piecewise_decay", - "batch_size": 256, - "epochs": [30, 60, 90], - "steps": [0.1, 0.01, 0.001, 0.0001] - } -} - - -class ShuffleNetV2(): +__all__ = [ + 'ShuffleNetV2_x0_5_swish', 'ShuffleNetV2_x1_0_swish', + 'ShuffleNetV2_x1_5_swish', 'ShuffleNetV2_x2_0_swish', 'ShuffleNetV2_swish' +] + + +class ShuffleNetV2_swish(): def __init__(self, scale=1.0): - self.params = train_parameters self.scale = scale def net(self, input, class_dim=1000): - scale = self.scale + scale = self.scale stage_repeats = [4, 8, 4] - + if scale == 0.5: - stage_out_channels = [-1, 24, 48, 96, 192, 1024] + stage_out_channels = [-1, 24, 48, 96, 192, 1024] elif scale == 1.0: stage_out_channels = [-1, 24, 116, 232, 464, 1024] elif scale == 1.5: @@ -55,50 +45,77 @@ class ShuffleNetV2(): elif scale == 2.0: stage_out_channels = [-1, 24, 224, 488, 976, 2048] else: - raise ValueError( - """{} groups is not supported for + raise ValueError("""{} groups is not supported for 1x1 Grouped Convolutions""".format(num_groups)) #conv1 - + input_channel = stage_out_channels[1] - conv1 = self.conv_bn_layer(input=input, filter_size=3, num_filters=input_channel, padding=1, stride=2,name='stage1_conv') - pool1 = fluid.layers.pool2d(input=conv1, pool_size=3, pool_stride=2, pool_padding=1, pool_type='max') + conv1 = self.conv_bn_layer( + input=input, + filter_size=3, + num_filters=input_channel, + padding=1, + stride=2, + name='stage1_conv') + pool1 = fluid.layers.pool2d( + input=conv1, + pool_size=3, + pool_stride=2, + pool_padding=1, + pool_type='max') conv = pool1 # bottleneck sequences for idxstage in range(len(stage_repeats)): numrepeat = stage_repeats[idxstage] - output_channel = stage_out_channels[idxstage+2] + output_channel = stage_out_channels[idxstage + 2] for i in range(numrepeat): if i == 0: - conv = self.inverted_residual_unit(input=conv, num_filters=output_channel, stride=2, - benchmodel=2,name=str(idxstage+2)+'_'+str(i+1)) + conv = self.inverted_residual_unit( + input=conv, + num_filters=output_channel, + stride=2, + benchmodel=2, + name=str(idxstage + 2) + '_' + str(i + 1)) else: - conv = self.inverted_residual_unit(input=conv, num_filters=output_channel, stride=1, - benchmodel=1,name=str(idxstage+2)+'_'+str(i+1)) - - conv_last = self.conv_bn_layer(input=conv, filter_size=1, num_filters=stage_out_channels[-1], - padding=0, stride=1, name='conv5') - pool_last = fluid.layers.pool2d(input=conv_last, pool_size=7, pool_stride=1, pool_padding=0, pool_type='avg') + conv = self.inverted_residual_unit( + input=conv, + num_filters=output_channel, + stride=1, + benchmodel=1, + name=str(idxstage + 2) + '_' + str(i + 1)) + conv_last = self.conv_bn_layer( + input=conv, + filter_size=1, + num_filters=stage_out_channels[-1], + padding=0, + stride=1, + name='conv5') + pool_last = fluid.layers.pool2d( + input=conv_last, + pool_size=7, + pool_stride=1, + pool_padding=0, + pool_type='avg') output = fluid.layers.fc(input=pool_last, size=class_dim, - param_attr=ParamAttr(initializer=MSRA(),name='fc6_weights'), + param_attr=ParamAttr( + initializer=MSRA(), name='fc6_weights'), bias_attr=ParamAttr(name='fc6_offset')) return output - def conv_bn_layer(self, - input, - filter_size, - num_filters, - stride, - padding, - num_groups=1, - use_cudnn=True, - if_act=True, - name=None): + input, + filter_size, + num_filters, + stride, + padding, + num_groups=1, + use_cudnn=True, + if_act=True, + name=None): conv = fluid.layers.conv2d( input=input, num_filters=num_filters, @@ -108,154 +125,169 @@ class ShuffleNetV2(): groups=num_groups, act=None, use_cudnn=use_cudnn, - param_attr=ParamAttr(initializer=MSRA(),name=name+'_weights'), + param_attr=ParamAttr( + initializer=MSRA(), name=name + '_weights'), bias_attr=False) - out = int((input.shape[2] - 1)/float(stride) + 1) + out = int((input.shape[2] - 1) / float(stride) + 1) bn_name = name + '_bn' if if_act: - return fluid.layers.batch_norm(input=conv, act='swish', - param_attr = ParamAttr(name=bn_name+"_scale"), - bias_attr=ParamAttr(name=bn_name+"_offset"), - moving_mean_name=bn_name + '_mean', - moving_variance_name=bn_name + '_variance') + return fluid.layers.batch_norm( + input=conv, + act='swish', + param_attr=ParamAttr(name=bn_name + "_scale"), + bias_attr=ParamAttr(name=bn_name + "_offset"), + moving_mean_name=bn_name + '_mean', + moving_variance_name=bn_name + '_variance') else: - return fluid.layers.batch_norm(input=conv, - param_attr = ParamAttr(name=bn_name+"_scale"), - bias_attr=ParamAttr(name=bn_name+"_offset"), - moving_mean_name=bn_name + '_mean', - moving_variance_name=bn_name + '_variance') + return fluid.layers.batch_norm( + input=conv, + param_attr=ParamAttr(name=bn_name + "_scale"), + bias_attr=ParamAttr(name=bn_name + "_offset"), + moving_mean_name=bn_name + '_mean', + moving_variance_name=bn_name + '_variance') - def channel_shuffle(self, x, groups): - batchsize, num_channels, height, width = x.shape[0], x.shape[1], x.shape[2], x.shape[3] + batchsize, num_channels, height, width = x.shape[0], x.shape[ + 1], x.shape[2], x.shape[3] channels_per_group = num_channels // groups - + # reshape - x = fluid.layers.reshape(x=x, shape=[batchsize, groups, channels_per_group, height, width]) + x = fluid.layers.reshape( + x=x, shape=[batchsize, groups, channels_per_group, height, width]) - x = fluid.layers.transpose(x=x, perm=[0,2,1,3,4]) + x = fluid.layers.transpose(x=x, perm=[0, 2, 1, 3, 4]) # flatten - x = fluid.layers.reshape(x=x, shape=[batchsize, num_channels, height, width]) + x = fluid.layers.reshape( + x=x, shape=[batchsize, num_channels, height, width]) return x - - def inverted_residual_unit(self, input, num_filters, stride, benchmodel, name=None): + def inverted_residual_unit(self, + input, + num_filters, + stride, + benchmodel, + name=None): assert stride in [1, 2], \ "supported stride are {} but your stride is {}".format([1,2], stride) - - oup_inc = num_filters//2 + + oup_inc = num_filters // 2 inp = input.shape[1] - + if benchmodel == 1: x1, x2 = fluid.layers.split( - input, num_or_sections=[input.shape[1]//2, input.shape[1]//2], dim=1) - + input, + num_or_sections=[input.shape[1] // 2, input.shape[1] // 2], + dim=1) + conv_pw = self.conv_bn_layer( - input=x2, - num_filters=oup_inc, - filter_size=1, + input=x2, + num_filters=oup_inc, + filter_size=1, stride=1, padding=0, num_groups=1, if_act=True, - name='stage_'+name+'_conv1') + name='stage_' + name + '_conv1') conv_dw = self.conv_bn_layer( - input=conv_pw, - num_filters=oup_inc, - filter_size=3, - stride=stride, + input=conv_pw, + num_filters=oup_inc, + filter_size=3, + stride=stride, padding=1, - num_groups=oup_inc, + num_groups=oup_inc, if_act=False, use_cudnn=False, - name='stage_'+name+'_conv2') + name='stage_' + name + '_conv2') conv_linear = self.conv_bn_layer( - input=conv_dw, - num_filters=oup_inc, - filter_size=1, - stride=1, + input=conv_dw, + num_filters=oup_inc, + filter_size=1, + stride=1, padding=0, - num_groups=1, + num_groups=1, if_act=True, - name='stage_'+name+'_conv3') - + name='stage_' + name + '_conv3') + out = fluid.layers.concat([x1, conv_linear], axis=1) - else: #branch1 conv_dw_1 = self.conv_bn_layer( - input=input, - num_filters=inp, - filter_size=3, + input=input, + num_filters=inp, + filter_size=3, stride=stride, padding=1, num_groups=inp, if_act=False, use_cudnn=False, - name='stage_'+name+'_conv4') - + name='stage_' + name + '_conv4') + conv_linear_1 = self.conv_bn_layer( - input=conv_dw_1, - num_filters=oup_inc, - filter_size=1, + input=conv_dw_1, + num_filters=oup_inc, + filter_size=1, stride=1, padding=0, num_groups=1, if_act=True, - name='stage_'+name+'_conv5') - + name='stage_' + name + '_conv5') + #branch2 conv_pw_2 = self.conv_bn_layer( - input=input, - num_filters=oup_inc, - filter_size=1, + input=input, + num_filters=oup_inc, + filter_size=1, stride=1, padding=0, num_groups=1, if_act=True, - name='stage_'+name+'_conv1') + name='stage_' + name + '_conv1') conv_dw_2 = self.conv_bn_layer( - input=conv_pw_2, - num_filters=oup_inc, - filter_size=3, - stride=stride, + input=conv_pw_2, + num_filters=oup_inc, + filter_size=3, + stride=stride, padding=1, - num_groups=oup_inc, + num_groups=oup_inc, if_act=False, use_cudnn=False, - name='stage_'+name+'_conv2') + name='stage_' + name + '_conv2') conv_linear_2 = self.conv_bn_layer( - input=conv_dw_2, - num_filters=oup_inc, - filter_size=1, - stride=1, + input=conv_dw_2, + num_filters=oup_inc, + filter_size=1, + stride=1, padding=0, - num_groups=1, + num_groups=1, if_act=True, - name='stage_'+name+'_conv3') + name='stage_' + name + '_conv3') out = fluid.layers.concat([conv_linear_1, conv_linear_2], axis=1) - + return self.channel_shuffle(out, 2) - + + def ShuffleNetV2_x0_5_swish(): - model = ShuffleNetV2(scale=0.5) + model = ShuffleNetV2_swish(scale=0.5) return model + def ShuffleNetV2_x1_0_swish(): - model = ShuffleNetV2(scale=1.0) + model = ShuffleNetV2_swish(scale=1.0) return model + def ShuffleNetV2_x1_5_swish(): - model = ShuffleNetV2(scale=1.5) + model = ShuffleNetV2_swish(scale=1.5) return model + def ShuffleNetV2_x2_0_swish(): - model = ShuffleNetV2(scale=2.0) + model = ShuffleNetV2_swish(scale=2.0) return model diff --git a/PaddleCV/image_classification/models/squeezenet.py b/PaddleCV/image_classification/models/squeezenet.py index 9c21bb21..a6dc5b3e 100644 --- a/PaddleCV/image_classification/models/squeezenet.py +++ b/PaddleCV/image_classification/models/squeezenet.py @@ -1,16 +1,20 @@ -# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +#copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve. # -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at +#Licensed under the Apache License, Version 2.0 (the "License"); +#you may not use this file except in compliance with the License. +#You may obtain a copy of the License at # -# http://www.apache.org/licenses/LICENSE-2.0 +# http://www.apache.org/licenses/LICENSE-2.0 # -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. +#Unless required by applicable law or agreed to in writing, software +#distributed under the License is distributed on an "AS IS" BASIS, +#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +#See the License for the specific language governing permissions and +#limitations under the License. +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + import paddle import paddle.fluid as fluid import math @@ -18,99 +22,111 @@ from paddle.fluid.param_attr import ParamAttr __all__ = ["SqueezeNet", "SqueezeNet1_0", "SqueezeNet1_1"] -train_parameters = { - "input_size": [3, 224, 224], - "input_mean": [0.485, 0.456, 0.406], - "input_std": [0.229, 0.224, 0.225], - "learning_strategy": { - "name": "piecewise_decay", - "batch_size": 256, - "epochs": [30, 60, 90], - "steps": [0.1, 0.01, 0.001, 0.0001] - } -} class SqueezeNet(): def __init__(self, version='1.0'): - self.params = train_parameters self.version = version - + def net(self, input, class_dim=1000): version = self.version assert version in ['1.0', '1.1'], \ "supported version are {} but input version is {}".format(['1.0', '1.1'], version) if version == '1.0': - conv = fluid.layers.conv2d(input, - num_filters=96, - filter_size=7, - stride=2, - act='relu', - param_attr=fluid.param_attr.ParamAttr(name="conv1_weights"), - bias_attr=ParamAttr(name='conv1_offset')) - conv = fluid.layers.pool2d(conv, pool_size=3, pool_stride=2,pool_type='max') + conv = fluid.layers.conv2d( + input, + num_filters=96, + filter_size=7, + stride=2, + act='relu', + param_attr=fluid.param_attr.ParamAttr(name="conv1_weights"), + bias_attr=ParamAttr(name='conv1_offset')) + conv = fluid.layers.pool2d( + conv, pool_size=3, pool_stride=2, pool_type='max') conv = self.make_fire(conv, 16, 64, 64, name='fire2') conv = self.make_fire(conv, 16, 64, 64, name='fire3') conv = self.make_fire(conv, 32, 128, 128, name='fire4') - conv = fluid.layers.pool2d(conv, pool_size=3, pool_stride=2, pool_type='max') + conv = fluid.layers.pool2d( + conv, pool_size=3, pool_stride=2, pool_type='max') conv = self.make_fire(conv, 32, 128, 128, name='fire5') conv = self.make_fire(conv, 48, 192, 192, name='fire6') conv = self.make_fire(conv, 48, 192, 192, name='fire7') conv = self.make_fire(conv, 64, 256, 256, name='fire8') - conv = fluid.layers.pool2d(conv, pool_size=3, pool_stride=2, pool_type='max') + conv = fluid.layers.pool2d( + conv, pool_size=3, pool_stride=2, pool_type='max') conv = self.make_fire(conv, 64, 256, 256, name='fire9') else: - conv = fluid.layers.conv2d(input, - num_filters=64, - filter_size=3, - stride=2, - padding=1, - act='relu', - param_attr=fluid.param_attr.ParamAttr(name="conv1_weights"), - bias_attr=ParamAttr(name='conv1_offset')) - conv = fluid.layers.pool2d(conv, pool_size=3, pool_stride=2, pool_type='max') + conv = fluid.layers.conv2d( + input, + num_filters=64, + filter_size=3, + stride=2, + padding=1, + act='relu', + param_attr=fluid.param_attr.ParamAttr(name="conv1_weights"), + bias_attr=ParamAttr(name='conv1_offset')) + conv = fluid.layers.pool2d( + conv, pool_size=3, pool_stride=2, pool_type='max') conv = self.make_fire(conv, 16, 64, 64, name='fire2') conv = self.make_fire(conv, 16, 64, 64, name='fire3') - conv = fluid.layers.pool2d(conv, pool_size=3, pool_stride=2, pool_type='max') + conv = fluid.layers.pool2d( + conv, pool_size=3, pool_stride=2, pool_type='max') conv = self.make_fire(conv, 32, 128, 128, name='fire4') conv = self.make_fire(conv, 32, 128, 128, name='fire5') - conv = fluid.layers.pool2d(conv, pool_size=3, pool_stride=2, pool_type='max') + conv = fluid.layers.pool2d( + conv, pool_size=3, pool_stride=2, pool_type='max') conv = self.make_fire(conv, 48, 192, 192, name='fire6') conv = self.make_fire(conv, 48, 192, 192, name='fire7') conv = self.make_fire(conv, 64, 256, 256, name='fire8') conv = self.make_fire(conv, 64, 256, 256, name='fire9') conv = fluid.layers.dropout(conv, dropout_prob=0.5) - conv = fluid.layers.conv2d(conv, - num_filters=class_dim, - filter_size=1, - act='relu', - param_attr=fluid.param_attr.ParamAttr(name="conv10_weights"), - bias_attr=ParamAttr(name='conv10_offset')) + conv = fluid.layers.conv2d( + conv, + num_filters=class_dim, + filter_size=1, + act='relu', + param_attr=fluid.param_attr.ParamAttr(name="conv10_weights"), + bias_attr=ParamAttr(name='conv10_offset')) conv = fluid.layers.pool2d(conv, pool_type='avg', global_pooling=True) out = fluid.layers.flatten(conv) return out - - def make_fire_conv(self, input, num_filters, filter_size, padding=0, name=None): - conv = fluid.layers.conv2d(input, - num_filters=num_filters, - filter_size=filter_size, - padding=padding, - act='relu', - param_attr=fluid.param_attr.ParamAttr(name=name + "_weights"), - bias_attr=ParamAttr(name=name + '_offset')) + def make_fire_conv(self, + input, + num_filters, + filter_size, + padding=0, + name=None): + conv = fluid.layers.conv2d( + input, + num_filters=num_filters, + filter_size=filter_size, + padding=padding, + act='relu', + param_attr=fluid.param_attr.ParamAttr(name=name + "_weights"), + bias_attr=ParamAttr(name=name + '_offset')) return conv - - def make_fire(self, input, squeeze_channels, expand1x1_channels, expand3x3_channels, name=None): - conv = self.make_fire_conv(input, squeeze_channels, 1, name=name+'_squeeze1x1') - conv_path1 = self.make_fire_conv(conv, expand1x1_channels, 1, name=name+'_expand1x1') - conv_path2 = self.make_fire_conv(conv, expand3x3_channels, 3, 1, name=name+'_expand3x3') + + def make_fire(self, + input, + squeeze_channels, + expand1x1_channels, + expand3x3_channels, + name=None): + conv = self.make_fire_conv( + input, squeeze_channels, 1, name=name + '_squeeze1x1') + conv_path1 = self.make_fire_conv( + conv, expand1x1_channels, 1, name=name + '_expand1x1') + conv_path2 = self.make_fire_conv( + conv, expand3x3_channels, 3, 1, name=name + '_expand3x3') out = fluid.layers.concat([conv_path1, conv_path2], axis=1) return out + def SqueezeNet1_0(): model = SqueezeNet(version='1.0') return model + def SqueezeNet1_1(): model = SqueezeNet(version='1.1') return model diff --git a/PaddleCV/image_classification/models/vgg.py b/PaddleCV/image_classification/models/vgg.py index a69bb39f..d58efd7f 100644 --- a/PaddleCV/image_classification/models/vgg.py +++ b/PaddleCV/image_classification/models/vgg.py @@ -21,22 +21,9 @@ import paddle.fluid as fluid __all__ = ["VGGNet", "VGG11", "VGG13", "VGG16", "VGG19"] -train_parameters = { - "input_size": [3, 224, 224], - "input_mean": [0.485, 0.456, 0.406], - "input_std": [0.229, 0.224, 0.225], - "learning_strategy": { - "name": "piecewise_decay", - "batch_size": 256, - "epochs": [30, 60, 90], - "steps": [0.1, 0.01, 0.001, 0.0001] - } -} - class VGGNet(): def __init__(self, layers=16): - self.params = train_parameters self.layers = layers def net(self, input, class_dim=1000): @@ -93,8 +80,7 @@ class VGGNet(): act='relu', param_attr=fluid.param_attr.ParamAttr( name=name + str(i + 1) + "_weights"), - bias_attr=fluid.param_attr.ParamAttr( - name=name + str(i + 1) + "_offset")) + bias_attr=False) return fluid.layers.pool2d( input=conv, pool_size=2, pool_type='max', pool_stride=2) diff --git a/PaddleCV/image_classification/models/xception.py b/PaddleCV/image_classification/models/xception.py index 2a0874c6..89bc1818 100644 --- a/PaddleCV/image_classification/models/xception.py +++ b/PaddleCV/image_classification/models/xception.py @@ -24,64 +24,68 @@ from paddle.fluid.param_attr import ParamAttr __all__ = ['Xception', 'Xception_41', 'Xception_65', 'Xception_71'] -train_parameters = { - "input_size": [3, 299, 299], - "input_mean": [0.485, 0.456, 0.406], - "input_std": [0.229, 0.224, 0.225], - "learning_strategy": { - "name": "piecewise_decay", - "batch_size": 256, - "epochs": [30, 60, 90], - "steps": [0.1, 0.01, 0.001, 0.0001] - } -} class Xception(object): """Xception""" + def __init__(self, entry_flow_block_num=3, middle_flow_block_num=8): - self.params = train_parameters self.entry_flow_block_num = entry_flow_block_num self.middle_flow_block_num = middle_flow_block_num return - + def net(self, input, class_dim=1000): conv = self.entry_flow(input, self.entry_flow_block_num) conv = self.middle_flow(conv, self.middle_flow_block_num) conv = self.exit_flow(conv, class_dim) - + return conv - + def entry_flow(self, input, block_num=3): '''xception entry_flow''' name = "entry_flow" conv = self.conv_bn_layer( - input=input, num_filters=32, filter_size=3, stride=2, act='relu', name=name+"_conv1") + input=input, + num_filters=32, + filter_size=3, + stride=2, + act='relu', + name=name + "_conv1") conv = self.conv_bn_layer( - input=conv, num_filters=64, filter_size=3, stride=1, act='relu', name=name+"_conv2") - - + input=conv, + num_filters=64, + filter_size=3, + stride=1, + act='relu', + name=name + "_conv2") + if block_num == 3: relu_first = [False, True, True] num_filters = [128, 256, 728] - stride=[2,2,2] + stride = [2, 2, 2] elif block_num == 5: relu_first = [False, True, True, True, True] num_filters = [128, 256, 256, 728, 728] - stride=[2,1,2,1,2] + stride = [2, 1, 2, 1, 2] else: sys.exit(-1) - + for block in range(block_num): - curr_name = "{}_{}".format( name, block ) - conv = self.entry_flow_bottleneck_block(conv, - num_filters=num_filters[block], - name=curr_name, - stride=stride[block], - relu_first=relu_first[block]) - + curr_name = "{}_{}".format(name, block) + conv = self.entry_flow_bottleneck_block( + conv, + num_filters=num_filters[block], + name=curr_name, + stride=stride[block], + relu_first=relu_first[block]) + return conv - - def entry_flow_bottleneck_block(self, input, num_filters, name, stride=2, relu_first=False): + + def entry_flow_bottleneck_block(self, + input, + num_filters, + name, + stride=2, + relu_first=False): '''entry_flow_bottleneck_block''' short = fluid.layers.conv2d( input=input, @@ -90,80 +94,96 @@ class Xception(object): stride=stride, padding=0, act=None, - param_attr=ParamAttr(name+"_branch1_weights"), - bias_attr=False - ) - + param_attr=ParamAttr(name + "_branch1_weights"), + bias_attr=False) + conv0 = input if relu_first: - conv0 = fluid.layers.relu( conv0 ) - - conv1 = self.separable_conv( conv0, num_filters, stride=1, name=name+"_branch2a_weights" ) - - conv2 = fluid.layers.relu( conv1 ) - conv2 = self.separable_conv( conv2, num_filters, stride=1, name=name+"_branch2b_weights" ) - + conv0 = fluid.layers.relu(conv0) + + conv1 = self.separable_conv( + conv0, num_filters, stride=1, name=name + "_branch2a_weights") + + conv2 = fluid.layers.relu(conv1) + conv2 = self.separable_conv( + conv2, num_filters, stride=1, name=name + "_branch2b_weights") + pool = fluid.layers.pool2d( input=conv2, pool_size=3, pool_stride=stride, pool_padding=1, pool_type='max') - + return fluid.layers.elementwise_add(x=short, y=pool) - + def middle_flow(self, input, block_num=8): '''xception middle_flow''' - num_filters=728 + num_filters = 728 conv = input for block in range(block_num): - name = "middle_flow_{}".format( block ) - conv =self.middle_flow_bottleneck_block(conv, num_filters, name) - + name = "middle_flow_{}".format(block) + conv = self.middle_flow_bottleneck_block(conv, num_filters, name) + return conv - + def middle_flow_bottleneck_block(self, input, num_filters, name): '''middle_flow_bottleneck_block''' - conv0 = fluid.layers.relu( input ) - conv0 = self.separable_conv( conv0, num_filters=num_filters, stride=1, name=name+"_branch2a_weights" ) - - conv1 = fluid.layers.relu( conv0 ) - conv1 = self.separable_conv( conv1, num_filters=num_filters, stride=1, name=name+"_branch2b_weights" ) - - conv2 = fluid.layers.relu( conv1 ) - conv2 = self.separable_conv( conv2, num_filters=num_filters, stride=1, name=name+"_branch2c_weights" ) - + conv0 = fluid.layers.relu(input) + conv0 = self.separable_conv( + conv0, + num_filters=num_filters, + stride=1, + name=name + "_branch2a_weights") + + conv1 = fluid.layers.relu(conv0) + conv1 = self.separable_conv( + conv1, + num_filters=num_filters, + stride=1, + name=name + "_branch2b_weights") + + conv2 = fluid.layers.relu(conv1) + conv2 = self.separable_conv( + conv2, + num_filters=num_filters, + stride=1, + name=name + "_branch2c_weights") + return fluid.layers.elementwise_add(x=input, y=conv2) - - + def exit_flow(self, input, class_dim): '''xception exit flow''' name = "exit_flow" num_filters1 = 728 num_filters2 = 1024 - conv0 = self.exit_flow_bottleneck_block( input, num_filters1, num_filters2, name=name+"_1" ) - - conv1 = self.separable_conv( conv0, num_filters=1536, stride=1, name=name+"_2" ) - conv1 = fluid.layers.relu( conv1 ) - - conv2 = self.separable_conv( conv1, num_filters=2048, stride=1, name=name+"_3" ) - conv2 = fluid.layers.relu( conv2 ) - + conv0 = self.exit_flow_bottleneck_block( + input, num_filters1, num_filters2, name=name + "_1") + + conv1 = self.separable_conv( + conv0, num_filters=1536, stride=1, name=name + "_2") + conv1 = fluid.layers.relu(conv1) + + conv2 = self.separable_conv( + conv1, num_filters=2048, stride=1, name=name + "_3") + conv2 = fluid.layers.relu(conv2) + pool = fluid.layers.pool2d( - input=conv2, pool_type='avg', global_pooling=True) - - + input=conv2, pool_type='avg', global_pooling=True) + stdv = 1.0 / math.sqrt(pool.shape[1] * 1.0) - out = fluid.layers.fc(input=pool, - size=class_dim, - param_attr=fluid.param_attr.ParamAttr( - name='fc_weights', - initializer=fluid.initializer.Uniform(-stdv, stdv)), - bias_attr=fluid.param_attr.ParamAttr(name='fc_offset')) - + out = fluid.layers.fc( + input=pool, + size=class_dim, + param_attr=fluid.param_attr.ParamAttr( + name='fc_weights', + initializer=fluid.initializer.Uniform(-stdv, stdv)), + bias_attr=fluid.param_attr.ParamAttr(name='fc_offset')) + return out - - def exit_flow_bottleneck_block(self, input, num_filters1, num_filters2, name): + + def exit_flow_bottleneck_block(self, input, num_filters1, num_filters2, + name): '''entry_flow_bottleneck_block''' short = fluid.layers.conv2d( input=input, @@ -172,26 +192,26 @@ class Xception(object): stride=2, padding=0, act=None, - param_attr=ParamAttr(name+"_branch1_weights"), - bias_attr=False - ) - - conv0 = fluid.layers.relu( input ) - conv1 = self.separable_conv( conv0, num_filters1, stride=1, name=name+"_branch2a_weights" ) - - conv2 = fluid.layers.relu( conv1 ) - conv2 = self.separable_conv( conv2, num_filters2, stride=1, name=name+"_branch2b_weights" ) - + param_attr=ParamAttr(name + "_branch1_weights"), + bias_attr=False) + + conv0 = fluid.layers.relu(input) + conv1 = self.separable_conv( + conv0, num_filters1, stride=1, name=name + "_branch2a_weights") + + conv2 = fluid.layers.relu(conv1) + conv2 = self.separable_conv( + conv2, num_filters2, stride=1, name=name + "_branch2b_weights") + pool = fluid.layers.pool2d( input=conv2, pool_size=3, pool_stride=2, pool_padding=1, pool_type='max') - + return fluid.layers.elementwise_add(x=short, y=pool) - - + def separable_conv(self, input, num_filters, stride=1, name=None): """separable_conv""" pointwise_conv = self.conv_bn_layer( @@ -200,7 +220,7 @@ class Xception(object): num_filters=num_filters, stride=1, name=name + "_sep") - + depthwise_conv = self.conv_bn_layer( input=pointwise_conv, filter_size=3, @@ -211,7 +231,7 @@ class Xception(object): name=name + "_dw") return depthwise_conv - + def conv_bn_layer(self, input, num_filters, @@ -234,17 +254,17 @@ class Xception(object): bias_attr=False, use_cudnn=use_cudnn, name=name + '.conv2d.output.1') - + bn_name = "bn_" + name - - return fluid.layers.batch_norm(input=conv, - act=act, - name=bn_name + '.output.1', - param_attr=ParamAttr(name=bn_name + '_scale'), - bias_attr=ParamAttr(bn_name + '_offset'), - moving_mean_name=bn_name + '_mean', - moving_variance_name=bn_name + '_variance') - + + return fluid.layers.batch_norm( + input=conv, + act=act, + name=bn_name + '.output.1', + param_attr=ParamAttr(name=bn_name + '_scale'), + bias_attr=ParamAttr(bn_name + '_offset'), + moving_mean_name=bn_name + '_mean', + moving_variance_name=bn_name + '_variance') def Xception_41(): @@ -252,11 +272,13 @@ def Xception_41(): model = Xception(entry_flow_block_num=3, middle_flow_block_num=8) return model + def Xception_65(): """Xception_65""" model = Xception(entry_flow_block_num=3, middle_flow_block_num=16) return model + def Xception_71(): """Xception_71""" model = Xception(entry_flow_block_num=5, middle_flow_block_num=16) diff --git a/PaddleCV/image_classification/reader.py b/PaddleCV/image_classification/reader.py old mode 100755 new mode 100644 index c445d233..9ff65bf7 --- a/PaddleCV/image_classification/reader.py +++ b/PaddleCV/image_classification/reader.py @@ -17,203 +17,327 @@ import math import random import functools import numpy as np -from PIL import Image, ImageEnhance +import cv2 +import io +import signal import paddle +import paddle.fluid as fluid random.seed(0) np.random.seed(0) -DATA_DIM = 224 -THREAD = 8 -BUF_SIZE = 2048 +def rotate_image(img): + """rotate image + + Args: + img: image data + + Returns: + rotated image data + """ + (h, w) = img.shape[:2] + center = (w / 2, h / 2) + angle = np.random.randint(-10, 11) + M = cv2.getRotationMatrix2D(center, angle, 1.0) + rotated = cv2.warpAffine(img, M, (w, h)) + return rotated + + +def random_crop(img, size, settings, scale=None, ratio=None, + interpolation=None): + """random crop image + + Args: + img: image data + size: crop size + settings: arguments + scale: scale parameter + ratio: ratio parameter + + Returns: + random cropped image data + """ + lower_scale = settings.lower_scale + lower_ratio = settings.lower_ratio + upper_ratio = settings.upper_ratio + scale = [lower_scale, 1.0] if scale is None else scale + ratio = [lower_ratio, upper_ratio] if ratio is None else ratio + + aspect_ratio = math.sqrt(np.random.uniform(*ratio)) + w = 1. * aspect_ratio + h = 1. / aspect_ratio + + bound = min((float(img.shape[0]) / img.shape[1]) / (h**2), + (float(img.shape[1]) / img.shape[0]) / (w**2)) + + scale_max = min(scale[1], bound) + scale_min = min(scale[0], bound) + + target_area = img.shape[0] * img.shape[1] * np.random.uniform(scale_min, + scale_max) + target_size = math.sqrt(target_area) + w = int(target_size * w) + h = int(target_size * h) + i = np.random.randint(0, img.shape[0] - h + 1) + j = np.random.randint(0, img.shape[1] - w + 1) -DATA_DIR = 'data/ILSVRC2012' + img = img[i:i + h, j:j + w, :] -img_mean = np.array([0.485, 0.456, 0.406]).reshape((3, 1, 1)) -img_std = np.array([0.229, 0.224, 0.225]).reshape((3, 1, 1)) + if interpolation: + resized = cv2.resize(img, (size, size), interpolation=interpolation) + else: + resized = cv2.resize(img, (size, size)) + return resized -def resize_short(img, target_size): - percent = float(target_size) / min(img.size[0], img.size[1]) - resized_width = int(round(img.size[0] * percent)) - resized_height = int(round(img.size[1] * percent)) - img = img.resize((resized_width, resized_height), Image.LANCZOS) +#NOTE:(2019/08/08) distort color func is not implemented +def distort_color(img): + """distort image color + + Args: + img: image data + + Returns: + distorted color image data + """ return img +def resize_short(img, target_size, interpolation=None): + """resize image + + Args: + img: image data + target_size: resize short target size + interpolation: interpolation mode + + Returns: + resized image data + """ + percent = float(target_size) / min(img.shape[0], img.shape[1]) + resized_width = int(round(img.shape[1] * percent)) + resized_height = int(round(img.shape[0] * percent)) + if interpolation: + resized = cv2.resize( + img, (resized_width, resized_height), interpolation=interpolation) + else: + resized = cv2.resize(img, (resized_width, resized_height)) + return resized + + def crop_image(img, target_size, center): - width, height = img.size + """crop image + + Args: + img: images data + target_size: crop target size + center: crop mode + + Returns: + img: cropped image data + """ + height, width = img.shape[:2] size = target_size if center == True: - w_start = (width - size) / 2 - h_start = (height - size) / 2 + w_start = (width - size) // 2 + h_start = (height - size) // 2 else: w_start = np.random.randint(0, width - size + 1) h_start = np.random.randint(0, height - size + 1) w_end = w_start + size h_end = h_start + size - img = img.crop((w_start, h_start, w_end, h_end)) + img = img[h_start:h_end, w_start:w_end, :] return img -def random_crop(img, size, scale=[0.08, 1.0], ratio=[3. / 4., 4. / 3.]): - aspect_ratio = math.sqrt(np.random.uniform(*ratio)) - w = 1. * aspect_ratio - h = 1. / aspect_ratio +def create_mixup_reader(settings, rd): + """ + """ - bound = min((float(img.size[0]) / img.size[1]) / (w**2), - (float(img.size[1]) / img.size[0]) / (h**2)) - scale_max = min(scale[1], bound) - scale_min = min(scale[0], bound) - - target_area = img.size[0] * img.size[1] * np.random.uniform(scale_min, - scale_max) - target_size = math.sqrt(target_area) - w = int(target_size * w) - h = int(target_size * h) - - i = np.random.randint(0, img.size[0] - w + 1) - j = np.random.randint(0, img.size[1] - h + 1) - - img = img.crop((i, j, i + w, j + h)) - img = img.resize((size, size), Image.LANCZOS) - return img + class context: + tmp_mix = [] + tmp_l1 = [] + tmp_l2 = [] + tmp_lam = [] + batch_size = settings.batch_size + alpha = settings.mixup_alpha -def rotate_image(img): - angle = np.random.randint(-10, 11) - img = img.rotate(angle) - return img + def fetch_data(): + data_list = [] + for i, item in enumerate(rd()): + data_list.append(item) + if i % batch_size == batch_size - 1: -def distort_color(img): - def random_brightness(img, lower=0.5, upper=1.5): - e = np.random.uniform(lower, upper) - return ImageEnhance.Brightness(img).enhance(e) + yield data_list + data_list = [] - def random_contrast(img, lower=0.5, upper=1.5): - e = np.random.uniform(lower, upper) - return ImageEnhance.Contrast(img).enhance(e) + def mixup_data(): + for data_list in fetch_data(): + if alpha > 0.: + lam = np.random.beta(alpha, alpha) + else: + lam = 1. + l1 = np.array(data_list) + l2 = np.random.permutation(l1) + mixed_l = [ + l1[i][0] * lam + (1 - lam) * l2[i][0] for i in range(len(l1)) + ] + yield (mixed_l, l1, l2, lam) - def random_color(img, lower=0.5, upper=1.5): - e = np.random.uniform(lower, upper) - return ImageEnhance.Color(img).enhance(e) + def mixup_reader(): + for context.tmp_mix, context.tmp_l1, context.tmp_l2, context.tmp_lam in mixup_data( + ): + for i in range(len(context.tmp_mix)): + mixed_l = context.tmp_mix[i] + l1 = context.tmp_l1[i] + l2 = context.tmp_l2[i] + lam = context.tmp_lam + yield (mixed_l, int(l1[1]), int(l2[1]), float(lam)) - ops = [random_brightness, random_contrast, random_color] - np.random.shuffle(ops) + return mixup_reader - img = ops[0](img) - img = ops[1](img) - img = ops[2](img) - return img +def process_image(sample, settings, mode, color_jitter, rotate): + """ process_image """ + mean = settings.image_mean + std = settings.image_std + crop_size = settings.crop_size -def process_image(sample, mode, color_jitter, rotate): img_path = sample[0] + img = cv2.imread(img_path) - img = Image.open(img_path) - if mode == 'train': - if rotate: img = rotate_image(img) - img = random_crop(img, DATA_DIM) - else: - img = resize_short(img, target_size=256) - img = crop_image(img, target_size=DATA_DIM, center=True) if mode == 'train': + if rotate: + img = rotate_image(img) + if crop_size > 0: + img = random_crop(img, crop_size, settings) if color_jitter: img = distort_color(img) if np.random.randint(0, 2) == 1: - img = img.transpose(Image.FLIP_LEFT_RIGHT) - - if img.mode != 'RGB': - img = img.convert('RGB') - - img = np.array(img).astype('float32').transpose((2, 0, 1)) / 255 + img = img[:, ::-1, :] + else: + if crop_size > 0: + target_size = settings.resize_short_size + img = resize_short(img, target_size) + img = crop_image(img, target_size=crop_size, center=True) + + img = img[:, :, ::-1].astype('float32').transpose((2, 0, 1)) / 255 + img_mean = np.array(mean).reshape((3, 1, 1)) + img_std = np.array(std).reshape((3, 1, 1)) img -= img_mean img /= img_std if mode == 'train' or mode == 'val': - return img, sample[1] + return (img, sample[1]) elif mode == 'test': - return [img] - + return (img, ) -def process_batch_data(input_data, mode, color_jitter, rotate): - batch_data = [] - for sample in input_data: - batch_data.append(process_image(sample, mode, color_jitter, rotate)) - return batch_data - -def _reader_creator(file_list, - batch_size, +def _reader_creator(settings, + file_list, mode, shuffle=False, color_jitter=False, rotate=False, - data_dir=DATA_DIR, - shuffle_seed=0, - infinite=False): + data_dir=None): def reader(): - def read_file_list(): - with open(file_list) as flist: - full_lines = [line.strip() for line in flist] - if shuffle: - if shuffle_seed is not None: - np.random.seed(shuffle_seed) - np.random.shuffle(full_lines) - batch_data = [] - for line in full_lines: - img_path, label = line.split() - img_path = os.path.join(data_dir, img_path) - batch_data.append([img_path, int(label)]) - if len(batch_data) == batch_size: - if mode == 'train' or mode == 'val': - yield batch_data - elif mode == 'test': - yield [sample[0] for sample in batch_data] - batch_data = [] - - return read_file_list - - data_reader = reader() - num_trainers = int(os.environ.get('PADDLE_TRAINERS_NUM', 1)) - if mode == 'train' and num_trainers > 1: - assert shuffle_seed is not None, \ - "If num_trainers > 1, the shuffle_seed must be set, because " \ - "the order of batch data generated by reader " \ - "must be the same in the respective processes." - data_reader = fluid.contrib.reader.distributed_batch_reader(data_reader) + with open(file_list) as flist: + full_lines = [line.strip() for line in flist] + if mode != "test" and len(full_lines) < settings.batch_size: + print( + "Warning: The number of the whole data ({}) is smaller than the batch_size ({}), and drop_last is turnning on, so nothing will feed in program, Terminated now. Please reset batch_size to a smaller number or feed more data!" + .format(len(full_lines), settings.batch_size)) + os._exit(1) + + if shuffle: + np.random.shuffle(full_lines) + for line in full_lines: + img_path, label = line.split() + img_path = os.path.join(data_dir, img_path) + if not os.path.exists(img_path): + print("Warning: {} doesn't exist!".format(img_path)) + if mode == "train" or mode == "val": + yield img_path, int(label) + elif mode == "test": + yield [img_path] mapper = functools.partial( - process_batch_data, mode=mode, color_jitter=color_jitter, rotate=rotate) - - return paddle.reader.xmap_readers(mapper, data_reader, THREAD, BUF_SIZE) - - -def train(batch_size, data_dir=DATA_DIR, shuffle_seed=0, infinite=False): - file_list = os.path.join(data_dir, 'train_list.txt') - return _reader_creator( + process_image, + settings=settings, + mode=mode, + color_jitter=color_jitter, + rotate=rotate) + + return paddle.reader.xmap_readers( + mapper, + reader, + settings.reader_thread, + settings.reader_buf_size, + order=False) + + +def train(settings): + """Create a reader for trainning + + Args: + settings: arguments + + Returns: + train reader + """ + file_list = os.path.join(settings.data_dir, 'train_list.txt') + assert os.path.isfile( + file_list), "{} doesn't exist, please check data list path".format( + file_list) + reader = _reader_creator( + settings, file_list, - batch_size, 'train', shuffle=True, color_jitter=False, rotate=False, - data_dir=data_dir, - shuffle_seed=shuffle_seed, - infinite=infinite) + data_dir=settings.data_dir) + if settings.use_mixup == True: + reader = create_mixup_reader(settings, reader) + return reader -def val(batch_size, data_dir=DATA_DIR): - file_list = os.path.join(data_dir, 'val_list.txt') - return _reader_creator( - file_list, batch_size, 'val', shuffle=False, data_dir=data_dir) +def val(settings): + """Create a reader for eval -def test(batch_size, data_dir=DATA_DIR): - file_list = os.path.join(data_dir, 'val_list.txt') + Args: + settings: arguments + + Returns: + eval reader + """ + file_list = os.path.join(settings.data_dir, 'val_list.txt') + assert os.path.isfile( + file_list), "{} doesn't exist, please check data list path".format( + file_list) + + return _reader_creator( + settings, file_list, 'val', shuffle=False, data_dir=settings.data_dir) + + +def test(settings): + """Create a reader for testing + + Args: + settings: arguments + + Returns: + test reader + """ + file_list = os.path.join(settings.data_dir, 'val_list.txt') + assert os.path.isfile( + file_list), "{} doesn't exist, please check data list path".format( + file_list) return _reader_creator( - file_list, batch_size, 'test', shuffle=False, data_dir=data_dir) + settings, file_list, 'test', shuffle=False, data_dir=settings.data_dir) diff --git a/PaddleCV/image_classification/reader_cv2.py b/PaddleCV/image_classification/reader_cv2.py deleted file mode 100644 index 371d2b5d..00000000 --- a/PaddleCV/image_classification/reader_cv2.py +++ /dev/null @@ -1,307 +0,0 @@ -#copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve. -# -#Licensed under the Apache License, Version 2.0 (the "License"); -#you may not use this file except in compliance with the License. -#You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -#Unless required by applicable law or agreed to in writing, software -#distributed under the License is distributed on an "AS IS" BASIS, -#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -#See the License for the specific language governing permissions and -#limitations under the License. - -import os -import math -import random -import functools -import numpy as np -import cv2 -import io - -import paddle -import paddle.fluid as fluid - -random.seed(0) -np.random.seed(0) - -DATA_DIM = 224 - -THREAD = 8 -BUF_SIZE = 2048 - -DATA_DIR = './data/ILSVRC2012' - -img_mean = np.array([0.485, 0.456, 0.406]).reshape((3, 1, 1)) -img_std = np.array([0.229, 0.224, 0.225]).reshape((3, 1, 1)) - - -def rotate_image(img): - """ rotate_image """ - (h, w) = img.shape[:2] - center = (w / 2, h / 2) - angle = np.random.randint(-10, 11) - M = cv2.getRotationMatrix2D(center, angle, 1.0) - rotated = cv2.warpAffine(img, M, (w, h)) - return rotated - - -def random_crop(img, size, settings, scale=None, ratio=None): - """ random_crop """ - lower_scale = settings.lower_scale - lower_ratio = settings.lower_ratio - upper_ratio = settings.upper_ratio - scale = [lower_scale, 1.0] if scale is None else scale - ratio = [lower_ratio, upper_ratio] if ratio is None else ratio - - aspect_ratio = math.sqrt(np.random.uniform(*ratio)) - w = 1. * aspect_ratio - h = 1. / aspect_ratio - - bound = min((float(img.shape[0]) / img.shape[1]) / (h**2), - (float(img.shape[1]) / img.shape[0]) / (w**2)) - - scale_max = min(scale[1], bound) - scale_min = min(scale[0], bound) - - target_area = img.shape[0] * img.shape[1] * np.random.uniform(scale_min, - scale_max) - target_size = math.sqrt(target_area) - w = int(target_size * w) - h = int(target_size * h) - i = np.random.randint(0, img.shape[0] - h + 1) - j = np.random.randint(0, img.shape[1] - w + 1) - - img = img[i:i + h, j:j + w, :] - - resized = cv2.resize( - img, - (size, size) - #, interpolation=cv2.INTER_LANCZOS4 - ) - return resized - - -def distort_color(img): - return img - - -def resize_short(img, target_size): - """ resize_short """ - percent = float(target_size) / min(img.shape[0], img.shape[1]) - resized_width = int(round(img.shape[1] * percent)) - resized_height = int(round(img.shape[0] * percent)) - resized = cv2.resize( - img, - (resized_width, resized_height), - #interpolation=cv2.INTER_LANCZOS4 - ) - return resized - - -def crop_image(img, target_size, center): - """ crop_image """ - height, width = img.shape[:2] - size = target_size - if center == True: - w_start = (width - size) // 2 - h_start = (height - size) // 2 - else: - w_start = np.random.randint(0, width - size + 1) - h_start = np.random.randint(0, height - size + 1) - w_end = w_start + size - h_end = h_start + size - img = img[h_start:h_end, w_start:w_end, :] - return img - - -def create_mixup_reader(settings, rd): - class context: - tmp_mix = [] - tmp_l1 = [] - tmp_l2 = [] - tmp_lam = [] - - batch_size = settings.batch_size - alpha = settings.mixup_alpha - - def fetch_data(): - - data_list = [] - for i, item in enumerate(rd()): - data_list.append(item) - if i % batch_size == batch_size - 1: - yield data_list - data_list = [] - - def mixup_data(): - - for data_list in fetch_data(): - if alpha > 0.: - lam = np.random.beta(alpha, alpha) - else: - lam = 1. - l1 = np.array(data_list) - l2 = np.random.permutation(l1) - mixed_l = [ - l1[i][0] * lam + (1 - lam) * l2[i][0] for i in range(len(l1)) - ] - yield mixed_l, l1, l2, lam - - def mixup_reader(): - - for context.tmp_mix, context.tmp_l1, context.tmp_l2, context.tmp_lam in mixup_data( - ): - for i in range(len(context.tmp_mix)): - mixed_l = context.tmp_mix[i] - l1 = context.tmp_l1[i] - l2 = context.tmp_l2[i] - lam = context.tmp_lam - yield mixed_l, l1[1], l2[1], lam - - return mixup_reader - - -def process_image(sample, - settings, - mode, - color_jitter, - rotate, - crop_size=224, - mean=None, - std=None): - """ process_image """ - - mean = [0.485, 0.456, 0.406] if mean is None else mean - std = [0.229, 0.224, 0.225] if std is None else std - - img_path = sample[0] - img = cv2.imread(img_path) - - if mode == 'train': - if rotate: - img = rotate_image(img) - if crop_size > 0: - img = random_crop(img, crop_size, settings) - if color_jitter: - img = distort_color(img) - if np.random.randint(0, 2) == 1: - img = img[:, ::-1, :] - else: - if crop_size > 0: - target_size = settings.resize_short_size - img = resize_short(img, target_size) - img = crop_image(img, target_size=crop_size, center=True) - - img = img[:, :, ::-1].astype('float32').transpose((2, 0, 1)) / 255 - img_mean = np.array(mean).reshape((3, 1, 1)) - img_std = np.array(std).reshape((3, 1, 1)) - img -= img_mean - img /= img_std - - if mode == 'train' or mode == 'val': - return (img, sample[1]) - elif mode == 'test': - return (img, ) - - -def process_batch_data(input_data, settings, mode, color_jitter, rotate): - batch_data = [] - crop_size = int(settings.image_shape.split(',')[-1]) - for sample in input_data: - if os.path.isfile(sample[0]): - batch_data.append( - process_image(sample, settings, mode, color_jitter, rotate, crop_size)) - else: - print("File not exist : %s" % sample[0]) - return batch_data - - -def _reader_creator(settings, - file_list, - batch_size, - mode, - shuffle=False, - color_jitter=False, - rotate=False, - data_dir=DATA_DIR, - shuffle_seed=0): - def reader(): - def read_file_list(): - with open(file_list) as flist: - full_lines = [line.strip() for line in flist] - if shuffle: - if shuffle_seed is not None: - np.random.seed(shuffle_seed) - np.random.shuffle(full_lines) - batch_data = [] - for line in full_lines: - img_path, label = line.split() - img_path = os.path.join(data_dir, img_path) - batch_data.append([img_path, int(label)]) - if len(batch_data) == batch_size: - if mode == 'train' or mode == 'val' or mode == 'test': - yield batch_data - - batch_data = [] - - return read_file_list - - data_reader = reader() - num_trainers = int(os.environ.get('PADDLE_TRAINERS_NUM', 1)) - if mode == 'train' and num_trainers > 1: - assert shuffle_seed is not None, \ - "If num_trainers > 1, the shuffle_seed must be set, because " \ - "the order of batch data generated by reader " \ - "must be the same in the respective processes." - data_reader = fluid.contrib.reader.distributed_batch_reader(data_reader) - - mapper = functools.partial( - process_batch_data, - settings=settings, - mode=mode, - color_jitter=color_jitter, - rotate=rotate) - - return paddle.reader.xmap_readers( - mapper, data_reader, THREAD, BUF_SIZE, order=False) - - -def train(settings, batch_size, data_dir=DATA_DIR, shuffle_seed=0): - file_list = os.path.join(data_dir, 'train_list.txt') - reader = _reader_creator( - settings, - file_list, - batch_size, - 'train', - shuffle=True, - color_jitter=False, - rotate=False, - data_dir=data_dir, - shuffle_seed=shuffle_seed) - if settings.use_mixup == True: - reader = create_mixup_reader(settings, reader) - return reader - - -def val(settings, batch_size, data_dir=DATA_DIR): - file_list = os.path.join(data_dir, 'val_list.txt') - return _reader_creator( - settings, - file_list, - batch_size, - 'val', - shuffle=False, - data_dir=data_dir) - - -def test(settings, batch_size, data_dir=DATA_DIR): - file_list = os.path.join(data_dir, 'val_list.txt') - return _reader_creator( - settings, - file_list, - batch_size, - 'test', - shuffle=False, - data_dir=data_dir) diff --git a/PaddleCV/image_classification/run.sh b/PaddleCV/image_classification/run.sh index b2466a79..59b46f7a 100644 --- a/PaddleCV/image_classification/run.sh +++ b/PaddleCV/image_classification/run.sh @@ -1,798 +1,10 @@ -#Hyperparameters config -#Example: SE_ResNext50_32x4d -python train.py \ - --model=SE_ResNeXt50_32x4d \ - --batch_size=400 \ - --total_images=1281167 \ - --class_dim=1000 \ - --image_shape=3,224,224 \ - --model_save_dir=output/ \ - --with_mem_opt=True \ - --lr_strategy=cosine_decay \ - --lr=0.1 \ - --num_epochs=200 \ - --l2_decay=1.2e-4 \ -# >log_SE_ResNeXt50_32x4d.txt 2>&1 & - -#AlexNet: -#python train.py \ -# --model=AlexNet \ -# --batch_size=256 \ -# --total_images=1281167 \ -# --class_dim=1000 \ -# --image_shape=3,224,224 \ -# --model_save_dir=output/ \ -# --with_mem_opt=True \ -# --lr_strategy=piecewise_decay \ -# --num_epochs=120 \ -# --lr=0.01 \ -# --l2_decay=1e-4 - -#SqueezeNet1_0 -#python train.py \ -# --model=SqueezeNet1_0 \ -# --batch_size=256 \ -# --total_images=1281167 \ -# --image_shape=3,224,224 \ -# --lr_strategy=cosine_decay \ -# --class_dim=1000 \ -# --model_save_dir=output/ \ -# --lr=0.02 \ -# --num_epochs=120 \ -# --with_mem_opt=True \ -# --l2_decay=1e-4 - -#SqueezeNet1_1 -#python train.py \ -# --model=SqueezeNet1_1 \ -# --batch_size=256 \ -# --total_images=1281167 \ -# --image_shape=3,224,224 \ -# --lr_strategy=cosine_decay \ -# --class_dim=1000 \ -# --model_save_dir=output/ \ -# --lr=0.02 \ -# --num_epochs=120 \ -# --with_mem_opt=True \ -# --l2_decay=1e-4 - -#VGG11: -#python train.py \ -# --model=VGG11 \ -# --batch_size=512 \ -# --total_images=1281167 \ -# --image_shape=3,224,224 \ -# --lr_strategy=cosine_decay \ -# --class_dim=1000 \ -# --model_save_dir=output/ \ -# --lr=0.1 \ -# --num_epochs=90 \ -# --with_mem_opt=True \ -# --l2_decay=2e-4 - -#VGG13: -#python train.py -# --model=VGG13 \ -# --batch_size=256 \ -# --total_images=1281167 \ -# --class_dim=1000 \ -# --image_shape=3,224,224 \ -# --lr_strategy=cosine_decay \ -# --lr=0.01 \ -# --num_epochs=90 \ -# --model_save_dir=output/ \ -# --with_mem_opt=True \ -# --l2_decay=3e-4 - -#VGG16: -#python train.py -# --model=VGG16 \ -# --batch_size=256 \ -# --total_images=1281167 \ -# --class_dim=1000 \ -# --lr_strategy=cosine_decay \ -# --image_shape=3,224,224 \ -# --model_save_dir=output/ \ -# --lr=0.01 \ -# --num_epochs=90 \ -# --with_mem_opt=True \ -# --l2_decay=3e-4 - -#VGG19: -#python train.py -# --model=VGG19 \ -# --batch_size=256 \ -# --total_images=1281167 \ -# --class_dim=1000 \ -# --image_shape=3,224,224 \ -# --lr_strategy=cosine_decay \ -# --lr=0.01 \ -# --num_epochs=90 \ -# --with_mem_opt=True \ -# --model_save_dir=output/ \ -# --l2_decay=3e-4 - -#MobileNetV1: -#python train.py \ -# --model=MobileNet \ -# --batch_size=256 \ -# --total_images=1281167 \ -# --class_dim=1000 \ -# --image_shape=3,224,224 \ -# --model_save_dir=output/ \ -# --with_mem_opt=True \ -# --lr_strategy=piecewise_decay \ -# --num_epochs=120 \ -# --lr=0.1 \ -# --l2_decay=3e-5 - -#MobileNetV2_x0_25 -#python train.py \ -# --model=MobileNetV2_x0_25 \ -# --batch_size=256 \ -# --total_images=1281167 \ -# --class_dim=1000 \ -# --image_shape=3,224,224 \ -# --model_save_dir=output/ \ -# --with_mem_opt=True \ -# --lr_strategy=cosine_decay \ -# --num_epochs=240 \ -# --lr=0.1 \ -# --l2_decay=3e-5 \ -# --lower_ratio=1.0 \ -# --upper_ratio=1.0 - -#MobileNetV2_x0_5 -#python train.py \ -# --model=MobileNetV2_x0_5 \ -# --batch_size=256 \ -# --total_images=1281167 \ -# --class_dim=1000 \ -# --image_shape=3,224,224 \ -# --model_save_dir=output/ \ -# --with_mem_opt=True \ -# --lr_strategy=cosine_decay \ -# --num_epochs=240 \ -# --lr=0.1 \ -# --l2_decay=3e-5 \ -# --lower_ratio=1.0 \ -# --upper_ratio=1.0 - -#MobileNetV2_x1_0: -#python train.py \ -# --model=MobileNetV2_x1_0 \ -# --batch_size=500 \ -# --total_images=1281167 \ -# --class_dim=1000 \ -# --image_shape=3,224,224 \ -# --model_save_dir=output/ \ -# --with_mem_opt=True \ -# --lr_strategy=cosine_decay \ -# --num_epochs=240 \ -# --lr=0.1 \ -# --l2_decay=4e-5 - -#MobileNetV2_x1_5 -#python train.py \ -# --model=MobileNetV2_x1_5 \ -# --batch_size=256 \ -# --total_images=1281167 \ -# --class_dim=1000 \ -# --image_shape=3,224,224 \ -# --model_save_dir=output/ \ -# --with_mem_opt=True \ -# --lr_strategy=cosine_decay \ -# --num_epochs=240 \ -# --lr=0.1 \ -# --l2_decay=4e-5 - -#MobileNetV2_x2_0 -#python train.py \ -# --model=MobileNetV2_x2_0 \ -# --batch_size=256 \ -# --total_images=1281167 \ -# --class_dim=1000 \ -# --image_shape=3,224,224 \ -# --model_save_dir=output/ \ -# --with_mem_opt=True \ -# --lr_strategy=cosine_decay \ -# --num_epochs=240 \ -# --lr=0.1 \ -# --l2_decay=4e-5 - -#ShuffleNetV2_x0_25: -#python train.py \ -# --model=ShuffleNetV2_x0_25 \ -# --batch_size=1024 \ -# --total_images=1281167 \ -# --class_dim=1000 \ -# --image_shape=3,224,224 \ -# --model_save_dir=output/ \ -# --with_mem_opt=True \ -# --lr_strategy=cosine_warmup_decay \ -# --num_epochs=240 \ -# --lr=0.5 \ -# --l2_decay=3e-5 \ -# --lower_scale=0.64 \ -# --lower_ratio=0.8 \ -# --upper_ratio=1.2 - -#ShuffleNetV2_x0_33: -#python train.py \ -# --model=ShuffleNetV2_x0_33 \ -# --batch_size=1024 \ -# --total_images=1281167 \ -# --class_dim=1000 \ -# --image_shape=3,224,224 \ -# --model_save_dir=output/ \ -# --with_mem_opt=True \ -# --lr_strategy=cosine_warmup_decay \ -# --num_epochs=240 \ -# --lr=0.5 \ -# --l2_decay=3e-5 \ -# --lower_scale=0.64 \ -# --lower_ratio=0.8 \ -# --upper_ratio=1.2 - -#ShuffleNetV2_x0_5: -#python train.py \ -# --model=ShuffleNetV2_x0_5 \ -# --batch_size=1024 \ -# --total_images=1281167 \ -# --class_dim=1000 \ -# --image_shape=3,224,224 \ -# --model_save_dir=output/ \ -# --with_mem_opt=True \ -# --lr_strategy=cosine_warmup_decay \ -# --num_epochs=240 \ -# --lr=0.5 \ -# --l2_decay=3e-5 \ -# --lower_scale=0.64 \ -# --lower_ratio=0.8 \ -# --upper_ratio=1.2 - -#ShuffleNetV2_x1_0: -#python train.py \ -# --model=ShuffleNetV2_x1_0 \ -# --batch_size=1024 \ -# --total_images=1281167 \ -# --class_dim=1000 \ -# --image_shape=3,224,224 \ -# --model_save_dir=output/ \ -# --with_mem_opt=True \ -# --lr_strategy=cosine_warmup_decay \ -# --num_epochs=240 \ -# --lr=0.5 \ -# --l2_decay=4e-5 \ -# --lower_scale=0.2 - -#ShuffleNetV2_x1_5: -#python train.py \ -# --model=ShuffleNetV2_x1_5 \ -# --batch_size=512 \ -# --total_images=1281167 \ -# --class_dim=1000 \ -# --image_shape=3,224,224 \ -# --model_save_dir=output/ \ -# --with_mem_opt=True \ -# --lr_strategy=cosine_warmup_decay \ -# --num_epochs=240 \ -# --lr=0.25 \ -# --l2_decay=4e-5 \ -# --lower_ratio=1.0 \ -# --upper_ratio=1.0 - -#ShuffleNetV2_x2_0: -#python train.py \ -# --model=ShuffleNetV2_x2_0 \ -# --batch_size=512 \ -# --total_images=1281167 \ -# --class_dim=1000 \ -# --image_shape=3,224,224 \ -# --model_save_dir=output/ \ -# --with_mem_opt=True \ -# --lr_strategy=cosine_warmup_decay \ -# --num_epochs=240 \ -# --lr=0.25 \ -# --l2_decay=4e-5 - -#ShuffleNetV2_x1_0_swish: -#python train.py \ -# --model=ShuffleNetV2_x1_0_swish \ -# --batch_size=1024 \ -# --total_images=1281167 \ -# --class_dim=1000 \ -# --image_shape=3,224,224 \ -# --model_save_dir=output/ \ -# --with_mem_opt=True \ -# --lr_strategy=cosine_warmup_decay \ -# --lr=0.5 \ -# --num_epochs=240 \ -# --l2_decay=4e-5 - -#ResNet18: -#python train.py \ -# --model=ResNet18 \ -# --batch_size=256 \ -# --total_images=1281167 \ -# --class_dim=1000 \ -# --image_shape=3,224,224 \ -# --model_save_dir=output/ \ -# --with_mem_opt=True \ -# --lr_strategy=cosine_decay \ -# --lr=0.1 \ -# --num_epochs=120 \ -# --l2_decay=1e-4 - -#ResNet34: -#python train.py \ -# --model=ResNet34 \ -# --batch_size=256 \ -# --total_images=1281167 \ -# --class_dim=1000 \ -# --image_shape=3,224,224 \ -# --model_save_dir=output/ \ -# --with_mem_opt=True \ -# --lr_strategy=cosine_decay \ -# --lr=0.1 \ -# --num_epochs=120 \ -# --l2_decay=1e-4 - -#ResNet50: -#python train.py \ -# --model=ResNet50 \ -# --batch_size=256 \ -# --total_images=1281167 \ -# --class_dim=1000 \ -# --image_shape=3,224,224 \ -# --model_save_dir=output/ \ -# --with_mem_opt=True \ -# --lr_strategy=piecewise_decay \ -# --num_epochs=120 \ -# --lr=0.1 \ -# --l2_decay=1e-4 - -#ResNet50_vc -#python train.py -# --model=ResNet50_vc \ -# --batch_size=256 \ -# --total_images=1281167 \ -# --image_shape=3,224,224 \ -# --class_dim=1000 \ -# --lr_strategy=cosine_decay \ -# --lr=0.1 \ -# --num_epochs=200 \ -# --with_mem_opt=True \ -# --model_save_dir=output/ \ -# --l2_decay=1e-4 \ - -#ResNet50_vd -#python train.py -# --model=ResNet50_vd \ -# --batch_size=256 \ -# --total_images=1281167 \ -# --image_shape=3,224,224 \ -# --class_dim=1000 \ -# --lr_strategy=cosine_decay \ -# --lr=0.1 \ -# --num_epochs=200 \ -# --with_mem_opt=True \ -# --model_save_dir=output/ \ -# --l2_decay=7e-5 \ -# --use_mixup=True \ -# --use_label_smoothing=True \ -# --label_smoothing_epsilon=0.1 - -#ResNet101: -#python train.py \ -# --model=ResNet101 \ -# --batch_size=256 \ -# --total_images=1281167 \ -# --class_dim=1000 \ -# --image_shape=3,224,224 \ -# --model_save_dir=output/ \ -# --with_mem_opt=True \ -# --lr_strategy=piecewise_decay \ -# --num_epochs=120 \ -# --lr=0.1 \ -# --l2_decay=1e-4 - -#ResNet101_vd -#python train.py -# --model=ResNet101_vd \ -# --batch_size=256 \ -# --total_images=1281167 \ -# --image_shape=3,224,224 \ -# --class_dim=1000 \ -# --lr_strategy=cosine_decay \ -# --lr=0.1 \ -# --num_epochs=200 \ -# --with_mem_opt=True \ -# --model_save_dir=output/ \ -# --l2_decay=1e-4 \ -# --use_mixup=True \ -# --use_label_smoothing=True \ -# --label_smoothing_epsilon=0.1 - -#ResNet152: -#python train.py \ -# --model=ResNet152 \ -# --batch_size=256 \ -# --total_images=1281167 \ -# --class_dim=1000 \ -# --image_shape=3,224,224 \ -# --model_save_dir=output/ \ -# --lr_strategy=piecewise_decay \ -# --with_mem_opt=True \ -# --lr=0.1 \ -# --num_epochs=120 \ -# --l2_decay=1e-4 - -#ResNet152_vd -#python train.py -# --model=ResNet152_vd \ -# --batch_size=256 \ -# --total_images=1281167 \ -# --image_shape=3,224,224 \ -# --class_dim=1000 \ -# --lr_strategy=cosine_decay \ -# --lr=0.1 \ -# --num_epochs=200 \ -# --with_mem_opt=True \ -# --model_save_dir=output/ \ -# --l2_decay=1e-4 \ -# --use_mixup=True \ -# --use_label_smoothing=True \ -# --label_smoothing_epsilon=0.1 - -#ResNet200_vd -#python train.py -# --model=ResNet200_vd \ -# --batch_size=256 \ -# --total_images=1281167 \ -# --image_shape=3,224,224 \ -# --class_dim=1000 \ -# --lr_strategy=cosine_decay \ -# --lr=0.1 \ -# --num_epochs=200 \ -# --with_mem_opt=True \ -# --model_save_dir=output/ \ -# --l2_decay=1e-4 \ -# --use_mixup=True \ -# --use_label_smoothing=True \ -# --label_smoothing_epsilon=0.1 - -#ResNeXt50_32x4d -#python train.py \ -# --model=ResNeXt50_32x4d \ -# --batch_size=256 \ -# --total_images=1281167 \ -# --image_shape=3,224,224 \ -# --class_dim=1000 \ -# --lr_strategy=piecewise_decay \ -# --lr=0.1 \ -# --num_epochs=120 \ -# --with_mem_opt=True \ -# --model_save_dir=output/ \ -# --l2_decay=1e-4 - -#ResNeXt50_vd_32x4d -#python train.py \ -# --model=ResNeXt50_vd_32x4d \ -# --batch_size=256 \ -# --total_images=1281167 \ -# --image_shape=3,224,224 \ -# --class_dim=1000 \ -# --lr_strategy=cosine_decay \ -# --lr=0.1 \ -# --num_epochs=200 \ -# --with_mem_opt=True \ -# --model_save_dir=output/ \ -# --l2_decay=1e-4 \ -# --use_mixup=True \ -# --use_label_smoothing=True \ -# --label_smoothing_epsilon=0.1 \ - -#ResNeXt50_64x4d -#python train.py \ -# --model=ResNeXt50_64x4d \ -# --batch_size=256 \ -# --total_images=1281167 \ -# --image_shape=3,224,224 \ -# --class_dim=1000 \ -# --lr_strategy=piecewise_decay \ -# --lr=0.1 \ -# --num_epochs=120 \ -# --with_mem_opt=True \ -# --model_save_dir=output/ \ -# --l2_decay=1e-4 - -#ResNeXt50_vd_64x4d -#python train.py \ -# --model=ResNeXt50_vd_64x4d \ -# --batch_size=256 \ -# --total_images=1281167 \ -# --image_shape=3,224,224 \ -# --class_dim=1000 \ -# --lr_strategy=cosine_decay \ -# --lr=0.1 \ -# --num_epochs=200 \ -# --with_mem_opt=True \ -# --model_save_dir=output/ \ -# --l2_decay=1e-4 \ -# --use_mixup=True \ -# --use_label_smoothing=True \ -# --label_smoothing_epsilon=0.1 \ - -#ResNeXt101_32x4d -#python train.py \ -# --model=ResNeXt101_32x4d \ -# --batch_size=256 \ -# --total_images=1281167 \ -# --image_shape=3,224,224 \ -# --class_dim=1000 \ -# --lr_strategy=piecewise_decay \ -# --lr=0.1 \ -# --num_epochs=120 \ -# --with_mem_opt=True \ -# --model_save_dir=output/ \ -# --l2_decay=1e-4 - -#ResNeXt101_64x4d -#python train.py \ -# --model=ResNeXt101_64x4d \ -# --batch_size=256 \ -# --total_images=1281167 \ -# --image_shape=3,224,224 \ -# --class_dim=1000 \ -# --lr_strategy=piecewise_decay \ -# --lr=0.1 \ -# --num_epochs=120 \ -# --with_mem_opt=True \ -# --model_save_dir=output/ \ -# --l2_decay=15e-5 - -#ResNeXt101_vd_64x4d -# python train.py \ -# --model=ResNeXt101_vd_64x4d \ -# --batch_size=256 \ -# --total_images=1281167 \ -# --image_shape=3,224,224 \ -# --class_dim=1000 \ -# --lr_strategy=cosine_decay \ -# --lr=0.1 \ -# --num_epochs=200 \ -# --with_mem_opt=True \ -# --model_save_dir=output/ \ -# --l2_decay=1e-4 \ -# --use_mixup=True \ -# --use_label_smoothing=True \ -# --label_smoothing_epsilon=0.1 - -# ResNeXt152_32x4d -# python train.py \ -# --model=ResNeXt152_32x4d \ -# --batch_size=256 \ -# --total_images=1281167 \ -# --image_shape=3,224,224 \ -# --class_dim=1000 \ -# --lr_strategy=piecewise_decay \ -# --lr=0.1 \ -# --num_epochs=120 \ -# --with_mem_opt=True \ -# --model_save_dir=output/ \ -# --l2_decay=1e-4 - -#ResNeXt152_64x4d -#python train.py \ -# --model=ResNeXt152_64x4d \ -# --batch_size=256 \ -# --total_images=1281167 \ -# --image_shape=3,224,224 \ -# --class_dim=1000 \ -# --lr_strategy=piecewise_decay \ -# --lr=0.1 \ -# --num_epochs=120 \ -# --with_mem_opt=True \ -# --model_save_dir=output/ \ -# --l2_decay=18e-5 - -# DenseNet121 -# python train.py \ -# --model=DenseNet121 \ -# --batch_size=256 \ -# --total_images=1281167 \ -# --image_shape=3,224,224 \ -# --class_dim=1000 \ -# --lr_strategy=piecewise_decay \ -# --lr=0.1 \ -# --num_epochs=120 \ -# --with_mem_opt=True \ -# --model_save_dir=output/ \ -# --l2_decay=1e-4 - -# DenseNet161 -# python train.py \ -# --model=DenseNet161 \ -# --batch_size=256 \ -# --total_images=1281167 \ -# --image_shape=3,224,224 \ -# --class_dim=1000 \ -# --lr_strategy=piecewise_decay \ -# --lr=0.1 \ -# --num_epochs=120 \ -# --with_mem_opt=True \ -# --model_save_dir=output/ \ -# --l2_decay=1e-4 - -# DenseNet169 -# python train.py \ -# --model=DenseNet169 \ -# --batch_size=256 \ -# --total_images=1281167 \ -# --image_shape=3,224,224 \ -# --class_dim=1000 \ -# --lr_strategy=piecewise_decay \ -# --lr=0.1 \ -# --num_epochs=120 \ -# --with_mem_opt=True \ -# --model_save_dir=output/ \ -# --l2_decay=1e-4 - -# DenseNet201 -# python train.py \ -# --model=DenseNet201 \ -# --batch_size=256 \ -# --total_images=1281167 \ -# --image_shape=3,224,224 \ -# --class_dim=1000 \ -# --lr_strategy=piecewise_decay \ -# --lr=0.1 \ -# --num_epochs=120 \ -# --with_mem_opt=True \ -# --model_save_dir=output/ \ -# --l2_decay=1e-4 - -# DenseNet264 -# python train.py \ -# --model=DenseNet264 \ -# --batch_size=256 \ -# --total_images=1281167 \ -# --image_shape=3,224,224 \ -# --class_dim=1000 \ -# --lr_strategy=piecewise_decay \ -# --lr=0.1 \ -# --num_epochs=120 \ -# --with_mem_opt=True \ -# --model_save_dir=output/ \ -# --l2_decay=1e-4 - -#SE_ResNeXt50_32x4d: -#python train.py \ -# --model=SE_ResNeXt50_32x4d \ -# --batch_size=400 \ -# --total_images=1281167 \ -# --class_dim=1000 \ -# --image_shape=3,224,224 \ -# --lr_strategy=cosine_decay \ -# --model_save_dir=output/ \ -# --lr=0.1 \ -# --num_epochs=200 \ -# --with_mem_opt=True \ -# --l2_decay=1.2e-4 - -#SE_ResNeXt101_32x4d: -#python train.py \ -# --model=SE_ResNeXt101_32x4d \ -# --batch_size=400 \ -# --total_images=1281167 \ -# --class_dim=1000 \ -# --image_shape=3,224,224 \ -# --lr_strategy=cosine_decay \ -# --model_save_dir=output/ \ -# --lr=0.1 \ -# --num_epochs=200 \ -# --with_mem_opt=True \ -# --l2_decay=1.5e-5 - -# SE_154 -# python train.py \ -# --model=SE_154_vd \ -# --batch_size=256 \ -# --total_images=1281167 \ -# --image_shape=3,224,224 \ -# --class_dim=1000 \ -# --lr_strategy=cosine_decay \ -# --lr=0.1 \ -# --num_epochs=200 \ -# --with_mem_opt=True \ -# --model_save_dir=output/ \ -# --l2_decay=1e-4 \ -# --use_mixup=True \ -# --use_label_smoothing=True \ -# --label_smoothing_epsilon=0.1 \ - -#GoogleNet: -#python train.py \ -# --model=GoogleNet \ -# --batch_size=256 \ -# --total_images=1281167 \ -# --class_dim=1000 \ -# --image_shape=3,224,224 \ -# --model_save_dir=output/ \ -# --with_mem_opt=True \ -# --lr_strategy=cosine_decay \ -# --lr=0.01 \ -# --num_epochs=200 \ -# --l2_decay=1e-4 - -# Xception_41 -# python train.py \ -# --model=Xception_41 \ -# --batch_size=256 \ -# --total_images=1281167 \ -# --image_shape=3,224,224 \ -# --class_dim=1000 \ -# --lr_strategy=cosine_decay \ -# --lr=0.045 \ -# --num_epochs=120 \ -# --with_mem_opt=True \ -# --model_save_dir=output/ \ -# --l2_decay=1e-4 \ -# --resize_short_size=320 - -# InceptionV4 -# python train.py -# --model=InceptionV4 \ -# --batch_size=256 \ -# --total_images=1281167 \ -# --image_shape=3,299,299 \ -# --class_dim=1000 \ -# --lr_strategy=cosine_decay \ -# --lr=0.045 \ -# --num_epochs=200 \ -# --with_mem_opt=True \ -# --model_save_dir=output/ \ -# --l2_decay=1e-4 \ -# --use_mixup=True \ -# --resize_short_size=320 \ -# --use_label_smoothing=True \ -# --label_smoothing_epsilon=0.1 \ - -#DarkNet53 - python train.py -# --model=DarkNet53 \ -# --batch_size=256 \ -# --total_images=1281167 \ -# --image_shape=3,256,256 \ -# --class_dim=1000 \ -# --lr_strategy=cosine_decay \ -# --lr=0.1 \ -# --num_epochs=200 \ -# --with_mem_opt=True \ -# --model_save_dir=output/ \ -# --l2_decay=1e-4 \ -# --use_mixup=True \ -# --resize_short_size=256 \ -# --use_label_smoothing=True \ -# --label_smoothing_epsilon=0.1 \ - -#ResNet50 nGraph: -# Training: -#OMP_NUM_THREADS=`nproc` FLAGS_use_ngraph=true python train.py \ -# --model=ResNet50 \ -# --batch_size=128 \ -# --total_images=1281167 \ -# --class_dim=1000 \ -# --image_shape=3,224,224 \ -# --lr=0.001 \ -# --num_epochs=120 \ -# --with_mem_opt=False \ -# --model_save_dir=output/ \ -# --lr_strategy=adam \ -# --use_gpu=False -# Inference: -#OMP_NUM_THREADS=`nproc` FLAGS_use_ngraph=true python infer.py \ -# --use_gpu=false \ -# --model=ResNet50 \ -# --pretrained_model=ResNet50_pretrained +mode=$1 +model=$2 +if [ "$mode"x == "train"x ]; then + echo $mode $model + sh ./scripts/train/$model.sh +elif [ "$mode"x == "eval"x ]; then + echo "eval is not implenmented now, refer to README.md" +else + echo "Not implemented mode" $mode +fi diff --git a/PaddleCV/image_classification/scripts/train/AlexNet.sh b/PaddleCV/image_classification/scripts/train/AlexNet.sh new file mode 100644 index 00000000..6919f2b9 --- /dev/null +++ b/PaddleCV/image_classification/scripts/train/AlexNet.sh @@ -0,0 +1,19 @@ +#Training details +#GPU: NVIDIA® Tesla® P40 8cards 120epochs 55h +export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 +export FLAGS_fast_eager_deletion_mode=1 +export FLAGS_eager_delete_tensor_gb=0.0 +export FLAGS_fraction_of_gpu_memory_to_use=0.98 + +#AlexNet: +python train.py \ + --model=AlexNet \ + --batch_size=256 \ + --total_images=1281167 \ + --class_dim=1000 \ + --image_shape=3,224,224 \ + --model_save_dir=output/ \ + --lr_strategy=piecewise_decay \ + --num_epochs=120 \ + --lr=0.01 \ + --l2_decay=1e-4 diff --git a/PaddleCV/image_classification/scripts/train/DarkNet53.sh b/PaddleCV/image_classification/scripts/train/DarkNet53.sh new file mode 100644 index 00000000..a56f1ea8 --- /dev/null +++ b/PaddleCV/image_classification/scripts/train/DarkNet53.sh @@ -0,0 +1,17 @@ +#Training details +#DarkNet53 +python train.py \ + --model=DarkNet53 \ + --batch_size=256 \ + --total_images=1281167 \ + --image_shape=3,256,256 \ + --class_dim=1000 \ + --lr_strategy=cosine_decay \ + --lr=0.1 \ + --num_epochs=200 \ + --model_save_dir=output/ \ + --l2_decay=1e-4 \ + --use_mixup=True \ + --resize_short_size=256 \ + --use_label_smoothing=True \ + --label_smoothing_epsilon=0.1 \ diff --git a/PaddleCV/image_classification/scripts/train/DenseNet121.sh b/PaddleCV/image_classification/scripts/train/DenseNet121.sh new file mode 100644 index 00000000..44ecaca2 --- /dev/null +++ b/PaddleCV/image_classification/scripts/train/DenseNet121.sh @@ -0,0 +1,13 @@ +#Training details +#DenseNet121 +python train.py \ + --model=DenseNet121 \ + --batch_size=256 \ + --total_images=1281167 \ + --image_shape=3,224,224 \ + --class_dim=1000 \ + --lr_strategy=piecewise_decay \ + --lr=0.1 \ + --num_epochs=120 \ + --model_save_dir=output/ \ + --l2_decay=1e-4 diff --git a/PaddleCV/image_classification/scripts/train/DenseNet161.sh b/PaddleCV/image_classification/scripts/train/DenseNet161.sh new file mode 100644 index 00000000..4da3fb6f --- /dev/null +++ b/PaddleCV/image_classification/scripts/train/DenseNet161.sh @@ -0,0 +1,13 @@ +#Traing details +#DenseNet161 +python train.py \ + --model=DenseNet161 \ + --batch_size=256 \ + --total_images=1281167 \ + --image_shape=3,224,224 \ + --class_dim=1000 \ + --lr_strategy=piecewise_decay \ + --lr=0.1 \ + --num_epochs=120 \ + --model_save_dir=output/ \ + --l2_decay=1e-4 diff --git a/PaddleCV/image_classification/scripts/train/DenseNet169.sh b/PaddleCV/image_classification/scripts/train/DenseNet169.sh new file mode 100644 index 00000000..2e5120e4 --- /dev/null +++ b/PaddleCV/image_classification/scripts/train/DenseNet169.sh @@ -0,0 +1,13 @@ +#Training details +#DenseNet169 +python train.py \ + --model=DenseNet169 \ + --batch_size=256 \ + --total_images=1281167 \ + --image_shape=3,224,224 \ + --class_dim=1000 \ + --lr_strategy=piecewise_decay \ + --lr=0.1 \ + --num_epochs=120 \ + --model_save_dir=output/ \ + --l2_decay=1e-4 diff --git a/PaddleCV/image_classification/scripts/train/DenseNet201.sh b/PaddleCV/image_classification/scripts/train/DenseNet201.sh new file mode 100644 index 00000000..7535a86b --- /dev/null +++ b/PaddleCV/image_classification/scripts/train/DenseNet201.sh @@ -0,0 +1,13 @@ +#DenseNet201 +#Training details +python train.py \ + --model=DenseNet201 \ + --batch_size=256 \ + --total_images=1281167 \ + --image_shape=3,224,224 \ + --class_dim=1000 \ + --lr_strategy=piecewise_decay \ + --lr=0.1 \ + --num_epochs=120 \ + --model_save_dir=output/ \ + --l2_decay=1e-4 diff --git a/PaddleCV/image_classification/scripts/train/DenseNet264.sh b/PaddleCV/image_classification/scripts/train/DenseNet264.sh new file mode 100644 index 00000000..f10df370 --- /dev/null +++ b/PaddleCV/image_classification/scripts/train/DenseNet264.sh @@ -0,0 +1,13 @@ +#DenseNet264 +#Training details +python train.py \ + --model=DenseNet264 \ + --batch_size=256 \ + --total_images=1281167 \ + --image_shape=3,224,224 \ + --class_dim=1000 \ + --lr_strategy=piecewise_decay \ + --lr=0.1 \ + --num_epochs=120 \ + --model_save_dir=output/ \ + --l2_decay=1e-4 diff --git a/PaddleCV/image_classification/scripts/train/GoogLeNet.sh b/PaddleCV/image_classification/scripts/train/GoogLeNet.sh new file mode 100644 index 00000000..63171b31 --- /dev/null +++ b/PaddleCV/image_classification/scripts/train/GoogLeNet.sh @@ -0,0 +1,19 @@ +#Training details +#GPU: NVIDIA® Tesla® V100 4cards 200epochs 132h +export CUDA_VISIBLE_DEVICES=0,1,2,3 +export FLAGS_fast_eager_deletion_mode=1 +export FLAGS_eager_delete_tensor_gb=0.0 +export FLAGS_fraction_of_gpu_memory_to_use=0.98 + +#GoogLeNet: +python train.py \ + --model=GoogLeNet \ + --batch_size=256 \ + --total_images=1281167 \ + --class_dim=1000 \ + --image_shape=3,224,224 \ + --model_save_dir=output/ \ + --lr_strategy=cosine_decay \ + --lr=0.01 \ + --num_epochs=200 \ + --l2_decay=1e-4 diff --git a/PaddleCV/image_classification/scripts/train/InceptionV4.sh b/PaddleCV/image_classification/scripts/train/InceptionV4.sh new file mode 100644 index 00000000..ba3c4954 --- /dev/null +++ b/PaddleCV/image_classification/scripts/train/InceptionV4.sh @@ -0,0 +1,23 @@ +#Training details +#GPU: NVIDIA® Tesla® V100 8cards 200epochs 367h +export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 +export FLAGS_fast_eager_deletion_mode=1 +export FLAGS_eager_delete_tensor_gb=0.0 +export FLAGS_fraction_of_gpu_memory_to_use=0.98 + +#InceptionV4 +python train.py \ + --model=InceptionV4 \ + --batch_size=256 \ + --total_images=1281167 \ + --image_shape=3,299,299 \ + --class_dim=1000 \ + --lr_strategy=cosine_decay \ + --lr=0.045 \ + --num_epochs=200 \ + --model_save_dir=output/ \ + --l2_decay=1e-4 \ + --use_mixup=True \ + --resize_short_size=320 \ + --use_label_smoothing=True \ + --label_smoothing_epsilon=0.1 \ diff --git a/PaddleCV/image_classification/scripts/train/MobileNetV1.sh b/PaddleCV/image_classification/scripts/train/MobileNetV1.sh new file mode 100644 index 00000000..8d00ce7c --- /dev/null +++ b/PaddleCV/image_classification/scripts/train/MobileNetV1.sh @@ -0,0 +1,19 @@ +#Training details +#GPU: NVIDIA® Tesla® V100 4cards 120epochs 55h +export CUDA_VISIBLE_DEVICES=0,1,2,3 +export FLAGS_fast_eager_deletion_mode=1 +export FLAGS_eager_delete_tensor_gb=0.0 +export FLAGS_fraction_of_gpu_memory_to_use=0.98 + + +python train.py \ + --model=MobileNetV1 \ + --batch_size=256 \ + --total_images=1281167 \ + --class_dim=1000 \ + --image_shape=3,224,224 \ + --model_save_dir=output/ \ + --lr_strategy=piecewise_decay \ + --num_epochs=120 \ + --lr=0.1 \ + --l2_decay=3e-5 \ diff --git a/PaddleCV/image_classification/scripts/train/MobileNetV2.sh b/PaddleCV/image_classification/scripts/train/MobileNetV2.sh new file mode 100644 index 00000000..7a0ce41c --- /dev/null +++ b/PaddleCV/image_classification/scripts/train/MobileNetV2.sh @@ -0,0 +1,19 @@ +#Training details +#GPU: NVIDIA® Tesla® V100 4cards 240epochs 135h +export CUDA_VISIBLE_DEVICES=0,1,2,3 +export FLAGS_fast_eager_deletion_mode=1 +export FLAGS_eager_delete_tensor_gb=0.0 +export FLAGS_fraction_of_gpu_memory_to_use=0.98 + + +python train.py \ + --model=MobileNetV2 \ + --batch_size=500 \ + --total_images=1281167 \ + --class_dim=1000 \ + --image_shape=3,224,224 \ + --model_save_dir=output/ \ + --lr_strategy=cosine_decay \ + --num_epochs=240 \ + --lr=0.1 \ + --l2_decay=4e-5 diff --git a/PaddleCV/image_classification/scripts/train/MobileNetV2_x0_25.sh b/PaddleCV/image_classification/scripts/train/MobileNetV2_x0_25.sh new file mode 100644 index 00000000..8bdb0de8 --- /dev/null +++ b/PaddleCV/image_classification/scripts/train/MobileNetV2_x0_25.sh @@ -0,0 +1,14 @@ +#MobileNetV2_x0_25 +python train.py \ + --model=MobileNetV2_x0_25 \ + --batch_size=256 \ + --total_images=1281167 \ + --class_dim=1000 \ + --image_shape=3,224,224 \ + --model_save_dir=output/ \ + --lr_strategy=cosine_decay \ + --num_epochs=240 \ + --lr=0.1 \ + --l2_decay=3e-5 \ + --lower_ratio=1.0 \ + --upper_ratio=1.0 diff --git a/PaddleCV/image_classification/scripts/train/MobileNetV2_x0_5.sh b/PaddleCV/image_classification/scripts/train/MobileNetV2_x0_5.sh new file mode 100644 index 00000000..f0ba07ad --- /dev/null +++ b/PaddleCV/image_classification/scripts/train/MobileNetV2_x0_5.sh @@ -0,0 +1,14 @@ +#MobileNetV2_x0_5 +python train.py \ + --model=MobileNetV2_x0_5 \ + --batch_size=256 \ + --total_images=1281167 \ + --class_dim=1000 \ + --image_shape=3,224,224 \ + --model_save_dir=output/ \ + --lr_strategy=cosine_decay \ + --num_epochs=240 \ + --lr=0.1 \ + --l2_decay=3e-5 \ + --lower_ratio=1.0 \ + --upper_ratio=1.0 diff --git a/PaddleCV/image_classification/scripts/train/MobileNetV2_x1_5.sh b/PaddleCV/image_classification/scripts/train/MobileNetV2_x1_5.sh new file mode 100644 index 00000000..f0ed2a0b --- /dev/null +++ b/PaddleCV/image_classification/scripts/train/MobileNetV2_x1_5.sh @@ -0,0 +1,12 @@ +#MobileNetV2_x1_5 +python train.py \ + --model=MobileNetV2_x1_5 \ + --batch_size=256 \ + --total_images=1281167 \ + --class_dim=1000 \ + --image_shape=3,224,224 \ + --model_save_dir=output/ \ + --lr_strategy=cosine_decay \ + --num_epochs=240 \ + --lr=0.1 \ + --l2_decay=4e-5 diff --git a/PaddleCV/image_classification/scripts/train/MobileNetV2_x2_0.sh b/PaddleCV/image_classification/scripts/train/MobileNetV2_x2_0.sh new file mode 100644 index 00000000..dcfe0b85 --- /dev/null +++ b/PaddleCV/image_classification/scripts/train/MobileNetV2_x2_0.sh @@ -0,0 +1,12 @@ +#MobileNetV2_x2_0 +python train.py \ + --model=MobileNetV2_x2_0 \ + --batch_size=256 \ + --total_images=1281167 \ + --class_dim=1000 \ + --image_shape=3,224,224 \ + --model_save_dir=output/ \ + --lr_strategy=cosine_decay \ + --num_epochs=240 \ + --lr=0.1 \ + --l2_decay=4e-5 diff --git a/PaddleCV/image_classification/scripts/train/ResNeXt101_32x4d.sh b/PaddleCV/image_classification/scripts/train/ResNeXt101_32x4d.sh new file mode 100644 index 00000000..e39008f5 --- /dev/null +++ b/PaddleCV/image_classification/scripts/train/ResNeXt101_32x4d.sh @@ -0,0 +1,14 @@ +#ResNeXt101_32x4d +#Training details + +python train.py \ + --model=ResNeXt101_32x4d \ + --batch_size=256 \ + --total_images=1281167 \ + --image_shape=3,224,224 \ + --class_dim=1000 \ + --lr_strategy=piecewise_decay \ + --lr=0.1 \ + --num_epochs=120 \ + --model_save_dir=output/ \ + --l2_decay=1e-4 diff --git a/PaddleCV/image_classification/scripts/train/ResNeXt101_64x4d.sh b/PaddleCV/image_classification/scripts/train/ResNeXt101_64x4d.sh new file mode 100644 index 00000000..f5aeb3a3 --- /dev/null +++ b/PaddleCV/image_classification/scripts/train/ResNeXt101_64x4d.sh @@ -0,0 +1,19 @@ +#Training details +#Missed +export CUDA_VISIBLE_DEVICES=0,1,2,3 +export FLAGS_fast_eager_deletion_mode=1 +export FLAGS_eager_delete_tensor_gb=0.0 +export FLAGS_fraction_of_gpu_memory_to_use=0.98 + +#ResNeXt101_64x4d +python train.py \ + --model=ResNeXt101_64x4d \ + --batch_size=256 \ + --total_images=1281167 \ + --image_shape=3,224,224 \ + --class_dim=1000 \ + --lr_strategy=piecewise_decay \ + --lr=0.1 \ + --num_epochs=120 \ + --model_save_dir=output/ \ + --l2_decay=15e-5 diff --git a/PaddleCV/image_classification/scripts/train/ResNeXt101_vd_64x4d.sh b/PaddleCV/image_classification/scripts/train/ResNeXt101_vd_64x4d.sh new file mode 100644 index 00000000..f3d11779 --- /dev/null +++ b/PaddleCV/image_classification/scripts/train/ResNeXt101_vd_64x4d.sh @@ -0,0 +1,22 @@ +#Training details +#Missed +export CUDA_VISIBLE_DEVICES=0,1,2,3 +export FLAGS_fast_eager_deletion_mode=1 +export FLAGS_eager_delete_tensor_gb=0.0 +export FLAGS_fraction_of_gpu_memory_to_use=0.98 + +#ResNeXt101_vd_64x4d +python train.py \ + --model=ResNeXt101_vd_64x4d \ + --batch_size=256 \ + --total_images=1281167 \ + --image_shape=3,224,224 \ + --class_dim=1000 \ + --lr_strategy=cosine_decay \ + --lr=0.1 \ + --num_epochs=200 \ + --model_save_dir=output/ \ + --l2_decay=1e-4 \ + --use_mixup=True \ + --use_label_smoothing=True \ + --label_smoothing_epsilon=0.1 diff --git a/PaddleCV/image_classification/scripts/train/ResNeXt152_32x4d.sh b/PaddleCV/image_classification/scripts/train/ResNeXt152_32x4d.sh new file mode 100644 index 00000000..1b81968b --- /dev/null +++ b/PaddleCV/image_classification/scripts/train/ResNeXt152_32x4d.sh @@ -0,0 +1,12 @@ +# ResNeXt152_32x4d + python train.py \ + --model=ResNeXt152_32x4d \ + --batch_size=256 \ + --total_images=1281167 \ + --image_shape=3,224,224 \ + --class_dim=1000 \ + --lr_strategy=piecewise_decay \ + --lr=0.1 \ + --num_epochs=120 \ + --model_save_dir=output/ \ + --l2_decay=1e-4 diff --git a/PaddleCV/image_classification/scripts/train/ResNeXt152_64x4d.sh b/PaddleCV/image_classification/scripts/train/ResNeXt152_64x4d.sh new file mode 100644 index 00000000..0a1bd518 --- /dev/null +++ b/PaddleCV/image_classification/scripts/train/ResNeXt152_64x4d.sh @@ -0,0 +1,18 @@ +#Training details +export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 +export FLAGS_fast_eager_deletion_mode=1 +export FLAGS_eager_delete_tensor_gb=0.0 +export FLAGS_fraction_of_gpu_memory_to_use=0.98 + +#ResNeXt152_64x4d +python train.py \ + --model=ResNeXt152_64x4d \ + --batch_size=256 \ + --total_images=1281167 \ + --image_shape=3,224,224 \ + --class_dim=1000 \ + --lr_strategy=piecewise_decay \ + --lr=0.1 \ + --num_epochs=120 \ + --model_save_dir=output/ \ + --l2_decay=18e-5 diff --git a/PaddleCV/image_classification/scripts/train/ResNeXt50_32x4d.sh b/PaddleCV/image_classification/scripts/train/ResNeXt50_32x4d.sh new file mode 100644 index 00000000..91e64708 --- /dev/null +++ b/PaddleCV/image_classification/scripts/train/ResNeXt50_32x4d.sh @@ -0,0 +1,11 @@ +python train.py \ + --model=ResNeXt50_32x4d \ + --batch_size=256 \ + --total_images=1281167 \ + --image_shape=3,224,224 \ + --class_dim=1000 \ + --lr_strategy=piecewise_decay \ + --lr=0.1 \ + --num_epochs=120 \ + --model_save_dir=output/ \ + --l2_decay=1e-4 diff --git a/PaddleCV/image_classification/scripts/train/ResNeXt50_vd_64x4d.sh b/PaddleCV/image_classification/scripts/train/ResNeXt50_vd_64x4d.sh new file mode 100644 index 00000000..9728535b --- /dev/null +++ b/PaddleCV/image_classification/scripts/train/ResNeXt50_vd_64x4d.sh @@ -0,0 +1,15 @@ +#ResNeXt50_vd_64x4d +python train.py \ + --model=ResNeXt50_vd_64x4d \ + --batch_size=256 \ + --total_images=1281167 \ + --image_shape=3,224,224 \ + --class_dim=1000 \ + --lr_strategy=cosine_decay \ + --lr=0.1 \ + --num_epochs=200 \ + --model_save_dir=output/ \ + --l2_decay=1e-4 \ + --use_mixup=True \ + --use_label_smoothing=True \ + --label_smoothing_epsilon=0.1 \ diff --git a/PaddleCV/image_classification/scripts/train/ResNet101.sh b/PaddleCV/image_classification/scripts/train/ResNet101.sh new file mode 100644 index 00000000..a2af4385 --- /dev/null +++ b/PaddleCV/image_classification/scripts/train/ResNet101.sh @@ -0,0 +1,19 @@ +##Training details +#GPU: NVIDIA® Tesla® V100 4cards 120epochs 100h +export CUDA_VISIBLE_DEVICES=0,1,2,3 +export FLAGS_fast_eager_deletion_mode=1 +export FLAGS_eager_delete_tensor_gb=0.0 +export FLAGS_fraction_of_gpu_memory_to_use=0.98 + +#ResNet101: +python train.py \ + --model=ResNet101 \ + --batch_size=256 \ + --total_images=1281167 \ + --class_dim=1000 \ + --image_shape=3,224,224 \ + --model_save_dir=output/ \ + --lr_strategy=piecewise_decay \ + --num_epochs=120 \ + --lr=0.1 \ + --l2_decay=1e-4 diff --git a/PaddleCV/image_classification/scripts/train/ResNet101_vd.sh b/PaddleCV/image_classification/scripts/train/ResNet101_vd.sh new file mode 100644 index 00000000..b9bdf778 --- /dev/null +++ b/PaddleCV/image_classification/scripts/train/ResNet101_vd.sh @@ -0,0 +1,22 @@ +#Training details +#GPU: NVIDIA® Tesla® V100 4cards 200epochs 182h +export CUDA_VISIBLE_DEVICES=0,1,2,3 +export FLAGS_fast_eager_deletion_mode=1 +export FLAGS_eager_delete_tensor_gb=0.0 +export FLAGS_fraction_of_gpu_memory_to_use=0.98 + +#ResNet101_vd +python train.py \ + --model=ResNet101_vd \ + --batch_size=256 \ + --total_images=1281167 \ + --image_shape=3,224,224 \ + --class_dim=1000 \ + --lr_strategy=cosine_decay \ + --lr=0.1 \ + --num_epochs=200 \ + --model_save_dir=output/ \ + --l2_decay=1e-4 \ + --use_mixup=True \ + --use_label_smoothing=True \ + --label_smoothing_epsilon=0.1 diff --git a/PaddleCV/image_classification/scripts/train/ResNet152.sh b/PaddleCV/image_classification/scripts/train/ResNet152.sh new file mode 100644 index 00000000..44275753 --- /dev/null +++ b/PaddleCV/image_classification/scripts/train/ResNet152.sh @@ -0,0 +1,19 @@ +##Training details +#GPU: NVIDIA® Tesla® P40 8cards 120epochs 200h +export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 +export FLAGS_fast_eager_deletion_mode=1 +export FLAGS_eager_delete_tensor_gb=0.0 +export FLAGS_fraction_of_gpu_memory_to_use=0.98 + +#ResNet152: +python train.py \ + --model=ResNet152 \ + --batch_size=256 \ + --total_images=1281167 \ + --class_dim=1000 \ + --image_shape=3,224,224 \ + --model_save_dir=output/ \ + --lr_strategy=piecewise_decay \ + --lr=0.1 \ + --num_epochs=120 \ + --l2_decay=1e-4 diff --git a/PaddleCV/image_classification/scripts/train/ResNet152_vd.sh b/PaddleCV/image_classification/scripts/train/ResNet152_vd.sh new file mode 100644 index 00000000..b4cb84ad --- /dev/null +++ b/PaddleCV/image_classification/scripts/train/ResNet152_vd.sh @@ -0,0 +1,21 @@ +##Training details +#GPU: NVIDIA® Tesla® P40 8cards 200epochs 346h +export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 +export FLAGS_fast_eager_deletion_mode=1 +export FLAGS_eager_delete_tensor_gb=0.0 +export FLAGS_fraction_of_gpu_memory_to_use=0.98 + +python train.py \ + --model=ResNet152_vd \ + --batch_size=256 \ + --total_images=1281167 \ + --image_shape=3,224,224 \ + --class_dim=1000 \ + --lr_strategy=cosine_decay \ + --lr=0.1 \ + --num_epochs=200 \ + --model_save_dir=output/ \ + --l2_decay=1e-4 \ + --use_mixup=True \ + --use_label_smoothing=True \ + --label_smoothing_epsilon=0.1 diff --git a/PaddleCV/image_classification/scripts/train/ResNet18.sh b/PaddleCV/image_classification/scripts/train/ResNet18.sh new file mode 100644 index 00000000..b3d1018c --- /dev/null +++ b/PaddleCV/image_classification/scripts/train/ResNet18.sh @@ -0,0 +1,19 @@ +##Training details +#GPU: NVIDIA® Tesla® V100 4cards 120epochs 67h +export CUDA_VISIBLE_DEVICES=0,1,2,3 +export FLAGS_fast_eager_deletion_mode=1 +export FLAGS_eager_delete_tensor_gb=0.0 +export FLAGS_fraction_of_gpu_memory_to_use=0.98 + +#ResNet18: +python train.py \ + --model=ResNet18 \ + --batch_size=256 \ + --total_images=1281167 \ + --class_dim=1000 \ + --image_shape=3,224,224 \ + --model_save_dir=output/ \ + --lr_strategy=cosine_decay \ + --lr=0.1 \ + --num_epochs=120 \ + --l2_decay=1e-4 diff --git a/PaddleCV/image_classification/scripts/train/ResNet200_vd.sh b/PaddleCV/image_classification/scripts/train/ResNet200_vd.sh new file mode 100644 index 00000000..464db8ac --- /dev/null +++ b/PaddleCV/image_classification/scripts/train/ResNet200_vd.sh @@ -0,0 +1,22 @@ +#Training details +#Machine: Missed +export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 +export FLAGS_fast_eager_deletion_mode=1 +export FLAGS_eager_delete_tensor_gb=0.0 +export FLAGS_fraction_of_gpu_memory_to_use=0.98 + +#ResNet200_vd +python train.py \ + --model=ResNet200_vd \ + --batch_size=256 \ + --total_images=1281167 \ + --image_shape=3,224,224 \ + --class_dim=1000 \ + --lr_strategy=cosine_decay \ + --lr=0.1 \ + --num_epochs=200 \ + --model_save_dir=output/ \ + --l2_decay=1e-4 \ + --use_mixup=True \ + --use_label_smoothing=True \ + --label_smoothing_epsilon=0.1 diff --git a/PaddleCV/image_classification/scripts/train/ResNet34.sh b/PaddleCV/image_classification/scripts/train/ResNet34.sh new file mode 100644 index 00000000..5ce4689b --- /dev/null +++ b/PaddleCV/image_classification/scripts/train/ResNet34.sh @@ -0,0 +1,18 @@ +#Training details +#GPU: NVIDIA® Tesla® V100 4cards 120epochs 73h +export CUDA_VISIBLE_DEVICES=0,1,2,3 +export FLAGS_fast_eager_deletion_mode=1 +export FLAGS_eager_delete_tensor_gb=0.0 +export FLAGS_fraction_of_gpu_memory_to_use=0.98 +#ResNet34: +python train.py \ + --model=ResNet34 \ + --batch_size=256 \ + --total_images=1281167 \ + --class_dim=1000 \ + --image_shape=3,224,224 \ + --model_save_dir=output/ \ + --lr_strategy=cosine_decay \ + --lr=0.1 \ + --num_epochs=120 \ + --l2_decay=1e-4 diff --git a/PaddleCV/image_classification/scripts/train/ResNet50.sh b/PaddleCV/image_classification/scripts/train/ResNet50.sh new file mode 100644 index 00000000..47063075 --- /dev/null +++ b/PaddleCV/image_classification/scripts/train/ResNet50.sh @@ -0,0 +1,19 @@ +##Training details +#GPU: NVIDIA® Tesla® V100 4cards 120epochs 67h +export CUDA_VISIBLE_DEVICES=0,1,2,3 +export FLAGS_fast_eager_deletion_mode=1 +export FLAGS_eager_delete_tensor_gb=0.0 +export FLAGS_fraction_of_gpu_memory_to_use=0.98 + +#ResNet50: +python train.py \ + --model=ResNet50 \ + --batch_size=256 \ + --total_images=1281167 \ + --class_dim=1000 \ + --image_shape=3,224,224 \ + --model_save_dir=output/ \ + --lr_strategy=piecewise_decay \ + --num_epochs=120 \ + --lr=0.1 \ + --l2_decay=1e-4 diff --git a/PaddleCV/image_classification/scripts/train/ResNet50_vc.sh b/PaddleCV/image_classification/scripts/train/ResNet50_vc.sh new file mode 100644 index 00000000..d5d0cc5e --- /dev/null +++ b/PaddleCV/image_classification/scripts/train/ResNet50_vc.sh @@ -0,0 +1,19 @@ +##Training details +#GPU: NVIDIA® Tesla® V100 4cards 200epochs 141h +export CUDA_VISIBLE_DEVICES=0,1,2,3 +export FLAGS_fast_eager_deletion_mode=1 +export FLAGS_eager_delete_tensor_gb=0.0 +export FLAGS_fraction_of_gpu_memory_to_use=0.98 + +#ResNet50_vc +python train.py \ + --model=ResNet50_vc \ + --batch_size=256 \ + --total_images=1281167 \ + --image_shape=3,224,224 \ + --class_dim=1000 \ + --lr_strategy=cosine_decay \ + --lr=0.1 \ + --num_epochs=200 \ + --model_save_dir=output/ \ + --l2_decay=1e-4 \ diff --git a/PaddleCV/image_classification/scripts/train/ResNet50_vd.sh b/PaddleCV/image_classification/scripts/train/ResNet50_vd.sh new file mode 100644 index 00000000..968e3dd0 --- /dev/null +++ b/PaddleCV/image_classification/scripts/train/ResNet50_vd.sh @@ -0,0 +1,21 @@ +#Training details +#GPU: NVIDIA® Tesla® V100 4cards 200epochs 120h +export CUDA_VISIBLE_DEVICES=0,1,2,3 +export FLAGS_fast_eager_deletion_mode=1 +export FLAGS_eager_delete_tensor_gb=0.0 +export FLAGS_fraction_of_gpu_memory_to_use=0.98 + +python train.py \ + --model=ResNet50_vd \ + --batch_size=256 \ + --total_images=1281167 \ + --image_shape=3,224,224 \ + --class_dim=1000 \ + --lr_strategy=cosine_decay \ + --lr=0.1 \ + --num_epochs=200 \ + --model_save_dir=output/ \ + --l2_decay=7e-5 \ + --use_mixup=True \ + --use_label_smoothing=True \ + --label_smoothing_epsilon=0.1 diff --git a/PaddleCV/image_classification/scripts/train/SE_154_vd.sh b/PaddleCV/image_classification/scripts/train/SE_154_vd.sh new file mode 100644 index 00000000..e61cc6fd --- /dev/null +++ b/PaddleCV/image_classification/scripts/train/SE_154_vd.sh @@ -0,0 +1,22 @@ +#Training details +#GPU: NVIDIA® Tesla® P40 8cards 200epochs 916h +export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 +export FLAGS_fast_eager_deletion_mode=1 +export FLAGS_eager_delete_tensor_gb=0.0 +export FLAGS_fraction_of_gpu_memory_to_use=0.98 + +#SE_154 +python train.py \ + --model=SE_154_vd \ + --batch_size=256 \ + --total_images=1281167 \ + --image_shape=3,224,224 \ + --class_dim=1000 \ + --lr_strategy=cosine_decay \ + --lr=0.1 \ + --num_epochs=200 \ + --model_save_dir=output/ \ + --l2_decay=1e-4 \ + --use_mixup=True \ + --use_label_smoothing=True \ + --label_smoothing_epsilon=0.1 \ diff --git a/PaddleCV/image_classification/scripts/train/SE_ResNeXt101_32x4d.sh b/PaddleCV/image_classification/scripts/train/SE_ResNeXt101_32x4d.sh new file mode 100644 index 00000000..a385814a --- /dev/null +++ b/PaddleCV/image_classification/scripts/train/SE_ResNeXt101_32x4d.sh @@ -0,0 +1,19 @@ +##Training details +#GPU: NVIDIA® Tesla® P40 8cards 120epochs 566h +export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 +export FLAGS_fast_eager_deletion_mode=1 +export FLAGS_eager_delete_tensor_gb=0.0 +export FLAGS_fraction_of_gpu_memory_to_use=0.98 + +#SE_ResNeXt101_32x4d: +python train.py \ + --model=SE_ResNeXt101_32x4d \ + --batch_size=400 \ + --total_images=1281167 \ + --class_dim=1000 \ + --image_shape=3,224,224 \ + --lr_strategy=cosine_decay \ + --model_save_dir=output/ \ + --lr=0.1 \ + --num_epochs=200 \ + --l2_decay=1.5e-5 diff --git a/PaddleCV/image_classification/scripts/train/SE_ResNeXt50_32x4d.sh b/PaddleCV/image_classification/scripts/train/SE_ResNeXt50_32x4d.sh new file mode 100644 index 00000000..acfadb80 --- /dev/null +++ b/PaddleCV/image_classification/scripts/train/SE_ResNeXt50_32x4d.sh @@ -0,0 +1,20 @@ +#Training details +#Machine:V100 4cards 200epochs 282h +export CUDA_VISIBLE_DEVICES=0,1,2,3 +export FLAGS_fast_eager_deletion_mode=1 +export FLAGS_eager_delete_tensor_gb=0.0 +export FLAGS_fraction_of_gpu_memory_to_use=0.98 + + +#SE_ResNeXt50_32x4d: +python train.py \ + --model=SE_ResNeXt50_32x4d \ + --batch_size=400 \ + --total_images=1281167 \ + --class_dim=1000 \ + --image_shape=3,224,224 \ + --lr_strategy=cosine_decay \ + --model_save_dir=output/ \ + --lr=0.1 \ + --num_epochs=200 \ + --l2_decay=1.2e-4 diff --git a/PaddleCV/image_classification/scripts/train/ShuffleNetV2.sh b/PaddleCV/image_classification/scripts/train/ShuffleNetV2.sh new file mode 100644 index 00000000..369e5879 --- /dev/null +++ b/PaddleCV/image_classification/scripts/train/ShuffleNetV2.sh @@ -0,0 +1,18 @@ +##Training details +#GPU: NVIDIA® Tesla® K40 4cards 240epochs 156h +export CUDA_VISIBLE_DEVICES=0,1,2,3 +export FLAGS_fast_eager_deletion_mode=1 +export FLAGS_eager_delete_tensor_gb=0.0 +export FLAGS_fraction_of_gpu_memory_to_use=0.98 + +python train.py \ + --model=ShuffleNetV2 \ + --batch_size=1024 \ + --total_images=1281167 \ + --class_dim=1000 \ + --image_shape=3,224,224 \ + --model_save_dir=output/ \ + --lr_strategy=cosine_decay_warmup \ + --lr=0.5 \ + --num_epochs=240 \ + --l2_decay=4e-5 diff --git a/PaddleCV/image_classification/scripts/train/ShuffleNetV2_swish.sh b/PaddleCV/image_classification/scripts/train/ShuffleNetV2_swish.sh new file mode 100644 index 00000000..b3e29dd3 --- /dev/null +++ b/PaddleCV/image_classification/scripts/train/ShuffleNetV2_swish.sh @@ -0,0 +1,18 @@ +##Training details +#GPU: NVIDIA® Tesla® K40 4cards 240epochs 156h +export CUDA_VISIBLE_DEVICES=0,1,2,3 +export FLAGS_fast_eager_deletion_mode=1 +export FLAGS_eager_delete_tensor_gb=0.0 +export FLAGS_fraction_of_gpu_memory_to_use=0.98 + +python train.py \ + --model=ShuffleNetV2_swish \ + --batch_size=1024 \ + --total_images=1281167 \ + --class_dim=1000 \ + --image_shape=3,224,224 \ + --model_save_dir=output/ \ + --lr_strategy=cosine_decay_warmup \ + --lr=0.5 \ + --num_epochs=240 \ + --l2_decay=4e-5 diff --git a/PaddleCV/image_classification/scripts/train/ShuffleNetV2_x0_25.sh b/PaddleCV/image_classification/scripts/train/ShuffleNetV2_x0_25.sh new file mode 100644 index 00000000..449119d6 --- /dev/null +++ b/PaddleCV/image_classification/scripts/train/ShuffleNetV2_x0_25.sh @@ -0,0 +1,15 @@ +#ShuffleNetV2_x0_25: +python train.py \ + --model=ShuffleNetV2_x0_25 \ + --batch_size=1024 \ + --total_images=1281167 \ + --class_dim=1000 \ + --image_shape=3,224,224 \ + --model_save_dir=output/ \ + --lr_strategy=cosine_decay_warmup \ + --num_epochs=240 \ + --lr=0.5 \ + --l2_decay=3e-5 \ + --lower_scale=0.64 \ + --lower_ratio=0.8 \ + --upper_ratio=1.2 diff --git a/PaddleCV/image_classification/scripts/train/ShuffleNetV2_x0_33.sh b/PaddleCV/image_classification/scripts/train/ShuffleNetV2_x0_33.sh new file mode 100644 index 00000000..f38655b8 --- /dev/null +++ b/PaddleCV/image_classification/scripts/train/ShuffleNetV2_x0_33.sh @@ -0,0 +1,15 @@ +#ShuffleNetV2_x0_33: +python train.py \ + --model=ShuffleNetV2_x0_33 \ + --batch_size=1024 \ + --total_images=1281167 \ + --class_dim=1000 \ + --image_shape=3,224,224 \ + --model_save_dir=output/ \ + --lr_strategy=cosine_decay_warmup \ + --num_epochs=240 \ + --lr=0.5 \ + --l2_decay=3e-5 \ + --lower_scale=0.64 \ + --lower_ratio=0.8 \ + --upper_ratio=1.2 diff --git a/PaddleCV/image_classification/scripts/train/ShuffleNetV2_x0_5.sh b/PaddleCV/image_classification/scripts/train/ShuffleNetV2_x0_5.sh new file mode 100644 index 00000000..3cb89a4b --- /dev/null +++ b/PaddleCV/image_classification/scripts/train/ShuffleNetV2_x0_5.sh @@ -0,0 +1,15 @@ +#ShuffleNetV2_x0_5: +python train.py \ + --model=ShuffleNetV2_x0_5 \ + --batch_size=1024 \ + --total_images=1281167 \ + --class_dim=1000 \ + --image_shape=3,224,224 \ + --model_save_dir=output/ \ + --lr_strategy=cosine_decay_warmup \ + --num_epochs=240 \ + --lr=0.5 \ + --l2_decay=3e-5 \ + --lower_scale=0.64 \ + --lower_ratio=0.8 \ + --upper_ratio=1.2 diff --git a/PaddleCV/image_classification/scripts/train/ShuffleNetV2_x1_5.sh b/PaddleCV/image_classification/scripts/train/ShuffleNetV2_x1_5.sh new file mode 100644 index 00000000..459bcbc4 --- /dev/null +++ b/PaddleCV/image_classification/scripts/train/ShuffleNetV2_x1_5.sh @@ -0,0 +1,13 @@ +python train.py \ + --model=ShuffleNetV2_x1_5 \ + --batch_size=512 \ + --total_images=1281167 \ + --class_dim=1000 \ + --image_shape=3,224,224 \ + --model_save_dir=output/ \ + --lr_strategy=cosine_decay_warmup \ + --num_epochs=240 \ + --lr=0.25 \ + --l2_decay=4e-5 \ + --lower_ratio=1.0 \ + --upper_ratio=1.0 diff --git a/PaddleCV/image_classification/scripts/train/ShuffleNetV2_x2_0.sh b/PaddleCV/image_classification/scripts/train/ShuffleNetV2_x2_0.sh new file mode 100644 index 00000000..087e0254 --- /dev/null +++ b/PaddleCV/image_classification/scripts/train/ShuffleNetV2_x2_0.sh @@ -0,0 +1,12 @@ +#ShuffleNetV2_x2_0: +python train.py \ + --model=ShuffleNetV2_x2_0 \ + --batch_size=512 \ + --total_images=1281167 \ + --class_dim=1000 \ + --image_shape=3,224,224 \ + --model_save_dir=output/ \ + --lr_strategy=cosine_decay_warmup \ + --num_epochs=240 \ + --lr=0.25 \ + --l2_decay=4e-5 diff --git a/PaddleCV/image_classification/scripts/train/SqueezeNet1_0.sh b/PaddleCV/image_classification/scripts/train/SqueezeNet1_0.sh new file mode 100644 index 00000000..ee722bfd --- /dev/null +++ b/PaddleCV/image_classification/scripts/train/SqueezeNet1_0.sh @@ -0,0 +1,12 @@ +#SqueezeNet1_0 +python train.py \ + --model=SqueezeNet1_0 \ + --batch_size=256 \ + --total_images=1281167 \ + --image_shape=3,224,224 \ + --lr_strategy=cosine_decay \ + --class_dim=1000 \ + --model_save_dir=output/ \ + --lr=0.02 \ + --num_epochs=120 \ + --l2_decay=1e-4 diff --git a/PaddleCV/image_classification/scripts/train/SqueezeNet1_1.sh b/PaddleCV/image_classification/scripts/train/SqueezeNet1_1.sh new file mode 100644 index 00000000..70bd773d --- /dev/null +++ b/PaddleCV/image_classification/scripts/train/SqueezeNet1_1.sh @@ -0,0 +1,12 @@ +#SqueezeNet1_1 +python train.py \ + --model=SqueezeNet1_1 \ + --batch_size=256 \ + --total_images=1281167 \ + --image_shape=3,224,224 \ + --lr_strategy=cosine_decay \ + --class_dim=1000 \ + --model_save_dir=output/ \ + --lr=0.02 \ + --num_epochs=120 \ + --l2_decay=1e-4 diff --git a/PaddleCV/image_classification/scripts/train/VGG11.sh b/PaddleCV/image_classification/scripts/train/VGG11.sh new file mode 100644 index 00000000..ad8934e4 --- /dev/null +++ b/PaddleCV/image_classification/scripts/train/VGG11.sh @@ -0,0 +1,19 @@ +#Training details +#GPU: NVIDIA® Tesla® P40 8cards 90epochs 52h +export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 +export FLAGS_fast_eager_deletion_mode=1 +export FLAGS_eager_delete_tensor_gb=0.0 +export FLAGS_fraction_of_gpu_memory_to_use=0.98 + +#VGG11: +python train.py \ + --model=VGG11 \ + --batch_size=512 \ + --total_images=1281167 \ + --image_shape=3,224,224 \ + --lr_strategy=cosine_decay \ + --class_dim=1000 \ + --model_save_dir=output/ \ + --lr=0.1 \ + --num_epochs=90 \ + --l2_decay=2e-4 diff --git a/PaddleCV/image_classification/scripts/train/VGG13.sh b/PaddleCV/image_classification/scripts/train/VGG13.sh new file mode 100644 index 00000000..24960f88 --- /dev/null +++ b/PaddleCV/image_classification/scripts/train/VGG13.sh @@ -0,0 +1,19 @@ +#Training details +#GPU: NVIDIA® Tesla® V100 4cards 90epochs 58h +export CUDA_VISIBLE_DEVICES=0,1,2,3 +export FLAGS_fast_eager_deletion_mode=1 +export FLAGS_eager_delete_tensor_gb=0.0 +export FLAGS_fraction_of_gpu_memory_to_use=0.98 + +#VGG13: +python train.py \ + --model=VGG13 \ + --batch_size=256 \ + --total_images=1281167 \ + --class_dim=1000 \ + --image_shape=3,224,224 \ + --lr_strategy=cosine_decay \ + --lr=0.01 \ + --num_epochs=90 \ + --model_save_dir=output/ \ + --l2_decay=3e-4 diff --git a/PaddleCV/image_classification/scripts/train/VGG16.sh b/PaddleCV/image_classification/scripts/train/VGG16.sh new file mode 100644 index 00000000..ebf5a356 --- /dev/null +++ b/PaddleCV/image_classification/scripts/train/VGG16.sh @@ -0,0 +1,19 @@ +#Training details +#GPU: NVIDIA® Tesla® P40 8cards 90epochs 72h +export CUDA_VISIBLE_DEVICES=0,1,2,3,4,5,6,7 +export FLAGS_fast_eager_deletion_mode=1 +export FLAGS_eager_delete_tensor_gb=0.0 +export FLAGS_fraction_of_gpu_memory_to_use=0.98 + +#VGG16: +python train.py \ + --model=VGG16 \ + --batch_size=256 \ + --total_images=1281167 \ + --class_dim=1000 \ + --lr_strategy=cosine_decay \ + --image_shape=3,224,224 \ + --model_save_dir=output/ \ + --lr=0.01 \ + --num_epochs=90 \ + --l2_decay=3e-4 diff --git a/PaddleCV/image_classification/scripts/train/VGG19.sh b/PaddleCV/image_classification/scripts/train/VGG19.sh new file mode 100644 index 00000000..bca6a002 --- /dev/null +++ b/PaddleCV/image_classification/scripts/train/VGG19.sh @@ -0,0 +1,19 @@ +#Training details +#GPU: NVIDIA® Tesla® V100 4cards 150epochs 173h +export CUDA_VISIBLE_DEVICES=0,1,2,3 +export FLAGS_fast_eager_deletion_mode=1 +export FLAGS_eager_delete_tensor_gb=0.0 +export FLAGS_fraction_of_gpu_memory_to_use=0.98 + +#VGG19: +python train.py \ + --model=VGG19 \ + --batch_size=256 \ + --total_images=1281167 \ + --class_dim=1000 \ + --image_shape=3,224,224 \ + --lr_strategy=cosine_decay \ + --lr=0.01 \ + --num_epochs=150 \ + --model_save_dir=output/ \ + --l2_decay=4e-4 diff --git a/PaddleCV/image_classification/scripts/train/Xception_41.sh b/PaddleCV/image_classification/scripts/train/Xception_41.sh new file mode 100644 index 00000000..57ec3910 --- /dev/null +++ b/PaddleCV/image_classification/scripts/train/Xception_41.sh @@ -0,0 +1,12 @@ +python train.py \ + --model=Xception_41 \ + --batch_size=256 \ + --total_images=1281167 \ + --image_shape=3,224,224 \ + --class_dim=1000 \ + --lr_strategy=cosine_decay \ + --lr=0.045 \ + --num_epochs=120 \ + --model_save_dir=output/ \ + --l2_decay=1e-4 \ + --resize_short_size=320 diff --git a/PaddleCV/image_classification/train.py b/PaddleCV/image_classification/train.py index 48428bec..fa15c172 100755 --- a/PaddleCV/image_classification/train.py +++ b/PaddleCV/image_classification/train.py @@ -37,352 +37,87 @@ set_paddle_flags({ 'FLAGS_eager_delete_tensor_gb': 0, # enable gc 'FLAGS_fraction_of_gpu_memory_to_use': 0.98 }) + import argparse import functools import subprocess + import paddle import paddle.fluid as fluid -import paddle.dataset.flowers as flowers -import reader_cv2 as reader -import utils +import reader +from utils import * import models -from utils.fp16_utils import create_master_params_grads, master_param_to_train_param -from utils.utility import add_arguments, print_arguments, check_gpu -from utils.learning_rate import cosine_decay_with_warmup -from dist_train import dist_utils - -IMAGENET1000 = 1281167 -num_trainers = int(os.environ.get('PADDLE_TRAINERS_NUM', 1)) - -parser = argparse.ArgumentParser(description=__doc__) -add_arg = functools.partial(add_arguments, argparser=parser) - -# yapf: disable -add_arg('batch_size', int, 256, "Minibatch size.") -add_arg('use_gpu', bool, True, "Whether to use GPU or not.") -add_arg('total_images', int, 1281167, "Training image number.") -add_arg('num_epochs', int, 120, "number of epochs.") -add_arg('class_dim', int, 1000, "Class number.") -add_arg('image_shape', str, "3,224,224", "input image size") -add_arg('model_save_dir', str, "output", "model save directory") -add_arg('with_inplace', bool, True, "Whether to use inplace memory optimization.") -add_arg('pretrained_model', str, None, "Whether to use pretrained model.") -add_arg('checkpoint', str, None, "Whether to resume checkpoint.") -add_arg('lr', float, 0.1, "set learning rate.") -add_arg('lr_strategy', str, "piecewise_decay", "Set the learning rate decay strategy.") -add_arg('model', str, "SE_ResNeXt50_32x4d", "Set the network to use.") -add_arg('enable_ce', bool, False, "If set True, enable continuous evaluation job.") -add_arg('data_dir', str, "./data/ILSVRC2012/", "The ImageNet dataset root dir.") -add_arg('fp16', bool, False, "Enable half precision training with fp16." ) -add_arg('scale_loss', float, 1.0, "Scale loss for fp16." ) -add_arg('l2_decay', float, 1e-4, "L2_decay parameter.") -add_arg('momentum_rate', float, 0.9, "momentum_rate.") -add_arg('use_label_smoothing', bool, False, "Whether to use label_smoothing or not") -add_arg('label_smoothing_epsilon', float, 0.2, "Set the label_smoothing_epsilon parameter") -add_arg('lower_scale', float, 0.08, "Set the lower_scale in ramdom_crop") -add_arg('lower_ratio', float, 3./4., "Set the lower_ratio in ramdom_crop") -add_arg('upper_ratio', float, 4./3., "Set the upper_ratio in ramdom_crop") -add_arg('resize_short_size', int, 256, "Set the resize_short_size") -add_arg('use_mixup', bool, False, "Whether to use mixup or not") -add_arg('mixup_alpha', float, 0.2, "Set the mixup_alpha parameter") -add_arg('is_distill', bool, False, "is distill or not") - -def optimizer_setting(params): - ls = params["learning_strategy"] - l2_decay = params["l2_decay"] - momentum_rate = params["momentum_rate"] - if ls["name"] == "piecewise_decay": - if "total_images" not in params: - total_images = IMAGENET1000 - else: - total_images = params["total_images"] - batch_size = ls["batch_size"] - step = int(math.ceil(float(total_images) / batch_size)) - bd = [step * e for e in ls["epochs"]] - base_lr = params["lr"] - lr = [] - lr = [base_lr * (0.1**i) for i in range(len(bd) + 1)] - optimizer = fluid.optimizer.Momentum( - learning_rate=fluid.layers.piecewise_decay( - boundaries=bd, values=lr), - momentum=momentum_rate, - regularization=fluid.regularizer.L2Decay(l2_decay)) - - elif ls["name"] == "cosine_decay": - if "total_images" not in params: - total_images = IMAGENET1000 - else: - total_images = params["total_images"] - batch_size = ls["batch_size"] - l2_decay = params["l2_decay"] - momentum_rate = params["momentum_rate"] - step = int(math.ceil(float(total_images) / batch_size)) - lr = params["lr"] - num_epochs = params["num_epochs"] - - optimizer = fluid.optimizer.Momentum( - learning_rate=fluid.layers.cosine_decay( - learning_rate=lr, step_each_epoch=step, epochs=num_epochs), - momentum=momentum_rate, - regularization=fluid.regularizer.L2Decay(l2_decay)) - - elif ls["name"] == "cosine_warmup_decay": - if "total_images" not in params: - total_images = IMAGENET1000 - else: - total_images = params["total_images"] - batch_size = ls["batch_size"] - l2_decay = params["l2_decay"] - momentum_rate = params["momentum_rate"] - step = int(math.ceil(float(total_images) / batch_size)) - lr = params["lr"] - num_epochs = params["num_epochs"] - - optimizer = fluid.optimizer.Momentum( - learning_rate=cosine_decay_with_warmup( - learning_rate=lr, step_each_epoch=step, epochs=num_epochs), - momentum=momentum_rate, - regularization=fluid.regularizer.L2Decay(l2_decay)) - - elif ls["name"] == "linear_decay": - if "total_images" not in params: - total_images = IMAGENET1000 - else: - total_images = params["total_images"] - batch_size = ls["batch_size"] - num_epochs = params["num_epochs"] - start_lr = params["lr"] - l2_decay = params["l2_decay"] - momentum_rate = params["momentum_rate"] - end_lr = 0 - total_step = int((total_images / batch_size) * num_epochs) - lr = fluid.layers.polynomial_decay( - start_lr, total_step, end_lr, power=1) - optimizer = fluid.optimizer.Momentum( - learning_rate=lr, - momentum=momentum_rate, - regularization=fluid.regularizer.L2Decay(l2_decay)) - elif ls["name"] == "adam": - lr = params["lr"] - optimizer = fluid.optimizer.Adam(learning_rate=lr) - elif ls["name"] == "rmsprop_cosine": - if "total_images" not in params: - total_images = IMAGENET1000 - else: - total_images = params["total_images"] - batch_size = ls["batch_size"] - l2_decay = params["l2_decay"] - momentum_rate = params["momentum_rate"] - step = int(math.ceil(float(total_images) / batch_size)) - lr = params["lr"] - num_epochs = params["num_epochs"] - optimizer = fluid.optimizer.RMSProp( - learning_rate=fluid.layers.cosine_decay( - learning_rate=lr, step_each_epoch=step, epochs=num_epochs), - momentum=momentum_rate, - regularization=fluid.regularizer.L2Decay(l2_decay), - # RMSProp Optimizer: Apply epsilon=1 on ImageNet. - epsilon=1 - ) - else: - lr = params["lr"] - l2_decay = params["l2_decay"] - momentum_rate = params["momentum_rate"] - optimizer = fluid.optimizer.Momentum( - learning_rate=lr, - momentum=momentum_rate, - regularization=fluid.regularizer.L2Decay(l2_decay)) - - return optimizer - -def calc_loss(epsilon,label,class_dim,softmax_out,use_label_smoothing): - if use_label_smoothing: - label_one_hot = fluid.layers.one_hot(input=label, depth=class_dim) - smooth_label = fluid.layers.label_smooth(label=label_one_hot, epsilon=epsilon, dtype="float32") - loss = fluid.layers.cross_entropy(input=softmax_out, label=smooth_label, soft_label=True) - else: - loss = fluid.layers.cross_entropy(input=softmax_out, label=label) - return loss - - -def net_config(image, model, args, is_train, label=0, y_a=0, y_b=0, lam=0.0): - model_list = [m for m in dir(models) if "__" not in m] - assert args.model in model_list, "{} is not lists: {}".format(args.model, - model_list) - class_dim = args.class_dim - model_name = args.model - use_mixup = args.use_mixup - use_label_smoothing = args.use_label_smoothing - epsilon = args.label_smoothing_epsilon - - if args.enable_ce: - assert model_name == "SE_ResNeXt50_32x4d" - model.params["dropout_seed"] = 100 - class_dim = 102 - - if model_name == "GoogleNet": - out0, out1, out2 = model.net(input=image, class_dim=class_dim) - cost0 = fluid.layers.cross_entropy(input=out0, label=label) - cost1 = fluid.layers.cross_entropy(input=out1, label=label) - cost2 = fluid.layers.cross_entropy(input=out2, label=label) - avg_cost0 = fluid.layers.mean(x=cost0) - avg_cost1 = fluid.layers.mean(x=cost1) - avg_cost2 = fluid.layers.mean(x=cost2) - - avg_cost = avg_cost0 + 0.3 * avg_cost1 + 0.3 * avg_cost2 - acc_top1 = fluid.layers.accuracy(input=out0, label=label, k=1) - acc_top5 = fluid.layers.accuracy(input=out0, label=label, k=5) - - else: - if not args.is_distill: - out = model.net(input=image, class_dim=class_dim) - softmax_out = fluid.layers.softmax(out, use_cudnn=False) - if is_train: - if use_mixup: - loss_a = calc_loss(epsilon,y_a,class_dim,softmax_out,use_label_smoothing) - loss_b = calc_loss(epsilon,y_b,class_dim,softmax_out,use_label_smoothing) - loss_a_mean = fluid.layers.mean(x = loss_a) - loss_b_mean = fluid.layers.mean(x = loss_b) - cost = lam * loss_a_mean + (1 - lam) * loss_b_mean - avg_cost = fluid.layers.mean(x=cost) - if args.scale_loss > 1: - avg_cost = fluid.layers.mean(x=cost) * float(args.scale_loss) - return avg_cost - else: - cost = calc_loss(epsilon,label,class_dim,softmax_out,use_label_smoothing) - - else: - cost = fluid.layers.cross_entropy(input=softmax_out, label=label) - else: - out1, out2 = model.net(input=image, class_dim=args.class_dim) - softmax_out1, softmax_out = fluid.layers.softmax(out1), fluid.layers.softmax(out2) - smooth_out1 = fluid.layers.label_smooth(label=softmax_out1, epsilon=0.0, dtype="float32") - cost = fluid.layers.cross_entropy(input=softmax_out, label=smooth_out1, soft_label=True) - - avg_cost = fluid.layers.mean(cost) - if args.scale_loss > 1: - avg_cost = fluid.layers.mean(x=cost) * float(args.scale_loss) - acc_top1 = fluid.layers.accuracy(input=softmax_out, label=label, k=1) - acc_top5 = fluid.layers.accuracy(input=softmax_out, label=label, k=5) - - return avg_cost, acc_top1, acc_top5 +from build_model import create_model + def build_program(is_train, main_prog, startup_prog, args): - image_shape = [int(m) for m in args.image_shape.split(",")] - model_name = args.model - model_list = [m for m in dir(models) if "__" not in m] - assert model_name in model_list, "{} is not in lists: {}".format(args.model, - model_list) - model = models.__dict__[model_name]() + """build program, and add grad op in program accroding to different mode + + Args: + is_train: mode: train or test + main_prog: main program + startup_prog: strartup program + args: arguments + + Returns : + train mode: [Loss, global_lr, py_reader] + test mode: [Loss, py_reader] + """ + model = models.__dict__[args.model]() with fluid.program_guard(main_prog, startup_prog): - use_mixup = args.use_mixup - if is_train and use_mixup: - py_reader = fluid.layers.py_reader( - capacity=16, - shapes=[[-1] + image_shape, [-1, 1], [-1, 1], [-1, 1]], - lod_levels=[0, 0, 0, 0], - dtypes=["float32", "int64", "int64", "float32"], - use_double_buffer=True) - else: - py_reader = fluid.layers.py_reader( - capacity=16, - shapes=[[-1] + image_shape, [-1, 1]], - lod_levels=[0, 0], - dtypes=["float32", "int64"], - use_double_buffer=True) - + if args.random_seed: + main_prog.random_seed = args.random_seed + startup_prog.random_seed = args.random_seed with fluid.unique_name.guard(): - if is_train and use_mixup: - image, y_a, y_b, lam = fluid.layers.read_file(py_reader) - if args.fp16: - image = fluid.layers.cast(image, "float16") - avg_cost = net_config(image=image, y_a=y_a, y_b=y_b, lam=lam, model=model, args=args, label=0, is_train=True) - avg_cost.persistable = True - build_program_out = [py_reader, avg_cost] - else: - image, label = fluid.layers.read_file(py_reader) - if args.fp16: - image = fluid.layers.cast(image, "float16") - avg_cost, acc_top1, acc_top5 = net_config(image, model, args, label=label, is_train=is_train) - avg_cost.persistable = True - acc_top1.persistable = True - acc_top5.persistable = True - build_program_out = [py_reader, avg_cost, acc_top1, acc_top5] - + py_reader, loss_out = create_model(model, args, is_train) + # add backward op in program if is_train: - params = model.params - params["total_images"] = args.total_images - params["lr"] = args.lr - params["num_epochs"] = args.num_epochs - params["learning_strategy"]["batch_size"] = args.batch_size - params["learning_strategy"]["name"] = args.lr_strategy - params["l2_decay"] = args.l2_decay - params["momentum_rate"] = args.momentum_rate - - optimizer = optimizer_setting(params) - if args.fp16: - params_grads = optimizer.backward(avg_cost) - master_params_grads = create_master_params_grads( - params_grads, main_prog, startup_prog, args.scale_loss) - optimizer.apply_gradients(master_params_grads) - master_param_to_train_param(master_params_grads, - params_grads, main_prog) - else: - optimizer.minimize(avg_cost) + optimizer = create_optimizer(args) + avg_cost = loss_out[0] + optimizer.minimize(avg_cost) + #XXX: fetch learning rate now, better implement is required here. global_lr = optimizer._global_learning_rate() - global_lr.persistable=True - build_program_out.append(global_lr) - - return build_program_out + global_lr.persistable = True + loss_out.append(global_lr) + loss_out.append(py_reader) + return loss_out -def get_device_num(): - # NOTE(zcd): for multi-processe training, each process use one GPU card. - if num_trainers > 1: - return 1 - return fluid.core.get_cuda_device_count() def train(args): - # parameters from arguments - model_name = args.model - checkpoint = args.checkpoint - pretrained_model = args.pretrained_model - model_save_dir = args.model_save_dir - use_mixup = args.use_mixup - + """Train model + + Args: + args: all arguments. + """ startup_prog = fluid.Program() train_prog = fluid.Program() test_prog = fluid.Program() - if args.enable_ce: - startup_prog.random_seed = 1000 - train_prog.random_seed = 1000 - - b_out = build_program( - is_train=True, - main_prog=train_prog, - startup_prog=startup_prog, - args=args) - if use_mixup: - train_py_reader, train_cost, global_lr = b_out[0], b_out[1], b_out[2] - train_fetch_vars = [train_cost, global_lr] - train_fetch_list = [] - for var in train_fetch_vars: - var.persistable=True - train_fetch_list.append(var.name) - - else: - train_py_reader, train_cost, train_acc1, train_acc5, global_lr = b_out[0],b_out[1],b_out[2],b_out[3],b_out[4] - train_fetch_vars = [train_cost, train_acc1, train_acc5, global_lr] - train_fetch_list = [] - for var in train_fetch_vars: - var.persistable=True - train_fetch_list.append(var.name) - - b_out_test = build_program( - is_train=False, - main_prog=test_prog, - startup_prog=startup_prog, - args=args) - test_py_reader, test_cost, test_acc1, test_acc5 = b_out_test[0],b_out_test[1],b_out_test[2],b_out_test[3] + + train_out = build_program( + is_train=True, + main_prog=train_prog, + startup_prog=startup_prog, + args=args) + train_py_reader = train_out[-1] + train_fetch_vars = train_out[:-1] + train_fetch_list = [] + for var in train_fetch_vars: + var.persistable = True + train_fetch_list.append(var.name) + + test_out = build_program( + is_train=False, + main_prog=test_prog, + startup_prog=startup_prog, + args=args) + test_py_reader = test_out[-1] + test_fetch_vars = test_out[:-1] + test_fetch_list = [] + for var in test_fetch_vars: + var.persistable = True + test_fetch_list.append(var.name) + + #Create test_prog and set layers' is_test params to True test_prog = test_prog.clone(for_test=True) gpu_id = int(os.environ.get('FLAGS_selected_gpus', 0)) @@ -390,219 +125,96 @@ def train(args): exe = fluid.Executor(place) exe.run(startup_prog) - if checkpoint is not None: - fluid.io.load_persistables(exe, checkpoint, main_program=train_prog) - - if pretrained_model: - - def if_exist(var): - return os.path.exists(os.path.join(pretrained_model, var.name)) - - fluid.io.load_vars( - exe, pretrained_model, main_program=train_prog, predicate=if_exist) - - if args.use_gpu: - device_num = get_device_num() - else: - device_num = 1 - train_batch_size = args.batch_size / device_num - - test_batch_size = 16 - if not args.enable_ce: - # NOTE: the order of batch data generated by batch_reader - # must be the same in the respective processes. - shuffle_seed = 1 if num_trainers > 1 else None - train_reader = reader.train(settings=args, batch_size=train_batch_size, shuffle_seed=shuffle_seed) - test_reader = reader.val(settings=args, batch_size=test_batch_size) - else: - # use flowers dataset for CE and set use_xmap False to avoid disorder data - # but it is time consuming. For faster speed, need another dataset. - import random - random.seed(0) - np.random.seed(0) - train_reader = paddle.batch( - flowers.train(use_xmap=False), - batch_size=train_batch_size, - drop_last=True) - if num_trainers > 1: - train_reader = fluid.contrib.reader.distributed_batch_reader(train_reader) - test_reader = paddle.batch( - flowers.test(use_xmap=False), batch_size=test_batch_size) - - train_py_reader.decorate_paddle_reader(train_reader) - test_py_reader.decorate_paddle_reader(test_reader) - - - test_fetch_vars = [test_cost, test_acc1, test_acc5] - test_fetch_list = [] - for var in test_fetch_vars: - var.persistable=True - test_fetch_list.append(var.name) + #init model by checkpoint or pretrianed model. + init_model(exe, args, train_prog) + + train_reader = reader.train(settings=args) + train_reader = paddle.batch( + train_reader, + batch_size=int(args.batch_size / fluid.core.get_cuda_device_count()), + drop_last=True) - # use_ngraph is for CPU only, please refer to README_ngraph.md for details - use_ngraph = os.getenv('FLAGS_use_ngraph') - if not use_ngraph: - build_strategy = fluid.BuildStrategy() - # memopt may affect GC results - #build_strategy.memory_optimize = args.with_mem_opt - build_strategy.enable_inplace = args.with_inplace - #build_strategy.fuse_all_reduce_ops=1 - - exec_strategy = fluid.ExecutionStrategy() - exec_strategy.num_threads = device_num - exec_strategy.num_iteration_per_drop_scope = 10 - if num_trainers > 1 and args.use_gpu: - dist_utils.prepare_for_multi_process(exe, build_strategy, train_prog) - # NOTE: the process is fast when num_threads is 1 - # for multi-process training. - exec_strategy.num_threads = 1 - - train_exe = fluid.ParallelExecutor( - main_program=train_prog, - use_cuda=bool(args.use_gpu), - loss_name=train_cost.name, - build_strategy=build_strategy, - exec_strategy=exec_strategy) - else: - train_exe = exe - - params = models.__dict__[args.model]().params - for pass_id in range(params["num_epochs"]): + test_reader = reader.val(settings=args) + test_reader = paddle.batch( + test_reader, batch_size=args.test_batch_size, drop_last=True) + + train_py_reader.decorate_sample_list_generator(train_reader, place) + test_py_reader.decorate_sample_list_generator(test_reader, place) + + compiled_train_prog = best_strategy_compiled(args, train_prog, + train_fetch_vars[0]) + + for pass_id in range(args.num_epochs): + + train_batch_id = 0 + test_batch_id = 0 + train_batch_time_record = [] + test_batch_time_record = [] + train_batch_metrics_record = [] + test_batch_metrics_record = [] train_py_reader.start() - train_info = [[], [], []] - test_info = [[], [], []] - train_time = [] - batch_id = 0 - time_record=[] + try: while True: t1 = time.time() - if use_mixup: - if use_ngraph: - loss, lr = train_exe.run(train_prog, fetch_list=train_fetch_list) - else: - loss, lr = train_exe.run(fetch_list=train_fetch_list) - else: - if use_ngraph: - loss, acc1, acc5, lr = train_exe.run(train_prog, fetch_list=train_fetch_list) - else: - loss, acc1, acc5, lr = train_exe.run(fetch_list=train_fetch_list) - - acc1 = np.mean(np.array(acc1)) - acc5 = np.mean(np.array(acc5)) - train_info[1].append(acc1) - train_info[2].append(acc5) - + train_batch_metrics = exe.run(compiled_train_prog, + fetch_list=train_fetch_list) t2 = time.time() - period = t2 - t1 - time_record.append(period) - - loss = np.mean(np.array(loss)) - train_info[0].append(loss) - lr = np.mean(np.array(lr)) - train_time.append(period) - - if batch_id % 10 == 0: - period = np.mean(time_record) - time_record=[] - if use_mixup: - print("Pass {0}, trainbatch {1}, loss {2}, lr {3}, time {4}" - .format(pass_id, batch_id, "%.5f"%loss, "%.5f" %lr, "%2.2f sec" % period)) - else: - print("Pass {0}, trainbatch {1}, loss {2}, \ - acc1 {3}, acc5 {4}, lr {5}, time {6}" - .format(pass_id, batch_id, "%.5f"%loss, "%.5f"%acc1, "%.5f"%acc5, "%.5f" % - lr, "%2.2f sec" % period)) - sys.stdout.flush() - batch_id += 1 + train_batch_elapse = t2 - t1 + train_batch_time_record.append(train_batch_elapse) + train_batch_metrics_avg = np.mean( + np.array(train_batch_metrics), axis=1) + train_batch_metrics_record.append(train_batch_metrics_avg) + + print_info(pass_id, train_batch_id, args.print_step, + train_batch_metrics_avg, train_batch_elapse, "batch") + sys.stdout.flush() + train_batch_id += 1 + except fluid.core.EOFException: train_py_reader.reset() - train_loss = np.array(train_info[0]).mean() - if not use_mixup: - train_acc1 = np.array(train_info[1]).mean() - train_acc5 = np.array(train_info[2]).mean() - train_speed = np.array(train_time).mean() / (train_batch_size * - device_num) - test_py_reader.start() - - test_batch_id = 0 try: while True: t1 = time.time() - loss, acc1, acc5 = exe.run(program=test_prog, - fetch_list=test_fetch_list) + test_batch_metrics = exe.run(program=test_prog, + fetch_list=test_fetch_list) t2 = time.time() - period = t2 - t1 - loss = np.mean(loss) - acc1 = np.mean(acc1) - acc5 = np.mean(acc5) - test_info[0].append(loss) - test_info[1].append(acc1) - test_info[2].append(acc5) - if test_batch_id % 10 == 0: - print("Pass {0},testbatch {1},loss {2}, \ - acc1 {3},acc5 {4},time {5}" - .format(pass_id, test_batch_id, "%.5f"%loss,"%.5f"%acc1, "%.5f"%acc5, - "%2.2f sec" % period)) - sys.stdout.flush() + test_batch_elapse = t2 - t1 + test_batch_time_record.append(test_batch_elapse) + + test_batch_metrics_avg = np.mean( + np.array(test_batch_metrics), axis=1) + test_batch_metrics_record.append(test_batch_metrics_avg) + + print_info(pass_id, test_batch_id, args.print_step, + test_batch_metrics_avg, test_batch_elapse, "batch") + sys.stdout.flush() test_batch_id += 1 + except fluid.core.EOFException: test_py_reader.reset() + train_epoch_time_avg = np.mean(np.array(train_batch_time_record)) + train_epoch_metrics_avg = np.mean( + np.array(train_batch_metrics_record), axis=0) + + test_epoch_time_avg = np.mean(np.array(test_batch_time_record)) + test_epoch_metrics_avg = np.mean( + np.array(test_batch_metrics_record), axis=0) - test_loss = np.array(test_info[0]).mean() - test_acc1 = np.array(test_info[1]).mean() - test_acc5 = np.array(test_info[2]).mean() - - if use_mixup: - print("End pass {0}, train_loss {1}, test_loss {2}, test_acc1 {3}, test_acc5 {4}".format( - pass_id, "%.5f"%train_loss, "%.5f"%test_loss, "%.5f"%test_acc1, "%.5f"%test_acc5)) - else: - - print("End pass {0}, train_loss {1}, train_acc1 {2}, train_acc5 {3}, " - "test_loss {4}, test_acc1 {5}, test_acc5 {6}".format( - pass_id, "%.5f"%train_loss, "%.5f"%train_acc1, "%.5f"%train_acc5, "%.5f"%test_loss, - "%.5f"%test_acc1, "%.5f"%test_acc5)) - sys.stdout.flush() - - model_path = os.path.join(model_save_dir + '/' + model_name, - str(pass_id)) - if not os.path.isdir(model_path): - os.makedirs(model_path) - fluid.io.save_persistables(exe, model_path, main_program=train_prog) - - # This is for continuous evaluation only - if args.enable_ce and pass_id == args.num_epochs - 1: - if device_num == 1: - # Use the mean cost/acc for training - print("kpis train_cost %s" % train_loss) - print("kpis train_acc_top1 %s" % train_acc1) - print("kpis train_acc_top5 %s" % train_acc5) - # Use the mean cost/acc for testing - print("kpis test_cost %s" % test_loss) - print("kpis test_acc_top1 %s" % test_acc1) - print("kpis test_acc_top5 %s" % test_acc5) - print("kpis train_speed %s" % train_speed) - else: - # Use the mean cost/acc for training - print("kpis train_cost_card%s %s" % (device_num, train_loss)) - print("kpis train_acc_top1_card%s %s" % - (device_num, train_acc1)) - print("kpis train_acc_top5_card%s %s" % - (device_num, train_acc5)) - # Use the mean cost/acc for testing - print("kpis test_cost_card%s %s" % (device_num, test_loss)) - print("kpis test_acc_top1_card%s %s" % (device_num, test_acc1)) - print("kpis test_acc_top5_card%s %s" % (device_num, test_acc5)) - print("kpis train_speed_card%s %s" % (device_num, train_speed)) + print_info(pass_id, 0, 0, + list(train_epoch_metrics_avg) + list(test_epoch_metrics_avg), + 0, "epoch") + #For now, save model per epoch. + save_model(args, exe, train_prog, pass_id) def main(): - args = parser.parse_args() + args = parse_args() print_arguments(args) - check_gpu(args.use_gpu) + check_args(args) train(args) diff --git a/PaddleCV/image_classification/utils/__init__.py b/PaddleCV/image_classification/utils/__init__.py index 1e025483..995da6a3 100644 --- a/PaddleCV/image_classification/utils/__init__.py +++ b/PaddleCV/image_classification/utils/__init__.py @@ -1,3 +1,15 @@ -from .learning_rate import cosine_decay, lr_warmup -from .fp16_utils import create_master_params_grads, master_param_to_train_param -from .utility import add_arguments, print_arguments +#copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve. +# +#Licensed under the Apache License, Version 2.0 (the "License"); +#you may not use this file except in compliance with the License. +#You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +#Unless required by applicable law or agreed to in writing, software +#distributed under the License is distributed on an "AS IS" BASIS, +#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +#See the License for the specific language governing permissions and +#limitations under the License. +from .optimizer import cosine_decay, lr_warmup, cosine_decay_with_warmup, Optimizer, create_optimizer +from .utility import add_arguments, print_arguments, parse_args, check_gpu, check_args, init_model, save_model, create_pyreader, print_info, best_strategy_compiled, init_model, save_model diff --git a/PaddleCV/image_classification/utils/fp16_utils.py b/PaddleCV/image_classification/utils/fp16_utils.py deleted file mode 100644 index fbae4dc3..00000000 --- a/PaddleCV/image_classification/utils/fp16_utils.py +++ /dev/null @@ -1,133 +0,0 @@ -#copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve. -# -#Licensed under the Apache License, Version 2.0 (the "License"); -#you may not use this file except in compliance with the License. -#You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -#Unless required by applicable law or agreed to in writing, software -#distributed under the License is distributed on an "AS IS" BASIS, -#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -#See the License for the specific language governing permissions and -#limitations under the License. - -from __future__ import print_function - -import paddle -import paddle.fluid as fluid -import paddle.fluid.core as core - -def cast_fp16_to_fp32(i, o, prog): - prog.global_block().append_op( - type="cast", - inputs={"X": i}, - outputs={"Out": o}, - attrs={ - "in_dtype": fluid.core.VarDesc.VarType.FP16, - "out_dtype": fluid.core.VarDesc.VarType.FP32 - } - ) - -def cast_fp32_to_fp16(i, o, prog): - prog.global_block().append_op( - type="cast", - inputs={"X": i}, - outputs={"Out": o}, - attrs={ - "in_dtype": fluid.core.VarDesc.VarType.FP32, - "out_dtype": fluid.core.VarDesc.VarType.FP16 - } - ) - -def copy_to_master_param(p, block): - v = block.vars.get(p.name, None) - if v is None: - raise ValueError("no param name %s found!" % p.name) - new_p = fluid.framework.Parameter( - block=block, - shape=v.shape, - dtype=fluid.core.VarDesc.VarType.FP32, - type=v.type, - lod_level=v.lod_level, - stop_gradient=p.stop_gradient, - trainable=p.trainable, - optimize_attr=p.optimize_attr, - regularizer=p.regularizer, - gradient_clip_attr=p.gradient_clip_attr, - error_clip=p.error_clip, - name=v.name + ".master") - return new_p - - -def _update_role_var_grad(prog, params_grads): - BACKWARD = core.op_proto_and_checker_maker.OpRole.Backward - gradname_to_paramname = dict() - for p, g in params_grads: - gradname_to_paramname[g.name] = p.name - for op in prog.global_block().ops: - role = op.attr("op_role") - if role & int(BACKWARD) and op.has_attr("op_role_var"): - # have backward bits then remove all op_role_var - op.desc.remove_attr("op_role_var") - for op in prog.global_block().ops: - if op.type == "allreduce": - allreduce_role_var = [] - for input_varname in op.input_arg_names: - if input_varname in gradname_to_paramname: - allreduce_role_var.append(gradname_to_paramname[input_varname]) - allreduce_role_var.append(input_varname) - print("updating role var: ", allreduce_role_var) - op._set_attr("op_role_var", allreduce_role_var) - -def create_master_params_grads(params_grads, main_prog, startup_prog, scale_loss, reduce_master_grad=True): - master_params_grads = [] # master p, g on local device - params_grads_to_apply = [] # master p, g after allreduced, if reduce_master_grad is enabled - tmp_role = main_prog._current_role - OpRole = fluid.core.op_proto_and_checker_maker.OpRole - main_prog._current_role = OpRole.Backward - for p, g in params_grads: - # create master parameters - master_param = copy_to_master_param(p, main_prog.global_block()) - startup_master_param = startup_prog.global_block()._clone_variable(master_param) - startup_p = startup_prog.global_block().var(p.name) - cast_fp16_to_fp32(startup_p, startup_master_param, startup_prog) - # cast fp16 gradients to fp32 before apply gradients - if g.name.startswith("batch_norm"): - if scale_loss > 1: - scaled_g = g / float(scale_loss) - else: - scaled_g = g - master_params_grads.append([p, scaled_g]) - continue - - master_grad = fluid.layers.cast(g, "float32") - if scale_loss > 1: - master_grad = master_grad / float(scale_loss) - master_params_grads.append([p, master_grad]) - if reduce_master_grad: - reduced_master_grad = fluid.layers.collective._allreduce(master_grad) - else: - reduced_master_grad = master_grad - params_grads_to_apply.append([master_param, reduced_master_grad]) - - # update program op role var acording to master grads before allreduce. - _update_role_var_grad(main_prog, master_params_grads) - main_prog._current_role = tmp_role - return params_grads_to_apply - -def master_param_to_train_param(master_params_grads, params_grads, main_prog): - for idx, m_p_g in enumerate(master_params_grads): - with main_prog._optimized_guard([m_p_g[0], m_p_g[1]]): - train_p_name = m_p_g[0].name.replace(".master", "") - if train_p_name.startswith("batch_norm"): - continue - train_p = None - # find fp16 param in original params_grads list - for p, g in params_grads: - if p.name == train_p_name: - train_p = p - if not train_p: - print("can not find train param for: ", m_p_g[0].name) - continue - cast_fp32_to_fp16(m_p_g[0], train_p, main_prog) diff --git a/PaddleCV/image_classification/utils/learning_rate.py b/PaddleCV/image_classification/utils/learning_rate.py deleted file mode 100644 index 2a2c10d1..00000000 --- a/PaddleCV/image_classification/utils/learning_rate.py +++ /dev/null @@ -1,93 +0,0 @@ -#copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve. -# -#Licensed under the Apache License, Version 2.0 (the "License"); -#you may not use this file except in compliance with the License. -#You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -#Unless required by applicable law or agreed to in writing, software -#distributed under the License is distributed on an "AS IS" BASIS, -#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -#See the License for the specific language governing permissions and -#limitations under the License. - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import math - -import paddle -import paddle.fluid as fluid -import paddle.fluid.layers.ops as ops -from paddle.fluid.initializer import init_on_cpu -from paddle.fluid.layers.learning_rate_scheduler import _decay_step_counter - -def cosine_decay(learning_rate, step_each_epoch, epochs=120): - """Applies cosine decay to the learning rate. - lr = 0.05 * (math.cos(epoch * (math.pi / 120)) + 1) - """ - global_step = _decay_step_counter() - - with init_on_cpu(): - epoch = ops.floor(global_step / step_each_epoch) - decayed_lr = learning_rate * \ - (ops.cos(epoch * (math.pi / epochs)) + 1)/2 - return decayed_lr - -def cosine_decay_with_warmup(learning_rate, step_each_epoch, epochs=120): - """Applies cosine decay to the learning rate. - lr = 0.05 * (math.cos(epoch * (math.pi / 120)) + 1) - decrease lr for every mini-batch and start with warmup. - """ - global_step = _decay_step_counter() - lr = fluid.layers.tensor.create_global_var( - shape=[1], - value=0.0, - dtype='float32', - persistable=True, - name="learning_rate") - - warmup_epoch = fluid.layers.fill_constant( - shape=[1], dtype='float32', value=float(5), force_cpu=True) - - with init_on_cpu(): - epoch = ops.floor(global_step / step_each_epoch) - with control_flow.Switch() as switch: - with switch.case(epoch < warmup_epoch): - decayed_lr = learning_rate * (global_step / (step_each_epoch * warmup_epoch)) - fluid.layers.tensor.assign(input=decayed_lr, output=lr) - with switch.default(): - decayed_lr = learning_rate * \ - (ops.cos((global_step - warmup_epoch * step_each_epoch) * (math.pi / (epochs * step_each_epoch))) + 1)/2 - fluid.layers.tensor.assign(input=decayed_lr, output=lr) - return lr - -def lr_warmup(learning_rate, warmup_steps, start_lr, end_lr): - """ Applies linear learning rate warmup for distributed training - Argument learning_rate can be float or a Variable - lr = lr + (warmup_rate * step / warmup_steps) - """ - assert (isinstance(end_lr, float)) - assert (isinstance(start_lr, float)) - linear_step = end_lr - start_lr - with fluid.default_main_program()._lr_schedule_guard(): - lr = fluid.layers.tensor.create_global_var( - shape=[1], - value=0.0, - dtype='float32', - persistable=True, - name="learning_rate_warmup") - - global_step = fluid.layers.learning_rate_scheduler._decay_step_counter() - - with fluid.layers.control_flow.Switch() as switch: - with switch.case(global_step < warmup_steps): - decayed_lr = start_lr + linear_step * (global_step / - warmup_steps) - fluid.layers.tensor.assign(decayed_lr, lr) - with switch.default(): - fluid.layers.tensor.assign(learning_rate, lr) - - return lr diff --git a/PaddleCV/image_classification/utils/optimizer.py b/PaddleCV/image_classification/utils/optimizer.py new file mode 100644 index 00000000..8efa16a4 --- /dev/null +++ b/PaddleCV/image_classification/utils/optimizer.py @@ -0,0 +1,242 @@ +#copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve. +# +#Licensed under the Apache License, Version 2.0 (the "License"); +#you may not use this file except in compliance with the License. +#You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +#Unless required by applicable law or agreed to in writing, software +#distributed under the License is distributed on an "AS IS" BASIS, +#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +#See the License for the specific language governing permissions and +#limitations under the License. + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import math + +import paddle +import paddle.fluid as fluid +import paddle.fluid.layers.ops as ops +from paddle.fluid.initializer import init_on_cpu +from paddle.fluid.layers.learning_rate_scheduler import _decay_step_counter + + +def cosine_decay(learning_rate, step_each_epoch, epochs=120): + """Applies cosine decay to the learning rate. + lr = 0.05 * (math.cos(epoch * (math.pi / 120)) + 1) + """ + global_step = _decay_step_counter() + + with init_on_cpu(): + epoch = ops.floor(global_step / step_each_epoch) + decayed_lr = learning_rate * \ + (ops.cos(epoch * (math.pi / epochs)) + 1)/2 + return decayed_lr + + +def cosine_decay_with_warmup(learning_rate, step_each_epoch, epochs=120): + """Applies cosine decay to the learning rate. + lr = 0.05 * (math.cos(epoch * (math.pi / 120)) + 1) + decrease lr for every mini-batch and start with warmup. + """ + global_step = _decay_step_counter() + lr = fluid.layers.tensor.create_global_var( + shape=[1], + value=0.0, + dtype='float32', + persistable=True, + name="learning_rate") + + warmup_epoch = fluid.layers.fill_constant( + shape=[1], dtype='float32', value=float(5), force_cpu=True) + + with init_on_cpu(): + epoch = ops.floor(global_step / step_each_epoch) + with fluid.layers.control_flow.Switch() as switch: + with switch.case(epoch < warmup_epoch): + decayed_lr = learning_rate * (global_step / + (step_each_epoch * warmup_epoch)) + fluid.layers.tensor.assign(input=decayed_lr, output=lr) + with switch.default(): + decayed_lr = learning_rate * \ + (ops.cos((global_step - warmup_epoch * step_each_epoch) * (math.pi / (epochs * step_each_epoch))) + 1)/2 + fluid.layers.tensor.assign(input=decayed_lr, output=lr) + return lr + + +def lr_warmup(learning_rate, warmup_steps, start_lr, end_lr): + """ Applies linear learning rate warmup for distributed training + Argument learning_rate can be float or a Variable + lr = lr + (warmup_rate * step / warmup_steps) + """ + assert (isinstance(end_lr, float)) + assert (isinstance(start_lr, float)) + linear_step = end_lr - start_lr + with fluid.default_main_program()._lr_schedule_guard(): + lr = fluid.layers.tensor.create_global_var( + shape=[1], + value=0.0, + dtype='float32', + persistable=True, + name="learning_rate_warmup") + + global_step = fluid.layers.learning_rate_scheduler._decay_step_counter() + + with fluid.layers.control_flow.Switch() as switch: + with switch.case(global_step < warmup_steps): + decayed_lr = start_lr + linear_step * (global_step / + warmup_steps) + fluid.layers.tensor.assign(decayed_lr, lr) + with switch.default(): + fluid.layers.tensor.assign(learning_rate, lr) + + return lr + + +class Optimizer(object): + """A class used to represent several optimizer methods + + Attributes: + batch_size: batch size on all devices. + lr: learning rate. + lr_strategy: learning rate decay strategy. + l2_decay: l2_decay parameter. + momentum_rate: momentum rate when using Momentum optimizer. + step_epochs: piecewise decay steps. + num_epochs: number of total epochs. + + total_images: total images. + step: total steps in the an epoch. + + """ + + def __init__(self, args): + + self.batch_size = args.batch_size + self.lr = args.lr + self.lr_strategy = args.lr_strategy + self.l2_decay = args.l2_decay + self.momentum_rate = args.momentum_rate + self.step_epochs = args.step_epochs + self.num_epochs = args.num_epochs + + self.total_images = args.total_images + self.step = int(math.ceil(float(self.total_images) / self.batch_size)) + + def piecewise_decay(self): + """piecewise decay with Momentum optimizer + + Returns: + a piecewise_decay optimizer + """ + bd = [self.step * e for e in self.step_epochs] + lr = [self.lr * (0.1**i) for i in range(len(bd) + 1)] + learning_rate = fluid.layers.piecewise_decay(boundaries=bd, values=lr) + optimizer = fluid.optimizer.Momentum( + learning_rate=learning_rate, + momentum=self.momentum_rate, + regularization=fluid.regularizer.L2Decay(self.l2_decay)) + return optimizer + + def cosine_decay(self): + """cosine decay with Momentum optimizer + + Returns: + a cosine_decay optimizer + """ + + learning_rate = fluid.layers.cosine_decay( + learning_rate=self.lr, + step_each_epoch=self.step, + epochs=self.num_epochs) + optimizer = fluid.optimizer.Momentum( + learning_rate=learning_rate, + momentum=self.momentum_rate, + regularization=fluid.regularizer.L2Decay(self.l2_decay)) + return optimizer + + def cosine_decay_warmup(self): + """cosine decay with warmup + + Returns: + a cosine_decay_with_warmup optimizer + """ + + learning_rate = cosine_decay_with_warmup( + learning_rate=self.lr, + step_each_epoch=self.step, + epochs=self.num_epochs) + optimizer = fluid.optimizer.Momentum( + learning_rate=learning_rate, + momentum=self.momentum_rate, + regularization=fluid.regularizer.L2Decay(self.l2_decay)) + return optimizer + + def linear_decay(self): + """linear decay with Momentum optimizer + + Returns: + a linear_decay optimizer + """ + + end_lr = 0 + learning_rate = fluid.layers.polynomial_decay( + self.lr, self.step, end_lr, power=1) + optimizer = fluid.optimizer.Momentum( + learning_rate=learning_rate, + momentum=self.momentum_rate, + regularization=fluid.regularizer.L2Decay(self.l2_decay)) + + return optimizer + + def adam_decay(self): + """Adam optimizer + + Returns: + an adam_decay optimizer + """ + + return fluid.optimizer.Adam(learning_rate=self.lr) + + def cosine_decay_RMSProp(self): + """cosine decay with RMSProp optimizer + + Returns: + an cosine_decay_RMSProp optimizer + """ + + learning_rate = fluid.layers.cosine_decay( + learning_rate=self.lr, + step_each_epoch=self.step, + epochs=self.num_epochs) + optimizer = fluid.optimizer.RMSProp( + learning_rate=learning_rate, + momentum=self.momentum_rate, + regularization=fluid.regularizer.L2Decay(self.l2_decay), + # Apply epsilon=1 on ImageNet dataset. + epsilon=1) + return optimizer + + def default_decay(self): + """default decay + + Returns: + default decay optimizer + """ + + optimizer = fluid.optimizer.Momentum( + learning_rate=self.lr, + momentum=self.momentum_rate, + regularization=fluid.regularizer.L2Decay(self.l2_decay)) + return optimizer + + +def create_optimizer(args): + Opt = Optimizer(args) + optimizer = getattr(Opt, args.lr_strategy)() + + return optimizer diff --git a/PaddleCV/image_classification/utils/tools/readable_label.txt b/PaddleCV/image_classification/utils/tools/readable_label.txt new file mode 100644 index 00000000..376e1802 --- /dev/null +++ b/PaddleCV/image_classification/utils/tools/readable_label.txt @@ -0,0 +1,1000 @@ +0 tench, Tinca tinca +1 goldfish, Carassius auratus +2 great white shark, white shark, man-eater, man-eating shark, Carcharodon carcharias +3 tiger shark, Galeocerdo cuvieri +4 hammerhead, hammerhead shark +5 electric ray, crampfish, numbfish, torpedo +6 stingray +7 cock +8 hen +9 ostrich, Struthio camelus +10 brambling, Fringilla montifringilla +11 goldfinch, Carduelis carduelis +12 house finch, linnet, Carpodacus mexicanus +13 junco, snowbird +14 indigo bunting, indigo finch, indigo bird, Passerina cyanea +15 robin, American robin, Turdus migratorius +16 bulbul +17 jay +18 magpie +19 chickadee +20 water ouzel, dipper +21 kite +22 bald eagle, American eagle, Haliaeetus leucocephalus +23 vulture +24 great grey owl, great gray owl, Strix nebulosa +25 European fire salamander, Salamandra salamandra +26 common newt, Triturus vulgaris +27 eft +28 spotted salamander, Ambystoma maculatum +29 axolotl, mud puppy, Ambystoma mexicanum +30 bullfrog, Rana catesbeiana +31 tree frog, tree-frog +32 tailed frog, bell toad, ribbed toad, tailed toad, Ascaphus trui +33 loggerhead, loggerhead turtle, Caretta caretta +34 leatherback turtle, leatherback, leathery turtle, Dermochelys coriacea +35 mud turtle +36 terrapin +37 box turtle, box tortoise +38 banded gecko +39 common iguana, iguana, Iguana iguana +40 American chameleon, anole, Anolis carolinensis +41 whiptail, whiptail lizard +42 agama +43 frilled lizard, Chlamydosaurus kingi +44 alligator lizard +45 Gila monster, Heloderma suspectum +46 green lizard, Lacerta viridis +47 African chameleon, Chamaeleo chamaeleon +48 Komodo dragon, Komodo lizard, dragon lizard, giant lizard, Varanus komodoensis +49 African crocodile, Nile crocodile, Crocodylus niloticus +50 American alligator, Alligator mississipiensis +51 triceratops +52 thunder snake, worm snake, Carphophis amoenus +53 ringneck snake, ring-necked snake, ring snake +54 hognose snake, puff adder, sand viper +55 green snake, grass snake +56 king snake, kingsnake +57 garter snake, grass snake +58 water snake +59 vine snake +60 night snake, Hypsiglena torquata +61 boa constrictor, Constrictor constrictor +62 rock python, rock snake, Python sebae +63 Indian cobra, Naja naja +64 green mamba +65 sea snake +66 horned viper, cerastes, sand viper, horned asp, Cerastes cornutus +67 diamondback, diamondback rattlesnake, Crotalus adamanteus +68 sidewinder, horned rattlesnake, Crotalus cerastes +69 trilobite +70 harvestman, daddy longlegs, Phalangium opilio +71 scorpion +72 black and gold garden spider, Argiope aurantia +73 barn spider, Araneus cavaticus +74 garden spider, Aranea diademata +75 black widow, Latrodectus mactans +76 tarantula +77 wolf spider, hunting spider +78 tick +79 centipede +80 black grouse +81 ptarmigan +82 ruffed grouse, partridge, Bonasa umbellus +83 prairie chicken, prairie grouse, prairie fowl +84 peacock +85 quail +86 partridge +87 African grey, African gray, Psittacus erithacus +88 macaw +89 sulphur-crested cockatoo, Kakatoe galerita, Cacatua galerita +90 lorikeet +91 coucal +92 bee eater +93 hornbill +94 hummingbird +95 jacamar +96 toucan +97 drake +98 red-breasted merganser, Mergus serrator +99 goose +100 black swan, Cygnus atratus +101 tusker +102 echidna, spiny anteater, anteater +103 platypus, duckbill, duckbilled platypus, duck-billed platypus, Ornithorhynchus anatinus +104 wallaby, brush kangaroo +105 koala, koala bear, kangaroo bear, native bear, Phascolarctos cinereus +106 wombat +107 jellyfish +108 sea anemone, anemone +109 brain coral +110 flatworm, platyhelminth +111 nematode, nematode worm, roundworm +112 conch +113 snail +114 slug +115 sea slug, nudibranch +116 chiton, coat-of-mail shell, sea cradle, polyplacophore +117 chambered nautilus, pearly nautilus, nautilus +118 Dungeness crab, Cancer magister +119 rock crab, Cancer irroratus +120 fiddler crab +121 king crab, Alaska crab, Alaskan king crab, Alaska king crab, Paralithodes camtschatica +122 American lobster, Northern lobster, Maine lobster, Homarus americanus +123 spiny lobster, langouste, rock lobster, crawfish, crayfish, sea crawfish +124 crayfish, crawfish, crawdad, crawdaddy +125 hermit crab +126 isopod +127 white stork, Ciconia ciconia +128 black stork, Ciconia nigra +129 spoonbill +130 flamingo +131 little blue heron, Egretta caerulea +132 American egret, great white heron, Egretta albus +133 bittern +134 crane +135 limpkin, Aramus pictus +136 European gallinule, Porphyrio porphyrio +137 American coot, marsh hen, mud hen, water hen, Fulica americana +138 bustard +139 ruddy turnstone, Arenaria interpres +140 red-backed sandpiper, dunlin, Erolia alpina +141 redshank, Tringa totanus +142 dowitcher +143 oystercatcher, oyster catcher +144 pelican +145 king penguin, Aptenodytes patagonica +146 albatross, mollymawk +147 grey whale, gray whale, devilfish, Eschrichtius gibbosus, Eschrichtius robustus +148 killer whale, killer, orca, grampus, sea wolf, Orcinus orca +149 dugong, Dugong dugon +150 sea lion +151 Chihuahua +152 Japanese spaniel +153 Maltese dog, Maltese terrier, Maltese +154 Pekinese, Pekingese, Peke +155 Shih-Tzu +156 Blenheim spaniel +157 papillon +158 toy terrier +159 Rhodesian ridgeback +160 Afghan hound, Afghan +161 basset, basset hound +162 beagle +163 bloodhound, sleuthhound +164 bluetick +165 black-and-tan coonhound +166 Walker hound, Walker foxhound +167 English foxhound +168 redbone +169 borzoi, Russian wolfhound +170 Irish wolfhound +171 Italian greyhound +172 whippet +173 Ibizan hound, Ibizan Podenco +174 Norwegian elkhound, elkhound +175 otterhound, otter hound +176 Saluki, gazelle hound +177 Scottish deerhound, deerhound +178 Weimaraner +179 Staffordshire bullterrier, Staffordshire bull terrier +180 American Staffordshire terrier, Staffordshire terrier, American pit bull terrier, pit bull terrier +181 Bedlington terrier +182 Border terrier +183 Kerry blue terrier +184 Irish terrier +185 Norfolk terrier +186 Norwich terrier +187 Yorkshire terrier +188 wire-haired fox terrier +189 Lakeland terrier +190 Sealyham terrier, Sealyham +191 Airedale, Airedale terrier +192 cairn, cairn terrier +193 Australian terrier +194 Dandie Dinmont, Dandie Dinmont terrier +195 Boston bull, Boston terrier +196 miniature schnauzer +197 giant schnauzer +198 standard schnauzer +199 Scotch terrier, Scottish terrier, Scottie +200 Tibetan terrier, chrysanthemum dog +201 silky terrier, Sydney silky +202 soft-coated wheaten terrier +203 West Highland white terrier +204 Lhasa, Lhasa apso +205 flat-coated retriever +206 curly-coated retriever +207 golden retriever +208 Labrador retriever +209 Chesapeake Bay retriever +210 German short-haired pointer +211 vizsla, Hungarian pointer +212 English setter +213 Irish setter, red setter +214 Gordon setter +215 Brittany spaniel +216 clumber, clumber spaniel +217 English springer, English springer spaniel +218 Welsh springer spaniel +219 cocker spaniel, English cocker spaniel, cocker +220 Sussex spaniel +221 Irish water spaniel +222 kuvasz +223 schipperke +224 groenendael +225 malinois +226 briard +227 kelpie +228 komondor +229 Old English sheepdog, bobtail +230 Shetland sheepdog, Shetland sheep dog, Shetland +231 collie +232 Border collie +233 Bouvier des Flandres, Bouviers des Flandres +234 Rottweiler +235 German shepherd, German shepherd dog, German police dog, alsatian +236 Doberman, Doberman pinscher +237 miniature pinscher +238 Greater Swiss Mountain dog +239 Bernese mountain dog +240 Appenzeller +241 EntleBucher +242 boxer +243 bull mastiff +244 Tibetan mastiff +245 French bulldog +246 Great Dane +247 Saint Bernard, St Bernard +248 Eskimo dog, husky +249 malamute, malemute, Alaskan malamute +250 Siberian husky +251 dalmatian, coach dog, carriage dog +252 affenpinscher, monkey pinscher, monkey dog +253 basenji +254 pug, pug-dog +255 Leonberg +256 Newfoundland, Newfoundland dog +257 Great Pyrenees +258 Samoyed, Samoyede +259 Pomeranian +260 chow, chow chow +261 keeshond +262 Brabancon griffon +263 Pembroke, Pembroke Welsh corgi +264 Cardigan, Cardigan Welsh corgi +265 toy poodle +266 miniature poodle +267 standard poodle +268 Mexican hairless +269 timber wolf, grey wolf, gray wolf, Canis lupus +270 white wolf, Arctic wolf, Canis lupus tundrarum +271 red wolf, maned wolf, Canis rufus, Canis niger +272 coyote, prairie wolf, brush wolf, Canis latrans +273 dingo, warrigal, warragal, Canis dingo +274 dhole, Cuon alpinus +275 African hunting dog, hyena dog, Cape hunting dog, Lycaon pictus +276 hyena, hyaena +277 red fox, Vulpes vulpes +278 kit fox, Vulpes macrotis +279 Arctic fox, white fox, Alopex lagopus +280 grey fox, gray fox, Urocyon cinereoargenteus +281 tabby, tabby cat +282 tiger cat +283 Persian cat +284 Siamese cat, Siamese +285 Egyptian cat +286 cougar, puma, catamount, mountain lion, painter, panther, Felis concolor +287 lynx, catamount +288 leopard, Panthera pardus +289 snow leopard, ounce, Panthera uncia +290 jaguar, panther, Panthera onca, Felis onca +291 lion, king of beasts, Panthera leo +292 tiger, Panthera tigris +293 cheetah, chetah, Acinonyx jubatus +294 brown bear, bruin, Ursus arctos +295 American black bear, black bear, Ursus americanus, Euarctos americanus +296 ice bear, polar bear, Ursus Maritimus, Thalarctos maritimus +297 sloth bear, Melursus ursinus, Ursus ursinus +298 mongoose +299 meerkat, mierkat +300 tiger beetle +301 ladybug, ladybeetle, lady beetle, ladybird, ladybird beetle +302 ground beetle, carabid beetle +303 long-horned beetle, longicorn, longicorn beetle +304 leaf beetle, chrysomelid +305 dung beetle +306 rhinoceros beetle +307 weevil +308 fly +309 bee +310 ant, emmet, pismire +311 grasshopper, hopper +312 cricket +313 walking stick, walkingstick, stick insect +314 cockroach, roach +315 mantis, mantid +316 cicada, cicala +317 leafhopper +318 lacewing, lacewing fly +319 dragonfly, darning needle, devil's darning needle, sewing needle, snake feeder, snake doctor, mosquito hawk, skeeter hawk +320 damselfly +321 admiral +322 ringlet, ringlet butterfly +323 monarch, monarch butterfly, milkweed butterfly, Danaus plexippus +324 cabbage butterfly +325 sulphur butterfly, sulfur butterfly +326 lycaenid, lycaenid butterfly +327 starfish, sea star +328 sea urchin +329 sea cucumber, holothurian +330 wood rabbit, cottontail, cottontail rabbit +331 hare +332 Angora, Angora rabbit +333 hamster +334 porcupine, hedgehog +335 fox squirrel, eastern fox squirrel, Sciurus niger +336 marmot +337 beaver +338 guinea pig, Cavia cobaya +339 sorrel +340 zebra +341 hog, pig, grunter, squealer, Sus scrofa +342 wild boar, boar, Sus scrofa +343 warthog +344 hippopotamus, hippo, river horse, Hippopotamus amphibius +345 ox +346 water buffalo, water ox, Asiatic buffalo, Bubalus bubalis +347 bison +348 ram, tup +349 bighorn, bighorn sheep, cimarron, Rocky Mountain bighorn, Rocky Mountain sheep, Ovis canadensis +350 ibex, Capra ibex +351 hartebeest +352 impala, Aepyceros melampus +353 gazelle +354 Arabian camel, dromedary, Camelus dromedarius +355 llama +356 weasel +357 mink +358 polecat, fitch, foulmart, foumart, Mustela putorius +359 black-footed ferret, ferret, Mustela nigripes +360 otter +361 skunk, polecat, wood pussy +362 badger +363 armadillo +364 three-toed sloth, ai, Bradypus tridactylus +365 orangutan, orang, orangutang, Pongo pygmaeus +366 gorilla, Gorilla gorilla +367 chimpanzee, chimp, Pan troglodytes +368 gibbon, Hylobates lar +369 siamang, Hylobates syndactylus, Symphalangus syndactylus +370 guenon, guenon monkey +371 patas, hussar monkey, Erythrocebus patas +372 baboon +373 macaque +374 langur +375 colobus, colobus monkey +376 proboscis monkey, Nasalis larvatus +377 marmoset +378 capuchin, ringtail, Cebus capucinus +379 howler monkey, howler +380 titi, titi monkey +381 spider monkey, Ateles geoffroyi +382 squirrel monkey, Saimiri sciureus +383 Madagascar cat, ring-tailed lemur, Lemur catta +384 indri, indris, Indri indri, Indri brevicaudatus +385 Indian elephant, Elephas maximus +386 African elephant, Loxodonta africana +387 lesser panda, red panda, panda, bear cat, cat bear, Ailurus fulgens +388 giant panda, panda, panda bear, coon bear, Ailuropoda melanoleuca +389 barracouta, snoek +390 eel +391 coho, cohoe, coho salmon, blue jack, silver salmon, Oncorhynchus kisutch +392 rock beauty, Holocanthus tricolor +393 anemone fish +394 sturgeon +395 gar, garfish, garpike, billfish, Lepisosteus osseus +396 lionfish +397 puffer, pufferfish, blowfish, globefish +398 abacus +399 abaya +400 academic gown, academic robe, judge's robe +401 accordion, piano accordion, squeeze box +402 acoustic guitar +403 aircraft carrier, carrier, flattop, attack aircraft carrier +404 airliner +405 airship, dirigible +406 altar +407 ambulance +408 amphibian, amphibious vehicle +409 analog clock +410 apiary, bee house +411 apron +412 ashcan, trash can, garbage can, wastebin, ash bin, ash-bin, ashbin, dustbin, trash barrel, trash bin +413 assault rifle, assault gun +414 backpack, back pack, knapsack, packsack, rucksack, haversack +415 bakery, bakeshop, bakehouse +416 balance beam, beam +417 balloon +418 ballpoint, ballpoint pen, ballpen, Biro +419 Band Aid +420 banjo +421 bannister, banister, balustrade, balusters, handrail +422 barbell +423 barber chair +424 barbershop +425 barn +426 barometer +427 barrel, cask +428 barrow, garden cart, lawn cart, wheelbarrow +429 baseball +430 basketball +431 bassinet +432 bassoon +433 bathing cap, swimming cap +434 bath towel +435 bathtub, bathing tub, bath, tub +436 beach wagon, station wagon, wagon, estate car, beach waggon, station waggon, waggon +437 beacon, lighthouse, beacon light, pharos +438 beaker +439 bearskin, busby, shako +440 beer bottle +441 beer glass +442 bell cote, bell cot +443 bib +444 bicycle-built-for-two, tandem bicycle, tandem +445 bikini, two-piece +446 binder, ring-binder +447 binoculars, field glasses, opera glasses +448 birdhouse +449 boathouse +450 bobsled, bobsleigh, bob +451 bolo tie, bolo, bola tie, bola +452 bonnet, poke bonnet +453 bookcase +454 bookshop, bookstore, bookstall +455 bottlecap +456 bow +457 bow tie, bow-tie, bowtie +458 brass, memorial tablet, plaque +459 brassiere, bra, bandeau +460 breakwater, groin, groyne, mole, bulwark, seawall, jetty +461 breastplate, aegis, egis +462 broom +463 bucket, pail +464 buckle +465 bulletproof vest +466 bullet train, bullet +467 butcher shop, meat market +468 cab, hack, taxi, taxicab +469 caldron, cauldron +470 candle, taper, wax light +471 cannon +472 canoe +473 can opener, tin opener +474 cardigan +475 car mirror +476 carousel, carrousel, merry-go-round, roundabout, whirligig +477 carpenter's kit, tool kit +478 carton +479 car wheel +480 cash machine, cash dispenser, automated teller machine, automatic teller machine, automated teller, automatic teller, ATM +481 cassette +482 cassette player +483 castle +484 catamaran +485 CD player +486 cello, violoncello +487 cellular telephone, cellular phone, cellphone, cell, mobile phone +488 chain +489 chainlink fence +490 chain mail, ring mail, mail, chain armor, chain armour, ring armor, ring armour +491 chain saw, chainsaw +492 chest +493 chiffonier, commode +494 chime, bell, gong +495 china cabinet, china closet +496 Christmas stocking +497 church, church building +498 cinema, movie theater, movie theatre, movie house, picture palace +499 cleaver, meat cleaver, chopper +500 cliff dwelling +501 cloak +502 clog, geta, patten, sabot +503 cocktail shaker +504 coffee mug +505 coffeepot +506 coil, spiral, volute, whorl, helix +507 combination lock +508 computer keyboard, keypad +509 confectionery, confectionary, candy store +510 container ship, containership, container vessel +511 convertible +512 corkscrew, bottle screw +513 cornet, horn, trumpet, trump +514 cowboy boot +515 cowboy hat, ten-gallon hat +516 cradle +517 crane +518 crash helmet +519 crate +520 crib, cot +521 Crock Pot +522 croquet ball +523 crutch +524 cuirass +525 dam, dike, dyke +526 desk +527 desktop computer +528 dial telephone, dial phone +529 diaper, nappy, napkin +530 digital clock +531 digital watch +532 dining table, board +533 dishrag, dishcloth +534 dishwasher, dish washer, dishwashing machine +535 disk brake, disc brake +536 dock, dockage, docking facility +537 dogsled, dog sled, dog sleigh +538 dome +539 doormat, welcome mat +540 drilling platform, offshore rig +541 drum, membranophone, tympan +542 drumstick +543 dumbbell +544 Dutch oven +545 electric fan, blower +546 electric guitar +547 electric locomotive +548 entertainment center +549 envelope +550 espresso maker +551 face powder +552 feather boa, boa +553 file, file cabinet, filing cabinet +554 fireboat +555 fire engine, fire truck +556 fire screen, fireguard +557 flagpole, flagstaff +558 flute, transverse flute +559 folding chair +560 football helmet +561 forklift +562 fountain +563 fountain pen +564 four-poster +565 freight car +566 French horn, horn +567 frying pan, frypan, skillet +568 fur coat +569 garbage truck, dustcart +570 gasmask, respirator, gas helmet +571 gas pump, gasoline pump, petrol pump, island dispenser +572 goblet +573 go-kart +574 golf ball +575 golfcart, golf cart +576 gondola +577 gong, tam-tam +578 gown +579 grand piano, grand +580 greenhouse, nursery, glasshouse +581 grille, radiator grille +582 grocery store, grocery, food market, market +583 guillotine +584 hair slide +585 hair spray +586 half track +587 hammer +588 hamper +589 hand blower, blow dryer, blow drier, hair dryer, hair drier +590 hand-held computer, hand-held microcomputer +591 handkerchief, hankie, hanky, hankey +592 hard disc, hard disk, fixed disk +593 harmonica, mouth organ, harp, mouth harp +594 harp +595 harvester, reaper +596 hatchet +597 holster +598 home theater, home theatre +599 honeycomb +600 hook, claw +601 hoopskirt, crinoline +602 horizontal bar, high bar +603 horse cart, horse-cart +604 hourglass +605 iPod +606 iron, smoothing iron +607 jack-o'-lantern +608 jean, blue jean, denim +609 jeep, landrover +610 jersey, T-shirt, tee shirt +611 jigsaw puzzle +612 jinrikisha, ricksha, rickshaw +613 joystick +614 kimono +615 knee pad +616 knot +617 lab coat, laboratory coat +618 ladle +619 lampshade, lamp shade +620 laptop, laptop computer +621 lawn mower, mower +622 lens cap, lens cover +623 letter opener, paper knife, paperknife +624 library +625 lifeboat +626 lighter, light, igniter, ignitor +627 limousine, limo +628 liner, ocean liner +629 lipstick, lip rouge +630 Loafer +631 lotion +632 loudspeaker, speaker, speaker unit, loudspeaker system, speaker system +633 loupe, jeweler's loupe +634 lumbermill, sawmill +635 magnetic compass +636 mailbag, postbag +637 mailbox, letter box +638 maillot +639 maillot, tank suit +640 manhole cover +641 maraca +642 marimba, xylophone +643 mask +644 matchstick +645 maypole +646 maze, labyrinth +647 measuring cup +648 medicine chest, medicine cabinet +649 megalith, megalithic structure +650 microphone, mike +651 microwave, microwave oven +652 military uniform +653 milk can +654 minibus +655 miniskirt, mini +656 minivan +657 missile +658 mitten +659 mixing bowl +660 mobile home, manufactured home +661 Model T +662 modem +663 monastery +664 monitor +665 moped +666 mortar +667 mortarboard +668 mosque +669 mosquito net +670 motor scooter, scooter +671 mountain bike, all-terrain bike, off-roader +672 mountain tent +673 mouse, computer mouse +674 mousetrap +675 moving van +676 muzzle +677 nail +678 neck brace +679 necklace +680 nipple +681 notebook, notebook computer +682 obelisk +683 oboe, hautboy, hautbois +684 ocarina, sweet potato +685 odometer, hodometer, mileometer, milometer +686 oil filter +687 organ, pipe organ +688 oscilloscope, scope, cathode-ray oscilloscope, CRO +689 overskirt +690 oxcart +691 oxygen mask +692 packet +693 paddle, boat paddle +694 paddlewheel, paddle wheel +695 padlock +696 paintbrush +697 pajama, pyjama, pj's, jammies +698 palace +699 panpipe, pandean pipe, syrinx +700 paper towel +701 parachute, chute +702 parallel bars, bars +703 park bench +704 parking meter +705 passenger car, coach, carriage +706 patio, terrace +707 pay-phone, pay-station +708 pedestal, plinth, footstall +709 pencil box, pencil case +710 pencil sharpener +711 perfume, essence +712 Petri dish +713 photocopier +714 pick, plectrum, plectron +715 pickelhaube +716 picket fence, paling +717 pickup, pickup truck +718 pier +719 piggy bank, penny bank +720 pill bottle +721 pillow +722 ping-pong ball +723 pinwheel +724 pirate, pirate ship +725 pitcher, ewer +726 plane, carpenter's plane, woodworking plane +727 planetarium +728 plastic bag +729 plate rack +730 plow, plough +731 plunger, plumber's helper +732 Polaroid camera, Polaroid Land camera +733 pole +734 police van, police wagon, paddy wagon, patrol wagon, wagon, black Maria +735 poncho +736 pool table, billiard table, snooker table +737 pop bottle, soda bottle +738 pot, flowerpot +739 potter's wheel +740 power drill +741 prayer rug, prayer mat +742 printer +743 prison, prison house +744 projectile, missile +745 projector +746 puck, hockey puck +747 punching bag, punch bag, punching ball, punchball +748 purse +749 quill, quill pen +750 quilt, comforter, comfort, puff +751 racer, race car, racing car +752 racket, racquet +753 radiator +754 radio, wireless +755 radio telescope, radio reflector +756 rain barrel +757 recreational vehicle, RV, R.V. +758 reel +759 reflex camera +760 refrigerator, icebox +761 remote control, remote +762 restaurant, eating house, eating place, eatery +763 revolver, six-gun, six-shooter +764 rifle +765 rocking chair, rocker +766 rotisserie +767 rubber eraser, rubber, pencil eraser +768 rugby ball +769 rule, ruler +770 running shoe +771 safe +772 safety pin +773 saltshaker, salt shaker +774 sandal +775 sarong +776 sax, saxophone +777 scabbard +778 scale, weighing machine +779 school bus +780 schooner +781 scoreboard +782 screen, CRT screen +783 screw +784 screwdriver +785 seat belt, seatbelt +786 sewing machine +787 shield, buckler +788 shoe shop, shoe-shop, shoe store +789 shoji +790 shopping basket +791 shopping cart +792 shovel +793 shower cap +794 shower curtain +795 ski +796 ski mask +797 sleeping bag +798 slide rule, slipstick +799 sliding door +800 slot, one-armed bandit +801 snorkel +802 snowmobile +803 snowplow, snowplough +804 soap dispenser +805 soccer ball +806 sock +807 solar dish, solar collector, solar furnace +808 sombrero +809 soup bowl +810 space bar +811 space heater +812 space shuttle +813 spatula +814 speedboat +815 spider web, spider's web +816 spindle +817 sports car, sport car +818 spotlight, spot +819 stage +820 steam locomotive +821 steel arch bridge +822 steel drum +823 stethoscope +824 stole +825 stone wall +826 stopwatch, stop watch +827 stove +828 strainer +829 streetcar, tram, tramcar, trolley, trolley car +830 stretcher +831 studio couch, day bed +832 stupa, tope +833 submarine, pigboat, sub, U-boat +834 suit, suit of clothes +835 sundial +836 sunglass +837 sunglasses, dark glasses, shades +838 sunscreen, sunblock, sun blocker +839 suspension bridge +840 swab, swob, mop +841 sweatshirt +842 swimming trunks, bathing trunks +843 swing +844 switch, electric switch, electrical switch +845 syringe +846 table lamp +847 tank, army tank, armored combat vehicle, armoured combat vehicle +848 tape player +849 teapot +850 teddy, teddy bear +851 television, television system +852 tennis ball +853 thatch, thatched roof +854 theater curtain, theatre curtain +855 thimble +856 thresher, thrasher, threshing machine +857 throne +858 tile roof +859 toaster +860 tobacco shop, tobacconist shop, tobacconist +861 toilet seat +862 torch +863 totem pole +864 tow truck, tow car, wrecker +865 toyshop +866 tractor +867 trailer truck, tractor trailer, trucking rig, rig, articulated lorry, semi +868 tray +869 trench coat +870 tricycle, trike, velocipede +871 trimaran +872 tripod +873 triumphal arch +874 trolleybus, trolley coach, trackless trolley +875 trombone +876 tub, vat +877 turnstile +878 typewriter keyboard +879 umbrella +880 unicycle, monocycle +881 upright, upright piano +882 vacuum, vacuum cleaner +883 vase +884 vault +885 velvet +886 vending machine +887 vestment +888 viaduct +889 violin, fiddle +890 volleyball +891 waffle iron +892 wall clock +893 wallet, billfold, notecase, pocketbook +894 wardrobe, closet, press +895 warplane, military plane +896 washbasin, handbasin, washbowl, lavabo, wash-hand basin +897 washer, automatic washer, washing machine +898 water bottle +899 water jug +900 water tower +901 whiskey jug +902 whistle +903 wig +904 window screen +905 window shade +906 Windsor tie +907 wine bottle +908 wing +909 wok +910 wooden spoon +911 wool, woolen, woollen +912 worm fence, snake fence, snake-rail fence, Virginia fence +913 wreck +914 yawl +915 yurt +916 web site, website, internet site, site +917 comic book +918 crossword puzzle, crossword +919 street sign +920 traffic light, traffic signal, stoplight +921 book jacket, dust cover, dust jacket, dust wrapper +922 menu +923 plate +924 guacamole +925 consomme +926 hot pot, hotpot +927 trifle +928 ice cream, icecream +929 ice lolly, lolly, lollipop, popsicle +930 French loaf +931 bagel, beigel +932 pretzel +933 cheeseburger +934 hotdog, hot dog, red hot +935 mashed potato +936 head cabbage +937 broccoli +938 cauliflower +939 zucchini, courgette +940 spaghetti squash +941 acorn squash +942 butternut squash +943 cucumber, cuke +944 artichoke, globe artichoke +945 bell pepper +946 cardoon +947 mushroom +948 Granny Smith +949 strawberry +950 orange +951 lemon +952 fig +953 pineapple, ananas +954 banana +955 jackfruit, jak, jack +956 custard apple +957 pomegranate +958 hay +959 carbonara +960 chocolate sauce, chocolate syrup +961 dough +962 meat loaf, meatloaf +963 pizza, pizza pie +964 potpie +965 burrito +966 red wine +967 espresso +968 cup +969 eggnog +970 alp +971 bubble +972 cliff, drop, drop-off +973 coral reef +974 geyser +975 lakeside, lakeshore +976 promontory, headland, head, foreland +977 sandbar, sand bar +978 seashore, coast, seacoast, sea-coast +979 valley, vale +980 volcano +981 ballplayer, baseball player +982 groom, bridegroom +983 scuba diver +984 rapeseed +985 daisy +986 yellow lady's slipper, yellow lady-slipper, Cypripedium calceolus, Cypripedium parviflorum +987 corn +988 acorn +989 hip, rose hip, rosehip +990 buckeye, horse chestnut, conker +991 coral fungus +992 agaric +993 gyromitra +994 stinkhorn, carrion fungus +995 earthstar +996 hen-of-the-woods, hen of the woods, Polyporus frondosus, Grifola frondosa +997 bolete +998 ear, spike, capitulum +999 toilet tissue, toilet paper, bathroom tissue diff --git a/PaddleCV/image_classification/utils/utility.py b/PaddleCV/image_classification/utils/utility.py index ad86f46b..24d203bd 100644 --- a/PaddleCV/image_classification/utils/utility.py +++ b/PaddleCV/image_classification/utils/utility.py @@ -1,5 +1,4 @@ -"""Contains common utility functions.""" -# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve. +# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve. # #Licensed under the Apache License, Version 2.0 (the "License"); #you may not use this file except in compliance with the License. @@ -20,6 +19,17 @@ from __future__ import print_function import distutils.util import numpy as np import six +import argparse +import functools +import logging +import sys +import os +import warnings +import signal + +import paddle +import paddle.fluid as fluid + def print_arguments(args): """Print argparse's arguments. @@ -43,7 +53,7 @@ def print_arguments(args): def add_arguments(argname, type, default, help, argparser, **kwargs): - """Add argparse's argument. + """Add argparse's argument. Usage: @@ -61,20 +71,315 @@ def add_arguments(argname, type, default, help, argparser, **kwargs): help=help + ' Default: %(default)s.', **kwargs) -def check_gpu(use_gpu): + +def parse_args(): + """Add arguments + + Returns: + all training args """ + parser = argparse.ArgumentParser(description=__doc__) + add_arg = functools.partial(add_arguments, argparser=parser) + # yapf: disable + + # ENV + add_arg('use_gpu', bool, True, "Whether to use GPU.") + add_arg('model_save_dir', str, "./output", "The directory path to save model.") + add_arg('data_dir', str, "./data/ILSVRC2012/", "The ImageNet dataset root directory.") + add_arg('pretrained_model', str, None, "Whether to load pretrained model.") + add_arg('checkpoint', str, None, "Whether to resume checkpoint.") + add_arg('save_params', str, "./output", "Whether to save params.") + + add_arg('print_step', int, 10, "The steps interval to print logs") + add_arg('save_step', int, 100, "The steps interval to save checkpoints") + + # SOLVER AND HYPERPARAMETERS + add_arg('model', str, "ResNet50", "The name of network.") + add_arg('total_images', int, 1281167, "The number of total training images.") + add_arg('num_epochs', int, 120, "The number of total epochs.") + add_arg('class_dim', int, 1000, "The number of total classes.") + add_arg('image_shape', str, "3,224,224", "The size of Input image, order: [channels, height, weidth] ") + add_arg('batch_size', int, 8, "Minibatch size on a device.") + add_arg('test_batch_size', int, 16, "Test batch size on a deveice.") + add_arg('lr', float, 0.1, "The learning rate.") + add_arg('lr_strategy', str, "piecewise_decay", "The learning rate decay strategy.") + add_arg('l2_decay', float, 1e-4, "The l2_decay parameter.") + add_arg('momentum_rate', float, 0.9, "The value of momentum_rate.") + parser.add_argument('--step_epochs', nargs='+', type=int, default=[30, 60, 90], help="piecewise decay step") + # READER AND PREPROCESS + add_arg('lower_scale', float, 0.08, "The value of lower_scale in ramdom_crop") + add_arg('lower_ratio', float, 3./4., "The value of lower_ratio in ramdom_crop") + add_arg('upper_ratio', float, 4./3., "The value of upper_ratio in ramdom_crop") + add_arg('resize_short_size', int, 256, "The value of resize_short_size") + add_arg('crop_size', int, 224, "The value of crop size") + add_arg('use_mixup', bool, False, "Whether to use mixup") + add_arg('mixup_alpha', float, 0.2, "The value of mixup_alpha") + add_arg('reader_thread', int, 8, "The number of multi thread reader") + add_arg('reader_buf_size', int, 2048, "The buf size of multi thread reader") + add_arg('interpolation', int, None, "The interpolation mode") + parser.add_argument('--image_mean', nargs='+', type=float, default=[0.485, 0.456, 0.406], help="The mean of input image data") + parser.add_argument('--image_std', nargs='+', type=float, default=[0.229, 0.224, 0.225], help="The std of input image data") + + # SWITCH + add_arg('use_inplace', bool, True, "Whether to use inplace memory optimization.") + #NOTE: (2019/08/08) FP16 is moving to PaddlePaddle/Fleet now + #add_arg('use_fp16', bool, False, "Whether to enable half precision training with fp16." ) + #add_arg('scale_loss', float, 1.0, "The value of scale_loss for fp16." ) + add_arg('use_label_smoothing', bool, False, "Whether to use label_smoothing") + add_arg('label_smoothing_epsilon', float, 0.2, "The value of label_smoothing_epsilon parameter") + #NOTE: (2019/08/08) temporary disable use_distill + #add_arg('use_distill', bool, False, "Whether to use distill") + add_arg('random_seed', int, None, "random seed") + # yapf: enable + + args = parser.parse_args() + + return args + + +def check_gpu(): + """ Log error and exit when set use_gpu=true in paddlepaddle - cpu version. + cpu ver sion. """ + logger = logging.getLogger(__name__) err = "Config use_gpu cannot be set as true while you are " \ - "using paddlepaddle cpu version ! \nPlease try: \n" \ - "\t1. Install paddlepaddle-gpu to run model on GPU \n" \ - "\t2. Set use_gpu as false in config file to run " \ - "model on CPU" + "using paddlepaddle cpu version ! \nPlease try: \n" \ + "\t1. Install paddlepaddle-gpu to run model on GPU \n" \ + "\t2. Set use_gpu as false in config file to run " \ + "model on CPU" try: - if use_gpu and not fluid.is_compiled_with_cuda(): - logger.error(err) + if args.use_gpu and not fluid.is_compiled_with_cuda(): + print(err) sys.exit(1) except Exception as e: pass + + +def check_args(args): + """check arguments before running + + Args: + all arguments + """ + + # check models name + sys.path.append("..") + import models + model_list = [m for m in dir(models) if "__" not in m] + assert args.model in model_list, "{} is not in lists: {}, please check the model name".format( + args.model, model_list) + + # check learning rate strategy + lr_strategy_list = [ + "piecewise_decay", "cosine_decay", "linear_decay", "cosine_decay_warmup" + ] + if args.lr_strategy not in lr_strategy_list: + warnings.warn( + "\n{} is not in lists: {}, \nUse default learning strategy now.". + format(args.lr_strategy, lr_strategy_list)) + args.lr_strategy = "default_decay" + # check confict of GoogLeNet and mixup + if args.model == "GoogLeNet": + assert args.use_mixup == False, "Cannot use mixup processing in GoogLeNet, please set use_mixup = False." + + if args.interpolation: + assert args.interpolation in [ + 0, 1, 2, 3, 4 + ], "Wrong interpolation, please set:\n0: cv2.INTER_NEAREST\n1: cv2.INTER_LINEAR\n2: cv2.INTER_CUBIC\n3: cv2.INTER_AREA\n4: cv2.INTER_LANCZOS4" + + assert args.checkpoint is None or args.pretrained_model is None, "Do not init model by checkpoint and pretrained_model both." + + # check pretrained_model path for loading + if args.pretrained_model is not None: + assert isinstance(args.pretrained_model, str) + assert os.path.isdir( + args. + pretrained_model), "please support available pretrained_model path." + + #FIXME: check checkpoint path for saving + if args.checkpoint is not None: + assert isinstance(args.checkpoint, str) + assert os.path.isdir( + args.checkpoint + ), "please support available checkpoint path for initing model." + + # check params for loading + """ + if args.save_params: + assert isinstance(args.save_params, str) + assert os.path.isdir( + args.save_params), "please support available save_params path." + """ + + # check gpu: when using gpu, the number of visible cards should divide batch size + if args.use_gpu: + assert args.batch_size % fluid.core.get_cuda_device_count( + ) == 0, "please support correct batch_size({}), which can be divided by available cards({}), you can change the number of cards by indicating: export CUDA_VISIBLE_DEVICES= ".format( + args.batch_size, fluid.core.get_cuda_device_count()) + + # check data directory + assert os.path.isdir( + args.data_dir + ), "Data doesn't exist in {}, please load right path".format(args.data_dir) + + #check gpu + + check_gpu() + + +def init_model(exe, args, program): + if args.checkpoint: + fluid.io.load_persistables(exe, args.checkpoint, main_program=program) + print("Finish initing model from %s" % (args.checkpoint)) + + if args.pretrained_model: + + def if_exist(var): + return os.path.exists(os.path.join(args.pretrained_model, var.name)) + + fluid.io.load_vars( + exe, + args.pretrained_model, + main_program=program, + predicate=if_exist) + + +def save_model(args, exe, train_prog, info): + model_path = os.path.join(args.model_save_dir, args.model, str(info)) + if not os.path.isdir(model_path): + os.makedirs(model_path) + fluid.io.save_persistables(exe, model_path, main_program=train_prog) + print("Already save model in %s" % (model_path)) + + +def create_pyreader(is_train, args): + """create PyReader + + Usage: + Using mixup process in training, it will return 5 results, include py_reader, image, y_a(label), y_b(label) and lamda, or it will return 3 results, include py_reader, image, and label. + + Args: + is_train: mode + args: arguments + + Returns: + py_reader and the input data of net, + """ + image_shape = [int(m) for m in args.image_shape.split(",")] + + feed_image = fluid.layers.data( + name="feed_image", shape=image_shape, dtype="float32", lod_level=0) + + feed_label = fluid.layers.data( + name="feed_label", shape=[1], dtype="int64", lod_level=0) + feed_y_a = fluid.layers.data( + name="feed_y_a", shape=[1], dtype="int64", lod_level=0) + + if is_train and args.use_mixup: + feed_y_b = fluid.layers.data( + name="feed_y_b", shape=[1], dtype="int64", lod_level=0) + feed_lam = fluid.layers.data( + name="feed_lam", shape=[1], dtype="float32", lod_level=0) + + py_reader = fluid.io.PyReader( + feed_list=[feed_image, feed_y_a, feed_y_b, feed_lam], + capacity=64, + use_double_buffer=True, + iterable=False) + return py_reader, [feed_image, feed_y_a, feed_y_b, feed_lam] + else: + py_reader = fluid.io.PyReader( + feed_list=[feed_image, feed_label], + capacity=64, + use_double_buffer=True, + iterable=False) + + return py_reader, [feed_image, feed_label] + + +def print_info(pass_id, batch_id, print_step, metrics, time_info, info_mode): + """print function + + Args: + pass_id: epoch index + batch_id: batch index + print_step: the print_step arguments + metrics: message to print + time_info: time infomation + info_mode: mode + """ + if info_mode == "batch": + if batch_id % print_step == 0: + #if isinstance(metrics,np.ndarray): + # train and mixup output + if len(metrics) == 2: + loss, lr = metrics + print( + "[Pass {0}, train batch {1}] \tloss {2}, lr {3}, elapse {4}". + format(pass_id, batch_id, "%.5f" % loss, "%.5f" % lr, + "%2.2f sec" % time_info)) + # train and no mixup output + elif len(metrics) == 4: + loss, acc1, acc5, lr = metrics + print( + "[Pass {0}, train batch {1}] \tloss {2}, acc1 {3}, acc5 {4}, lr {5}, elapse {6}". + format(pass_id, batch_id, "%.5f" % loss, "%.5f" % acc1, + "%.5f" % acc5, "%.5f" % lr, "%2.2f sec" % time_info)) + # test output + elif len(metrics) == 3: + loss, acc1, acc5 = metrics + print( + "[Pass {0}, test batch {1}] \tloss {2}, acc1 {3}, acc5 {4}, elapse {5}". + format(pass_id, batch_id, "%.5f" % loss, "%.5f" % acc1, + "%.5f" % acc5, "%2.2f sec" % time_info)) + else: + raise Exception( + "length of metrics {} is not implemented, It maybe caused by wrong format of build_program_output". + format(len(metrics))) + sys.stdout.flush() + + elif info_mode == "epoch": + ## TODO add time elapse + #if isinstance(metrics,np.ndarray): + if len(metrics) == 5: + train_loss, _, test_loss, test_acc1, test_acc5 = metrics + print( + "[End pass {0}]\ttrain_loss {1}, test_loss {2}, test_acc1 {3}, test_acc5 {4}". + format(pass_id, "%.5f" % train_loss, "%.5f" % test_loss, "%.5f" + % test_acc1, "%.5f" % test_acc5)) + elif len(metrics) == 7: + train_loss, train_acc1, train_acc5, _, test_loss, test_acc1, test_acc5 = metrics + print( + "[End pass {0}]\ttrain_loss {1}, train_acc1 {2}, train_acc5 {3},test_loss {4}, test_acc1 {5}, test_acc5 {6}". + format(pass_id, "%.5f" % train_loss, "%.5f" % train_acc1, "%.5f" + % train_acc5, "%.5f" % test_loss, "%.5f" % test_acc1, + "%.5f" % test_acc5)) + sys.stdout.flush() + elif info_mode == "ce": + raise Warning("CE code is not ready") + else: + raise Exception("Illegal info_mode") + + +def best_strategy_compiled(args, program, loss): + """make a program which wrapped by a compiled program + """ + + if os.getenv('FLAGS_use_ngraph'): + return program + else: + build_strategy = fluid.compiler.BuildStrategy() + build_strategy.enable_inplace = args.use_inplace + + exec_strategy = fluid.ExecutionStrategy() + exec_strategy.num_threads = fluid.core.get_cuda_device_count() + exec_strategy.num_iteration_per_drop_scope = 10 + + compiled_program = fluid.CompiledProgram(program).with_data_parallel( + loss_name=loss.name, + build_strategy=build_strategy, + exec_strategy=exec_strategy) + + return compiled_program -- GitLab