From 693ab9b80ee57a5a516cc921590d58abc08ae0f8 Mon Sep 17 00:00:00 2001 From: jerrywgz Date: Fri, 19 Apr 2019 15:55:26 +0800 Subject: [PATCH] add more config for lrc (#7) * add more config for lrc * refine code style and add README --- .pre-commit-config.yaml | 27 +++ .style.yapf | 3 + .travis/precommit.sh | 21 +++ LRC/README.md | 64 +++++-- LRC/README_cn.md | 69 ++++--- LRC/genotypes.py | 32 +++- LRC/labels.npz | Bin 0 -> 80244 bytes LRC/learning_rate.py | 37 +++- LRC/model.py | 282 +++++++++++++++++++++++++---- LRC/operations.py | 3 +- LRC/paddle_predict/__init__.py | 0 LRC/{reader.py => reader_cifar.py} | 31 +++- LRC/run.sh | 8 - LRC/run_cifar.sh | 11 ++ LRC/run_cifar_test.sh | 8 + LRC/test_mixup.py | 140 ++++++++++++++ LRC/train_mixup.py | 173 ++++++++---------- LRC/voting.py | 18 ++ README.md | 2 +- 19 files changed, 737 insertions(+), 192 deletions(-) create mode 100644 .pre-commit-config.yaml create mode 100644 .style.yapf create mode 100644 .travis/precommit.sh create mode 100644 LRC/labels.npz create mode 100644 LRC/paddle_predict/__init__.py rename LRC/{reader.py => reader_cifar.py} (85%) delete mode 100644 LRC/run.sh create mode 100644 LRC/run_cifar.sh create mode 100644 LRC/run_cifar_test.sh create mode 100644 LRC/test_mixup.py create mode 100644 LRC/voting.py diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml new file mode 100644 index 0000000..4102b69 --- /dev/null +++ b/.pre-commit-config.yaml @@ -0,0 +1,27 @@ +- repo: https://github.com/PaddlePaddle/mirrors-yapf.git + sha: 0d79c0c469bab64f7229c9aca2b1186ef47f0e37 + hooks: + - id: yapf + files: \.py$ +- repo: https://github.com/pre-commit/pre-commit-hooks + sha: a11d9314b22d8f8c7556443875b731ef05965464 + hooks: + - id: check-merge-conflict + - id: check-symlinks + - id: detect-private-key + files: (?!.*paddle)^.*$ + - id: end-of-file-fixer + files: \.md$ + - id: trailing-whitespace + files: \.md$ +- repo: https://github.com/Lucas-C/pre-commit-hooks + sha: v1.0.1 + hooks: + - id: forbid-crlf + files: \.md$ + - id: remove-crlf + files: \.md$ + - id: forbid-tabs + files: \.md$ + - id: remove-tabs + files: \.md$ diff --git a/.style.yapf b/.style.yapf new file mode 100644 index 0000000..4741fb4 --- /dev/null +++ b/.style.yapf @@ -0,0 +1,3 @@ +[style] +based_on_style = pep8 +column_limit = 80 diff --git a/.travis/precommit.sh b/.travis/precommit.sh new file mode 100644 index 0000000..369fa51 --- /dev/null +++ b/.travis/precommit.sh @@ -0,0 +1,21 @@ +#!/bin/bash +function abort(){ + echo "Your commit does not fit PaddlePaddle code style" 1>&2 + echo "Please use pre-commit scripts to auto-format your code" 1>&2 + exit 1 +} + +trap 'abort' 0 +set -e +cd `dirname $0` +cd .. +export PATH=/usr/bin:$PATH +pre-commit install + +if ! pre-commit run -a ; then + ls -lh + git diff --exit-code + exit 1 +fi + +trap : 0 diff --git a/LRC/README.md b/LRC/README.md index df9af47..7955101 100644 --- a/LRC/README.md +++ b/LRC/README.md @@ -1,5 +1,5 @@ # LRC Local Rademachar Complexity Regularization -Regularization of Deep Neural Networks(DNNs) for the sake of improving their generalization capability is important and chllenging. This directory contains image classification model based on a novel regularizer rooted in Local Rademacher Complexity (LRC). We appreciate the contribution by [DARTS](https://arxiv.org/abs/1806.09055) for our research. The regularization by LRC and DARTS are combined in this model on CIFAR-10 dataset. Code accompanying the paper +Regularization of Deep Neural Networks(DNNs) for the sake of improving their generalization capability is important and chllenging. This directory contains image classification model based on a novel regularizer rooted in Local Rademacher Complexity (LRC). We appreciate the contribution by [DARTS](https://arxiv.org/abs/1806.09055) for our research. The regularization by LRC and DARTS are combined in this model to reach accuracy of 98.01% on CIFAR-10 dataset. Code accompanying the paper > [An Empirical Study on Regularization of Deep Neural Networks by Local Rademacher Complexity](https://arxiv.org/abs/1902.00873)\ > Yingzhen Yang, Xingjian Li, Jun Huan.\ > _arXiv:1902.00873_. @@ -7,13 +7,21 @@ Regularization of Deep Neural Networks(DNNs) for the sake of improving their gen --- # Table of Contents +- [Introduction of algorithm](#introduction-of-algorithm) - [Installation](#installation) - [Data preparation](#data-preparation) - [Training](#training) +- [Testing](#testing) +- [Experimental result](#experimental-result) +- [Reference](#reference) + +## Introduction of algorithm + +Rademacher complexity is well known as a distribution-free complexity measure of function class and LRC focus on a restricted function class which leads to sharper convergence rates and potential better generalization. Our LRC based regularizer is developed by estimating the complexity of the function class centered at the minimizer of the empirical loss of DNNs. ## Installation -Running sample code in this directory requires PaddelPaddle Fluid v.1.2.0 and later. If the PaddlePaddle on your device is lower than this version, please follow the instructions in [installation document](http://www.paddlepaddle.org/documentation/docs/zh/1.2/beginners_guide/install/index_cn.html#paddlepaddle) and make an update. +Running sample code in this directory requires PaddelPaddle Fluid v.1.3.0 and later. If the PaddlePaddle on your device is lower than this version, please follow the instructions in [installation document](http://www.paddlepaddle.org/documentation/docs/zh/1.3/beginners_guide/install/index_cn.html#paddlepaddle) and make an update. ## Data preparation @@ -30,13 +38,8 @@ The dataset will be downloaded to `dataset/cifar/cifar-10-batches-py` in the sam After data preparation, one can start the training step by: - python -u train_mixup.py \ - --batch_size=80 \ - --auxiliary \ - --weight_decay=0.0003 \ - --learning_rate=0.025 \ - --lrc_loss_lambda=0.7 \ - --cutout + sh run_cifar.sh + - Set ```export CUDA_VISIBLE_DEVICES=0``` to specifiy one GPU to train. - For more help on arguments: @@ -44,7 +47,7 @@ After data preparation, one can start the training step by: **data reader introduction:** -* Data reader is defined in `reader.py`. +* Data reader is defined in `reader_cifar.py`. * Reshape the images to 32 * 32. * In training stage, images are padding to 40 * 40 and cropped randomly to the original size. * In training stage, images are horizontally random flipped. @@ -54,19 +57,40 @@ After data preparation, one can start the training step by: **model configuration:** -* Use auxiliary loss and auxiliary\_weight=0.4. -* Use dropout and drop\_path\_prob=0.2. -* Set lrc\_loss\_lambda=0.7. - -**training strategy:** - * Use momentum optimizer with momentum=0.9. -* Weight decay is 0.0003. -* Use cosine decay with init\_lr=0.025. * Total epoch is 600. -* Use Xaiver initalizer to weight in conv2d, Constant initalizer to weight in batch norm and Normal initalizer to weight in fc. -* Initalize bias in batch norm and fc to zero constant and do not add bias to conv2d. +* Use global L2 norm to clip gradient. +* Other configurations are set in `run_cifar.sh` + +## Tesing + +one can start the testing step by: + + sh run_cifar_test.sh + +- Set ```export CUDA_VISIBLE_DEVICES=0``` to specifiy one GPU to train. +- For more help on arguments: + + python test_mixup.py --help + +After obtaining six models, one can get ensembled model by: + + python voting.py + +## Experimental result + +Experimental result is shown as below: + +| Model | based lr | batch size | model id | acc-1 | +| :--------------- | :--------: | :------------: | :------------------: |------: | +| [model_0](https://paddlemodels.bj.bcebos.com/autodl/lrc_model_0.tar.gz) | 0.01 | 64 | 0 | 97.12% | +| [model_1](https://paddlemodels.bj.bcebos.com/autodl/lrc_model_1.tar.gz) | 0.02 | 80 | 0 | 97.34% | +| [model_2](https://paddlemodels.bj.bcebos.com/autodl/lrc_model_2.tar.gz) | 0.015 | 80 | 1 | 97.31% | +| [model_3](https://paddlemodels.bj.bcebos.com/autodl/lrc_model_3.tar.gz) | 0.02 | 80 | 1 | 97.52% | +| [model_4](https://paddlemodels.bj.bcebos.com/autodl/lrc_model_4.tar.gz) | 0.03 | 80 | 1 | 97.30% | +| [model_5](https://paddlemodels.bj.bcebos.com/autodl/lrc_model_5.tar.gz) | 0.015 | 64 | 2 | 97.32% | +ensembled model acc-1=98.01% ## Reference diff --git a/LRC/README_cn.md b/LRC/README_cn.md index 06dc937..581560a 100644 --- a/LRC/README_cn.md +++ b/LRC/README_cn.md @@ -1,5 +1,5 @@ # LRC 局部Rademachar复杂度正则化 -为了在深度神经网络中提升泛化能力,正则化的选择十分重要也具有挑战性。本目录包括了一种基于局部rademacher复杂度的新型正则(LRC)的图像分类模型。十分感谢[DARTS](https://arxiv.org/abs/1806.09055)模型对本研究提供的帮助。该模型将LRC正则和DARTS网络相结合,在CIFAR-10数据集中得到了很出色的效果。代码和文章一同发布 +为了在深度神经网络中提升泛化能力,正则化的选择十分重要也具有挑战性。本目录包括了一种基于局部rademacher复杂度的新型正则(LRC)的图像分类模型。十分感谢[DARTS](https://arxiv.org/abs/1806.09055)模型对本研究提供的帮助。该模型将LRC正则和DARTS网络相结合,在CIFAR-10数据集中得到了98.01%的准确率。代码和文章一同发布 > [An Empirical Study on Regularization of Deep Neural Networks by Local Rademacher Complexity](https://arxiv.org/abs/1902.00873)\ > Yingzhen Yang, Xingjian Li, Jun Huan.\ > _arXiv:1902.00873_. @@ -7,13 +7,21 @@ --- # 内容 +- [算法简介](#算法简介) - [安装](#安装) - [数据准备](#数据准备) - [模型训练](#模型训练) +- [模型测试](#模型测试) +- [实验结果](#实验结果) +- [引用](#引用) + +## 算法简介 + +局部拉德马赫复杂度方法借鉴了已有的局部拉德马赫复杂度方法,仅考虑在经验损失函数的极小值点附近的一个球内的拉德马赫复杂度。采用最近的拉德马赫复杂度的估计方法,对折页损失函数 (Hinge Loss) 和交叉熵(cross entropy)推得了这个固定值的表达式,并且将其称之为局部拉德马赫正则化项,并加在经验损失函数上。将正则化方法作用在混合和模型集成之后,得到了CIFAR-10上目前最好的准确率。 ## 安装 -在当前目录下运行样例代码需要PadddlePaddle Fluid的v.1.2.0或以上的版本。如果你的运行环境中的PaddlePaddle低于此版本,请根据[安装文档](http://www.paddlepaddle.org/documentation/docs/zh/1.2/beginners_guide/install/index_cn.html#paddlepaddle)中的说明来更新PaddlePaddle。 +在当前目录下运行样例代码需要PadddlePaddle Fluid的v.1.3.0或以上的版本。如果你的运行环境中的PaddlePaddle低于此版本,请根据[安装文档](http://www.paddlepaddle.org/documentation/docs/zh/1.3/beginners_guide/install/index_cn.html#paddlepaddle)中的说明来更新PaddlePaddle。 ## 数据准备 @@ -21,27 +29,22 @@ sh ./dataset/download.sh -请确保您的环境有互联网连接。数据会下载到`train.py`同目录下的`dataset/cifar/cifar-10-batches-py`。如果下载失败,您可以自行从https://www.cs.toronto.edu/~kriz/cifar.html上下载cifar-10-python.tar.gz并解压到上述位置。 +请确保您的环境有互联网连接。数据会下载到`train.py`同目录下的`dataset/cifar/cifar-10-batches-py`。如果下载失败,您可以自行从 https://www.cs.toronto.edu/~kriz/cifar.html 上下载cifar-10-python.tar.gz并解压到上述位置。 ## 模型训练 数据准备好后,可以通过如下命令开始训练: - python -u train_mixup.py \ - --batch_size=80 \ - --auxiliary \ - --weight_decay=0.0003 \ - --learning_rate=0.025 \ - --lrc_loss_lambda=0.7 \ - --cutout -- 通过设置 ```export CUDA_VISIBLE_DEVICES=0```指定单张GPU训练。 + sh run_cifar.sh + +- 在```run_cifar.sh```中通过设置 ```export CUDA_VISIBLE_DEVICES=0```指定GPU卡号进行训练。 - 可选参数见: python train_mixup.py --help **数据读取器说明:** -* 数据读取器定义在`reader.py`中 +* 数据读取器定义在`reader_cifar.py`中 * 输入图像尺寸统一变换为32 * 32 * 训练时将图像填充为40 * 40然后随机剪裁为原输入图像大小 * 训练时图像随机水平翻转 @@ -51,19 +54,41 @@ **模型配置:** -* 使用辅助损失,辅助损失权重为0.4 -* 使用dropout,随机丢弃率为0.2 -* 设置lrc\_loss\_lambda为0.7 - -**训练策略:** - * 采用momentum优化算法训练,momentum=0.9 -* 权重衰减系数为0.0001 -* 采用正弦学习率衰减,初始学习率为0.025 * 总共训练600轮 -* 对卷积权重采用Xaiver初始化,对batch norm权重采用固定初始化,对全连接层权重采用高斯初始化 -* 对batch norm和全连接层偏差采用固定初始化,不对卷积设置偏差 +* 对梯度采用全局L2范数裁剪 +* 其余模型配置在run_cifar.sh中 + +## 模型测试 + +可以通过如下命令开始测试: + + sh run_cifar_test.sh + +- 在```run_cifar_test.sh```中通过设置 ```export CUDA_VISIBLE_DEVICES=0```指定GPU卡号进行训练。 +- 可选参数见: + + python test_mixup.py --help + +得到六个模型后运行如下脚本得到融合模型: + + python voting.py + + +## 实验结果 + +下表为模型评估结果: + +| 模型 | 初始学习率 | 批量大小 | 模型编号 | acc-1 | +| :--------------- | :--------: | :------------: | :------------------: |------: | +| [model_0](https://paddlemodels.bj.bcebos.com/autodl/lrc_model_0.tar.gz) | 0.01 | 64 | 0 | 97.12% | +| [model_1](https://paddlemodels.bj.bcebos.com/autodl/lrc_model_1.tar.gz) | 0.02 | 80 | 0 | 97.34% | +| [model_2](https://paddlemodels.bj.bcebos.com/autodl/lrc_model_2.tar.gz) | 0.015 | 80 | 1 | 97.31% | +| [model_3](https://paddlemodels.bj.bcebos.com/autodl/lrc_model_3.tar.gz) | 0.02 | 80 | 1 | 97.52% | +| [model_4](https://paddlemodels.bj.bcebos.com/autodl/lrc_model_4.tar.gz) | 0.03 | 80 | 1 | 97.30% | +| [model_5](https://paddlemodels.bj.bcebos.com/autodl/lrc_model_5.tar.gz) | 0.015 | 64 | 2 | 97.32% | +融合模型acc-1=98.01% ## 引用 diff --git a/LRC/genotypes.py b/LRC/genotypes.py index 349fbd2..e2d0bc8 100644 --- a/LRC/genotypes.py +++ b/LRC/genotypes.py @@ -113,4 +113,34 @@ MY_DARTS = Genotype( ('skip_connect', 2), ('skip_connect', 3)], reduce_concat=range(2, 6)) -DARTS = MY_DARTS +MY_DARTS_list = [ + Genotype( + normal=[('sep_conv_3x3', 0), ('skip_connect', 1), ('sep_conv_3x3', 0), + ('sep_conv_3x3', 1), ('sep_conv_3x3', 0), ('sep_conv_3x3', 1), + ('skip_connect', 0), ('sep_conv_3x3', 2)], + normal_concat=range(2, 6), + reduce=[('max_pool_3x3', 0), ('max_pool_3x3', 1), ('skip_connect', 2), + ('max_pool_3x3', 0), ('skip_connect', 3), ('avg_pool_3x3', 1), + ('skip_connect', 2), ('skip_connect', 3)], + reduce_concat=range(2, 6)), + Genotype( + normal=[('sep_conv_3x3', 0), ('sep_conv_3x3', 1), ('skip_connect', 0), + ('dil_conv_3x3', 2), ('skip_connect', 0), ('sep_conv_3x3', 1), + ('skip_connect', 0), ('skip_connect', 1)], + normal_concat=range(2, 6), + reduce=[('max_pool_3x3', 0), ('max_pool_3x3', 1), ('skip_connect', 2), + ('dil_conv_3x3', 0), ('skip_connect', 3), ('skip_connect', 2), + ('skip_connect', 3), ('skip_connect', 2)], + reduce_concat=range(2, 6)), + Genotype( + normal=[('sep_conv_3x3', 0), ('skip_connect', 1), ('skip_connect', 0), + ('dil_conv_5x5', 1), ('skip_connect', 0), ('sep_conv_3x3', 1), + ('skip_connect', 0), ('sep_conv_3x3', 1)], + normal_concat=range(2, 6), + reduce=[('max_pool_3x3', 0), ('max_pool_3x3', 1), ('max_pool_3x3', 0), + ('skip_connect', 2), ('max_pool_3x3', 0), ('skip_connect', 2), + ('skip_connect', 2), ('skip_connect', 3)], + reduce_concat=range(2, 6)) +] + +DARTS = MY_DARTS_list[0] diff --git a/LRC/labels.npz b/LRC/labels.npz new file mode 100644 index 0000000000000000000000000000000000000000..78ea0197f5ea7dc95f168c139adce36635d07ee6 GIT binary patch literal 80244 zcmbW#&uW}?6vpv)npTbLTtK=Sg;3&3E3VxHZ8lxF3WJR(NE#+06cM}!x4p^UfivlR zHeWa-7$~Rboaf*9y>oeS^z>x;`1gD{{_yJk@^ZE8{~hm!o9*`M{Kd`fyMKRt{ln!? ztL4w-ul1YjyPvn~Z%^0X{r1iJ>~#Ih+wC9Q&CS)@?VIcU{QJ%Cch|f5-LIS5>s|lF zSLeIGvoFt1|33Ynf1e*6B7T;7XqQh8|50}*-2dtE%@6DQBlrJ*b{KqmsQcshfBd>P zr;qoMwJc8$|Dl8bEP+%`NF$TEf3z@opjwrPvr;S zo*rL5t$3Y#2lrQxuDQG_K7F~(<*p9Ax%=gy52xDe<-#|IGna=S9-Md4%ME8wpALNY z^4ohCIQnv#!`YWFkGu48sq)d$!>e>ue0lKXgtK?2N*5kZZ5}v2D!e>=>3bJ=I63$f zXV2Gr>*?^JZ%W_Kb*FbQr-w(6zde0>xyAaT%U=&)f4%Bm@a3@QFF##A^5EIi!-LoJbw9ky zmmWSI9DjIv@ci0Ocz3A$8S2*wWsesJw0>P9Io$gE~k9_%;nL$+g!EBr>Dwg@15nOV{fkKD|dT1dsQwu z_}J_5;aa^PJifc+!PC>lgNLIF*V=cW)4TNEm9Cuf)E5qqKD-_d-`+j=_Nx4J@Z2f4 zoba9F^Rb7w=c{jSdGY0;E0>FMFk<)SB#`{CV52d=#Ka^d6AF?Sa~I&wCT zeev|^>g7<~O&3mIoP2op&1Y{8CntT?9eTAo@~Loq=dI*^m;3SY^!9l2sc`mmdmn#zJ)U>RgLA)q z@$%7A-O;_=58?1sIrR9dyX7?JCpR5;;+f;Sf8x`J*W=N(_a1t>D!lvP?d8?;bC=#8 zzkcv={P6jB4?P~7+BqIw?;(eZ*PPYuUQgd0bnD+e{o?TD>fPr2^>V20<)g<_`N8S^ z4&=sH?d8X(3umw5@o9cN9XNayj*oY0<)`A~(NVphyZP(gsaNUq=c9KIJ|Dh(^zi2N z^mN?;PnQopJac+<@Kw4~El;n?;eF`xmy5nUaCGqXc=YMvyORz+p1JRZM^7F)_IRDs zp$|__kFT1$i*I$z@%Ym>hr^f4UbR>0!JDgeRd}^?y-LS-@;y{@{;Iuq>fAl5_mB&p zzd1ZTb2;SZ>-*r@^RuU;=c~tq)5Gz%=WpMN-^$PZ?vz)>hVDVqZ|`pSR(N^g&E?X=spcxa?}l$KKYs7?KIX;Kf#b`^ zefWIv;PK^zH>ank>z!NS=BvP-K79W6=J@o?;oV^_ zAG{vUyw%>F^yO6P;lsHPPL*Fi6(3G72VUpx<>8Mrz0mnbCr%gKb4*eC!bt=d@pnQ_|+|rj(qN-L!W>1z|pbC<0p?jAG&%v zdOSM(b|y7=~d@Kw3wH>ajxJyNa!qqQIJ)Nghxl;415a-6MIWBOd-Qnb@Y8+1r^=r%9dkH7Dt$P+#@J`rYHhRcGqm>$~9B$NR#& zM}=46RrBUFhodL2eJdSveEG|VSMj`~9v{x$-EjQ$s(0kC+S7HXo_>Ef=J4|Ik+1rA za4J5%`orPFHy6H22VPG{zUK9gt#X*tQ@yKPa=`P^x5}f+T^t{Jd{jC0^yI`d=cDK6 z4%J?-(rYg7EC+wJdT@Afe44X&^2bx*RC+2NeLVT>y^DP9>D#yR>zuy19Q@^h^WJdN zyP21#msihUPWtxda^ZKbms<|KTAcT!>rOa2a>)UQC$F9^936X=9=`XeAG|7`9-m(C zm8U#7zIwXy)8%6hhbMRU=H=t@_f9IFy&jK0KRJstuMQl3edXn+dQblJ;qB@1HHYI% zM-Qjk)8prRHWxoUx_DFd9xC1DHRs5tL~8(UT(Vb!rRl4cf!ll9DMQU(dTOp zFGqRsox{uNPI>I*cQhTKRh3M_qtDCyu9@7<-u|di;Jh2W{NBah98XSrdGO?f z-*{L`O0C=7q7bZa^lga z+xzL+)1{-Qua~bq9lCVj`Qyon$HyGb9duPZI2E6sIi5Up;pElJ3rClp3MY@>g-X|+ zp1Gc0ar*A%DIZRi!~N!}_mGF5yHq%SeJA>I;i>TWa@p(UfU{TSC|+-`;@Q)e(>vJ1 ztLAv-_}!bU^xfkf@Ko=|$DAJ>IQRKp{OIeeqvu195574(Ke*l_2Rxh{_H^kr2VHwQ zdVIQi?*qrr9L}6i=knvJbmf%`-X5M0oN8ZxJ$<=4_kGOuDxO?=zIu7|_;~j2u-AKk zJv@H>;Q86Rce;nJ_tndz^0k-CzJ73W!OK%Wdvm&St8(b!`N_eze%^-;9zA<`^mOr5 z{`}~ubmVXszBzq+eEI0((N*p7RD5^H%a1=keS17rzV6NC;jhYp$Cs|TN(W94?+$t8 zub(}CI6ClpesC(k>foEpubSg`E}#4K@Ty#T?+wpKl^c(*JL$mTnbXCCGpDP{FBhKo zf#U;*Czl>x9=!JS?d61{%dh$M_Hvucqo>P9PJ4Jd{N*#}r^3;xe))XiEOv@FNe7*uUznYl|El{e&+RotB*TWI&!J_-czL~uPTSUbol7u@aXch zr(=%~*WUg7+=Z{F57!>f-o1SJz{#P)@ioV@r^`o<^5F34c^^6G(AVSXTj|jCT~vCi zd~)H-LB~DSF<0f$tLA*@@VD37%j0gn{CMWg>yF~h>GIXX%Wtper zPI&P6_9~n`ygKFOFQ;lwx9_5-&(|F)zCB)Z%V#bJo*th*e?1(YJK*@ph3EU4)3K+A z4{t6PA9MKXx|0qZo$lq3i@rHuJQdzv#pkQaBX4^=dp_oRdh*dR*W=N1ze*3zT<`nB zsqW;b@`0lxkGA-nn}Abjx>#IURoT!|CDS+|N&M-?=>Q zROz|{k1w1&bnsOC=AdH^PfxGXcZX{4z2vH{ULH6)^6;mpSK;vJdLLCT`{w6QPvxU_ zPTw7RdF1BL2hP58`SA2~`Kj)DY)(A)==sP+&)!@vy?e~%G1t>^NAI?84mdjY^0@FVLk>8Wz*>B+^9zlz5Po=U%d*?Pwk!f#q*U@mA5(I>BE(0 zukw`x&s?Pg=T7ge$A{x1uRWgXec)8OdX4ObN1j$Vb3I=@e^qXG@zLAUHP?GTJ`=z96i<)7e0>jldMaN$ zb9bxw{HFZ!(W9^8@$pW2x%txL57#~Y>UB{WT-%;}r!o0~8F-XXs^Ty^AtQ|pT-mtGD!eBDD29?qPP9*!pT+ZHE{qp## z-Rs>=-}|Y2R6Oq~Cp;ax^5f&d(ZiP)9*)kW3y<%;>C4F6r7=)2HJeICDK+y&S4JeZ4(CANq30Yj3W{rwiXXp6XpydgaqGcL%(^d+3?F$DDuj z!qJD*t9Bz+ozj(d9isyds<380Kj~;(>Jsy2?I;wlTn;h_T>A}%8hg0!YKJ>kxJLuBW!<*CB z(=nGz*4r!Z(ct=JfG^}dxz$<$Ma6+bjvG_J{|e!z{B z4!`2~$f>7;CkH>hJ)ZAj&rjuJuXhhVyc}xp!k0r|z4G;ZqSMlf+FPAyIIUhNzgD0;Z-@EEnx+-0{_3on6eCF_~9Ps9ozWnfX_4x9u z-o+j8dOGyw!iP79m(!jeoW1wa!|COt>kd^8I%?8o_~ z^oxh{PVn^2;pnL5aNb|-oW2}zsyUo0kIG*)r(;gf`>DQ*d@3DxJ@g&yo0A?LIhs!o zCzqVv%g2u{yvkobJs*`G99_K%Pe(3(a>KQU^IiDpRdf3E+^H4^-#y>r_3p#xr}hp! zK60zxQ>BBahxeZJ@c5eR9aZ)k_-FCJfec>3_FyWmuMdU*Qw`0~li-`>3{9e8ti{Q8#1*L`^O=)0p8ue#=R+^fQ? za?8sXUgZy`!Z*L3Pjk7S|J3v0?=HS79eK^+?d7VUobdeM-d;XD{_ygtaNW~Y;k_eW`T45$ldgN=>GlqMcsca)tM+u%>dQ@E zg>yGuIo+kVr^g>39!`(1Z?#w5%SYu$-@WkeD&C!Z@!{#xgR{q%&)nVgRC@5LoN#)+ zDu4PaUwZC@w}*$L3vXVW-0<||RPlQUJvu5pT|K^BdOGeh*UP2aoV0 z%-!c**8weLiiADo^(oIM|RsqkuZn#0rKM_(^*d-L+;gs%^M zIPav&iw|$^_oBL|Jzslz&CQpOd-&<`=*UmsUWF^4?!EaW|fO_{fJZkDi_! zdVDy#^1$ist0NaZdF8^#lg}Oxjvkx}FPGf-_H^{kZI9Qy^zq>7xx-#wJ-$jGPtV_+ zj=sL$flqOIzUt&>?j8B-`ODe+^>Xo3@$6N;aD4Udpf3-ezWV0$ypx`uyHq&7boh0y zpZ4{WlaIZ7>BytXf$#lPIn>^VPtP8|zI@D8d_6z;)oIR`j%v=gz5FVk-W~kCFPuE8 zcXtOK|KjM-Rq^$5$gkqJ$De%VqAM3(_2A*!o8x&udea^97vJ1;RPW4Br6&(OefP?v z;=$9iH&^ZH%4;v5y*WMkN;pD@I_Z{T1SLxXEqen-@lN*l@J$O3q)2s5^(=+E| z&xbx7Uww0$^QEJwhiBhj4tnx=H#+9khr_1_hsWQ0xKD3S509=KaP9H9|{t_TIC)bnv=2=dbp@sfU-x?@TY3Di0jK>RvhVyi@u5 z`q9C&r^`h4{r{yr>}Z%xy@}FDVcXU|_=d~{WAmdOrB_@S|V6ef8W;uX}TP@bxw4Psjb{syQ4!A9upbZ;q$p z>*Xt-4j(;Tz3)JWuD$$p_*VxG&wHAe&$qeZReJFJ>6ydvrK>N`z3!j`-)b+HO2@uc zPWkk3dVX;7%cqCuUmbVKg-_QW-=04ob9lcCwR`$}^{TnN)q#^sElwVIdEwmQo#cdv zql0gck0-Z0^4qI$`s$kVv8O8+eRDbGQt80alY@>5r&sCor>n=4UoFmio5OWZ&mD5; z;pKv-2k%~UczZeALBG{|nbV_>PuE^9zWnrX?x6>VUwwFdI6V6D$p6@$4!pbR@YT!H zzI?gh_|R+bZq=PrPscmL$)VEG7bl;)i<{>B)#lK92fqAN?*Om&&hpDahc6wu`Qg#i z)1}9k9-WD2j>jLLk3Bu}_U3ZwdnbK!`Stwp?afvBRdc!Y_H^`e!g+5v6^^cIPETI% z2xsqZ)tnE1RW22tulF*COk3AoAxyKE2RywAFOR+a{3o6IRR>RAIaK$<;p_Rst9;>F>ENsG^Uf+= zJv}q?{8V^7fBGt%N?)(aA+L8(<(YKx?DZ<2`pXZe_pU0w9I8E@ zx!zsv>bO@9)w`7k?_TbFTr^n|{r#b2ePY2Jva?m#~ zp04-mzCL<);hXE}xubY}bIR);dCcjXyUQL=9zJmRaQ5=rt8n)1`M84*Je}s#dmp;y zlODX>aP^}HFK6d;yl403a@9v~kFS@Hj>;F#JIP5G&bz{ytM26Axjlcr=JfIT*vkbc zpWdBtDnB}O@#yfygL4PG+<0{1?5hW_!mD)f=*TaRo(^1ldU$&Jz0>=_(NpEbgX3>M zRXlolbll~g;X9{~?;broU3zfdL$$}RzP&u&i5@>aybAAb`gr#Ea`97(H;2b}hdtkE z&)<9D)79gf%VSQ5uluWOUY=fFJheQz;N{WF}R{O{5!Kreo^0-USNAEpV zd%5X*Ke^#lKK6Q*pLgL$7p@f!-+g?|o7bE^o#u0&3NNQxeRDqT%~iUpJMHzY@G87} z^z!4&1844j@8>;Lb2z&6%)N&>oIPK2{&=dpy4TZ_i=Mmr^P>yzE_k@|_`&m$4__WQ zIx1g1T=lDGk1wZt=*dG@U!1*)uhw6$^7jrZA30Tfe)y{U>R-G)9({MV+VjVA2R%M; z=I#4^>Eqku%fpYqd-e9JJ$-uBx90<=(vwd$=cCG_nyY;Hs`hw(2YP<;o8!^N^X}#< zJYRD?T|VaCgRY*RN)L`+_i~tfS3N!)KehS#K61!!ukw>m&sPrf$+tdo;q%wq)8$X6 zK6r56!yWRP)8$(~dwO*E*yGcOlM}wx-W_^6)wh?Q55D|xa>`a@L<7o;%?5_NsTSzW2hj zmy=)h?cwF6L)SZ+(^2Jv=Lb(;et3I2`1N~eE_Zoy*sIN9E{}>YuRF}~fb#*b9yRW?`RG$pLaBOzubCNE_&_Z z?CICf9^XCY)#*L(<@1r>UGVnidicJV9Ps?yg=Y_6UH*9PF)yEP@o?teOKz1eA3POL zPL-}oM;`a-`RU=|?A!B$=LatzT@}w==JZwHi4GpTUgd8uhxf8K=im9%yOSOtI`(qA zgJ18kZ+`yt>FN2K*VkSy6<;;yR~>tEID9_x%i~^qeD}NC9M7CDUB2|_)1#w@!=q1+ zK0iEryzc4JYi%z4$Lp6t&LoH6vk3L^|dX=7cnsm(F?OuHPeC5`w<)LgKrPV zS5EH$?;YUi!Qq*=+S60%(ZMt4>$|$sUVeP{!}Fmdhib3qUmg1V@%Y%Q_^LS`9l6|N zuDVYyb9wp0(cxp>if@ibPj30-*W7&H0+s z(Zk8*F7xIvmxB&|bLqS1BafVV`t`%>o{wr@J$!e;H6I>ae`kE)^s0ApmpiJXSMlUA z$G6wZ?=C&Qx#}Lc>f`az^P_`rK2zj+7N(Y{v`|;i5J9X|I@aXI5 z!?~A^IemM1UUNz@uE{8oGKIW?YDu4O(@aFvJ;>)ArySqI3?BU_~wz?A@Uk+8? z;`Dsr+(VDAx!zt5b9(f>1AX_(r>BE&jt6fKZ?10+cyqb&R6g?J!Q1ok9_H?~SK;`$ zOQp|`o_TR}CZMp;z6(4_{7I4tP26RJril%c0uSua<@PQr9lHGG!!wr?o=z)1yd386?ziXX4nF33czpTXLtifk ze{+={96fVBs(f&A=~Xzosy+SU%-sR6dJp;K(s!??tM?vs<$%-6-O3N1j(L6D!&mNB zdfv4>`tsSUa?)uoe&%%e$z6VPWaWLvGbweB`n>r^nYEj*q<@)sqtrj}9Mvdw9O)@O)Hwd%o^9r^ipVmkY1&$Zwj{ zlb4=+_RVK+j<4ru-n??b%MXul4sVX9n&X?}$?HCMnHP^ok3PIydV28gwO8@z`@a0? zsC3}zx1acQPtRfoR2=*ioju6*>$x5sxMU3>Y= z;r0B?^>WiyyZ0{ebos01t?pBsm#%#H#hJ@z&tLutN8da1r6(V~_Vmr^nA73Ycazs% zkFV09FQ>ay{8qm7ig#bTWt}@6)UF<&qztzA7J_{PgI^Nk^r_ zmkypiA9um=F|Uq1_H^;+!qKH;Z|;ur;rXcY)93GgefNCmz~RXwAHF>ve?471ci_X> z({rDB{oB)%n-6_@^1<1w{P6Mg_U3#$cei`w<%6eJ>8f(->AFMDhmO4XdN_S`3=rq4xrN>8a?=HITfj8%G&tE<`d%E`CgRUHS zsy$wDd{uYA;qhG9>Y z=jXl6>EXfAb0@qxeSFnkt)97elSk!a&xejGuY4+f@1~mL@x$W_hi4A2SLMT#+k3#_ z%dfA$IX!rKsy%&o(NoRgV86(ed7N z+@YGwf#)83et7)MReRN(4xT&s>gCs$FCShjUzMJBQ0c35>!-(4-NOftjyvVUgKMoG z9vyf(e2UZeF5gpLI>qsWSNo3g%GJFb^z#@Ng<$ zJ%2uGaeV3e9{lO>r-Mh&o=$P};PK^E&FPrq`(FH}eCGV<=;fnl&tLC7nnzEEpI*hc zcZXhWF8(S#IMv>J$j`?f-o1QPIaPDM^z?M#&H1;&$w^<8vz4#+rdvNcaPEcUORw|p z-ABh>FNeJR>D%Mk^YtF)a^sup<cebam@}&o7?ml_&;PvqQ@#vWsZ%@B@-Ahl;kDk3;s_zMB zE)PGta;Wh5^3p9&uJYjV;qc|Cj~-5yzjyQD?+(6v;5vt=%O6i}e0#b0>glzoqr&l} zV=gDWyyq{cn@a8JKI^p2?xkpZWekxtPccmk@edq9W;Nken?H$|8%SR8# zM^CqUa;Wt1@$C6{Cwg$bhd*6B`1bVpcki95+qu0wbXE7*^P|&z?sf;hIUUtr4!!Cw z6^<_-`fBy*nbV^~*In*+FPuF+IC?6*=EJv#r^64A4}X;oJRSGZFV6eWwb#>=haa9? z=6Za-a;WBXRQ_~SJUQU?{P6YOL2XX=sr=~U$t#CBe{(qgozsWI=O>4paPG18|F&a|d0%`sSfiAH3?xMGp?&{dD2XRe1X5^0}veaP;Z4;;VAV ztGB1uy?OC^dph#*>JDr-wIppE`;pBwV*_l z+1D2i4_?oYPxIh=A3T2a^!%&iUFfy1KRq}V-=04|Iqmgu^25su-}kUr<Aud*Pf1dQj4dr%B#nd4_^**etI}M-pO3vsnU~24my17>F}$Mxhg-LIbZtb z#Wx3@Jan6jE}WhZT|B=Jb;9$L1K-^HO?!Ag?v=}2Z{IuQ=U0DtIq2faf$v^>czkm{ z=B;$`@#uH&K05T}x2NM>=)>dl!PBewDqVOw`tso2A*YIu2WQS#e!Sjc56>4~HHV|a zSI-~bcUR5b&EK599*({_U-y?Mhl($US{;6P@~QADe|mE0y&qjYJ$SvGaB}GDi*K&S zqol~h5adh2*?+$tF>8ZuZ zBR9Ujd%57~>iNRSXAZBomy4gfn}e=*fK%b?qo=Qz+ni44_Iy?O_3r4L9~?b+zHsK= z$=&Ak-D?g{pP#+zzV6N8?CF}z$sbSe9)0=p*~8PF@Oa)sKD|9XI`WzG^^Wd>mxDe$ zeLY_~=GB2Shm)s#z1(_v9Rx#g~J> zdEd=F{OGpgoAXog>({(`_uw~&Jv}(p9qxpeQfovHGblfGVslb@bB zo?eyDd#ZHh#kW_zH@s>N$A=#OzJvU#JLuxkp>Gb4FSorO58j>+z8+6+-~5*st0yN% yCkMIb%khU-@0XW5)b5`**f0ONCvLXetMeB(x9=`5p54FLYFRB`@7C)*W%&;;ZQ5S| literal 0 HcmV?d00001 diff --git a/LRC/learning_rate.py b/LRC/learning_rate.py index 3965171..6658b3e 100644 --- a/LRC/learning_rate.py +++ b/LRC/learning_rate.py @@ -38,6 +38,41 @@ def cosine_decay(learning_rate, num_epoch, steps_one_epoch): with init_on_cpu(): decayed_lr = learning_rate * \ - (ops.cos((global_step / steps_one_epoch) \ + (ops.cos(fluid.layers.floor(global_step / steps_one_epoch) \ * math.pi / num_epoch) + 1)/2 return decayed_lr + + +def cosine_with_warmup_decay(learning_rate, lr_min, steps_one_epoch, + warmup_epochs, total_epoch, num_gpu): + global_step = _decay_step_counter() + epoch_idx = fluid.layers.floor(global_step / steps_one_epoch) + + lr = fluid.layers.create_global_var( + shape=[1], + value=0.0, + dtype='float32', + persistable=True, + name="learning_rate") + + warmup_epoch_var = fluid.layers.fill_constant( + shape=[1], dtype='float32', value=float(warmup_epochs), force_cpu=True) + num_gpu_var = fluid.layers.fill_constant( + shape=[1], dtype='float32', value=float(num_gpu), force_cpu=True) + batch_idx = global_step - steps_one_epoch * epoch_idx + + with fluid.layers.control_flow.Switch() as switch: + with switch.case(epoch_idx < warmup_epoch_var): + epoch_ = (batch_idx + 1) / steps_one_epoch + factor = 1 / num_gpu_var * ( + epoch_ * (num_gpu_var - 1) / warmup_epoch_var + 1) + decayed_lr = learning_rate * factor * num_gpu_var + fluid.layers.assign(decayed_lr, lr) + epoch_ = (batch_idx + 1) / steps_one_epoch + m = epoch_ / total_epoch + frac = (1 + ops.cos(math.pi * m)) / 2 + cosine_lr = (lr_min + (learning_rate - lr_min) * frac) * num_gpu_var + with switch.default(): + fluid.layers.assign(cosine_lr, lr) + + return lr diff --git a/LRC/model.py b/LRC/model.py index 45a4034..4735439 100644 --- a/LRC/model.py +++ b/LRC/model.py @@ -176,9 +176,9 @@ def StemConv(input, C_out, kernel_size, padding): class NetworkCIFAR(object): def __init__(self, C, class_num, layers, auxiliary, genotype): - self.class_num = class_num self._layers = layers self._auxiliary = auxiliary + self.class_num = class_num stem_multiplier = 3 self.drop_path_prob = 0 @@ -201,36 +201,12 @@ class NetworkCIFAR(object): if i == 2 * layers // 3: C_to_auxiliary = C_prev - def forward(self, init_channel, is_train): - self.training = is_train - self.logits_aux = None - num_channel = init_channel * 3 - s0 = StemConv(self.image, num_channel, kernel_size=3, padding=1) - s1 = s0 - for i, cell in enumerate(self.cells): - name = 'cells.' + str(i) + '.' - s0, s1 = s1, cell.forward(s0, s1, self.drop_path_prob, is_train, - name) - if i == int(2 * self._layers // 3): - if self._auxiliary and self.training: - self.logits_aux = AuxiliaryHeadCIFAR(s1, self.class_num) - out = fluid.layers.adaptive_pool2d(s1, (1, 1), "avg") - self.logits = fluid.layers.fc(out, - size=self.class_num, - param_attr=ParamAttr( - initializer=Normal(scale=1e-3), - name='classifier.weight'), - bias_attr=ParamAttr( - initializer=Constant(0.), - name='classifier.bias')) - return self.logits, self.logits_aux - - def build_input(self, image_shape, batch_size, is_train): + def build_input(self, image_shape, is_train): if is_train: py_reader = fluid.layers.py_reader( capacity=64, shapes=[[-1] + image_shape, [-1, 1], [-1, 1], [-1, 1], [-1, 1], - [-1, 1], [-1, batch_size, self.class_num - 1]], + [-1, 1], [50, -1, self.class_num - 1]], lod_levels=[0, 0, 0, 0, 0, 0, 0], dtypes=[ "float32", "int64", "int64", "float32", "int32", "int32", @@ -248,14 +224,35 @@ class NetworkCIFAR(object): name='test_reader') return py_reader - def train_model(self, py_reader, init_channels, aux, aux_w, batch_size, - loss_lambda): + def forward(self, init_channel, is_train): + self.training = is_train + self.logits_aux = None + num_channel = init_channel * 3 + s0 = s1 = StemConv(self.image, num_channel, kernel_size=3, padding=1) + for i, cell in enumerate(self.cells): + name = 'cells.' + str(i) + '.' + s0, s1 = s1, cell.forward(s0, s1, self.drop_path_prob, is_train, + name) + if i == int(2 * self._layers // 3): + if self._auxiliary and self.training: + self.logits_aux = AuxiliaryHeadCIFAR(s1, self.class_num) + out = fluid.layers.adaptive_pool2d(s1, (1, 1), "avg") + self.logits = fluid.layers.fc(out, + size=self.class_num, + param_attr=ParamAttr( + initializer=Normal(scale=1e-3), + name='classifier.weight'), + bias_attr=ParamAttr( + initializer=Constant(0, ), + name='classifier.bias')) + return self.logits, self.logits_aux + + def train_model(self, py_reader, init_channels, aux, aux_w, loss_lambda): self.image, self.ya, self.yb, self.lam, self.label_reshape,\ self.non_label_reshape, self.rad_var = fluid.layers.read_file(py_reader) self.logits, self.logits_aux = self.forward(init_channels, True) self.mixup_loss = self.mixup_loss(aux, aux_w) - self.lrc_loss = self.lrc_loss(batch_size) - return self.mixup_loss + loss_lambda * self.lrc_loss + return self.mixup_loss def test_model(self, py_reader, init_channels): self.image, self.ya = fluid.layers.read_file(py_reader) @@ -264,12 +261,13 @@ class NetworkCIFAR(object): loss = fluid.layers.cross_entropy(prob, self.ya) acc_1 = fluid.layers.accuracy(self.logits, self.ya, k=1) acc_5 = fluid.layers.accuracy(self.logits, self.ya, k=5) - return loss, acc_1, acc_5 + return prob, acc_1, acc_5 def mixup_loss(self, auxiliary, auxiliary_weight): prob = fluid.layers.softmax(self.logits, use_cudnn=False) loss_a = fluid.layers.cross_entropy(prob, self.ya) loss_b = fluid.layers.cross_entropy(prob, self.yb) + loss_a_mean = fluid.layers.reduce_mean(loss_a) loss_b_mean = fluid.layers.reduce_mean(loss_b) loss = self.lam * loss_a_mean + (1 - self.lam) * loss_b_mean @@ -283,7 +281,7 @@ class NetworkCIFAR(object): ) * loss_b_aux_mean return loss + auxiliary_weight * loss_aux - def lrc_loss(self, batch_size): + def lrc_loss(self): y_diff_reshape = fluid.layers.reshape(self.logits, shape=(-1, 1)) label_reshape = fluid.layers.squeeze(self.label_reshape, axes=[1]) non_label_reshape = fluid.layers.squeeze( @@ -296,18 +294,226 @@ class NetworkCIFAR(object): y_diff_non_label_reshape = fluid.layers.gather(y_diff_reshape, non_label_reshape) y_diff_label = fluid.layers.reshape( - y_diff_label_reshape, shape=(-1, batch_size, 1)) + y_diff_label_reshape, shape=(1, -1, 1)) y_diff_non_label = fluid.layers.reshape( - y_diff_non_label_reshape, - shape=(-1, batch_size, self.class_num - 1)) + y_diff_non_label_reshape, shape=(1, -1, self.class_num - 1)) y_diff_ = y_diff_non_label - y_diff_label y_diff_ = fluid.layers.transpose(y_diff_, perm=[1, 2, 0]) rad_var_trans = fluid.layers.transpose(self.rad_var, perm=[1, 2, 0]) rad_y_diff_trans = rad_var_trans * y_diff_ lrc_loss_sum = fluid.layers.reduce_sum(rad_y_diff_trans, dim=[0, 1]) - lrc_loss_ = fluid.layers.abs(lrc_loss_sum) / (batch_size * - (self.class_num - 1)) + shape_nbc = fluid.layers.shape(rad_y_diff_trans) + shape_nb = fluid.layers.slice(shape_nbc, axes=[0], starts=[0], ends=[2]) + num = fluid.layers.reduce_prod(shape_nb) + num.stop_gradient = True + lrc_loss_ = fluid.layers.abs(lrc_loss_sum) / num lrc_loss_mean = fluid.layers.reduce_mean(lrc_loss_) return lrc_loss_mean + + +def AuxiliaryHeadImageNet(input, num_classes, aux_name='auxiliary_head'): + relu_a = fluid.layers.relu(input) + pool_a = fluid.layers.pool2d(relu_a, 5, 'avg', pool_stride=3) + conv2d_a = fluid.layers.conv2d( + pool_a, + 128, + 1, + name=aux_name + '.features.2', + param_attr=ParamAttr( + initializer=Xavier( + uniform=False, fan_in=0), + name=aux_name + '.features.2.weight'), + bias_attr=False) + bn_a_name = aux_name + '.features.3' + bn_a = fluid.layers.batch_norm( + conv2d_a, + act='relu', + name=bn_a_name, + param_attr=ParamAttr( + initializer=Constant(1.), name=bn_a_name + '.weight'), + bias_attr=ParamAttr( + initializer=Constant(0.), name=bn_a_name + '.bias'), + moving_mean_name=bn_a_name + '.running_mean', + moving_variance_name=bn_a_name + '.running_var') + conv2d_b = fluid.layers.conv2d( + bn_a, + 768, + 2, + act='relu', + name=aux_name + '.features.5', + param_attr=ParamAttr( + initializer=Xavier( + uniform=False, fan_in=0), + name=aux_name + '.features.5.weight'), + bias_attr=False) + fc_name = aux_name + '.classifier' + fc = fluid.layers.fc(conv2d_b, + num_classes, + name=fc_name, + param_attr=ParamAttr( + initializer=Normal(scale=1e-3), + name=fc_name + '.weight'), + bias_attr=ParamAttr( + initializer=Constant(0.), name=fc_name + '.bias')) + return fc + + +def Stem0Conv(input, C_out): + conv_a = fluid.layers.conv2d( + input, + C_out // 2, + 3, + stride=2, + padding=1, + param_attr=ParamAttr( + initializer=Xavier( + uniform=False, fan_in=0), name='stem0.0.weight'), + bias_attr=False) + bn_a = fluid.layers.batch_norm( + conv_a, + param_attr=ParamAttr( + initializer=Constant(1.), name='stem0.1.weight'), + bias_attr=ParamAttr( + initializer=Constant(0.), name='stem0.1.bias'), + moving_mean_name='stem0.1.running_mean', + moving_variance_name='stem0.1.running_var', + act='relu') + conv_b = fluid.layers.conv2d( + relu_a, + C_out, + 3, + stride=2, + padding=1, + param_attr=ParamAttr( + initializer=Xavier( + uniform=False, fan_in=0), name='stem0.3.weight'), + bias_attr=False) + bn_b = fluid.layers.batch_norm( + conv_b, + param_attr=ParamAttr( + initializer=Constant(1.), name='stem0.4.weight'), + bias_attr=ParamAttr( + initializer=Constant(0.), name='stem0.4.bias'), + moving_mean_name='stem0.4.running_mean', + moving_variance_name='stem0.4.running_var') + + return bn_b + + +def Stem1Conv(input, C_out): + relu_a = fluid.layers.relu(input) + conv_a = fluid.layers.conv2d( + relu_a, + C_out, + 3, + stride=2, + padding=1, + param_attr=ParamAttr( + initializer=Xavier( + uniform=False, fan_in=0), name='stem1.1.weight'), + bias_attr=False) + bn_a = fluid.layers.batch_norm( + conv_a, + param_attr=ParamAttr( + initializer=Constant(1.), name='stem1.2.weight'), + bias_attr=ParamAttr( + initializer=Constant(0.), name='stem1.2.bias'), + moving_mean_name='stem1.2.running_mean', + moving_variance_name='stem1.2.running_var') + return bn_a + + +class NetworkImageNet(object): + def __init__(self, C, class_num, layers, genotype): + self.class_num = class_num + self._layers = layers + + self.drop_path_prob = 0 + + C_prev_prev, C_prev, C_curr = C, C, C + self.cells = [] + reduction_prev = True + for i in range(layers): + if i in [layers // 3, 2 * layers // 3]: + C_curr *= 2 + reduction = True + else: + reduction = False + cell = Cell(genotype, C_prev_prev, C_prev, C_curr, reduction, + reduction_prev) + reduction_prev = reduction + self.cells += [cell] + C_prev_prev, C_prev = C_prev, cell.multiplier * C_curr + if i == 2 * layers // 3: + C_to_auxiliary = C_prev + self.stem0 = functools.partial(Stem0Conv, C_out=C) + self.stem1 = functools.partial(Stem1Conv, C_out=C) + + def build_input(self, image_shape, is_train): + if is_train: + py_reader = fluid.layers.py_reader( + capacity=64, + shapes=[[-1] + image_shape, [-1, 1]], + lod_levels=[0, 0], + dtypes=["float32", "int64"], + use_double_buffer=True, + name='train_reader') + else: + py_reader = fluid.layers.py_reader( + capacity=64, + shapes=[[-1] + image_shape, [-1, 1]], + lod_levels=[0, 0], + dtypes=["float32", "int64"], + use_double_buffer=True, + name='test_reader') + return py_reader + + def forward(self, is_train): + self.training = is_train + self.logits_aux = None + s0 = self.stem0(self.image) + s1 = self.stem1(s0) + for i, cell in enumerate(self.cells): + name = 'cells.' + str(i) + '.' + s0, s1 = s1, cell.forward(s0, s1, self.drop_path_prob, is_train, + name) + if i == int(2 * self._layers // 3): + if self._auxiliary and self.training: + self.logits_aux = AuxiliaryHeadImageNet(s1, self.class_num) + out = fluid.layers.pool2d(s1, 7, "avg", pool_stride=7) + self.logits = fluid.layers.fc(out, + size=self.class_num, + param_attr=ParamAttr( + initializer=Normal(scale=1e-3), + name='classifier.weight'), + bias_attr=ParamAttr( + initializer=Constant(0, ), + name='classifier.bias')) + return self.logits, self.logits_aux + + def calc_loss(self, auxiliary_weight): + prob = fluid.layers.softmax(self.logits, use_cudnn=False) + loss = fluid.layers.cross_entropy(prob, self.label) + + loss_mean = fluid.layers.reduce_mean(loss) + prob_aux = fluid.layers.softmax(self.logits_aux, use_cudnn=False) + loss_aux = fluid.layers.cross_entropy(prob_aux, self.label) + loss_aux_mean = fluid.layers.reduce_mean(loss_aux) + return loss_mean + auxiliary_weight * loss_aux_mean + + def train_model(self, py_reader, aux_w): + self.image, self.label = fluid.layers.read_file(py_reader) + self.logits, self.logits_aux = self.forward(True) + self.loss = self.calc_loss(aux_w) + return self.loss + + def test_model(self, py_reader): + self.image, self.label = fluid.layers.read_file(py_reader) + self.logits, _ = self.forward(False) + prob = fluid.layers.softmax(self.logits, use_cudnn=False) + loss = fluid.layers.cross_entropy(prob, self.label) + acc_1 = fluid.layers.accuracy(self.logits, self.label, k=1) + acc_5 = fluid.layers.accuracy(self.logits, self.label, k=5) + return prob, acc_1, acc_5 diff --git a/LRC/operations.py b/LRC/operations.py index b015722..690ab60 100644 --- a/LRC/operations.py +++ b/LRC/operations.py @@ -312,7 +312,8 @@ def FactorizedReduce(input, C_out, name='', affine=True): bias_attr=False) h_end = relu_a.shape[2] w_end = relu_a.shape[3] - slice_a = fluid.layers.slice(relu_a, [2, 3], [1, 1], [h_end, w_end]) + slice_a = fluid.layers.slice( + input=relu_a, axes=[2, 3], starts=[1, 1], ends=[h_end, w_end]) conv2d_b = fluid.layers.conv2d( slice_a, C_out // 2, diff --git a/LRC/paddle_predict/__init__.py b/LRC/paddle_predict/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/LRC/reader.py b/LRC/reader_cifar.py similarity index 85% rename from LRC/reader.py rename to LRC/reader_cifar.py index 20b32b5..db7eb06 100644 --- a/LRC/reader.py +++ b/LRC/reader_cifar.py @@ -31,7 +31,10 @@ from PIL import Image from PIL import ImageOps import numpy as np -import cPickle +try: + import cPickle as pickle +except: + import pickle import random import utils import paddle.fluid as fluid @@ -46,7 +49,7 @@ image_size = 32 image_depth = 3 half_length = 8 -CIFAR_MEAN = [0.4914, 0.4822, 0.4465] +CIFAR_MEAN = [0.49139968, 0.48215827, 0.44653124] CIFAR_STD = [0.24703233, 0.24348505, 0.26158768] @@ -82,6 +85,7 @@ def generate_bernoulli_number(batch_size, CIFAR_CLASSES=10): def preprocess(sample, is_training, args): + image_array = sample.reshape(3, image_size, image_size) rgb_array = np.transpose(image_array, (1, 2, 0)) img = Image.fromarray(rgb_array, 'RGB') @@ -123,13 +127,15 @@ def reader_creator_filepath(filename, sub_name, is_training, args): datasets = [] for name in names: print("Reading file " + name) - batch = cPickle.load(open(filename + name, 'rb')) + batch = pickle.load(open(filename + name, 'rb')) data = batch['data'] labels = batch.get('labels', batch.get('fine_labels', None)) assert labels is not None dataset = zip(data, labels) datasets.extend(dataset) - random.shuffle(datasets) + + if is_training: + random.shuffle(datasets) def read_batch(datasets, args): for sample, label in datasets: @@ -160,6 +166,23 @@ def reader_creator_filepath(filename, sub_name, is_training, args): yield batch_out batch_data = [] batch_label = [] + if len(batch_data) != 0: + batch_data = np.array(batch_data, dtype='float32') + batch_label = np.array(batch_label, dtype='int64') + if is_training: + flatten_label, flatten_non_label = \ + generate_reshape_label(batch_label, len(batch_data)) + rad_var = generate_bernoulli_number(len(batch_data)) + mixed_x, y_a, y_b, lam = utils.mixup_data( + batch_data, batch_label, len(batch_data), args.mix_alpha) + batch_out = [[mixed_x, y_a, y_b, lam, flatten_label, \ + flatten_non_label, rad_var]] + yield batch_out + else: + batch_out = [[batch_data, batch_label]] + yield batch_out + batch_data = [] + batch_label = [] return reader diff --git a/LRC/run.sh b/LRC/run.sh deleted file mode 100644 index 9f1a045..0000000 --- a/LRC/run.sh +++ /dev/null @@ -1,8 +0,0 @@ -CUDA_VISIBLE_DEVICES=0 python -u train_mixup.py \ ---batch_size=80 \ ---auxiliary \ ---weight_decay=0.0003 \ ---learning_rate=0.025 \ ---lrc_loss_lambda=0.7 \ ---cutout - diff --git a/LRC/run_cifar.sh b/LRC/run_cifar.sh new file mode 100644 index 0000000..3c295b0 --- /dev/null +++ b/LRC/run_cifar.sh @@ -0,0 +1,11 @@ +export FLAGS_fraction_of_gpu_memory_to_use=0.9 +export FLAGS_eager_delete_tensor_gb=0.0 +export FLAGS_fast_eager_deletion_mode=1 + +nohup env CUDA_VISIBLE_DEVICES=0 python -u train_mixup.py --batch_size=64 --auxiliary --mix_alpha=0.9 --model_id=0 --cutout --lrc_loss_lambda=0.5 --weight_decay=0.0002 --learning_rate=0.01 --save_model_path=model_0 > lrc_model_0.log 2>&1 & +nohup env CUDA_VISIBLE_DEVICES=1 python -u train_mixup.py --batch_size=64 --auxiliary --mix_alpha=0.6 --model_id=0 --cutout --lrc_loss_lambda=0.5 --weight_decay=0.0002 --learning_rate=0.02 --save_model_path=model_1 > lrc_model_1.log 2>&1 & +nohup env CUDA_VISIBLE_DEVICES=2 python -u train_mixup.py --batch_size=80 --auxiliary --mix_alpha=0.5 --model_id=1 --cutout --lrc_loss_lambda=0.5 --weight_decay=0.0002 --learning_rate=0.015 --save_model_path=model_2 > lrc_model_2.log 2>&1 & +nohup env CUDA_VISIBLE_DEVICES=3 python -u train_mixup.py --batch_size=80 --auxiliary --mix_alpha=0.6 --model_id=1 --cutout --lrc_loss_lambda=0.5 --weight_decay=0.0002 --learning_rate=0.02 --save_model_path=model_3 > lrc_model_3.log 2>&1 & +nohup env CUDA_VISIBLE_DEVICES=4 python -u train_mixup.py --batch_size=80 --auxiliary --mix_alpha=0.8 --model_id=1 --cutout --lrc_loss_lambda=0.5 --weight_decay=0.0002 --learning_rate=0.03 --save_model_path=model_4 > lrc_model_4.log 2>&1 & +nohup env CUDA_VISIBLE_DEVICES=5 python -u train_mixup.py --batch_size=64 --auxiliary --mix_alpha=0.5 --model_id=2 --cutout --lrc_loss_lambda=0.5 --weight_decay=0.0002 --learning_rate=0.015 --save_model_path=model_5 > lrc_model_5.log 2>&1 & + diff --git a/LRC/run_cifar_test.sh b/LRC/run_cifar_test.sh new file mode 100644 index 0000000..b301167 --- /dev/null +++ b/LRC/run_cifar_test.sh @@ -0,0 +1,8 @@ +export FLAGS_fraction_of_gpu_memory_to_use=0.6 +nohup env CUDA_VISIBLE_DEVICES=0 python -u test_mixup.py --batch_size=64 --auxiliary --model_id=0 --pretrained_model=model_0/final/ --dump_path=paddle_predict/prob_test_0.pkl > lrc_test_0.log 2>&1 & +nohup env CUDA_VISIBLE_DEVICES=1 python -u test_mixup.py --batch_size=64 --auxiliary --model_id=0 --pretrained_model=model_1/final/ --dump_path=paddle_predict/prob_test_1.pkl > lrc_test_1.log 2>&1 & +nohup env CUDA_VISIBLE_DEVICES=2 python -u test_mixup.py --batch_size=80 --auxiliary --model_id=1 --pretrained_model=model_2/final/ --dump_path=paddle_predict/prob_test_2.pkl > lrc_test_2.log 2>&1 & +nohup env CUDA_VISIBLE_DEVICES=3 python -u test_mixup.py --batch_size=80 --auxiliary --model_id=1 --pretrained_model=model_3/final/ --dump_path=paddle_predict/prob_test_3.pkl > lrc_test_3.log 2>&1 & +nohup env CUDA_VISIBLE_DEVICES=4 python -u test_mixup.py --batch_size=80 --auxiliary --model_id=1 --pretrained_model=model_4/final/ --dump_path=paddle_predict/prob_test_4.pkl > lrc_test_4.log 2>&1 & +nohup env CUDA_VISIBLE_DEVICES=5 python -u test_mixup.py --batch_size=64 --auxiliary --model_id=2 --pretrained_model=model_5/final/ --dump_path=paddle_predict/prob_test_5.pkl > lrc_test_5.log 2>&1 & + diff --git a/LRC/test_mixup.py b/LRC/test_mixup.py new file mode 100644 index 0000000..f685318 --- /dev/null +++ b/LRC/test_mixup.py @@ -0,0 +1,140 @@ +# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve. +# +#Licensed under the Apache License, Version 2.0 (the "License"); +#you may not use this file except in compliance with the License. +#You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +#Unless required by applicable law or agreed to in writing, software +#distributed under the License is distributed on an "AS IS" BASIS, +#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +#See the License for the specific language governing permissions and +#limitations under the License. +# +# Based on: +# -------------------------------------------------------- +# DARTS +# Copyright (c) 2018, Hanxiao Liu. +# Licensed under the Apache License, Version 2.0; +# -------------------------------------------------------- + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function +from learning_rate import cosine_decay +import numpy as np +import argparse +from model import NetworkCIFAR as Network +import reader_cifar as reader +import sys +import os +import time +import logging +import genotypes +import paddle.fluid as fluid +import shutil +import utils + +parser = argparse.ArgumentParser("cifar") +# yapf: disable +parser.add_argument('--data', type=str, default='./dataset/cifar/cifar-10-batches-py/', help='location of the data corpus') +parser.add_argument('--batch_size', type=int, default=96, help='batch size') +parser.add_argument('--model_id', type=int, help='model id') +parser.add_argument('--report_freq', type=float, default=50, help='report frequency') +parser.add_argument( '--init_channels', type=int, default=36, help='num of init channels') +parser.add_argument( '--layers', type=int, default=20, help='total number of layers') +parser.add_argument('--auxiliary', action='store_true', default=False, help='use auxiliary tower') +parser.add_argument('--auxiliary_weight', type=float, default=0.4, help='weight for auxiliary loss') +parser.add_argument('--drop_path_prob', type=float, default=0.2, help='drop path probability') +parser.add_argument('--pretrained_model', type=str, default='/model_0/final/', help='pretrained model to load') +parser.add_argument('--arch', type=str, default='DARTS', help='which architecture to use') +parser.add_argument('--dump_path', type=str, default='prob_test_0.pkl', help='dump path') +# yapf: enable + +args = parser.parse_args() + +CIFAR_CLASSES = 10 +dataset_train_size = 50000 +image_size = 32 +genotypes.DARTS = genotypes.MY_DARTS_list[args.model_id] +print(genotypes.DARTS) + + +def main(): + image_shape = [3, image_size, image_size] + devices = os.getenv("CUDA_VISIBLE_DEVICES") or "" + devices_num = len(devices.split(",")) + logging.info("args = %s", args) + genotype = eval("genotypes.%s" % args.arch) + model = Network(args.init_channels, CIFAR_CLASSES, args.layers, + args.auxiliary, genotype) + test(model, args, image_shape) + + +def build_program(args, is_train, model, im_shape): + out = [] + py_reader = model.build_input(im_shape, is_train) + prob, acc_1, acc_5 = model.test_model(py_reader, args.init_channels) + out = [py_reader, prob, acc_1, acc_5] + return out + + +def test(model, args, im_shape): + + test_py_reader, prob, acc_1, acc_5 = build_program(args, False, model, + im_shape) + + test_prog = fluid.default_main_program().clone(for_test=True) + + place = fluid.CUDAPlace(0) + exe = fluid.Executor(place) + exe.run(fluid.default_startup_program()) + + # yapf: disable + if args.pretrained_model: + def if_exist(var): + return os.path.exists(os.path.join(args.pretrained_model, var.name)) + fluid.io.load_vars(exe, args.pretrained_model, predicate=if_exist) + + # yapf: enable + + exec_strategy = fluid.ExecutionStrategy() + exec_strategy.num_threads = 1 + compile_program = fluid.compiler.CompiledProgram( + test_prog).with_data_parallel(exec_strategy=exec_strategy) + test_reader = reader.test10(args) + test_py_reader.decorate_paddle_reader(test_reader) + + test_fetch_list = [prob, acc_1, acc_5] + prob = [] + top1 = utils.AvgrageMeter() + top5 = utils.AvgrageMeter() + test_py_reader.start() + test_start_time = time.time() + step_id = 0 + try: + while True: + prev_test_start_time = test_start_time + test_start_time = time.time() + prob_v, acc_1_v, acc_5_v = exe.run(compile_program, + test_prog, + fetch_list=test_fetch_list) + prob.append(list(np.array(prob_v))) + top1.update(np.array(acc_1_v), np.array(prob_v).shape[0]) + top5.update(np.array(acc_5_v), np.array(prob_v).shape[0]) + if step_id % args.report_freq == 0: + print('prob shape:', np.array(prob_v).shape) + print("Step {}, acc_1 {}, acc_5 {}, time {}".format( + step_id, + np.array(acc_1_v), + np.array(acc_5_v), test_start_time - prev_test_start_time)) + step_id += 1 + except fluid.core.EOFException: + test_py_reader.reset() + np.concatenate(prob).dump(args.dump_path) + print("top1 {0}, top5 {1}".format(top1.avg, top5.avg)) + + +if __name__ == '__main__': + main() diff --git a/LRC/train_mixup.py b/LRC/train_mixup.py index de752c8..e268b1c 100644 --- a/LRC/train_mixup.py +++ b/LRC/train_mixup.py @@ -26,7 +26,7 @@ from learning_rate import cosine_decay import numpy as np import argparse from model import NetworkCIFAR as Network -import reader +import reader_cifar as reader import sys import os import time @@ -35,73 +35,40 @@ import genotypes import paddle.fluid as fluid import shutil import utils -import cPickle as cp +import math parser = argparse.ArgumentParser("cifar") -parser.add_argument( - '--data', - type=str, - default='./dataset/cifar/cifar-10-batches-py/', - help='location of the data corpus') +# yapf: disable +parser.add_argument('--data', type=str, default='./dataset/cifar/cifar-10-batches-py/', help='location of the data corpus') parser.add_argument('--batch_size', type=int, default=96, help='batch size') -parser.add_argument( - '--learning_rate', type=float, default=0.025, help='init learning rate') +parser.add_argument('--pretrained_model', type=str, default=None, help='pretrained model to load') +parser.add_argument('--model_id', type=int, help='model id') +parser.add_argument('--learning_rate', type=float, default=0.025, help='init learning rate') parser.add_argument('--momentum', type=float, default=0.9, help='momentum') -parser.add_argument( - '--weight_decay', type=float, default=3e-4, help='weight decay') -parser.add_argument( - '--report_freq', type=float, default=50, help='report frequency') -parser.add_argument( - '--epochs', type=int, default=600, help='num of training epochs') -parser.add_argument( - '--init_channels', type=int, default=36, help='num of init channels') -parser.add_argument( - '--layers', type=int, default=20, help='total number of layers') -parser.add_argument( - '--model_path', - type=str, - default='saved_models', - help='path to save the model') -parser.add_argument( - '--auxiliary', - action='store_true', - default=False, - help='use auxiliary tower') -parser.add_argument( - '--auxiliary_weight', - type=float, - default=0.4, - help='weight for auxiliary loss') -parser.add_argument( - '--cutout', action='store_true', default=False, help='use cutout') -parser.add_argument( - '--cutout_length', type=int, default=16, help='cutout length') -parser.add_argument( - '--drop_path_prob', type=float, default=0.2, help='drop path probability') -parser.add_argument('--save', type=str, default='EXP', help='experiment name') -parser.add_argument( - '--arch', type=str, default='DARTS', help='which architecture to use') -parser.add_argument( - '--grad_clip', type=float, default=5, help='gradient clipping') -parser.add_argument( - '--lr_exp_decay', - action='store_true', - default=False, - help='use exponential_decay learning_rate') +parser.add_argument('--weight_decay', type=float, default=3e-4, help='weight decay') +parser.add_argument('--report_freq', type=float, default=50, help='report frequency') +parser.add_argument('--epochs', type=int, default=600, help='num of training epochs') +parser.add_argument('--init_channels', type=int, default=36, help='num of init channels') +parser.add_argument('--layers', type=int, default=20, help='total number of layers') +parser.add_argument('--save_model_path', type=str, default='saved_models', help='path to save the model') +parser.add_argument('--auxiliary', action='store_true', default=False, help='use auxiliary tower') +parser.add_argument('--auxiliary_weight', type=float, default=0.4, help='weight for auxiliary loss') +parser.add_argument('--cutout', action='store_true', default=False, help='use cutout') +parser.add_argument('--cutout_length', type=int, default=16, help='cutout length') +parser.add_argument('--drop_path_prob', type=float, default=0.2, help='drop path probability') +parser.add_argument('--arch', type=str, default='DARTS', help='which architecture to use') +parser.add_argument('--grad_clip', type=float, default=5, help='gradient clipping') +parser.add_argument('--lr_exp_decay', action='store_true', default=False, help='use exponential_decay learning_rate') parser.add_argument('--mix_alpha', type=float, default=0.5, help='mixup alpha') -parser.add_argument( - '--lrc_loss_lambda', default=0, type=float, help='lrc_loss_lambda') -parser.add_argument( - '--loss_type', - default=1, - type=float, - help='loss_type 0: cross entropy 1: multi margin loss 2: max margin loss') +parser.add_argument('--lrc_loss_lambda', default=0, type=float, help='lrc_loss_lambda') +# yapf: enable args = parser.parse_args() CIFAR_CLASSES = 10 -dataset_train_size = 50000 +dataset_train_size = 50000. image_size = 32 +genotypes.DARTS = genotypes.MY_DARTS_list[args.model_id] def main(): @@ -112,7 +79,9 @@ def main(): genotype = eval("genotypes.%s" % args.arch) model = Network(args.init_channels, CIFAR_CLASSES, args.layers, args.auxiliary, genotype) - steps_one_epoch = dataset_train_size / (devices_num * args.batch_size) + + steps_one_epoch = math.ceil(dataset_train_size / + (devices_num * args.batch_size)) train(model, args, image_shape, steps_one_epoch) @@ -120,73 +89,84 @@ def build_program(main_prog, startup_prog, args, is_train, model, im_shape, steps_one_epoch): out = [] with fluid.program_guard(main_prog, startup_prog): - py_reader = model.build_input(im_shape, args.batch_size, is_train) + py_reader = model.build_input(im_shape, is_train) if is_train: with fluid.unique_name.guard(): loss = model.train_model(py_reader, args.init_channels, args.auxiliary, args.auxiliary_weight, - args.batch_size, args.lrc_loss_lambda) + args.lrc_loss_lambda) optimizer = fluid.optimizer.Momentum( - learning_rate=cosine_decay(args.learning_rate, \ - args.epochs, steps_one_epoch), - regularization=fluid.regularizer.L2Decay(\ - args.weight_decay), - momentum=args.momentum) + learning_rate=cosine_decay(args.learning_rate, args.epochs, + steps_one_epoch), + regularization=fluid.regularizer.L2Decay(args.weight_decay), + momentum=args.momentum) optimizer.minimize(loss) out = [py_reader, loss] else: with fluid.unique_name.guard(): - loss, acc_1, acc_5 = model.test_model(py_reader, + prob, acc_1, acc_5 = model.test_model(py_reader, args.init_channels) - out = [py_reader, loss, acc_1, acc_5] + out = [py_reader, prob, acc_1, acc_5] return out def train(model, args, im_shape, steps_one_epoch): - train_startup_prog = fluid.Program() - test_startup_prog = fluid.Program() + startup_prog = fluid.Program() train_prog = fluid.Program() test_prog = fluid.Program() - train_py_reader, loss_train = build_program(train_prog, train_startup_prog, - args, True, model, im_shape, - steps_one_epoch) + train_py_reader, loss_train = build_program( + train_prog, startup_prog, args, True, model, im_shape, steps_one_epoch) - test_py_reader, loss_test, acc_1, acc_5 = build_program( - test_prog, test_startup_prog, args, False, model, im_shape, - steps_one_epoch) + test_py_reader, prob, acc_1, acc_5 = build_program( + test_prog, startup_prog, args, False, model, im_shape, steps_one_epoch) test_prog = test_prog.clone(for_test=True) place = fluid.CUDAPlace(0) exe = fluid.Executor(place) - exe.run(train_startup_prog) - exe.run(test_startup_prog) + exe.run(startup_prog) + + if args.pretrained_model: + + def if_exist(var): + return os.path.exists(os.path.join(args.pretrained_model, var.name)) + + fluid.io.load_vars( + exe, + args.pretrained_model, + main_program=train_prog, + predicate=if_exist) exec_strategy = fluid.ExecutionStrategy() exec_strategy.num_threads = 1 - train_exe = fluid.ParallelExecutor( - main_program=train_prog, - use_cuda=True, - loss_name=loss_train.name, - exec_strategy=exec_strategy) + build_strategy = fluid.BuildStrategy() + build_strategy.memory_optimize = False + build_strategy.enable_inplace = True + + compile_program = fluid.compiler.CompiledProgram( + train_prog).with_data_parallel( + loss_name=loss_train.name, + build_strategy=build_strategy, + exec_strategy=exec_strategy) + train_reader = reader.train10(args) test_reader = reader.test10(args) train_py_reader.decorate_paddle_reader(train_reader) test_py_reader.decorate_paddle_reader(test_reader) - fluid.clip.set_gradient_clip(fluid.clip.GradientClipByNorm(args.grad_clip)) - fluid.memory_optimize(fluid.default_main_program()) + fluid.clip.set_gradient_clip( + fluid.clip.GradientClipByGlobalNorm(args.grad_clip), program=train_prog) + train_fetch_list = [loss_train] def save_model(postfix, main_prog): - model_path = os.path.join(args.model_path, postfix) + model_path = os.path.join(args.save_model_path, postfix) if os.path.isdir(model_path): shutil.rmtree(model_path) fluid.io.save_persistables(exe, model_path, main_program=main_prog) def test(epoch_id): - test_fetch_list = [loss_test, acc_1, acc_5] - objs = utils.AvgrageMeter() + test_fetch_list = [prob, acc_1, acc_5] top1 = utils.AvgrageMeter() top5 = utils.AvgrageMeter() test_py_reader.start() @@ -196,11 +176,10 @@ def train(model, args, im_shape, steps_one_epoch): while True: prev_test_start_time = test_start_time test_start_time = time.time() - loss_test_v, acc_1_v, acc_5_v = exe.run( - test_prog, fetch_list=test_fetch_list) - objs.update(np.array(loss_test_v), args.batch_size) - top1.update(np.array(acc_1_v), args.batch_size) - top5.update(np.array(acc_5_v), args.batch_size) + prob_v, acc_1_v, acc_5_v = exe.run(test_prog, + fetch_list=test_fetch_list) + top1.update(np.array(acc_1_v), np.array(prob_v).shape[0]) + top5.update(np.array(acc_5_v), np.array(prob_v).shape[0]) if step_id % args.report_freq == 0: print("Epoch {}, Step {}, acc_1 {}, acc_5 {}, time {}". format(epoch_id, step_id, @@ -213,7 +192,6 @@ def train(model, args, im_shape, steps_one_epoch): print("Epoch {0}, top1 {1}, top5 {2}".format(epoch_id, top1.avg, top5.avg)) - train_fetch_list = [loss_train] epoch_start_time = time.time() for epoch_id in range(args.epochs): model.drop_path_prob = args.drop_path_prob * epoch_id / args.epochs @@ -230,7 +208,8 @@ def train(model, args, im_shape, steps_one_epoch): while True: prev_start_time = start_time start_time = time.time() - loss_v, = train_exe.run( + loss_v, = exe.run( + compile_program, fetch_list=[v.name for v in train_fetch_list]) print("Epoch {}, Step {}, loss {}, time {}".format(epoch_id, step_id, \ np.array(loss_v).mean(), start_time-prev_start_time)) @@ -238,8 +217,10 @@ def train(model, args, im_shape, steps_one_epoch): sys.stdout.flush() except fluid.core.EOFException: train_py_reader.reset() - if epoch_id % 50 == 0 or epoch_id == args.epochs - 1: + if epoch_id % 50 == 0: save_model(str(epoch_id), train_prog) + if epoch_id == args.epochs - 1: + save_model('final', train_prog) test(epoch_id) diff --git a/LRC/voting.py b/LRC/voting.py new file mode 100644 index 0000000..3c92dfb --- /dev/null +++ b/LRC/voting.py @@ -0,0 +1,18 @@ +import numpy as np +try: + import cPickle as pickle +except ImportError: + import pickle +import sys, os + +model_path = 'paddle_predict' +fl = os.listdir(model_path) +labels = np.load('labels.npz')['arr_0'] +pred = np.zeros((10000, 10)) +fl.sort() +i = 0 +for f in fl: + print(f) + pred += pickle.load(open(os.path.join(model_path, f))) + print(np.mean(np.argmax(pred, axis=1) == labels)) + i += 1 diff --git a/README.md b/README.md index 6144e77..6faaf17 100644 --- a/README.md +++ b/README.md @@ -1 +1 @@ -# AutoDL \ No newline at end of file +# AutoDL -- GitLab