From 213543160df07815cda53fa94e49343823a8dc67 Mon Sep 17 00:00:00 2001 From: Yibing Liu Date: Fri, 18 Oct 2019 17:04:28 +0800 Subject: [PATCH] Use submodule for autodl (#3669) --- .gitmodules | 3 + AutoDL | 1 + AutoDL/HiNAS_models/README.md | 78 ----- AutoDL/HiNAS_models/README_cn.md | 80 ------ AutoDL/HiNAS_models/build/__init__.py | 0 AutoDL/HiNAS_models/build/layers.py | 214 -------------- AutoDL/HiNAS_models/build/ops.py | 117 -------- AutoDL/HiNAS_models/build/resnet_base.py | 109 ------- AutoDL/HiNAS_models/build/vgg_base.py | 70 ----- AutoDL/HiNAS_models/nn_paddle.py | 139 --------- AutoDL/HiNAS_models/reader.py | 157 ---------- AutoDL/HiNAS_models/tokens/15113.pkl | Bin 328 -> 0 bytes AutoDL/HiNAS_models/tokens/15383.pkl | 36 --- AutoDL/HiNAS_models/tokens/15613.pkl | Bin 328 -> 0 bytes AutoDL/HiNAS_models/tokens/17754.pkl | Bin 328 -> 0 bytes AutoDL/HiNAS_models/tokens/17925.pkl | 36 --- AutoDL/HiNAS_models/tokens/18089.pkl | 36 --- AutoDL/HiNAS_models/train_hinas.py | 44 --- AutoDL/HiNAS_models/train_hinas_res.py | 44 --- AutoDL/LRC/README.md | 74 ----- AutoDL/LRC/README_cn.md | 71 ----- AutoDL/LRC/dataset/download.sh | 10 - AutoDL/LRC/genotypes.py | 116 -------- AutoDL/LRC/learning_rate.py | 43 --- AutoDL/LRC/model.py | 313 -------------------- AutoDL/LRC/operations.py | 349 ----------------------- AutoDL/LRC/reader.py | 187 ------------ AutoDL/LRC/run.sh | 8 - AutoDL/LRC/train_mixup.py | 246 ---------------- AutoDL/LRC/utils.py | 55 ---- 30 files changed, 4 insertions(+), 2632 deletions(-) create mode 160000 AutoDL delete mode 100755 AutoDL/HiNAS_models/README.md delete mode 100755 AutoDL/HiNAS_models/README_cn.md delete mode 100755 AutoDL/HiNAS_models/build/__init__.py delete mode 100755 AutoDL/HiNAS_models/build/layers.py delete mode 100755 AutoDL/HiNAS_models/build/ops.py delete mode 100755 AutoDL/HiNAS_models/build/resnet_base.py delete mode 100755 AutoDL/HiNAS_models/build/vgg_base.py delete mode 100755 AutoDL/HiNAS_models/nn_paddle.py delete mode 100755 AutoDL/HiNAS_models/reader.py delete mode 100755 AutoDL/HiNAS_models/tokens/15113.pkl delete mode 100755 AutoDL/HiNAS_models/tokens/15383.pkl delete mode 100755 AutoDL/HiNAS_models/tokens/15613.pkl delete mode 100755 AutoDL/HiNAS_models/tokens/17754.pkl delete mode 100755 AutoDL/HiNAS_models/tokens/17925.pkl delete mode 100755 AutoDL/HiNAS_models/tokens/18089.pkl delete mode 100755 AutoDL/HiNAS_models/train_hinas.py delete mode 100755 AutoDL/HiNAS_models/train_hinas_res.py delete mode 100644 AutoDL/LRC/README.md delete mode 100644 AutoDL/LRC/README_cn.md delete mode 100644 AutoDL/LRC/dataset/download.sh delete mode 100644 AutoDL/LRC/genotypes.py delete mode 100644 AutoDL/LRC/learning_rate.py delete mode 100644 AutoDL/LRC/model.py delete mode 100644 AutoDL/LRC/operations.py delete mode 100644 AutoDL/LRC/reader.py delete mode 100644 AutoDL/LRC/run.sh delete mode 100644 AutoDL/LRC/train_mixup.py delete mode 100644 AutoDL/LRC/utils.py diff --git a/.gitmodules b/.gitmodules index f8124bc2..fd7a90bc 100644 --- a/.gitmodules +++ b/.gitmodules @@ -7,3 +7,6 @@ [submodule "PaddleCV/PaddleSeg"] path = PaddleCV/PaddleSeg url = https://github.com/PaddlePaddle/PaddleSeg +[submodule "AutoDL"] + path = AutoDL + url = https://github.com/PaddlePaddle/AutoDL.git diff --git a/AutoDL b/AutoDL new file mode 160000 index 00000000..5447bcf7 --- /dev/null +++ b/AutoDL @@ -0,0 +1 @@ +Subproject commit 5447bcf72d92b58e9efe38e4aa0d47bab68bec31 diff --git a/AutoDL/HiNAS_models/README.md b/AutoDL/HiNAS_models/README.md deleted file mode 100755 index 4557405d..00000000 --- a/AutoDL/HiNAS_models/README.md +++ /dev/null @@ -1,78 +0,0 @@ -# Image Classification Models -This directory contains six image classification models, which are models automatically discovered by Baidu Big Data Lab (BDL) Hierarchical Neural Architecture Search project (HiNAS), achieving 96.1% accuracy on CIFAR-10 dataset. These models are divided into two categories. The first three have no skip link, named HiNAS 0-2, and the last three networks contain skip links, which are similar to the shortcut connections in Resnet, named HiNAS 3-5. - -We also recommend users to take a look at the  [IPython Notebook demo](https://aistudio.baidu.com/aistudio/projectDetail/122279) - ---- -## Table of Contents -- [Installation](#installation) -- [Data preparation](#data-preparation) -- [Training a model](#training-a-model) -- [Model performances](#model-performances) - -## Installation -Running the trainer in current directory requires: - -- PadddlePaddle Fluid >= v0.15.0 -- CuDNN >=6.0 - -If PaddlePaddle and CuDNN in your runtime environment do not meet the requirements, please follow the instructions in [installation document](http://www.paddlepaddle.org/docs/develop/documentation/zh/build_and_install/pip_install_cn.html) and make an update. - -## Data preparation - -When you run the sample code for the first time, the trainer will automatically download the cifar-10 dataset. Please make sure your environment has an internet connection. - -The dataset will be downloaded to `dataset/cifar/cifar-10-python.tar.gz` in the same directory as the Trainer. If automatic download fails, you can go to https://www.cs.toronto.edu/~kriz/cifar.html and download cifar-10-python.tar.gz to the location mentioned above. - -## Training a model - -After the environment is ready, you can train the model. There are two entrances: `train_hinas.py` and `train_hinas_res.py`. The former is used to train Model 0-2 (without skip link), and the latter is used to train Model 3-5 (contains skip link). - -Train Model 0~2 (without skip link): -``` -python train_hinas.py --model=m_id # m_id can be 0, 1 or 2. -``` -Train Model 3~5 (with skip link): -``` -python train_hinas_res.py --model=m_id # m_id can be 0, 1 or 2. -``` - -In addition, both `train_hinas.py` and `train_hinas_res.py` support the following parameters: - -- **random_flip_left_right**: Random flip image horizontally. (Default: True) -- **random_flip_up_down**: Randomly flip image vertically. (Default: False) -- **cutout**: Add cutout action to image. (Default: True) -- **standardize_image**: Image standardize. (Default: True) -- **pad_and_cut_image**: Random padding image and then crop back to the original size. (Default: True) -- **shuffle_image**: Shuffle the order of the input images during training. (Default: True) -- **lr_max**: Learning rate at the begin of training. (Default: 0.1) -- **lr_min**: Learning rate at the end of training. (Default: 0.0001) -- **batch_size**: Training batch size (Default: 128) -- **num_epochs**: Total training epoch (Default: 200) -- **weight_decay**: L2 Regularization value (Default: 0.0004) -- **momentum**: The momentum parameter in momentum optimizer (Default: 0.9) -- **dropout_rate**: Dropout rate of the dropout layer (Default: 0.5) -- **bn_decay**: The decay/momentum parameter (or called moving average decay) in batch norm layer (Default: 0.9) - - -## Model performances - -Train all six models using same hyperparameters: - -- learning rate: 0.1 -> 0.0001 with cosine annealing -- total epoch: 200 -- batch size: 128 -- L2 decay: 0.000400 -- optimizer: momentum optimizer with m=0.9 and use nesterov -- preprocess: random horizontal flip + image standardization + cutout - -And below is the accuracy on CIFAR-10 dataset: - -| model | round 1 | round 2 | round 3 | max | avg | -|----------|---------|---------|---------|--------|--------| -| HiNAS-0 | 0.9548 | 0.9520 | 0.9513 | 0.9548 | 0.9527 | -| HiNAS-1 | 0.9452 | 0.9462 | 0.9420 | 0.9462 | 0.9445 | -| HiNAS-2 | 0.9508 | 0.9506 | 0.9483 | 0.9508 | 0.9499 | -| HiNAS-3 | 0.9607 | 0.9623 | 0.9601 | 0.9623 | 0.9611 | -| HiNAS-4 | 0.9611 | 0.9584 | 0.9586 | 0.9611 | 0.9594 | -| HiNAS-5 | 0.9578 | 0.9588 | 0.9594 | 0.9594 | 0.9586 | diff --git a/AutoDL/HiNAS_models/README_cn.md b/AutoDL/HiNAS_models/README_cn.md deleted file mode 100755 index 15bbf310..00000000 --- a/AutoDL/HiNAS_models/README_cn.md +++ /dev/null @@ -1,80 +0,0 @@ -# Image Classification Models -本目录下包含6个图像分类模型,都是百度大数据实验室 Hierarchical Neural Architecture Search (HiNAS) 项目通过机器自动发现的模型,在CIFAR-10数据集上达到96.1%的准确率。这6个模型分为两类,前3个没有skip link,分别命名为 HiNAS 0-2号,后三个网络带有skip link,功能类似于Resnet中的shortcut connection,分别命名 HiNAS 3-5号。 - -同时推荐用户参考[ IPython Notebook demo](https://aistudio.baidu.com/aistudio/projectDetail/122279) - ---- -## Table of Contents -- [Installation](#installation) -- [Data preparation](#data-preparation) -- [Training a model](#training-a-model) -- [Model performances](#model-performances) - -## Installation -最低环境要求: - -- PadddlePaddle Fluid >= v0.15.0 -- Cudnn >=6.0 - -如果您的运行环境无法满足要求,可以参考此文档升级PaddlePaddle:[installation document](http://www.paddlepaddle.org/docs/develop/documentation/zh/build_and_install/pip_install_cn.html) - -## Data preparation - -第一次训练模型的时候,Trainer会自动下载CIFAR-10数据集,请确保您的环境有互联网连接。 - -数据集会被下载到Trainer同目录下的`dataset/cifar/cifar-10-python.tar.gz`,如果自动下载失败,您可以自行从 https://www.cs.toronto.edu/~kriz/cifar.html 下载cifar-10-python.tar.gz,然后放到上述位置。 - - -## Training a model -准备好环境后,可以训练模型,训练有2个入口,`train_hinas.py`和`train_hinas_res.py`,前者用来训练0-2号不含skip link的模型,后者用来训练3-5号包含skip link的模型。 - -训练0~2号不含skip link的模型: -``` -python train_hinas.py --model=m_id # m_id can be 0, 1 or 2. -``` -训练3~5号包含skip link的模型: -``` -python train_hinas_res.py --model=m_id # m_id can be 0, 1 or 2. -``` - -此外,`train_hinas.py`和`train_hinas_res.py` 都支持以下参数: - -初始化部分: - -- random_flip_left_right:图片随机水平翻转(Default:True) -- random_flip_up_down:图片随机垂直翻转(Default:False) -- cutout:图片随机遮挡(Default:True) -- standardize_image:对图片每个像素做 standardize(Default:True) -- pad_and_cut_image:图片随机padding,并裁剪回原大小(Default:True) -- shuffle_image:训练时对输入图片的顺序做shuffle(Default:True) -- lr_max:训练开始时的learning rate(Default:0.1) -- lr_min:训练结束时的learning rate(Default:0.0001) -- batch_size:训练的batch size(Default:128) -- num_epochs:训练总的epoch(Default:200) -- weight_decay:训练时L2 Regularization大小(Default:0.0004) -- momentum:momentum优化器中的momentum系数(Default:0.9) -- dropout_rate:dropout层的dropout_rate(Default:0.5) -- bn_decay:batch norm层的decay/momentum系数(即moving average decay)大小(Default:0.9) - - - -## Model performances -6个模型使用相同的参数训练: - -- learning rate: 0.1 -> 0.0001 with cosine annealing -- total epoch: 200 -- batch size: 128 -- L2 decay: 0.000400 -- optimizer: momentum optimizer with m=0.9 and use nesterov -- preprocess: random horizontal flip + image standardization + cutout - -以下是6个模型在CIFAR-10数据集上的准确率: - -| model | round 1 | round 2 | round 3 | max | avg | -|----------|---------|---------|---------|--------|--------| -| HiNAS-0 | 0.9548 | 0.9520 | 0.9513 | 0.9548 | 0.9527 | -| HiNAS-1 | 0.9452 | 0.9462 | 0.9420 | 0.9462 | 0.9445 | -| HiNAS-2 | 0.9508 | 0.9506 | 0.9483 | 0.9508 | 0.9499 | -| HiNAS-3 | 0.9607 | 0.9623 | 0.9601 | 0.9623 | 0.9611 | -| HiNAS-4 | 0.9611 | 0.9584 | 0.9586 | 0.9611 | 0.9594 | -| HiNAS-5 | 0.9578 | 0.9588 | 0.9594 | 0.9594 | 0.9586 | diff --git a/AutoDL/HiNAS_models/build/__init__.py b/AutoDL/HiNAS_models/build/__init__.py deleted file mode 100755 index e69de29b..00000000 diff --git a/AutoDL/HiNAS_models/build/layers.py b/AutoDL/HiNAS_models/build/layers.py deleted file mode 100755 index 5bd67fb8..00000000 --- a/AutoDL/HiNAS_models/build/layers.py +++ /dev/null @@ -1,214 +0,0 @@ -# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import operator - -import numpy as np -import paddle.fluid as fluid -from absl import flags - -FLAGS = flags.FLAGS - -flags.DEFINE_float("bn_decay", 0.9, "batch norm decay") -flags.DEFINE_float("dropout_rate", 0.5, "dropout rate") - - -def calc_padding(img_width, stride, dilation, filter_width): - """ calculate pixels to padding in order to keep input/output size same. """ - - filter_width = dilation * (filter_width - 1) + 1 - if img_width % stride == 0: - pad_along_width = max(filter_width - stride, 0) - else: - pad_along_width = max(filter_width - (img_width % stride), 0) - return pad_along_width // 2, pad_along_width - pad_along_width // 2 - - -def conv(inputs, - filters, - kernel, - strides=(1, 1), - dilation=(1, 1), - num_groups=1, - conv_param=None): - """ normal conv layer """ - - if isinstance(kernel, (tuple, list)): - n = operator.mul(*kernel) * inputs.shape[1] - else: - n = kernel * kernel * inputs.shape[1] - - # pad input - padding = (0, 0, 0, 0) \ - + calc_padding(inputs.shape[2], strides[0], dilation[0], kernel[0]) \ - + calc_padding(inputs.shape[3], strides[1], dilation[1], kernel[1]) - if sum(padding) > 0: - inputs = fluid.layers.pad(inputs, padding, 0) - - param_attr = fluid.param_attr.ParamAttr( - initializer=fluid.initializer.NormalInitializer( - 0.0, scale=np.sqrt(2.0 / n)), - regularizer=fluid.regularizer.L2Decay(FLAGS.weight_decay)) - - bias_attr = fluid.param_attr.ParamAttr( - regularizer=fluid.regularizer.L2Decay(0.)) - - return fluid.layers.conv2d( - inputs, - filters, - kernel, - stride=strides, - padding=0, - dilation=dilation, - groups=num_groups, - param_attr=param_attr if conv_param is None else conv_param, - use_cudnn=False if num_groups == inputs.shape[1] == filters else True, - bias_attr=bias_attr, - act=None) - - -def sep(inputs, filters, kernel, strides=(1, 1), dilation=(1, 1)): - """ Separable convolution layer """ - - if isinstance(kernel, (tuple, list)): - n_depth = operator.mul(*kernel) - else: - n_depth = kernel * kernel - n_point = inputs.shape[1] - - if isinstance(strides, (tuple, list)): - multiplier = strides[0] - else: - multiplier = strides - - depthwise_param = fluid.param_attr.ParamAttr( - initializer=fluid.initializer.NormalInitializer( - 0.0, scale=np.sqrt(2.0 / n_depth)), - regularizer=fluid.regularizer.L2Decay(FLAGS.weight_decay)) - - pointwise_param = fluid.param_attr.ParamAttr( - initializer=fluid.initializer.NormalInitializer( - 0.0, scale=np.sqrt(2.0 / n_point)), - regularizer=fluid.regularizer.L2Decay(FLAGS.weight_decay)) - - depthwise_conv = conv( - inputs=inputs, - kernel=kernel, - filters=int(filters * multiplier), - strides=strides, - dilation=dilation, - num_groups=int(filters * multiplier), - conv_param=depthwise_param) - - return conv( - inputs=depthwise_conv, - kernel=(1, 1), - filters=int(filters * multiplier), - strides=(1, 1), - dilation=dilation, - conv_param=pointwise_param) - - -def maxpool(inputs, kernel, strides=(1, 1)): - padding = (0, 0, 0, 0) \ - + calc_padding(inputs.shape[2], strides[0], 1, kernel[0]) \ - + calc_padding(inputs.shape[3], strides[1], 1, kernel[1]) - if sum(padding) > 0: - inputs = fluid.layers.pad(inputs, padding, 0) - - return fluid.layers.pool2d( - inputs, kernel, 'max', strides, pool_padding=0, ceil_mode=False) - - -def avgpool(inputs, kernel, strides=(1, 1)): - padding_pixel = (0, 0, 0, 0) - padding_pixel += calc_padding(inputs.shape[2], strides[0], 1, kernel[0]) - padding_pixel += calc_padding(inputs.shape[3], strides[1], 1, kernel[1]) - - if padding_pixel[4] == padding_pixel[5] and padding_pixel[ - 6] == padding_pixel[7]: - # same padding pixel num on all sides. - return fluid.layers.pool2d( - inputs, - kernel, - 'avg', - strides, - pool_padding=(padding_pixel[4], padding_pixel[6]), - ceil_mode=False) - elif padding_pixel[4] + 1 == padding_pixel[5] and padding_pixel[6] + 1 == padding_pixel[7] \ - and strides == (1, 1): - # different padding size: first pad then crop. - x = fluid.layers.pool2d( - inputs, - kernel, - 'avg', - strides, - pool_padding=(padding_pixel[5], padding_pixel[7]), - ceil_mode=False) - x_shape = x.shape - return fluid.layers.crop( - x, - shape=(-1, x_shape[1], x_shape[2] - 1, x_shape[3] - 1), - offsets=(0, 0, 1, 1)) - else: - # not support. use padding-zero and pool2d. - print("Warning: use zero-padding in avgpool") - outputs = fluid.layers.pad(inputs, padding_pixel, 0) - return fluid.layers.pool2d( - outputs, kernel, 'avg', strides, pool_padding=0, ceil_mode=False) - - -def global_avgpool(inputs): - return fluid.layers.pool2d( - inputs, - 1, - 'avg', - 1, - pool_padding=0, - global_pooling=True, - ceil_mode=True) - - -def fully_connected(inputs, units): - n = inputs.shape[1] - param_attr = fluid.param_attr.ParamAttr( - initializer=fluid.initializer.NormalInitializer( - 0.0, scale=np.sqrt(2.0 / n)), - regularizer=fluid.regularizer.L2Decay(FLAGS.weight_decay)) - - bias_attr = fluid.param_attr.ParamAttr( - regularizer=fluid.regularizer.L2Decay(0.)) - - return fluid.layers.fc(inputs, - units, - param_attr=param_attr, - bias_attr=bias_attr) - - -def bn_relu(inputs): - """ batch norm + rely layer """ - - output = fluid.layers.batch_norm( - inputs, momentum=FLAGS.bn_decay, epsilon=0.001, data_layout="NCHW") - return fluid.layers.relu(output) - - -def dropout(inputs): - """ dropout layer """ - - return fluid.layers.dropout(inputs, dropout_prob=FLAGS.dropout_rate) diff --git a/AutoDL/HiNAS_models/build/ops.py b/AutoDL/HiNAS_models/build/ops.py deleted file mode 100755 index 359f6285..00000000 --- a/AutoDL/HiNAS_models/build/ops.py +++ /dev/null @@ -1,117 +0,0 @@ -# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import build.layers as layers - - -def conv_1x1(inputs, downsample=False): - return conv_base(inputs, (1, 1), downsample=downsample) - - -def conv_2x2(inputs, downsample=False): - return conv_base(inputs, (2, 2), downsample=downsample) - - -def conv_3x3(inputs, downsample=False): - return conv_base(inputs, (3, 3), downsample=downsample) - - -def dilated_2x2(inputs, downsample=False): - return conv_base(inputs, (2, 2), (2, 2), downsample) - - -def conv_1x2_2x1(inputs, downsample=False): - return pair_base(inputs, 2, downsample) - - -def conv_1x3_3x1(inputs, downsample=False): - return pair_base(inputs, 3, downsample) - - -def sep_2x2(inputs, downsample=False): - return sep_base(inputs, (2, 2), downsample=downsample) - - -def sep_3x3(inputs, downsample=False): - return sep_base(inputs, (3, 3), downsample=downsample) - - -def maxpool_2x2(inputs, downsample=False): - return maxpool_base(inputs, (2, 2), downsample) - - -def maxpool_3x3(inputs, downsample=False): - return maxpool_base(inputs, (3, 3), downsample) - - -def avgpool_2x2(inputs, downsample=False): - return avgpool_base(inputs, (2, 2), downsample) - - -def avgpool_3x3(inputs, downsample=False): - return avgpool_base(inputs, (3, 3), downsample) - - -def conv_base(inputs, kernel, dilation=(1, 1), downsample=False): - filters = inputs.shape[1] - if downsample: - output = layers.conv(inputs, filters * 2, kernel, (2, 2)) - else: - output = layers.conv(inputs, filters, kernel, dilation=dilation) - return output - - -def pair_base(inputs, kernel, downsample=False): - filters = inputs.shape[1] - if downsample: - output = layers.conv(inputs, filters, (1, kernel), (1, 2)) - output = layers.conv(output, filters, (kernel, 1), (2, 1)) - output = layers.conv(output, filters * 2, (1, 1)) - else: - output = layers.conv(inputs, filters, (1, kernel)) - output = layers.conv(output, filters, (kernel, 1)) - return output - - -def sep_base(inputs, kernel, dilation=(1, 1), downsample=False): - filters = inputs.shape[1] - if downsample: - output = layers.sep(inputs, filters * 2, kernel, (2, 2)) - else: - output = layers.sep(inputs, filters, kernel, dilation=dilation) - return output - - -def maxpool_base(inputs, kernel, downsample=False): - if downsample: - filters = inputs.shape[1] - output = layers.maxpool(inputs, kernel, (2, 2)) - output = layers.conv(output, filters * 2, (1, 1)) - else: - output = layers.maxpool(inputs, kernel) - return output - - -def avgpool_base(inputs, kernel, downsample=False): - if downsample: - filters = inputs.shape[1] - output = layers.avgpool(inputs, kernel, (2, 2)) - output = layers.conv(output, filters * 2, (1, 1)) - else: - output = layers.avgpool(inputs, kernel) - return output diff --git a/AutoDL/HiNAS_models/build/resnet_base.py b/AutoDL/HiNAS_models/build/resnet_base.py deleted file mode 100755 index 76c870de..00000000 --- a/AutoDL/HiNAS_models/build/resnet_base.py +++ /dev/null @@ -1,109 +0,0 @@ -# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import paddle.fluid as fluid -from absl import flags - -import build.layers as layers -import build.ops as _ops - -FLAGS = flags.FLAGS - -flags.DEFINE_integer("num_stages", 3, "number of stages") -flags.DEFINE_integer("num_blocks", 5, "number of blocks per stage") -flags.DEFINE_integer("num_ops", 2, "number of operations per block") -flags.DEFINE_integer("width", 64, "network width") -flags.DEFINE_string("downsample", "pool", "conv or pool") - -num_classes = 10 - -ops = [ - _ops.conv_1x1, - _ops.conv_2x2, - _ops.conv_3x3, - _ops.dilated_2x2, - _ops.conv_1x2_2x1, - _ops.conv_1x3_3x1, - _ops.sep_2x2, - _ops.sep_3x3, - _ops.maxpool_2x2, - _ops.maxpool_3x3, - _ops.avgpool_2x2, - _ops.avgpool_3x3, -] - - -def net(inputs, tokens): - """ build network with skip links """ - - x = layers.conv(inputs, FLAGS.width, (3, 3)) - - num_ops = FLAGS.num_blocks * FLAGS.num_ops - x = stage(x, tokens[:num_ops], pre_activation=True) - for i in range(1, FLAGS.num_stages): - x = stage(x, tokens[i * num_ops:(i + 1) * num_ops], downsample=True) - - x = layers.bn_relu(x) - x = layers.global_avgpool(x) - x = layers.dropout(x) - logits = layers.fully_connected(x, num_classes) - - return fluid.layers.softmax(logits) - - -def stage(x, tokens, pre_activation=False, downsample=False): - """ build network's stage. Stage consists of blocks """ - - x = block(x, tokens[:FLAGS.num_ops], pre_activation, downsample) - for i in range(1, FLAGS.num_blocks): - print("-" * 12) - x = block(x, tokens[i * FLAGS.num_ops:(i + 1) * FLAGS.num_ops]) - print("=" * 12) - - return x - - -def block(x, tokens, pre_activation=False, downsample=False): - """ build block. """ - - if pre_activation: - x = layers.bn_relu(x) - res = x - else: - res = x - x = layers.bn_relu(x) - - x = ops[tokens[0]](x, downsample) - print("%s \t-> shape %s" % (ops[0].__name__, x.shape)) - for token in tokens[1:]: - x = layers.bn_relu(x) - x = ops[token](x) - print("%s \t-> shape %s" % (ops[token].__name__, x.shape)) - - if downsample: - filters = res.shape[1] - if FLAGS.downsample == "conv": - res = layers.conv(res, filters * 2, (1, 1), (2, 2)) - elif FLAGS.downsample == "pool": - res = layers.avgpool(res, (2, 2), (2, 2)) - res = fluid.layers.pad(res, (0, 0, filters // 2, filters // 2, 0, 0, - 0, 0)) - else: - raise NotImplementedError - - return x + res diff --git a/AutoDL/HiNAS_models/build/vgg_base.py b/AutoDL/HiNAS_models/build/vgg_base.py deleted file mode 100755 index d7506a7e..00000000 --- a/AutoDL/HiNAS_models/build/vgg_base.py +++ /dev/null @@ -1,70 +0,0 @@ -# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import paddle.fluid as fluid -from absl import flags - -import build.layers as layers -import build.ops as _ops - -FLAGS = flags.FLAGS -flags.DEFINE_integer("num_stages", 5, "number of stages") -flags.DEFINE_integer("width", 64, "network width") - -num_classes = 10 - -ops = [ - _ops.conv_1x1, #0 - _ops.conv_2x2, #1 - _ops.conv_3x3, #2 - _ops.dilated_2x2, #3 - _ops.conv_1x2_2x1, #4 - _ops.conv_1x3_3x1, #5 - _ops.sep_2x2, #6 - _ops.sep_3x3, #7 - _ops.maxpool_2x2, #8 - _ops.maxpool_3x3, - _ops.avgpool_2x2, #10 - _ops.avgpool_3x3, -] - - -def net(inputs, tokens): - depth = len(tokens) - q, r = divmod(depth + 1, FLAGS.num_stages) - downsample_steps = [ - i * q + max(0, i + r - FLAGS.num_stages + 1) - 2 - for i in range(1, FLAGS.num_stages) - ] - - x = layers.conv(inputs, FLAGS.width, (3, 3)) - x = layers.bn_relu(x) - - for i, token in enumerate(tokens): - downsample = i in downsample_steps - x = ops[token](x, downsample) - print("%s \t-> shape %s" % (ops[token].__name__, x.shape)) - if downsample: - print("=" * 12) - x = layers.bn_relu(x) - - x = layers.global_avgpool(x) - x = layers.dropout(x) - logits = layers.fully_connected(x, num_classes) - - return fluid.layers.softmax(logits) diff --git a/AutoDL/HiNAS_models/nn_paddle.py b/AutoDL/HiNAS_models/nn_paddle.py deleted file mode 100755 index d3a3ddd6..00000000 --- a/AutoDL/HiNAS_models/nn_paddle.py +++ /dev/null @@ -1,139 +0,0 @@ -# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function - -import math - -import numpy as np -import paddle -import paddle.fluid as fluid -from paddle.fluid.contrib.trainer import * -from paddle.fluid.layers.learning_rate_scheduler import _decay_step_counter -import reader - -from absl import flags - -# import preprocess - -FLAGS = flags.FLAGS - -flags.DEFINE_float("lr_max", 0.1, "initial learning rate") -flags.DEFINE_float("lr_min", 0.0001, "limiting learning rate") - -flags.DEFINE_integer("batch_size", 128, "batch size") -flags.DEFINE_integer("num_epochs", 200, "total epochs to train") -flags.DEFINE_float("weight_decay", 0.0004, "weight decay") - -flags.DEFINE_float("momentum", 0.9, "momentum") - -flags.DEFINE_boolean("shuffle_image", True, "shuffle input images on training") - -dataset_train_size = 50000 - - -class Model(object): - def __init__(self, build_fn, tokens): - print("learning rate: %f -> %f, cosine annealing" % - (FLAGS.lr_max, FLAGS.lr_min)) - print("epoch: %d" % FLAGS.num_epochs) - print("batch size: %d" % FLAGS.batch_size) - print("L2 decay: %f" % FLAGS.weight_decay) - - self.max_step = dataset_train_size * FLAGS.num_epochs // FLAGS.batch_size - - self.build_fn = build_fn - self.tokens = tokens - print("Token is %s" % ",".join(map(str, tokens))) - - def cosine_annealing(self): - step = _decay_step_counter() - lr = FLAGS.lr_min + (FLAGS.lr_max - FLAGS.lr_min) / 2 \ - * (1.0 + fluid.layers.ops.cos(step / self.max_step * math.pi)) - return lr - - def optimizer_program(self): - return fluid.optimizer.Momentum( - learning_rate=self.cosine_annealing(), - momentum=FLAGS.momentum, - use_nesterov=True, - regularization=fluid.regularizer.L2DecayRegularizer( - FLAGS.weight_decay)) - - def inference_network(self): - images = fluid.layers.data( - name='pixel', shape=[3, 32, 32], dtype='float32') - return self.build_fn(images, self.tokens) - - def train_network(self): - predict = self.inference_network() - label = fluid.layers.data(name='label', shape=[1], dtype='int64') - cost = fluid.layers.cross_entropy(input=predict, label=label) - avg_cost = fluid.layers.mean(cost) - accuracy = fluid.layers.accuracy(input=predict, label=label) - # self.parameters = fluid.parameters.create(avg_cost) - return [avg_cost, accuracy] - - def run(self): - train_files = reader.train10() - test_files = reader.test10() - - if FLAGS.shuffle_image: - train_reader = paddle.batch( - paddle.reader.shuffle(train_files, dataset_train_size), - batch_size=FLAGS.batch_size) - else: - train_reader = paddle.batch( - train_files, batch_size=FLAGS.batch_size) - - test_reader = paddle.batch(test_files, batch_size=FLAGS.batch_size) - - costs = [] - accs = [] - - def event_handler(event): - if isinstance(event, EndStepEvent): - costs.append(event.metrics[0]) - accs.append(event.metrics[1]) - if event.step % 20 == 0: - print("Epoch %d, Step %d, Loss %f, Acc %f" % ( - event.epoch, event.step, np.mean(costs), np.mean(accs))) - del costs[:] - del accs[:] - - if isinstance(event, EndEpochEvent): - if event.epoch % 3 == 0 or event.epoch == FLAGS.num_epochs - 1: - avg_cost, accuracy = trainer.test( - reader=test_reader, feed_order=['pixel', 'label']) - - event_handler.best_acc = max(event_handler.best_acc, - accuracy) - print("Test with epoch %d, Loss %f, Acc %f" % - (event.epoch, avg_cost, accuracy)) - print("Best acc %f" % event_handler.best_acc) - - event_handler.best_acc = 0.0 - place = fluid.CUDAPlace(0) - trainer = Trainer( - train_func=self.train_network, - optimizer_func=self.optimizer_program, - place=place) - - trainer.train( - reader=train_reader, - num_epochs=FLAGS.num_epochs, - event_handler=event_handler, - feed_order=['pixel', 'label']) diff --git a/AutoDL/HiNAS_models/reader.py b/AutoDL/HiNAS_models/reader.py deleted file mode 100755 index e30725b0..00000000 --- a/AutoDL/HiNAS_models/reader.py +++ /dev/null @@ -1,157 +0,0 @@ -# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -""" -CIFAR-10 dataset. -This module will download dataset from -https://www.cs.toronto.edu/~kriz/cifar.html and parse train/test set into -paddle reader creators. -The CIFAR-10 dataset consists of 60000 32x32 colour images in 10 classes, -with 6000 images per class. There are 50000 training images and 10000 test images. -""" - -from PIL import Image -from PIL import ImageOps -import numpy as np - -import cPickle -import itertools -import paddle.dataset.common -import tarfile -from absl import flags - -FLAGS = flags.FLAGS - -flags.DEFINE_boolean("random_flip_left_right", True, - "random flip left and right") -flags.DEFINE_boolean("random_flip_up_down", False, "random flip up and down") -flags.DEFINE_boolean("cutout", True, "cutout") -flags.DEFINE_boolean("standardize_image", True, "standardize input images") -flags.DEFINE_boolean("pad_and_cut_image", True, "pad and cut input images") - -__all__ = ['train10', 'test10', 'convert'] - -URL_PREFIX = 'https://www.cs.toronto.edu/~kriz/' -CIFAR10_URL = URL_PREFIX + 'cifar-10-python.tar.gz' -CIFAR10_MD5 = 'c58f30108f718f92721af3b95e74349a' - -paddle.dataset.common.DATA_HOME = "dataset/" - -image_size = 32 -image_depth = 3 -half_length = 8 - - -def preprocess(sample, is_training): - image_array = sample.reshape(3, image_size, image_size) - rgb_array = np.transpose(image_array, (1, 2, 0)) - img = Image.fromarray(rgb_array, 'RGB') - - if is_training: - if FLAGS.pad_and_cut_image: - # pad and ramdom crop - img = ImageOps.expand( - img, (2, 2, 2, 2), fill=0) # pad to 36 * 36 * 3 - left_top = np.random.randint(5, size=2) # rand 0 - 4 - img = img.crop((left_top[0], left_top[1], left_top[0] + image_size, - left_top[1] + image_size)) - - if FLAGS.random_flip_left_right and np.random.randint(2): - img = img.transpose(Image.FLIP_LEFT_RIGHT) - if FLAGS.random_flip_up_down and np.random.randint(2): - img = img.transpose(Image.FLIP_TOP_BOTTOM) - - img = np.array(img).astype(np.float32) - - if FLAGS.standardize_image: - # per_image_standardization - img_float = img / 255.0 - mean = np.mean(img_float) - std = max(np.std(img_float), 1.0 / np.sqrt(3 * image_size * image_size)) - img = (img_float - mean) / std - - if is_training and FLAGS.cutout: - center = np.random.randint(image_size, size=2) - offset_width = max(0, center[0] - half_length) - offset_height = max(0, center[1] - half_length) - target_width = min(center[0] + half_length, image_size) - target_height = min(center[1] + half_length, image_size) - - for i in range(offset_height, target_height): - for j in range(offset_width, target_width): - img[i][j][:] = 0.0 - - img = np.transpose(img, (2, 0, 1)) - return img.reshape(3 * image_size * image_size) - - -def reader_creator(filename, sub_name, is_training): - def read_batch(batch): - data = batch['data'] - labels = batch.get('labels', batch.get('fine_labels', None)) - assert labels is not None - for sample, label in itertools.izip(data, labels): - yield preprocess(sample, is_training), int(label) - - def reader(): - with tarfile.open(filename, mode='r') as f: - names = [ - each_item.name for each_item in f if sub_name in each_item.name - ] - names.sort() - - for name in names: - print("Reading file " + name) - batch = cPickle.load(f.extractfile(name)) - for item in read_batch(batch): - yield item - - return reader - - -def train10(): - """ - CIFAR-10 training set creator. - It returns a reader creator, each sample in the reader is image pixels in - [0, 1] and label in [0, 9]. - :return: Training reader creator - :rtype: callable - """ - return reader_creator( - paddle.dataset.common.download(CIFAR10_URL, 'cifar', CIFAR10_MD5), - 'data_batch', True) - - -def test10(): - """ - CIFAR-10 test set creator. - It returns a reader creator, each sample in the reader is image pixels in - [0, 1] and label in [0, 9]. - :return: Test reader creator. - :rtype: callable - """ - return reader_creator( - paddle.dataset.common.download(CIFAR10_URL, 'cifar', CIFAR10_MD5), - 'test_batch', False) - - -def fetch(): - paddle.dataset.common.download(CIFAR10_URL, 'cifar', CIFAR10_MD5) - - -def convert(path): - """ - Converts dataset to recordio format - """ - paddle.dataset.common.convert(path, train10(), 1000, "cifar_train10") - paddle.dataset.common.convert(path, test10(), 1000, "cifar_test10") diff --git a/AutoDL/HiNAS_models/tokens/15113.pkl b/AutoDL/HiNAS_models/tokens/15113.pkl deleted file mode 100755 index a36c7d322311ccceff93b13ddb5bc73058bb4bb7..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 328 zcmXv}J#T|B5H*mH;I?V=F=x(%RHkIg#yLWf* zy_4?Sm#Vw8eJmSY#h31E>Rju*=SN-1MqQ2Wq~TzrimP&D;R$w!FORY*D|zLWqJ|^L zJjPgoSr%G6=cZQgnExP5u^6yS5gmjl_IL2gx|Y3nsh)FDX_BF<$$` zYmy{ie>NUPFoU<^_Y`C|>meV-+$Qyu%L(0%(CvtiCf=drXbvWhJk-0HkbWW!aaf=1 MUz-$LgIy8-1Jmb1x&QzG diff --git a/AutoDL/HiNAS_models/tokens/15383.pkl b/AutoDL/HiNAS_models/tokens/15383.pkl deleted file mode 100755 index 9f05c39b..00000000 --- a/AutoDL/HiNAS_models/tokens/15383.pkl +++ /dev/null @@ -1,36 +0,0 @@ -cnumpy.core.multiarray -_reconstruct -p0 -(cnumpy -ndarray -p1 -(I0 -tp2 -S'b' -p3 -tp4 -Rp5 -(I1 -(I21 -tp6 -cnumpy -dtype -p7 -(S'i4' -p8 -I0 -I1 -tp9 -Rp10 -(I3 -S'<' -p11 -NNNI-1 -I-1 -I0 -tp12 -bI00 -S'\x05\x00\x00\x00\x07\x00\x00\x00\x02\x00\x00\x00\x05\x00\x00\x00\x05\x00\x00\x00\x02\x00\x00\x00\x08\x00\x00\x00\x02\x00\x00\x00\x03\x00\x00\x00\x01\x00\x00\x00\n\x00\x00\x00\t\x00\x00\x00\x03\x00\x00\x00\x08\x00\x00\x00\x0b\x00\x00\x00\x03\x00\x00\x00\t\x00\x00\x00\x02\x00\x00\x00\x06\x00\x00\x00\x01\x00\x00\x00\x06\x00\x00\x00' -p13 -tp14 -b. \ No newline at end of file diff --git a/AutoDL/HiNAS_models/tokens/15613.pkl b/AutoDL/HiNAS_models/tokens/15613.pkl deleted file mode 100755 index 332564be14020af6118ad578092d7e68f1447596..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 328 zcmYjMO|OD55GZiliN9LA0tuUJcV=gH zH@&)2m8Wy%%R*=VsXUrI);g~EUT3mU7o$sQI9TK2Ds5Cag5BWCy)4p9UU;SmaCni! z7)vmSMz$VfldCm`cZ@=O)EI}Dtb`-}Zs6_0)Kq6XH;r8D&o`I}CY&1c5Q~*)wr#Nt zf?)Tw@qPp|_)H!LFZ#DLG)F;q2R4a&ZBmao<+z7Q?|OuXEuZu})Fa;Q>NKjP;p<@k L^&R5J;5YGK%(+6W diff --git a/AutoDL/HiNAS_models/tokens/17754.pkl b/AutoDL/HiNAS_models/tokens/17754.pkl deleted file mode 100755 index 4844119fdbee64f86e457d70ce9e7259ced7b15f..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 328 zcmXw!y^g{#5QLosI55Z0!PQq&MM9#yC%{sQROwLSL?RM6jBzB=(QxRV8g>b>^m)hL znYDcNwW*J%Ce)QJ!ef1KWom8O$ek^8WzNpl+DVAkB~9LT;VE&0uXno23w@S_VIdJD zo)98KFBwohq^>k;^#3r3@o~p6#%QHH^>c%K56oS2qTD#~$zN{psf6 [An Empirical Study on Regularization of Deep Neural Networks by Local Rademacher Complexity](https://arxiv.org/abs/1902.00873)\ -> Yingzhen Yang, Xingjian Li, Jun Huan.\ -> _arXiv:1902.00873_. - ---- -# Table of Contents - -- [Installation](#installation) -- [Data preparation](#data-preparation) -- [Training](#training) - -## Installation - -Running sample code in this directory requires PaddelPaddle Fluid v.1.2.0 and later. If the PaddlePaddle on your device is lower than this version, please follow the instructions in [installation document](http://www.paddlepaddle.org/documentation/docs/zh/1.2/beginners_guide/install/index_cn.html#paddlepaddle) and make an update. - -## Data preparation - -When you want to use the cifar-10 dataset for the first time, you can download the dataset as: - - sh ./dataset/download.sh - -Please make sure your environment has an internet connection. - -The dataset will be downloaded to `dataset/cifar/cifar-10-batches-py` in the same directory as the `train.py`. If automatic download fails, you can download cifar-10-python.tar.gz from https://www.cs.toronto.edu/~kriz/cifar.html and decompress it to the location mentioned above. - - -## Training - -After data preparation, one can start the training step by: - - python -u train_mixup.py \ - --batch_size=80 \ - --auxiliary \ - --weight_decay=0.0003 \ - --learning_rate=0.025 \ - --lrc_loss_lambda=0.7 \ - --cutout -- Set ```export CUDA_VISIBLE_DEVICES=0``` to specifiy one GPU to train. -- For more help on arguments: - - python train_mixup.py --help - -**data reader introduction:** - -* Data reader is defined in `reader.py`. -* Reshape the images to 32 * 32. -* In training stage, images are padding to 40 * 40 and cropped randomly to the original size. -* In training stage, images are horizontally random flipped. -* Images are standardized to (0, 1). -* In training stage, cutout images randomly. -* Shuffle the order of the input images during training. - -**model configuration:** - -* Use auxiliary loss and auxiliary\_weight=0.4. -* Use dropout and drop\_path\_prob=0.2. -* Set lrc\_loss\_lambda=0.7. - -**training strategy:** - -* Use momentum optimizer with momentum=0.9. -* Weight decay is 0.0003. -* Use cosine decay with init\_lr=0.025. -* Total epoch is 600. -* Use Xaiver initalizer to weight in conv2d, Constant initalizer to weight in batch norm and Normal initalizer to weight in fc. -* Initalize bias in batch norm and fc to zero constant and do not add bias to conv2d. - - -## Reference - - - DARTS: Differentiable Architecture Search [`paper`](https://arxiv.org/abs/1806.09055) - - Differentiable architecture search in PyTorch [`code`](https://github.com/quark0/darts) diff --git a/AutoDL/LRC/README_cn.md b/AutoDL/LRC/README_cn.md deleted file mode 100644 index 06dc9370..00000000 --- a/AutoDL/LRC/README_cn.md +++ /dev/null @@ -1,71 +0,0 @@ -# LRC 局部Rademachar复杂度正则化 -为了在深度神经网络中提升泛化能力,正则化的选择十分重要也具有挑战性。本目录包括了一种基于局部rademacher复杂度的新型正则(LRC)的图像分类模型。十分感谢[DARTS](https://arxiv.org/abs/1806.09055)模型对本研究提供的帮助。该模型将LRC正则和DARTS网络相结合,在CIFAR-10数据集中得到了很出色的效果。代码和文章一同发布 -> [An Empirical Study on Regularization of Deep Neural Networks by Local Rademacher Complexity](https://arxiv.org/abs/1902.00873)\ -> Yingzhen Yang, Xingjian Li, Jun Huan.\ -> _arXiv:1902.00873_. - ---- -# 内容 - -- [安装](#安装) -- [数据准备](#数据准备) -- [模型训练](#模型训练) - -## 安装 - -在当前目录下运行样例代码需要PadddlePaddle Fluid的v.1.2.0或以上的版本。如果你的运行环境中的PaddlePaddle低于此版本,请根据[安装文档](http://www.paddlepaddle.org/documentation/docs/zh/1.2/beginners_guide/install/index_cn.html#paddlepaddle)中的说明来更新PaddlePaddle。 - -## 数据准备 - -第一次使用CIFAR-10数据集时,您可以通过如果命令下载: - - sh ./dataset/download.sh - -请确保您的环境有互联网连接。数据会下载到`train.py`同目录下的`dataset/cifar/cifar-10-batches-py`。如果下载失败,您可以自行从https://www.cs.toronto.edu/~kriz/cifar.html上下载cifar-10-python.tar.gz并解压到上述位置。 - -## 模型训练 - -数据准备好后,可以通过如下命令开始训练: - - python -u train_mixup.py \ - --batch_size=80 \ - --auxiliary \ - --weight_decay=0.0003 \ - --learning_rate=0.025 \ - --lrc_loss_lambda=0.7 \ - --cutout -- 通过设置 ```export CUDA_VISIBLE_DEVICES=0```指定单张GPU训练。 -- 可选参数见: - - python train_mixup.py --help - -**数据读取器说明:** - -* 数据读取器定义在`reader.py`中 -* 输入图像尺寸统一变换为32 * 32 -* 训练时将图像填充为40 * 40然后随机剪裁为原输入图像大小 -* 训练时图像随机水平翻转 -* 对图像每个像素做归一化处理 -* 训练时对图像做随机遮挡 -* 训练时对输入图像做随机洗牌 - -**模型配置:** - -* 使用辅助损失,辅助损失权重为0.4 -* 使用dropout,随机丢弃率为0.2 -* 设置lrc\_loss\_lambda为0.7 - -**训练策略:** - -* 采用momentum优化算法训练,momentum=0.9 -* 权重衰减系数为0.0001 -* 采用正弦学习率衰减,初始学习率为0.025 -* 总共训练600轮 -* 对卷积权重采用Xaiver初始化,对batch norm权重采用固定初始化,对全连接层权重采用高斯初始化 -* 对batch norm和全连接层偏差采用固定初始化,不对卷积设置偏差 - - -## 引用 - - - DARTS: Differentiable Architecture Search [`论文`](https://arxiv.org/abs/1806.09055) - - Differentiable Architecture Search in PyTorch [`代码`](https://github.com/quark0/darts) diff --git a/AutoDL/LRC/dataset/download.sh b/AutoDL/LRC/dataset/download.sh deleted file mode 100644 index 0981c3b6..00000000 --- a/AutoDL/LRC/dataset/download.sh +++ /dev/null @@ -1,10 +0,0 @@ -DIR="$( cd "$(dirname "$0")" ; pwd -P )" -cd "$DIR" -mkdir cifar -cd cifar -# Download the data. -echo "Downloading..." -wget https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz -# Extract the data. -echo "Extracting..." -tar zvxf cifar-10-python.tar.gz diff --git a/AutoDL/LRC/genotypes.py b/AutoDL/LRC/genotypes.py deleted file mode 100644 index 349fbd24..00000000 --- a/AutoDL/LRC/genotypes.py +++ /dev/null @@ -1,116 +0,0 @@ -# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -# Based on: -# -------------------------------------------------------- -# DARTS -# Copyright (c) 2018, Hanxiao Liu. -# Licensed under the Apache License, Version 2.0; -# -------------------------------------------------------- - -from collections import namedtuple - -Genotype = namedtuple('Genotype', 'normal normal_concat reduce reduce_concat') - -PRIMITIVES = [ - 'none', 'max_pool_3x3', 'avg_pool_3x3', 'skip_connect', 'sep_conv_3x3', - 'sep_conv_5x5', 'dil_conv_3x3', 'dil_conv_5x5' -] - -NASNet = Genotype( - normal=[ - ('sep_conv_5x5', 1), - ('sep_conv_3x3', 0), - ('sep_conv_5x5', 0), - ('sep_conv_3x3', 0), - ('avg_pool_3x3', 1), - ('skip_connect', 0), - ('avg_pool_3x3', 0), - ('avg_pool_3x3', 0), - ('sep_conv_3x3', 1), - ('skip_connect', 1), - ], - normal_concat=[2, 3, 4, 5, 6], - reduce=[ - ('sep_conv_5x5', 1), - ('sep_conv_7x7', 0), - ('max_pool_3x3', 1), - ('sep_conv_7x7', 0), - ('avg_pool_3x3', 1), - ('sep_conv_5x5', 0), - ('skip_connect', 3), - ('avg_pool_3x3', 2), - ('sep_conv_3x3', 2), - ('max_pool_3x3', 1), - ], - reduce_concat=[4, 5, 6], ) - -AmoebaNet = Genotype( - normal=[ - ('avg_pool_3x3', 0), - ('max_pool_3x3', 1), - ('sep_conv_3x3', 0), - ('sep_conv_5x5', 2), - ('sep_conv_3x3', 0), - ('avg_pool_3x3', 3), - ('sep_conv_3x3', 1), - ('skip_connect', 1), - ('skip_connect', 0), - ('avg_pool_3x3', 1), - ], - normal_concat=[4, 5, 6], - reduce=[ - ('avg_pool_3x3', 0), - ('sep_conv_3x3', 1), - ('max_pool_3x3', 0), - ('sep_conv_7x7', 2), - ('sep_conv_7x7', 0), - ('avg_pool_3x3', 1), - ('max_pool_3x3', 0), - ('max_pool_3x3', 1), - ('conv_7x1_1x7', 0), - ('sep_conv_3x3', 5), - ], - reduce_concat=[3, 4, 6]) - -DARTS_V1 = Genotype( - normal=[('sep_conv_3x3', 1), ('sep_conv_3x3', 0), ('skip_connect', 0), - ('sep_conv_3x3', 1), ('skip_connect', 0), ('sep_conv_3x3', 1), - ('sep_conv_3x3', 0), ('skip_connect', 2)], - normal_concat=[2, 3, 4, 5], - reduce=[('max_pool_3x3', 0), ('max_pool_3x3', 1), ('skip_connect', 2), - ('max_pool_3x3', 0), ('max_pool_3x3', 0), ('skip_connect', 2), - ('skip_connect', 2), ('avg_pool_3x3', 0)], - reduce_concat=[2, 3, 4, 5]) -DARTS_V2 = Genotype( - normal=[('sep_conv_3x3', 0), ('sep_conv_3x3', 1), ('sep_conv_3x3', 0), - ('sep_conv_3x3', 1), ('sep_conv_3x3', 1), ('skip_connect', 0), - ('skip_connect', 0), ('dil_conv_3x3', 2)], - normal_concat=[2, 3, 4, 5], - reduce=[('max_pool_3x3', 0), ('max_pool_3x3', 1), ('skip_connect', 2), - ('max_pool_3x3', 1), ('max_pool_3x3', 0), ('skip_connect', 2), - ('skip_connect', 2), ('max_pool_3x3', 1)], - reduce_concat=[2, 3, 4, 5]) - -MY_DARTS = Genotype( - normal=[('sep_conv_3x3', 0), ('skip_connect', 1), ('skip_connect', 0), - ('dil_conv_5x5', 1), ('skip_connect', 0), ('sep_conv_3x3', 1), - ('skip_connect', 0), ('sep_conv_3x3', 1)], - normal_concat=range(2, 6), - reduce=[('max_pool_3x3', 0), ('max_pool_3x3', 1), ('max_pool_3x3', 0), - ('skip_connect', 2), ('max_pool_3x3', 0), ('skip_connect', 2), - ('skip_connect', 2), ('skip_connect', 3)], - reduce_concat=range(2, 6)) - -DARTS = MY_DARTS diff --git a/AutoDL/LRC/learning_rate.py b/AutoDL/LRC/learning_rate.py deleted file mode 100644 index 3965171b..00000000 --- a/AutoDL/LRC/learning_rate.py +++ /dev/null @@ -1,43 +0,0 @@ -# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -# Based on: -# -------------------------------------------------------- -# DARTS -# Copyright (c) 2018, Hanxiao Liu. -# Licensed under the Apache License, Version 2.0; -# -------------------------------------------------------- - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function -import paddle -import paddle.fluid as fluid -import paddle.fluid.layers.ops as ops -from paddle.fluid.layers.learning_rate_scheduler import _decay_step_counter -import math -from paddle.fluid.initializer import init_on_cpu - - -def cosine_decay(learning_rate, num_epoch, steps_one_epoch): - """Applies cosine decay to the learning rate. - lr = 0.5 * (math.cos(epoch * (math.pi / 120)) + 1) - """ - global_step = _decay_step_counter() - - with init_on_cpu(): - decayed_lr = learning_rate * \ - (ops.cos((global_step / steps_one_epoch) \ - * math.pi / num_epoch) + 1)/2 - return decayed_lr diff --git a/AutoDL/LRC/model.py b/AutoDL/LRC/model.py deleted file mode 100644 index 45a40349..00000000 --- a/AutoDL/LRC/model.py +++ /dev/null @@ -1,313 +0,0 @@ -# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve. -# -#Licensed under the Apache License, Version 2.0 (the "License"); -#you may not use this file except in compliance with the License. -#You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -#Unless required by applicable law or agreed to in writing, software -#distributed under the License is distributed on an "AS IS" BASIS, -#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -#See the License for the specific language governing permissions and -#limitations under the License. -# -# Based on: -# -------------------------------------------------------- -# DARTS -# Copyright (c) 2018, Hanxiao Liu. -# Licensed under the Apache License, Version 2.0; -# -------------------------------------------------------- - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function -import os -import sys -import numpy as np -import time -import functools -import paddle -import paddle.fluid as fluid -from operations import * - - -class Cell(): - def __init__(self, genotype, C_prev_prev, C_prev, C, reduction, - reduction_prev): - print(C_prev_prev, C_prev, C) - - if reduction_prev: - self.preprocess0 = functools.partial(FactorizedReduce, C_out=C) - else: - self.preprocess0 = functools.partial( - ReLUConvBN, C_out=C, kernel_size=1, stride=1, padding=0) - self.preprocess1 = functools.partial( - ReLUConvBN, C_out=C, kernel_size=1, stride=1, padding=0) - if reduction: - op_names, indices = zip(*genotype.reduce) - concat = genotype.reduce_concat - else: - op_names, indices = zip(*genotype.normal) - concat = genotype.normal_concat - print(op_names, indices, concat, reduction) - self._compile(C, op_names, indices, concat, reduction) - - def _compile(self, C, op_names, indices, concat, reduction): - assert len(op_names) == len(indices) - self._steps = len(op_names) // 2 - self._concat = concat - self.multiplier = len(concat) - - self._ops = [] - for name, index in zip(op_names, indices): - stride = 2 if reduction and index < 2 else 1 - op = functools.partial(OPS[name], C=C, stride=stride, affine=True) - self._ops += [op] - self._indices = indices - - def forward(self, s0, s1, drop_prob, is_train, name): - self.training = is_train - preprocess0_name = name + 'preprocess0.' - preprocess1_name = name + 'preprocess1.' - s0 = self.preprocess0(s0, name=preprocess0_name) - s1 = self.preprocess1(s1, name=preprocess1_name) - out = [s0, s1] - for i in range(self._steps): - h1 = out[self._indices[2 * i]] - h2 = out[self._indices[2 * i + 1]] - op1 = self._ops[2 * i] - op2 = self._ops[2 * i + 1] - h3 = op1(h1, name=name + '_ops.' + str(2 * i) + '.') - h4 = op2(h2, name=name + '_ops.' + str(2 * i + 1) + '.') - if self.training and drop_prob > 0.: - if h3 != h1: - h3 = fluid.layers.dropout( - h3, - drop_prob, - dropout_implementation='upscale_in_train') - if h4 != h2: - h4 = fluid.layers.dropout( - h4, - drop_prob, - dropout_implementation='upscale_in_train') - s = h3 + h4 - out += [s] - return fluid.layers.concat([out[i] for i in self._concat], axis=1) - - -def AuxiliaryHeadCIFAR(input, num_classes, aux_name='auxiliary_head'): - relu_a = fluid.layers.relu(input) - pool_a = fluid.layers.pool2d(relu_a, 5, 'avg', 3) - conv2d_a = fluid.layers.conv2d( - pool_a, - 128, - 1, - name=aux_name + '.features.2', - param_attr=ParamAttr( - initializer=Xavier( - uniform=False, fan_in=0), - name=aux_name + '.features.2.weight'), - bias_attr=False) - bn_a_name = aux_name + '.features.3' - bn_a = fluid.layers.batch_norm( - conv2d_a, - act='relu', - name=bn_a_name, - param_attr=ParamAttr( - initializer=Constant(1.), name=bn_a_name + '.weight'), - bias_attr=ParamAttr( - initializer=Constant(0.), name=bn_a_name + '.bias'), - moving_mean_name=bn_a_name + '.running_mean', - moving_variance_name=bn_a_name + '.running_var') - conv2d_b = fluid.layers.conv2d( - bn_a, - 768, - 2, - name=aux_name + '.features.5', - param_attr=ParamAttr( - initializer=Xavier( - uniform=False, fan_in=0), - name=aux_name + '.features.5.weight'), - bias_attr=False) - bn_b_name = aux_name + '.features.6' - bn_b = fluid.layers.batch_norm( - conv2d_b, - act='relu', - name=bn_b_name, - param_attr=ParamAttr( - initializer=Constant(1.), name=bn_b_name + '.weight'), - bias_attr=ParamAttr( - initializer=Constant(0.), name=bn_b_name + '.bias'), - moving_mean_name=bn_b_name + '.running_mean', - moving_variance_name=bn_b_name + '.running_var') - fc_name = aux_name + '.classifier' - fc = fluid.layers.fc(bn_b, - num_classes, - name=fc_name, - param_attr=ParamAttr( - initializer=Normal(scale=1e-3), - name=fc_name + '.weight'), - bias_attr=ParamAttr( - initializer=Constant(0.), name=fc_name + '.bias')) - return fc - - -def StemConv(input, C_out, kernel_size, padding): - conv_a = fluid.layers.conv2d( - input, - C_out, - kernel_size, - padding=padding, - param_attr=ParamAttr( - initializer=Xavier( - uniform=False, fan_in=0), name='stem.0.weight'), - bias_attr=False) - bn_a = fluid.layers.batch_norm( - conv_a, - param_attr=ParamAttr( - initializer=Constant(1.), name='stem.1.weight'), - bias_attr=ParamAttr( - initializer=Constant(0.), name='stem.1.bias'), - moving_mean_name='stem.1.running_mean', - moving_variance_name='stem.1.running_var') - return bn_a - - -class NetworkCIFAR(object): - def __init__(self, C, class_num, layers, auxiliary, genotype): - self.class_num = class_num - self._layers = layers - self._auxiliary = auxiliary - - stem_multiplier = 3 - self.drop_path_prob = 0 - C_curr = stem_multiplier * C - - C_prev_prev, C_prev, C_curr = C_curr, C_curr, C - self.cells = [] - reduction_prev = False - for i in range(layers): - if i in [layers // 3, 2 * layers // 3]: - C_curr *= 2 - reduction = True - else: - reduction = False - cell = Cell(genotype, C_prev_prev, C_prev, C_curr, reduction, - reduction_prev) - reduction_prev = reduction - self.cells += [cell] - C_prev_prev, C_prev = C_prev, cell.multiplier * C_curr - if i == 2 * layers // 3: - C_to_auxiliary = C_prev - - def forward(self, init_channel, is_train): - self.training = is_train - self.logits_aux = None - num_channel = init_channel * 3 - s0 = StemConv(self.image, num_channel, kernel_size=3, padding=1) - s1 = s0 - for i, cell in enumerate(self.cells): - name = 'cells.' + str(i) + '.' - s0, s1 = s1, cell.forward(s0, s1, self.drop_path_prob, is_train, - name) - if i == int(2 * self._layers // 3): - if self._auxiliary and self.training: - self.logits_aux = AuxiliaryHeadCIFAR(s1, self.class_num) - out = fluid.layers.adaptive_pool2d(s1, (1, 1), "avg") - self.logits = fluid.layers.fc(out, - size=self.class_num, - param_attr=ParamAttr( - initializer=Normal(scale=1e-3), - name='classifier.weight'), - bias_attr=ParamAttr( - initializer=Constant(0.), - name='classifier.bias')) - return self.logits, self.logits_aux - - def build_input(self, image_shape, batch_size, is_train): - if is_train: - py_reader = fluid.layers.py_reader( - capacity=64, - shapes=[[-1] + image_shape, [-1, 1], [-1, 1], [-1, 1], [-1, 1], - [-1, 1], [-1, batch_size, self.class_num - 1]], - lod_levels=[0, 0, 0, 0, 0, 0, 0], - dtypes=[ - "float32", "int64", "int64", "float32", "int32", "int32", - "float32" - ], - use_double_buffer=True, - name='train_reader') - else: - py_reader = fluid.layers.py_reader( - capacity=64, - shapes=[[-1] + image_shape, [-1, 1]], - lod_levels=[0, 0], - dtypes=["float32", "int64"], - use_double_buffer=True, - name='test_reader') - return py_reader - - def train_model(self, py_reader, init_channels, aux, aux_w, batch_size, - loss_lambda): - self.image, self.ya, self.yb, self.lam, self.label_reshape,\ - self.non_label_reshape, self.rad_var = fluid.layers.read_file(py_reader) - self.logits, self.logits_aux = self.forward(init_channels, True) - self.mixup_loss = self.mixup_loss(aux, aux_w) - self.lrc_loss = self.lrc_loss(batch_size) - return self.mixup_loss + loss_lambda * self.lrc_loss - - def test_model(self, py_reader, init_channels): - self.image, self.ya = fluid.layers.read_file(py_reader) - self.logits, _ = self.forward(init_channels, False) - prob = fluid.layers.softmax(self.logits, use_cudnn=False) - loss = fluid.layers.cross_entropy(prob, self.ya) - acc_1 = fluid.layers.accuracy(self.logits, self.ya, k=1) - acc_5 = fluid.layers.accuracy(self.logits, self.ya, k=5) - return loss, acc_1, acc_5 - - def mixup_loss(self, auxiliary, auxiliary_weight): - prob = fluid.layers.softmax(self.logits, use_cudnn=False) - loss_a = fluid.layers.cross_entropy(prob, self.ya) - loss_b = fluid.layers.cross_entropy(prob, self.yb) - loss_a_mean = fluid.layers.reduce_mean(loss_a) - loss_b_mean = fluid.layers.reduce_mean(loss_b) - loss = self.lam * loss_a_mean + (1 - self.lam) * loss_b_mean - if auxiliary: - prob_aux = fluid.layers.softmax(self.logits_aux, use_cudnn=False) - loss_a_aux = fluid.layers.cross_entropy(prob_aux, self.ya) - loss_b_aux = fluid.layers.cross_entropy(prob_aux, self.yb) - loss_a_aux_mean = fluid.layers.reduce_mean(loss_a_aux) - loss_b_aux_mean = fluid.layers.reduce_mean(loss_b_aux) - loss_aux = self.lam * loss_a_aux_mean + (1 - self.lam - ) * loss_b_aux_mean - return loss + auxiliary_weight * loss_aux - - def lrc_loss(self, batch_size): - y_diff_reshape = fluid.layers.reshape(self.logits, shape=(-1, 1)) - label_reshape = fluid.layers.squeeze(self.label_reshape, axes=[1]) - non_label_reshape = fluid.layers.squeeze( - self.non_label_reshape, axes=[1]) - label_reshape.stop_gradient = True - non_label_reshape.stop_graident = True - - y_diff_label_reshape = fluid.layers.gather(y_diff_reshape, - label_reshape) - y_diff_non_label_reshape = fluid.layers.gather(y_diff_reshape, - non_label_reshape) - y_diff_label = fluid.layers.reshape( - y_diff_label_reshape, shape=(-1, batch_size, 1)) - y_diff_non_label = fluid.layers.reshape( - y_diff_non_label_reshape, - shape=(-1, batch_size, self.class_num - 1)) - y_diff_ = y_diff_non_label - y_diff_label - - y_diff_ = fluid.layers.transpose(y_diff_, perm=[1, 2, 0]) - rad_var_trans = fluid.layers.transpose(self.rad_var, perm=[1, 2, 0]) - rad_y_diff_trans = rad_var_trans * y_diff_ - lrc_loss_sum = fluid.layers.reduce_sum(rad_y_diff_trans, dim=[0, 1]) - lrc_loss_ = fluid.layers.abs(lrc_loss_sum) / (batch_size * - (self.class_num - 1)) - lrc_loss_mean = fluid.layers.reduce_mean(lrc_loss_) - - return lrc_loss_mean diff --git a/AutoDL/LRC/operations.py b/AutoDL/LRC/operations.py deleted file mode 100644 index b015722a..00000000 --- a/AutoDL/LRC/operations.py +++ /dev/null @@ -1,349 +0,0 @@ -# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve. -# -#Licensed under the Apache License, Version 2.0 (the "License"); -#you may not use this file except in compliance with the License. -#You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -#Unless required by applicable law or agreed to in writing, software -#distributed under the License is distributed on an "AS IS" BASIS, -#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -#See the License for the specific language governing permissions and -#limitations under the License. -# -# Based on: -# -------------------------------------------------------- -# DARTS -# Copyright (c) 2018, Hanxiao Liu. -# Licensed under the Apache License, Version 2.0; -# -------------------------------------------------------- -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function -import os -import sys -import numpy as np -import time -import paddle -import paddle.fluid as fluid -from paddle.fluid.param_attr import ParamAttr -from paddle.fluid.initializer import Xavier -from paddle.fluid.initializer import Normal -from paddle.fluid.initializer import Constant - -OPS = { - 'none' : lambda input, C, stride, name, affine: Zero(input, stride, name), - 'avg_pool_3x3' : lambda input, C, stride, name, affine: fluid.layers.pool2d(input, 3, 'avg', pool_stride=stride, pool_padding=1, name=name), - 'max_pool_3x3' : lambda input, C, stride, name, affine: fluid.layers.pool2d(input, 3, 'max', pool_stride=stride, pool_padding=1, name=name), - 'skip_connect' : lambda input,C, stride, name, affine: Identity(input, name) if stride == 1 else FactorizedReduce(input, C, name=name, affine=affine), - 'sep_conv_3x3' : lambda input,C, stride, name, affine: SepConv(input, C, C, 3, stride, 1, name=name, affine=affine), - 'sep_conv_5x5' : lambda input,C, stride, name, affine: SepConv(input, C, C, 5, stride, 2, name=name, affine=affine), - 'sep_conv_7x7' : lambda input,C, stride, name, affine: SepConv(input, C, C, 7, stride, 3, name=name, affine=affine), - 'dil_conv_3x3' : lambda input,C, stride, name, affine: DilConv(input, C, C, 3, stride, 2, 2, name=name, affine=affine), - 'dil_conv_5x5' : lambda input,C, stride, name, affine: DilConv(input, C, C, 5, stride, 4, 2, name=name, affine=affine), - 'conv_7x1_1x7' : lambda input,C, stride, name, affine: SevenConv(input, C, name=name, affine=affine) -} - - -def ReLUConvBN(input, C_out, kernel_size, stride, padding, name='', - affine=True): - relu_a = fluid.layers.relu(input) - conv2d_a = fluid.layers.conv2d( - relu_a, - C_out, - kernel_size, - stride, - padding, - param_attr=ParamAttr( - initializer=Xavier( - uniform=False, fan_in=0), - name=name + 'op.1.weight'), - bias_attr=False) - if affine: - reluconvbn_out = fluid.layers.batch_norm( - conv2d_a, - param_attr=ParamAttr( - initializer=Constant(1.), name=name + 'op.2.weight'), - bias_attr=ParamAttr( - initializer=Constant(0.), name=name + 'op.2.bias'), - moving_mean_name=name + 'op.2.running_mean', - moving_variance_name=name + 'op.2.running_var') - else: - reluconvbn_out = fluid.layers.batch_norm( - conv2d_a, - param_attr=ParamAttr( - initializer=Constant(1.), - learning_rate=0., - name=name + 'op.2.weight'), - bias_attr=ParamAttr( - initializer=Constant(0.), - learning_rate=0., - name=name + 'op.2.bias'), - moving_mean_name=name + 'op.2.running_mean', - moving_variance_name=name + 'op.2.running_var') - return reluconvbn_out - - -def DilConv(input, - C_in, - C_out, - kernel_size, - stride, - padding, - dilation, - name='', - affine=True): - relu_a = fluid.layers.relu(input) - conv2d_a = fluid.layers.conv2d( - relu_a, - C_in, - kernel_size, - stride, - padding, - dilation, - groups=C_in, - param_attr=ParamAttr( - initializer=Xavier( - uniform=False, fan_in=0), - name=name + 'op.1.weight'), - bias_attr=False, - use_cudnn=False) - conv2d_b = fluid.layers.conv2d( - conv2d_a, - C_out, - 1, - param_attr=ParamAttr( - initializer=Xavier( - uniform=False, fan_in=0), - name=name + 'op.2.weight'), - bias_attr=False) - if affine: - dilconv_out = fluid.layers.batch_norm( - conv2d_b, - param_attr=ParamAttr( - initializer=Constant(1.), name=name + 'op.3.weight'), - bias_attr=ParamAttr( - initializer=Constant(0.), name=name + 'op.3.bias'), - moving_mean_name=name + 'op.3.running_mean', - moving_variance_name=name + 'op.3.running_var') - else: - dilconv_out = fluid.layers.batch_norm( - conv2d_b, - param_attr=ParamAttr( - initializer=Constant(1.), - learning_rate=0., - name=name + 'op.3.weight'), - bias_attr=ParamAttr( - initializer=Constant(0.), - learning_rate=0., - name=name + 'op.3.bias'), - moving_mean_name=name + 'op.3.running_mean', - moving_variance_name=name + 'op.3.running_var') - return dilconv_out - - -def SepConv(input, - C_in, - C_out, - kernel_size, - stride, - padding, - name='', - affine=True): - relu_a = fluid.layers.relu(input) - conv2d_a = fluid.layers.conv2d( - relu_a, - C_in, - kernel_size, - stride, - padding, - groups=C_in, - param_attr=ParamAttr( - initializer=Xavier( - uniform=False, fan_in=0), - name=name + 'op.1.weight'), - bias_attr=False, - use_cudnn=False) - conv2d_b = fluid.layers.conv2d( - conv2d_a, - C_in, - 1, - param_attr=ParamAttr( - initializer=Xavier( - uniform=False, fan_in=0), - name=name + 'op.2.weight'), - bias_attr=False) - if affine: - bn_a = fluid.layers.batch_norm( - conv2d_b, - param_attr=ParamAttr( - initializer=Constant(1.), name=name + 'op.3.weight'), - bias_attr=ParamAttr( - initializer=Constant(0.), name=name + 'op.3.bias'), - moving_mean_name=name + 'op.3.running_mean', - moving_variance_name=name + 'op.3.running_var') - else: - bn_a = fluid.layers.batch_norm( - conv2d_b, - param_attr=ParamAttr( - initializer=Constant(1.), - learning_rate=0., - name=name + 'op.3.weight'), - bias_attr=ParamAttr( - initializer=Constant(0.), - learning_rate=0., - name=name + 'op.3.bias'), - moving_mean_name=name + 'op.3.running_mean', - moving_variance_name=name + 'op.3.running_var') - - relu_b = fluid.layers.relu(bn_a) - conv2d_d = fluid.layers.conv2d( - relu_b, - C_in, - kernel_size, - 1, - padding, - groups=C_in, - param_attr=ParamAttr( - initializer=Xavier( - uniform=False, fan_in=0), - name=name + 'op.5.weight'), - bias_attr=False, - use_cudnn=False) - conv2d_e = fluid.layers.conv2d( - conv2d_d, - C_out, - 1, - param_attr=ParamAttr( - initializer=Xavier( - uniform=False, fan_in=0), - name=name + 'op.6.weight'), - bias_attr=False) - if affine: - sepconv_out = fluid.layers.batch_norm( - conv2d_e, - param_attr=ParamAttr( - initializer=Constant(1.), name=name + 'op.7.weight'), - bias_attr=ParamAttr( - initializer=Constant(0.), name=name + 'op.7.bias'), - moving_mean_name=name + 'op.7.running_mean', - moving_variance_name=name + 'op.7.running_var') - else: - sepconv_out = fluid.layers.batch_norm( - conv2d_e, - param_attr=ParamAttr( - initializer=Constant(1.), - learning_rate=0., - name=name + 'op.7.weight'), - bias_attr=ParamAttr( - initializer=Constant(0.), - learning_rate=0., - name=name + 'op.7.bias'), - moving_mean_name=name + 'op.7.running_mean', - moving_variance_name=name + 'op.7.running_var') - return sepconv_out - - -def SevenConv(input, C_out, stride, name='', affine=True): - relu_a = fluid.layers.relu(input) - conv2d_a = fluid.layers.conv2d( - relu_a, - C_out, (1, 7), (1, stride), (0, 3), - param_attr=ParamAttr( - initializer=Xavier( - uniform=False, fan_in=0), - name=name + 'op.1.weight'), - bias_attr=False) - conv2d_b = fluid.layers.conv2d( - conv2d_a, - C_out, (7, 1), (stride, 1), (3, 0), - param_attr=ParamAttr( - initializer=Xavier( - uniform=False, fan_in=0), - name=name + 'op.2.weight'), - bias_attr=False) - if affine: - out = fluid.layers.batch_norm( - conv2d_b, - param_attr=ParamAttr( - initializer=Constant(1.), name=name + 'op.3.weight'), - bias_attr=ParamAttr( - initializer=Constant(0.), name=name + 'op.3.bias'), - moving_mean_name=name + 'op.3.running_mean', - moving_variance_name=name + 'op.3.running_var') - else: - out = fluid.layers.batch_norm( - conv2d_b, - param_attr=ParamAttr( - initializer=Constant(1.), - learning_rate=0., - name=name + 'op.3.weight'), - bias_attr=ParamAttr( - initializer=Constant(0.), - learning_rate=0., - name=name + 'op.3.bias'), - moving_mean_name=name + 'op.3.running_mean', - moving_variance_name=name + 'op.3.running_var') - - -def Identity(input, name=''): - return input - - -def Zero(input, stride, name=''): - ones = np.ones(input.shape[-2:]) - ones[::stride, ::stride] = 0 - ones = fluid.layers.assign(ones) - return input * ones - - -def FactorizedReduce(input, C_out, name='', affine=True): - relu_a = fluid.layers.relu(input) - conv2d_a = fluid.layers.conv2d( - relu_a, - C_out // 2, - 1, - 2, - param_attr=ParamAttr( - initializer=Xavier( - uniform=False, fan_in=0), - name=name + 'conv_1.weight'), - bias_attr=False) - h_end = relu_a.shape[2] - w_end = relu_a.shape[3] - slice_a = fluid.layers.slice(relu_a, [2, 3], [1, 1], [h_end, w_end]) - conv2d_b = fluid.layers.conv2d( - slice_a, - C_out // 2, - 1, - 2, - param_attr=ParamAttr( - initializer=Xavier( - uniform=False, fan_in=0), - name=name + 'conv_2.weight'), - bias_attr=False) - out = fluid.layers.concat([conv2d_a, conv2d_b], axis=1) - if affine: - out = fluid.layers.batch_norm( - out, - param_attr=ParamAttr( - initializer=Constant(1.), name=name + 'bn.weight'), - bias_attr=ParamAttr( - initializer=Constant(0.), name=name + 'bn.bias'), - moving_mean_name=name + 'bn.running_mean', - moving_variance_name=name + 'bn.running_var') - else: - out = fluid.layers.batch_norm( - out, - param_attr=ParamAttr( - initializer=Constant(1.), - learning_rate=0., - name=name + 'bn.weight'), - bias_attr=ParamAttr( - initializer=Constant(0.), - learning_rate=0., - name=name + 'bn.bias'), - moving_mean_name=name + 'bn.running_mean', - moving_variance_name=name + 'bn.running_var') - return out diff --git a/AutoDL/LRC/reader.py b/AutoDL/LRC/reader.py deleted file mode 100644 index 20b32b50..00000000 --- a/AutoDL/LRC/reader.py +++ /dev/null @@ -1,187 +0,0 @@ -# Copyright (c) 2019 PaddlePaddle Authors. All Rig hts Reserved -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -# Based on: -# -------------------------------------------------------- -# DARTS -# Copyright (c) 2018, Hanxiao Liu. -# Licensed under the Apache License, Version 2.0; -# -------------------------------------------------------- -""" -CIFAR-10 dataset. -This module will download dataset from -https://www.cs.toronto.edu/~kriz/cifar.html and parse train/test set into -paddle reader creators. -The CIFAR-10 dataset consists of 60000 32x32 colour images in 10 classes, -with 6000 images per class. There are 50000 training images and 10000 test images. -""" - -from PIL import Image -from PIL import ImageOps -import numpy as np - -import cPickle -import random -import utils -import paddle.fluid as fluid -import time -import os -import functools -import paddle.reader - -__all__ = ['train10', 'test10'] - -image_size = 32 -image_depth = 3 -half_length = 8 - -CIFAR_MEAN = [0.4914, 0.4822, 0.4465] -CIFAR_STD = [0.24703233, 0.24348505, 0.26158768] - - -def generate_reshape_label(label, batch_size, CIFAR_CLASSES=10): - reshape_label = np.zeros((batch_size, 1), dtype='int32') - reshape_non_label = np.zeros( - (batch_size * (CIFAR_CLASSES - 1), 1), dtype='int32') - num = 0 - for i in range(batch_size): - label_i = label[i] - reshape_label[i] = label_i + i * CIFAR_CLASSES - for j in range(CIFAR_CLASSES): - if label_i != j: - reshape_non_label[num] = \ - j + i * CIFAR_CLASSES - num += 1 - return reshape_label, reshape_non_label - - -def generate_bernoulli_number(batch_size, CIFAR_CLASSES=10): - rcc_iters = 50 - rad_var = np.zeros((rcc_iters, batch_size, CIFAR_CLASSES - 1)) - for i in range(rcc_iters): - bernoulli_num = np.random.binomial(size=batch_size, n=1, p=0.5) - bernoulli_map = np.array([]) - ones = np.ones((CIFAR_CLASSES - 1, 1)) - for batch_id in range(batch_size): - num = bernoulli_num[batch_id] - var_id = 2 * ones * num - 1 - bernoulli_map = np.append(bernoulli_map, var_id) - rad_var[i] = bernoulli_map.reshape((batch_size, CIFAR_CLASSES - 1)) - return rad_var.astype('float32') - - -def preprocess(sample, is_training, args): - image_array = sample.reshape(3, image_size, image_size) - rgb_array = np.transpose(image_array, (1, 2, 0)) - img = Image.fromarray(rgb_array, 'RGB') - - if is_training: - # pad and ramdom crop - img = ImageOps.expand(img, (4, 4, 4, 4), fill=0) # pad to 40 * 40 * 3 - left_top = np.random.randint(9, size=2) # rand 0 - 8 - img = img.crop((left_top[0], left_top[1], left_top[0] + image_size, - left_top[1] + image_size)) - if np.random.randint(2): - img = img.transpose(Image.FLIP_LEFT_RIGHT) - - img = np.array(img).astype(np.float32) - - # per_image_standardization - img_float = img / 255.0 - img = (img_float - CIFAR_MEAN) / CIFAR_STD - - if is_training and args.cutout: - center = np.random.randint(image_size, size=2) - offset_width = max(0, center[0] - half_length) - offset_height = max(0, center[1] - half_length) - target_width = min(center[0] + half_length, image_size) - target_height = min(center[1] + half_length, image_size) - - for i in range(offset_height, target_height): - for j in range(offset_width, target_width): - img[i][j][:] = 0.0 - - img = np.transpose(img, (2, 0, 1)) - return img - - -def reader_creator_filepath(filename, sub_name, is_training, args): - files = os.listdir(filename) - names = [each_item for each_item in files if sub_name in each_item] - names.sort() - datasets = [] - for name in names: - print("Reading file " + name) - batch = cPickle.load(open(filename + name, 'rb')) - data = batch['data'] - labels = batch.get('labels', batch.get('fine_labels', None)) - assert labels is not None - dataset = zip(data, labels) - datasets.extend(dataset) - random.shuffle(datasets) - - def read_batch(datasets, args): - for sample, label in datasets: - im = preprocess(sample, is_training, args) - yield im, [int(label)] - - def reader(): - batch_data = [] - batch_label = [] - for data, label in read_batch(datasets, args): - batch_data.append(data) - batch_label.append(label) - if len(batch_data) == args.batch_size: - batch_data = np.array(batch_data, dtype='float32') - batch_label = np.array(batch_label, dtype='int64') - if is_training: - flatten_label, flatten_non_label = \ - generate_reshape_label(batch_label, args.batch_size) - rad_var = generate_bernoulli_number(args.batch_size) - mixed_x, y_a, y_b, lam = utils.mixup_data( - batch_data, batch_label, args.batch_size, - args.mix_alpha) - batch_out = [[mixed_x, y_a, y_b, lam, flatten_label, \ - flatten_non_label, rad_var]] - yield batch_out - else: - batch_out = [[batch_data, batch_label]] - yield batch_out - batch_data = [] - batch_label = [] - - return reader - - -def train10(args): - """ - CIFAR-10 training set creator. - It returns a reader creator, each sample in the reader is image pixels in - [0, 1] and label in [0, 9]. - :return: Training reader creator - :rtype: callable - """ - - return reader_creator_filepath(args.data, 'data_batch', True, args) - - -def test10(args): - """ - CIFAR-10 test set creator. - It returns a reader creator, each sample in the reader is image pixels in - [0, 1] and label in [0, 9]. - :return: Test reader creator. - :rtype: callable - """ - return reader_creator_filepath(args.data, 'test_batch', False, args) diff --git a/AutoDL/LRC/run.sh b/AutoDL/LRC/run.sh deleted file mode 100644 index 9f1a045d..00000000 --- a/AutoDL/LRC/run.sh +++ /dev/null @@ -1,8 +0,0 @@ -CUDA_VISIBLE_DEVICES=0 python -u train_mixup.py \ ---batch_size=80 \ ---auxiliary \ ---weight_decay=0.0003 \ ---learning_rate=0.025 \ ---lrc_loss_lambda=0.7 \ ---cutout - diff --git a/AutoDL/LRC/train_mixup.py b/AutoDL/LRC/train_mixup.py deleted file mode 100644 index 4727e089..00000000 --- a/AutoDL/LRC/train_mixup.py +++ /dev/null @@ -1,246 +0,0 @@ -# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve. -# -#Licensed under the Apache License, Version 2.0 (the "License"); -#you may not use this file except in compliance with the License. -#You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -#Unless required by applicable law or agreed to in writing, software -#distributed under the License is distributed on an "AS IS" BASIS, -#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -#See the License for the specific language governing permissions and -#limitations under the License. -# -# Based on: -# -------------------------------------------------------- -# DARTS -# Copyright (c) 2018, Hanxiao Liu. -# Licensed under the Apache License, Version 2.0; -# -------------------------------------------------------- - -from __future__ import absolute_import -from __future__ import division -from __future__ import print_function -from learning_rate import cosine_decay -import numpy as np -import argparse -from model import NetworkCIFAR as Network -import reader -import sys -import os -import time -import logging -import genotypes -import paddle.fluid as fluid -import shutil -import utils -import cPickle as cp - -parser = argparse.ArgumentParser("cifar") -parser.add_argument( - '--data', - type=str, - default='./dataset/cifar/cifar-10-batches-py/', - help='location of the data corpus') -parser.add_argument('--batch_size', type=int, default=96, help='batch size') -parser.add_argument( - '--learning_rate', type=float, default=0.025, help='init learning rate') -parser.add_argument('--momentum', type=float, default=0.9, help='momentum') -parser.add_argument( - '--weight_decay', type=float, default=3e-4, help='weight decay') -parser.add_argument( - '--report_freq', type=float, default=50, help='report frequency') -parser.add_argument( - '--epochs', type=int, default=600, help='num of training epochs') -parser.add_argument( - '--init_channels', type=int, default=36, help='num of init channels') -parser.add_argument( - '--layers', type=int, default=20, help='total number of layers') -parser.add_argument( - '--model_path', - type=str, - default='saved_models', - help='path to save the model') -parser.add_argument( - '--auxiliary', - action='store_true', - default=False, - help='use auxiliary tower') -parser.add_argument( - '--auxiliary_weight', - type=float, - default=0.4, - help='weight for auxiliary loss') -parser.add_argument( - '--cutout', action='store_true', default=False, help='use cutout') -parser.add_argument( - '--cutout_length', type=int, default=16, help='cutout length') -parser.add_argument( - '--drop_path_prob', type=float, default=0.2, help='drop path probability') -parser.add_argument('--save', type=str, default='EXP', help='experiment name') -parser.add_argument( - '--arch', type=str, default='DARTS', help='which architecture to use') -parser.add_argument( - '--grad_clip', type=float, default=5, help='gradient clipping') -parser.add_argument( - '--lr_exp_decay', - action='store_true', - default=False, - help='use exponential_decay learning_rate') -parser.add_argument('--mix_alpha', type=float, default=0.5, help='mixup alpha') -parser.add_argument( - '--lrc_loss_lambda', default=0, type=float, help='lrc_loss_lambda') -parser.add_argument( - '--loss_type', - default=1, - type=float, - help='loss_type 0: cross entropy 1: multi margin loss 2: max margin loss') - -args = parser.parse_args() - -CIFAR_CLASSES = 10 -dataset_train_size = 50000 -image_size = 32 - - -def main(): - image_shape = [3, image_size, image_size] - devices = os.getenv("CUDA_VISIBLE_DEVICES") or "" - devices_num = len(devices.split(",")) - logging.info("args = %s", args) - genotype = eval("genotypes.%s" % args.arch) - model = Network(args.init_channels, CIFAR_CLASSES, args.layers, - args.auxiliary, genotype) - steps_one_epoch = dataset_train_size / (devices_num * args.batch_size) - train(model, args, image_shape, steps_one_epoch) - - -def build_program(main_prog, startup_prog, args, is_train, model, im_shape, - steps_one_epoch): - out = [] - with fluid.program_guard(main_prog, startup_prog): - py_reader = model.build_input(im_shape, args.batch_size, is_train) - if is_train: - with fluid.unique_name.guard(): - loss = model.train_model(py_reader, args.init_channels, - args.auxiliary, args.auxiliary_weight, - args.batch_size, args.lrc_loss_lambda) - optimizer = fluid.optimizer.Momentum( - learning_rate=cosine_decay(args.learning_rate, \ - args.epochs, steps_one_epoch), - regularization=fluid.regularizer.L2Decay(\ - args.weight_decay), - momentum=args.momentum) - optimizer.minimize(loss) - out = [py_reader, loss] - else: - with fluid.unique_name.guard(): - loss, acc_1, acc_5 = model.test_model(py_reader, - args.init_channels) - out = [py_reader, loss, acc_1, acc_5] - return out - - -def train(model, args, im_shape, steps_one_epoch): - train_startup_prog = fluid.Program() - test_startup_prog = fluid.Program() - train_prog = fluid.Program() - test_prog = fluid.Program() - - train_py_reader, loss_train = build_program(train_prog, train_startup_prog, - args, True, model, im_shape, - steps_one_epoch) - - test_py_reader, loss_test, acc_1, acc_5 = build_program( - test_prog, test_startup_prog, args, False, model, im_shape, - steps_one_epoch) - - test_prog = test_prog.clone(for_test=True) - - place = fluid.CUDAPlace(0) - exe = fluid.Executor(place) - exe.run(train_startup_prog) - exe.run(test_startup_prog) - - exec_strategy = fluid.ExecutionStrategy() - exec_strategy.num_threads = 1 - train_exe = fluid.ParallelExecutor( - main_program=train_prog, - use_cuda=True, - loss_name=loss_train.name, - exec_strategy=exec_strategy) - train_reader = reader.train10(args) - test_reader = reader.test10(args) - train_py_reader.decorate_paddle_reader(train_reader) - test_py_reader.decorate_paddle_reader(test_reader) - - fluid.clip.set_gradient_clip(fluid.clip.GradientClipByNorm(args.grad_clip)) - - def save_model(postfix, main_prog): - model_path = os.path.join(args.model_path, postfix) - if os.path.isdir(model_path): - shutil.rmtree(model_path) - fluid.io.save_persistables(exe, model_path, main_program=main_prog) - - def test(epoch_id): - test_fetch_list = [loss_test, acc_1, acc_5] - objs = utils.AvgrageMeter() - top1 = utils.AvgrageMeter() - top5 = utils.AvgrageMeter() - test_py_reader.start() - test_start_time = time.time() - step_id = 0 - try: - while True: - prev_test_start_time = test_start_time - test_start_time = time.time() - loss_test_v, acc_1_v, acc_5_v = exe.run( - test_prog, fetch_list=test_fetch_list) - objs.update(np.array(loss_test_v), args.batch_size) - top1.update(np.array(acc_1_v), args.batch_size) - top5.update(np.array(acc_5_v), args.batch_size) - if step_id % args.report_freq == 0: - print("Epoch {}, Step {}, acc_1 {}, acc_5 {}, time {}". - format(epoch_id, step_id, - np.array(acc_1_v), - np.array(acc_5_v), test_start_time - - prev_test_start_time)) - step_id += 1 - except fluid.core.EOFException: - test_py_reader.reset() - print("Epoch {0}, top1 {1}, top5 {2}".format(epoch_id, top1.avg, - top5.avg)) - - train_fetch_list = [loss_train] - epoch_start_time = time.time() - for epoch_id in range(args.epochs): - model.drop_path_prob = args.drop_path_prob * epoch_id / args.epochs - train_py_reader.start() - epoch_end_time = time.time() - if epoch_id > 0: - print("Epoch {}, total time {}".format(epoch_id - 1, epoch_end_time - - epoch_start_time)) - epoch_start_time = epoch_end_time - epoch_end_time - start_time = time.time() - step_id = 0 - try: - while True: - prev_start_time = start_time - start_time = time.time() - loss_v, = train_exe.run( - fetch_list=[v.name for v in train_fetch_list]) - print("Epoch {}, Step {}, loss {}, time {}".format(epoch_id, step_id, \ - np.array(loss_v).mean(), start_time-prev_start_time)) - step_id += 1 - sys.stdout.flush() - except fluid.core.EOFException: - train_py_reader.reset() - if epoch_id % 50 == 0 or epoch_id == args.epochs - 1: - save_model(str(epoch_id), train_prog) - test(epoch_id) - - -if __name__ == '__main__': - main() diff --git a/AutoDL/LRC/utils.py b/AutoDL/LRC/utils.py deleted file mode 100644 index 4002b57c..00000000 --- a/AutoDL/LRC/utils.py +++ /dev/null @@ -1,55 +0,0 @@ -# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -# Based on: -# -------------------------------------------------------- -# DARTS -# Copyright (c) 2018, Hanxiao Liu. -# Licensed under the Apache License, Version 2.0; -# -------------------------------------------------------- - -import os -import sys -import time -import math -import numpy as np - - -def mixup_data(x, y, batch_size, alpha=1.0): - '''Compute the mixup data. Return mixed inputs, pairs of targets, and lambda''' - if alpha > 0.: - lam = np.random.beta(alpha, alpha) - else: - lam = 1. - index = np.random.permutation(batch_size) - - mixed_x = lam * x + (1 - lam) * x[index, :] - y_a, y_b = y, y[index] - return mixed_x.astype('float32'), y_a.astype('int64'),\ - y_b.astype('int64'), np.array(lam, dtype='float32') - - -class AvgrageMeter(object): - def __init__(self): - self.reset() - - def reset(self): - self.avg = 0 - self.sum = 0 - self.cnt = 0 - - def update(self, val, n=1): - self.sum += val * n - self.cnt += n - self.avg = self.sum / self.cnt -- GitLab