提交 59f1b733 编写于 作者: A Aston Zhang

cifar10 code

上级 43cfa799
......@@ -45,8 +45,22 @@
为了使网页编译快一点,我们在git repo里仅仅存放100个训练样本('train_tiny.zip')和1个测试样本('test_tiny.zip')。执行以下代码会从git repo里解压生成小样本训练和测试数据,文件夹名称分别为'train_tiny'和'test_tiny'。训练数据标签的压缩文件将被解压成trainLabels.csv。
```{.python .input}
import sys
sys.path.append('..')
import datetime
import gluonbook as gb
from mxnet import autograd, gluon, init, nd
from mxnet.gluon import data as gdata, nn, loss as gloss
import numpy as np
import os
import pandas as pd
import shutil
```
```{.python .input n=1}
# 如果训练下载的Kaggle的完整数据集,把下面改False
# 如果训练下载的Kaggle的完整数据集,把下面改False
demo = True
if demo:
import zipfile
......@@ -62,10 +76,8 @@ if demo:
函数中的参数如data_dir、train_dir和test_dir对应上述数据存放路径及训练和测试的图片集文件夹名称。参数label_file为训练数据标签的文件名称。参数input_dir是整理后数据集文件夹名称。参数valid_ratio是验证集占原始训练集的比重。以valid_ratio=0.1为例,由于原始训练数据有5万张图片,调参时将有4万5千张图片用于训练(整理后存放在input_dir/train)而另外5千张图片为验证集(整理后存放在input_dir/valid)。
```{.python .input n=2}
import os
import shutil
def reorg_cifar10_data(data_dir, label_file, train_dir, test_dir, input_dir, valid_ratio):
def reorg_cifar10_data(data_dir, label_file, train_dir, test_dir, input_dir,
valid_ratio):
# 读取训练数据标签。
with open(os.path.join(data_dir, label_file), 'r') as f:
# 跳过文件头行(栏名称)。
......@@ -74,11 +86,11 @@ def reorg_cifar10_data(data_dir, label_file, train_dir, test_dir, input_dir, val
idx_label = dict(((int(idx), label) for idx, label in tokens))
labels = set(idx_label.values())
num_train = len(os.listdir(os.path.join(data_dir, train_dir)))
num_train_tuning = int(num_train * (1 - valid_ratio))
assert 0 < num_train_tuning < num_train
num_train_tuning_per_label = num_train_tuning // len(labels)
label_count = dict()
n_train_valid = len(os.listdir(os.path.join(data_dir, train_dir)))
n_train = int(n_train_valid * (1 - valid_ratio))
assert 0 < n_train < n_train_valid
n_train_per_label = n_train // len(labels)
label_count = {}
def mkdir_if_not_exist(path):
if not os.path.exists(os.path.join(*path)):
......@@ -91,7 +103,7 @@ def reorg_cifar10_data(data_dir, label_file, train_dir, test_dir, input_dir, val
mkdir_if_not_exist([data_dir, input_dir, 'train_valid', label])
shutil.copy(os.path.join(data_dir, train_dir, train_file),
os.path.join(data_dir, input_dir, 'train_valid', label))
if label not in label_count or label_count[label] < num_train_tuning_per_label:
if label not in label_count or label_count[label] < n_train_per_label:
mkdir_if_not_exist([data_dir, input_dir, 'train', label])
shutil.copy(os.path.join(data_dir, train_dir, train_file),
os.path.join(data_dir, input_dir, 'train', label))
......@@ -129,7 +141,8 @@ data_dir = '../data/kaggle_cifar10'
label_file = 'trainLabels.csv'
input_dir = 'train_valid_test'
valid_ratio = 0.1
reorg_cifar10_data(data_dir, label_file, train_dir, test_dir, input_dir, valid_ratio)
reorg_cifar10_data(data_dir, label_file, train_dir, test_dir, input_dir,
valid_ratio)
```
## 使用Gluon读取整理后的数据集
......@@ -137,62 +150,55 @@ reorg_cifar10_data(data_dir, label_file, train_dir, test_dir, input_dir, valid_r
为避免过拟合,我们在这里使用`transforms`来增广数据集。例如我们加入`transforms.RandomFlipLeftRight()`即可随机对每张图片做镜面反转。我们也通过`transforms.Normalize()`对彩色图像RGB三个通道分别做[标准化](../chapter_supervised-learning/kaggle-gluon-kfold.md)。以下我们列举了所有可能用到的操作,这些操作可以根据需求来决定是否调用,它们的参数也都是可调的。
```{.python .input n=4}
from mxnet import autograd
from mxnet import gluon
from mxnet import init
from mxnet import nd
from mxnet.gluon.data import vision
from mxnet.gluon.data.vision import transforms
import numpy as np
transform_train = transforms.Compose([
# transforms.CenterCrop(32)
# transforms.RandomFlipTopBottom(),
# transforms.RandomColorJitter(brightness=0.0, contrast=0.0, saturation=0.0, hue=0.0),
# transforms.RandomLighting(0.0),
# transforms.Cast('float32'),
# transforms.Resize(32),
# 随机按照scale和ratio裁剪,并放缩为32x32的正方形
transforms.RandomResizedCrop(32, scale=(0.08, 1.0), ratio=(3.0/4.0, 4.0/3.0)),
# 随机左右翻转图片
transforms.RandomFlipLeftRight(),
# 将图片像素值缩小到(0,1)内,并将数据格式从"高*宽*通道"改为"通道*高*宽"
transforms.ToTensor(),
# 对图片的每个通道做标准化
transforms.Normalize([0.4914, 0.4822, 0.4465], [0.2023, 0.1994, 0.2010])
transform_train = gdata.vision.transforms.Compose([
# gdata.vision.transforms.CenterCrop(32),
# gdata.vision.transforms.RandomFlipTopBottom(),
# gdata.vision.transforms.RandomColorJitter(brightness=0.0, contrast=0.0,
# saturation=0.0, hue=0.0),
# gdata.vision.transforms.RandomLighting(0.0),
# gdata.vision.transforms.Cast('float32'),
# gdata.vision.transforms.Resize(32),
# 随机按照 scale 和 ratio 裁剪,并放缩为 32 x 32 的正方形。
gdata.vision.transforms.RandomResizedCrop(32, scale=(0.08, 1.0),
ratio=(3.0/4.0, 4.0/3.0)),
# 随机左右翻转图片。
gdata.vision.transforms.RandomFlipLeftRight(),
# 将图片像素值缩小到(0, 1)内,并将数据格式从“高*宽*通道”改为“通道*高*宽”。
gdata.vision.transforms.ToTensor(),
# 对图片的每个通道做标准化。
gdata.vision.transforms.Normalize([0.4914, 0.4822, 0.4465],
[0.2023, 0.1994, 0.2010])
])
# 测试时,无需对图像做标准化以外的增强数据处理。
transform_test = transforms.Compose([
transforms.ToTensor(),
transforms.Normalize([0.4914, 0.4822, 0.4465], [0.2023, 0.1994, 0.2010])
transform_test = gdata.vision.transforms.Compose([
gdata.vision.transforms.ToTensor(),
gdata.vision.transforms.Normalize([0.4914, 0.4822, 0.4465],
[0.2023, 0.1994, 0.2010])
])
```
接下来,我们可以使用`Gluon`中的`ImageFolderDataset`类来读取整理后的数据集。注意,我们要在`loader`中调用刚刚定义好的图片增广函数。通过`vision.ImageFolderDataset`读入的数据是一个`(image, label)`组合,`transform_first()`的作用便是对这个组合中的第一个成员(即读入的图像)做图片增广操作。
```{.python .input n=5}
input_str = data_dir + '/' + input_dir + '/'
# 读取原始图像文件。flag=1说明输入图像有三个通道(彩色)。
train_ds = vision.ImageFolderDataset(input_str + 'train', flag=1)
valid_ds = vision.ImageFolderDataset(input_str + 'valid', flag=1)
train_valid_ds = vision.ImageFolderDataset(input_str + 'train_valid', flag=1)
test_ds = vision.ImageFolderDataset(input_str + 'test', flag=1)
loader = gluon.data.DataLoader
train_data = loader(train_ds.transform_first(transform_train),
batch_size, shuffle=True, last_batch='keep')
valid_data = loader(valid_ds.transform_first(transform_test),
batch_size, shuffle=True, last_batch='keep')
train_valid_data = loader(train_valid_ds.transform_first(transform_train),
batch_size, shuffle=True, last_batch='keep')
test_data = loader(test_ds.transform_first(transform_test),
batch_size, shuffle=False, last_batch='keep')
# 交叉熵损失函数。
softmax_cross_entropy = gluon.loss.SoftmaxCrossEntropyLoss()
input_s = data_dir + '/' + input_dir + '/'
# 读取原始图像文件。flag=1 说明输入图像有三个通道(彩色)。
train_ds = gdata.vision.ImageFolderDataset(input_s + 'train', flag=1)
valid_ds = gdata.vision.ImageFolderDataset(input_s + 'valid', flag=1)
train_valid_ds = gdata.vision.ImageFolderDataset(input_s + 'train_valid',
flag=1)
test_ds = gdata.vision.ImageFolderDataset(input_s + 'test', flag=1)
train_data = gdata.DataLoader(train_ds.transform_first(transform_train),
batch_size, shuffle=True, last_batch='keep')
valid_data = gdata.DataLoader(valid_ds.transform_first(transform_test),
batch_size, shuffle=True, last_batch='keep')
train_valid_data = gdata.DataLoader(train_valid_ds.transform_first(
transform_train), batch_size, shuffle=True, last_batch='keep')
test_data = gdata.DataLoader(test_ds.transform_first(transform_test),
batch_size, shuffle=False, last_batch='keep')
```
## 设计模型
......@@ -202,9 +208,6 @@ softmax_cross_entropy = gluon.loss.SoftmaxCrossEntropyLoss()
请注意:模型可以重新设计,参数也可以重新调整。
```{.python .input n=6}
from mxnet.gluon import nn
from mxnet import nd
class Residual(nn.HybridBlock):
def __init__(self, channels, same_shape=True, **kwargs):
super(Residual, self).__init__(**kwargs)
......@@ -212,13 +215,13 @@ class Residual(nn.HybridBlock):
with self.name_scope():
strides = 1 if same_shape else 2
self.conv1 = nn.Conv2D(channels, kernel_size=3, padding=1,
strides=strides)
strides=strides)
self.bn1 = nn.BatchNorm()
self.conv2 = nn.Conv2D(channels, kernel_size=3, padding=1)
self.bn2 = nn.BatchNorm()
if not same_shape:
self.conv3 = nn.Conv2D(channels, kernel_size=1,
strides=strides)
strides=strides)
def hybrid_forward(self, F, x):
out = F.relu(self.bn1(self.conv1(x)))
......@@ -227,29 +230,30 @@ class Residual(nn.HybridBlock):
x = self.conv3(x)
return F.relu(out + x)
class ResNet(nn.HybridBlock):
def __init__(self, num_classes, verbose=False, **kwargs):
super(ResNet, self).__init__(**kwargs)
self.verbose = verbose
with self.name_scope():
net = self.net = nn.HybridSequential()
# 模块1
net.add(nn.Conv2D(channels=32, kernel_size=3, strides=1, padding=1))
# 模块 1。
net.add(nn.Conv2D(channels=32, kernel_size=3, strides=1,
padding=1))
net.add(nn.BatchNorm())
net.add(nn.Activation(activation='relu'))
# 模块2
# 模块 2。
for _ in range(3):
net.add(Residual(channels=32))
# 模块3
# 模块 3。
net.add(Residual(channels=64, same_shape=False))
for _ in range(2):
net.add(Residual(channels=64))
# 模块4
# 模块 4。
net.add(Residual(channels=128, same_shape=False))
for _ in range(2):
net.add(Residual(channels=128))
# 模块5
# 模块 5。
net.add(nn.AvgPool2D(pool_size=8))
net.add(nn.Flatten())
net.add(nn.Dense(num_classes))
......@@ -262,7 +266,6 @@ class ResNet(nn.HybridBlock):
print('Block %d output: %s'%(i+1, out.shape))
return out
def get_net(ctx):
num_outputs = 10
net = ResNet(num_outputs)
......@@ -277,45 +280,43 @@ def get_net(ctx):
我们定义模型训练函数。这里我们记录每个epoch的训练时间。这有助于我们比较不同模型设计的时间成本。
```{.python .input n=7}
import datetime
import sys
sys.path.append('..')
import gluonbook as gb
def train(net, train_data, valid_data, num_epochs, lr, wd, ctx, lr_period, lr_decay):
trainer = gluon.Trainer(
net.collect_params(), 'sgd', {'learning_rate': lr, 'momentum': 0.9, 'wd': wd})
# 交叉熵损失函数。
loss = gloss.SoftmaxCrossEntropyLoss()
def train(net, train_data, valid_data, num_epochs, lr, wd, ctx, lr_period,
lr_decay):
trainer = gluon.Trainer(net.collect_params(), 'sgd',
{'learning_rate': lr, 'momentum': 0.9, 'wd': wd})
prev_time = datetime.datetime.now()
for epoch in range(num_epochs):
train_loss = 0.0
train_l = 0.0
train_acc = 0.0
if epoch > 0 and epoch % lr_period == 0:
trainer.set_learning_rate(trainer.learning_rate * lr_decay)
for data, label in train_data:
label = label.astype('float32').as_in_context(ctx)
for X, y in train_data:
y = y.astype('float32').as_in_context(ctx)
with autograd.record():
output = net(data.as_in_context(ctx))
loss = softmax_cross_entropy(output, label)
loss.backward()
y_hat = net(X.as_in_context(ctx))
l = loss(y_hat, y)
l.backward()
trainer.step(batch_size)
train_loss += nd.mean(loss).asscalar()
train_acc += gb.accuracy(output, label)
train_l += l.mean().asscalar()
train_acc += gb.accuracy(y_hat, y)
cur_time = datetime.datetime.now()
h, remainder = divmod((cur_time - prev_time).seconds, 3600)
m, s = divmod(remainder, 60)
time_str = "Time %02d:%02d:%02d" % (h, m, s)
time_s = "time %02d:%02d:%02d" % (h, m, s)
if valid_data is not None:
valid_acc = gb.evaluate_accuracy(valid_data, net, ctx)
epoch_str = ("Epoch %d. Loss: %f, Train acc %f, Valid acc %f, "
% (epoch, train_loss / len(train_data),
train_acc / len(train_data), valid_acc))
epoch_s = ("epoch %d, loss %f, train acc %f, valid acc %f, "
% (epoch, train_l / len(train_data),
train_acc / len(train_data), valid_acc))
else:
epoch_str = ("Epoch %d. Loss: %f, Train acc %f, "
% (epoch, train_loss / len(train_data),
train_acc / len(train_data)))
epoch_s = ("epoch %d, loss %f, train acc %f, " %
(epoch, train_l / len(train_data),
train_acc / len(train_data)))
prev_time = cur_time
print(epoch_str + time_str + ', lr ' + str(trainer.learning_rate))
print(epoch_s + time_s + ', lr ' + str(trainer.learning_rate))
```
以下定义训练参数并训练模型。这些参数均可调。为了使网页编译快一点,我们这里将epoch数量有意设为1。事实上,epoch一般可以调大些,例如100。
......@@ -325,15 +326,15 @@ def train(net, train_data, valid_data, num_epochs, lr, wd, ctx, lr_period, lr_de
```{.python .input n=8}
ctx = gb.try_gpu()
num_epochs = 1
learning_rate = 0.1
weight_decay = 5e-4
lr = 0.1
wd = 5e-4
lr_period = 80
lr_decay = 0.1
net = get_net(ctx)
net.hybridize()
train(net, train_data, valid_data, num_epochs, learning_rate,
weight_decay, ctx, lr_period, lr_decay)
train(net, train_data, valid_data, num_epochs, lr, wd, ctx, lr_period,
lr_decay)
```
## 对测试集分类
......@@ -341,22 +342,17 @@ train(net, train_data, valid_data, num_epochs, learning_rate,
当得到一组满意的模型设计和参数后,我们使用全部训练数据集(含验证集)重新训练模型,并对测试集分类。
```{.python .input n=9}
import numpy as np
import pandas as pd
net = get_net(ctx)
net.hybridize()
train(net, train_valid_data, None, num_epochs, learning_rate,
weight_decay, ctx, lr_period, lr_decay)
train(net, train_valid_data, None, num_epochs, lr, wd, ctx, lr_period,
lr_decay)
preds = []
for data, label in test_data:
output = net(data.as_in_context(ctx))
preds.extend(output.argmax(axis=1).astype(int).asnumpy())
for X, _ in test_data:
y_hat = net(X.as_in_context(ctx))
preds.extend(y_hat.argmax(axis=1).astype(int).asnumpy())
sorted_ids = list(range(1, len(test_ds) + 1))
sorted_ids.sort(key = lambda x:str(x))
df = pd.DataFrame({'id': sorted_ids, 'label': preds})
df['label'] = df['label'].apply(lambda x: train_valid_ds.synsets[x])
df.to_csv('submission.csv', index=False)
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册