提交 dc5d09bd 编写于 作者: M minqiyang

Add dygraph mnist

上级 ba609980
# MNIST
当我们学习编程的时候,编写的第一个程序一般是实现打印"Hello World"。而机器学习(或深度学习)的入门教程,一般都是 MNIST 数据库上的手写识别问题。原因是手写识别属于典型的图像分类问题,比较简单,同时MNIST数据集也很完备。MNIST数据集作为一个简单的计算机视觉数据集,包含一系列如图1所示的手写数字图片和对应的标签。图片是28x28的像素矩阵,标签则对应着0~9的10个数字。每张图片都经过了大小归一化和居中处理。
本页将介绍如何使用PaddlePaddle在DyGraph模式下实现MNIST,包括[安装](#installation)[训练](#training-a-model)[模型评估](#evaluation)
---
## 内容
- [安装](#installation)
- [训练](#training-a-model)
- [模型评估](#evaluation)
## 安装
在当前目录下运行样例代码需要PadddlePaddle Fluid的v1.4.0或以上的版本。如果你的运行环境中的PaddlePaddle低于此版本,请根据安装文档中的说明来更新PaddlePaddle。
## 训练
教程中使用`paddle.dataset.mnist`数据集作为训练数据,可以通过如下的方式启动训练:
```
env CUDA_VISIBLE_DEVICES=0 python train.py
```
## 输出
执行训练开始后,将得到类似如下的输出。
```
batch_id 0,loss 2.1786134243
batch_id 10,loss 0.898496925831
batch_id 20,loss 1.32524681091
...
```
from __future__ import print_function
import contextlib
import unittest
import numpy as np
import six
import paddle
import paddle.fluid as fluid
from paddle.fluid import core
from paddle.fluid.optimizer import SGDOptimizer
from paddle.fluid.dygraph.nn import Conv2D, Pool2D, FC
from paddle.fluid.dygraph.base import to_variable
import time
class SimpleImgConvPool(fluid.dygraph.Layer):
"""
Conv Pool Layer
"""
def __init__(self,
name_scope,
num_channels,
num_filters,
filter_size,
pool_size,
pool_stride,
pool_padding=0,
pool_type='max',
global_pooling=False,
conv_stride=1,
conv_padding=0,
conv_dilation=1,
conv_groups=1,
act=None,
use_cudnn=False,
param_attr=None,
bias_attr=None):
super(SimpleImgConvPool, self).__init__(name_scope)
self._conv2d = Conv2D(
self.full_name(),
num_channels=num_channels,
num_filters=num_filters,
filter_size=filter_size,
stride=conv_stride,
padding=conv_padding,
dilation=conv_dilation,
groups=conv_groups,
param_attr=None,
bias_attr=None,
use_cudnn=use_cudnn)
self._pool2d = Pool2D(
self.full_name(),
pool_size=pool_size,
pool_type=pool_type,
pool_stride=pool_stride,
pool_padding=pool_padding,
global_pooling=global_pooling,
use_cudnn=use_cudnn)
def forward(self, inputs):
x = self._conv2d(inputs)
x = self._pool2d(x)
return x
class MNIST(fluid.dygraph.Layer):
"""
MNIST model
"""
def __init__(self, name_scope):
super(MNIST, self).__init__(name_scope)
self._simple_img_conv_pool_1 = SimpleImgConvPool(
self.full_name(), 1, 20, 5, 2, 2, act="relu")
self._simple_img_conv_pool_2 = SimpleImgConvPool(
self.full_name(), 20, 50, 5, 2, 2, act="relu")
pool_2_shape = 50 * 4 * 4
SIZE = 10
scale = (2.0 / (pool_2_shape**2 * SIZE))**0.5
self._fc = FC(self.full_name(),
10,
param_attr=fluid.param_attr.ParamAttr(
initializer=fluid.initializer.NormalInitializer(
loc=0.0, scale=scale)),
act="softmax")
def forward(self, inputs):
x = self._simple_img_conv_pool_1(inputs)
x = self._simple_img_conv_pool_2(x)
x = self._fc(x)
return x
def train_mnist():
seed = 90
epoch_num = 10
with fluid.dygraph.guard():
fluid.default_startup_program().random_seed = seed
fluid.default_main_program().random_seed = seed
mnist = MNIST("mnist")
sgd = SGDOptimizer(learning_rate=1e-3)
train_reader = paddle.batch(
paddle.dataset.mnist.train(), batch_size=128, drop_last=True)
for epoch in range(epoch_num):
for batch_id, data in enumerate(train_reader()):
dy_x_data = np.array(
[x[0].reshape(1, 28, 28)
for x in data]).astype('float32')
y_data = np.array(
[x[1] for x in data]).astype('int64').reshape(128, 1)
img = to_variable(dy_x_data)
label = to_variable(y_data)
label.stop_gradient = True
cost = mnist(img)
loss = fluid.layers.cross_entropy(cost, label)
avg_loss = fluid.layers.mean(loss)
avg_loss.backward()
sgd.minimize(avg_loss)
mnist.clear_gradients()
dy_out = avg_loss.numpy()
print("batch id %d, loss %f" % (batch_id, dy_out))
if __name__ == '__main__':
train_mnist()
......@@ -145,6 +145,9 @@ def gru_decoder_with_attention(target_embedding, encoder_vec, encoder_proj,
decoder_inputs = fc_1 + fc_2
h, _, _ = fluid.layers.gru_unit(
input=decoder_inputs, hidden=hidden_mem, size=decoder_size * 3)
print(decoder_inputs.shape)
print(hidden_mem.shape)
print(decoder_size)
rnn.update_memory(hidden_mem, h)
out = fluid.layers.fc(input=h,
size=num_classes + 2,
......@@ -156,6 +159,8 @@ def gru_decoder_with_attention(target_embedding, encoder_vec, encoder_proj,
def attention_train_net(args, data_shape, num_classes):
print("xxx")
images = fluid.layers.data(name='pixel', shape=data_shape, dtype='float32')
label_in = fluid.layers.data(
name='label_in', shape=[1], dtype='int32', lod_level=1)
......@@ -293,6 +298,10 @@ def attention_infer(images, num_classes, use_cudnn=True):
input=decoder_inputs,
hidden=pre_state_expanded,
size=decoder_size * 3)
print(decoder_inputs.shape)
print(pre_state_expanded.shape)
import sys
sys.stdout.flush()
current_state_with_lod = fluid.layers.lod_reset(
x=current_state, y=pre_score)
......
......@@ -51,6 +51,10 @@ def train(args):
train_net = attention_train_net
get_feeder_data = get_attention_feeder_data
print("train net")
import sys
sys.stdout.flush()
num_classes = None
num_classes = data_reader.num_classes(
) if num_classes is None else num_classes
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册