未验证 提交 7500b381 编写于 作者: Q Qiyang Min 提交者: GitHub

Merge pull request #2121 from PaddlePaddle/update-mnist

add dygraph models:mnist, test=develop
# MNIST
当我们学习编程的时候,编写的第一个程序一般是实现打印"Hello World"。而机器学习(或深度学习)的入门教程,一般都是 MNIST 数据库上的手写识别问题。原因是手写识别属于典型的图像分类问题,比较简单,同时MNIST数据集也很完备。
本页将介绍如何使用PaddlePaddle在DyGraph模式下实现MNIST,包括[安装](#installation)[训练](#training-a-model)[输出](#log)
本页将介绍如何使用PaddlePaddle在DyGraph模式下实现MNIST,包括[安装](#installation)[训练](#training-a-model)[输出](#log)[参数保存](#save)[模型评估](#evaluation)
---
## 内容
......@@ -21,8 +21,42 @@ env CUDA_VISIBLE_DEVICES=0 python mnist_dygraph.py
## 输出
执行训练开始后,将得到类似如下的输出。
```
batch_id 0,loss 2.1786134243
batch_id 10,loss 0.898496925831
batch_id 20,loss 1.32524681091
Loss at epoch 0 step 0: [2.3043773]
Loss at epoch 0 step 100: [0.20764539]
Loss at epoch 0 step 200: [0.18648806]
Loss at epoch 0 step 300: [0.10279777]
Loss at epoch 0 step 400: [0.03940877]
...
```
## 参数保存
调用`fluid.dygraph.save_persistables()`接口可以把模型的参数进行保存。
```python
fluid.dygraph.save_persistables(mnist.state_dict(), "save_dir")
```
## 测试
执行`mnist.eval()`可以切换至评估状态,即不更新只使用参数进行训练,通过这种方式进行测试或者评估。
```python
mnist.eval()
```
## 模型评估
我们使用手写数据集中的一张图片来进行评估。为了区别训练模型,我们使用`with fluid.dygraph.guard()`来切换到一个新的参数空间,然后构建一个用于评估的网络`mnist_infer`,并通过`mnist_infer.load_dict()`来加载使用`fluid.dygraph.load_persistables`读取的参数。然后用`mnist_infer.eval()`切换到评估。
```python
with fluid.dygraph.guard():
mnist_infer = MNIST("mnist")
# load checkpoint
mnist_infer.load_dict(
fluid.dygraph.load_persistables("save_dir"))
# start evaluate mode
mnist_infer.eval()
```
如果无意外,将可以看到预测的结果:
```text
Inference result of image/infer_3.png is: 3
```
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import print_function
import contextlib
import numpy as np
import six
from PIL import Image
import os
import paddle
import paddle.fluid as fluid
from paddle.fluid.optimizer import SGDOptimizer
from paddle.fluid.optimizer import AdamOptimizer
from paddle.fluid.dygraph.nn import Conv2D, Pool2D, FC
from paddle.fluid.dygraph.base import to_variable
class SimpleImgConvPool(fluid.dygraph.Layer):
"""
Conv Pool Layer
"""
def __init__(self,
name_scope,
num_channels,
......@@ -64,9 +74,6 @@ class SimpleImgConvPool(fluid.dygraph.Layer):
class MNIST(fluid.dygraph.Layer):
"""
MNIST model
"""
def __init__(self, name_scope):
super(MNIST, self).__init__(name_scope)
......@@ -86,49 +93,110 @@ class MNIST(fluid.dygraph.Layer):
loc=0.0, scale=scale)),
act="softmax")
def forward(self, inputs):
def forward(self, inputs, label=None):
x = self._simple_img_conv_pool_1(inputs)
x = self._simple_img_conv_pool_2(x)
x = self._fc(x)
return x
if label is not None:
acc = fluid.layers.accuracy(input=x, label=label)
return x, acc
else:
return x
def test_train(reader, model, batch_size):
acc_set = []
avg_loss_set = []
for batch_id, data in enumerate(reader()):
dy_x_data = np.array(
[x[0].reshape(1, 28, 28)
for x in data]).astype('float32')
y_data = np.array(
[x[1] for x in data]).astype('int64').reshape(batch_size, 1)
img = to_variable(dy_x_data)
label = to_variable(y_data)
label.stop_gradient = True
prediction, acc = model(img, label)
loss = fluid.layers.cross_entropy(input=prediction, label=label)
avg_loss = fluid.layers.mean(loss)
acc_set.append(float(acc.numpy()))
avg_loss_set.append(float(avg_loss.numpy()))
# get test acc and loss
acc_val_mean = np.array(acc_set).mean()
avg_loss_val_mean = np.array(avg_loss_set).mean()
return avg_loss_val_mean, acc_val_mean
def train_mnist():
seed = 90
epoch_num = 10
epoch_num = 5
BATCH_SIZE = 64
with fluid.dygraph.guard():
fluid.default_startup_program().random_seed = seed
fluid.default_main_program().random_seed = seed
mnist = MNIST("mnist")
sgd = SGDOptimizer(learning_rate=1e-3)
adam = AdamOptimizer(learning_rate=0.001)
train_reader = paddle.batch(
paddle.dataset.mnist.train(), batch_size=128, drop_last=True)
paddle.dataset.mnist.train(), batch_size=BATCH_SIZE, drop_last=True)
test_reader = paddle.batch(
paddle.dataset.mnist.test(), batch_size=BATCH_SIZE, drop_last=True)
for epoch in range(epoch_num):
for batch_id, data in enumerate(train_reader()):
dy_x_data = np.array(
[x[0].reshape(1, 28, 28)
for x in data]).astype('float32')
y_data = np.array(
[x[1] for x in data]).astype('int64').reshape(128, 1)
[x[1] for x in data]).astype('int64').reshape(BATCH_SIZE, 1)
img = to_variable(dy_x_data)
label = to_variable(y_data)
label.stop_gradient = True
cost = mnist(img)
cost, acc = mnist(img, label)
loss = fluid.layers.cross_entropy(cost, label)
avg_loss = fluid.layers.mean(loss)
avg_loss.backward()
sgd.minimize(avg_loss)
adam.minimize(avg_loss)
# save checkpoint
mnist.clear_gradients()
if batch_id % 100 == 0:
print("Loss at epoch {} step {}: {:}".format(epoch, batch_id, avg_loss.numpy()))
mnist.eval()
test_cost, test_acc = test_train(test_reader, mnist, BATCH_SIZE)
mnist.train()
print("Loss at epoch {} , Test avg_loss is: {}, acc is: {}".format(epoch, test_cost, test_acc))
dy_out = avg_loss.numpy()
print("batch id %d, loss %f" % (batch_id, dy_out))
fluid.dygraph.save_persistables(mnist.state_dict(), "save_dir")
print("checkpoint saved")
with fluid.dygraph.guard():
mnist_infer = MNIST("mnist")
# load checkpoint
mnist_infer.load_dict(
fluid.dygraph.load_persistables("save_dir"))
print("checkpoint loaded")
# start evaluate mode
mnist_infer.eval()
def load_image(file):
im = Image.open(file).convert('L')
im = im.resize((28, 28), Image.ANTIALIAS)
im = np.array(im).reshape(1, 1, 28, 28).astype(np.float32)
im = im / 255.0 * 2.0 - 1.0
return im
cur_dir = os.path.dirname(os.path.realpath(__file__))
tensor_img = load_image(cur_dir + '/image/infer_3.png')
results = mnist_infer(to_variable(tensor_img))
lab = np.argsort(results.numpy())
print("Inference result of image/infer_3.png is: %d" % lab[0][-1])
if __name__ == '__main__':
train_mnist()
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册