diff --git a/dygraph/mnist/README_cn.md b/dygraph/mnist/README_cn.md new file mode 100644 index 0000000000000000000000000000000000000000..5e7ce9020599920261fecdf3be80d7ac70c982f1 --- /dev/null +++ b/dygraph/mnist/README_cn.md @@ -0,0 +1,28 @@ +# MNIST +当我们学习编程的时候,编写的第一个程序一般是实现打印"Hello World"。而机器学习(或深度学习)的入门教程,一般都是 MNIST 数据库上的手写识别问题。原因是手写识别属于典型的图像分类问题,比较简单,同时MNIST数据集也很完备。MNIST数据集作为一个简单的计算机视觉数据集,包含一系列如图1所示的手写数字图片和对应的标签。图片是28x28的像素矩阵,标签则对应着0~9的10个数字。每张图片都经过了大小归一化和居中处理。 +本页将介绍如何使用PaddlePaddle在DyGraph模式下实现MNIST,包括[安装](#installation)、[训练](#training-a-model)、[模型评估](#evaluation)。 + +--- +## 内容 +- [安装](#installation) +- [训练](#training-a-model) +- [模型评估](#evaluation) + +## 安装 + +在当前目录下运行样例代码需要PadddlePaddle Fluid的v1.4.0或以上的版本。如果你的运行环境中的PaddlePaddle低于此版本,请根据安装文档中的说明来更新PaddlePaddle。 + +## 训练 +教程中使用`paddle.dataset.mnist`数据集作为训练数据,可以通过如下的方式启动训练: +``` +env CUDA_VISIBLE_DEVICES=0 python train.py +``` + +## 输出 +执行训练开始后,将得到类似如下的输出。 +``` +batch_id 0,loss 2.1786134243 +batch_id 10,loss 0.898496925831 +batch_id 20,loss 1.32524681091 +... +``` diff --git a/dygraph/mnist/mnist_dygraph.py b/dygraph/mnist/mnist_dygraph.py new file mode 100644 index 0000000000000000000000000000000000000000..fb5a3d8ec76544a4b0f8e6ba2279ee3509a8209d --- /dev/null +++ b/dygraph/mnist/mnist_dygraph.py @@ -0,0 +1,137 @@ +from __future__ import print_function + +import contextlib +import unittest +import numpy as np +import six + +import paddle +import paddle.fluid as fluid +from paddle.fluid import core +from paddle.fluid.optimizer import SGDOptimizer +from paddle.fluid.dygraph.nn import Conv2D, Pool2D, FC +from paddle.fluid.dygraph.base import to_variable +import time + + +class SimpleImgConvPool(fluid.dygraph.Layer): + """ + Conv Pool Layer + """ + def __init__(self, + name_scope, + num_channels, + num_filters, + filter_size, + pool_size, + pool_stride, + pool_padding=0, + pool_type='max', + global_pooling=False, + conv_stride=1, + conv_padding=0, + conv_dilation=1, + conv_groups=1, + act=None, + use_cudnn=False, + param_attr=None, + bias_attr=None): + super(SimpleImgConvPool, self).__init__(name_scope) + + self._conv2d = Conv2D( + self.full_name(), + num_channels=num_channels, + num_filters=num_filters, + filter_size=filter_size, + stride=conv_stride, + padding=conv_padding, + dilation=conv_dilation, + groups=conv_groups, + param_attr=None, + bias_attr=None, + use_cudnn=use_cudnn) + + self._pool2d = Pool2D( + self.full_name(), + pool_size=pool_size, + pool_type=pool_type, + pool_stride=pool_stride, + pool_padding=pool_padding, + global_pooling=global_pooling, + use_cudnn=use_cudnn) + + def forward(self, inputs): + x = self._conv2d(inputs) + x = self._pool2d(x) + return x + + +class MNIST(fluid.dygraph.Layer): + """ + MNIST model + """ + def __init__(self, name_scope): + super(MNIST, self).__init__(name_scope) + + self._simple_img_conv_pool_1 = SimpleImgConvPool( + self.full_name(), 1, 20, 5, 2, 2, act="relu") + + self._simple_img_conv_pool_2 = SimpleImgConvPool( + self.full_name(), 20, 50, 5, 2, 2, act="relu") + + pool_2_shape = 50 * 4 * 4 + SIZE = 10 + scale = (2.0 / (pool_2_shape**2 * SIZE))**0.5 + self._fc = FC(self.full_name(), + 10, + param_attr=fluid.param_attr.ParamAttr( + initializer=fluid.initializer.NormalInitializer( + loc=0.0, scale=scale)), + act="softmax") + + def forward(self, inputs): + x = self._simple_img_conv_pool_1(inputs) + x = self._simple_img_conv_pool_2(x) + x = self._fc(x) + return x + + +def train_mnist(): + seed = 90 + epoch_num = 10 + + with fluid.dygraph.guard(): + fluid.default_startup_program().random_seed = seed + fluid.default_main_program().random_seed = seed + + mnist = MNIST("mnist") + sgd = SGDOptimizer(learning_rate=1e-3) + train_reader = paddle.batch( + paddle.dataset.mnist.train(), batch_size=128, drop_last=True) + + for epoch in range(epoch_num): + for batch_id, data in enumerate(train_reader()): + dy_x_data = np.array( + [x[0].reshape(1, 28, 28) + for x in data]).astype('float32') + y_data = np.array( + [x[1] for x in data]).astype('int64').reshape(128, 1) + + img = to_variable(dy_x_data) + label = to_variable(y_data) + label.stop_gradient = True + + cost = mnist(img) + loss = fluid.layers.cross_entropy(cost, label) + avg_loss = fluid.layers.mean(loss) + avg_loss.backward() + sgd.minimize(avg_loss) + mnist.clear_gradients() + + dy_out = avg_loss.numpy() + print("batch id %d, loss %f" % (batch_id, dy_out)) + + +if __name__ == '__main__': + train_mnist() + diff --git a/fluid/PaddleCV/ocr_recognition/attention_model.py b/fluid/PaddleCV/ocr_recognition/attention_model.py index d3d2185bb3685e4dd048e9bbf2990f90bf1c2254..d0cbe8c66f059b3758efbeb80369946ee9076244 100755 --- a/fluid/PaddleCV/ocr_recognition/attention_model.py +++ b/fluid/PaddleCV/ocr_recognition/attention_model.py @@ -145,6 +145,9 @@ def gru_decoder_with_attention(target_embedding, encoder_vec, encoder_proj, decoder_inputs = fc_1 + fc_2 h, _, _ = fluid.layers.gru_unit( input=decoder_inputs, hidden=hidden_mem, size=decoder_size * 3) + print(decoder_inputs.shape) + print(hidden_mem.shape) + print(decoder_size) rnn.update_memory(hidden_mem, h) out = fluid.layers.fc(input=h, size=num_classes + 2, @@ -156,6 +159,8 @@ def gru_decoder_with_attention(target_embedding, encoder_vec, encoder_proj, def attention_train_net(args, data_shape, num_classes): + print("xxx") + images = fluid.layers.data(name='pixel', shape=data_shape, dtype='float32') label_in = fluid.layers.data( name='label_in', shape=[1], dtype='int32', lod_level=1) @@ -293,6 +298,10 @@ def attention_infer(images, num_classes, use_cudnn=True): input=decoder_inputs, hidden=pre_state_expanded, size=decoder_size * 3) + print(decoder_inputs.shape) + print(pre_state_expanded.shape) + import sys + sys.stdout.flush() current_state_with_lod = fluid.layers.lod_reset( x=current_state, y=pre_score) diff --git a/fluid/PaddleCV/ocr_recognition/train.py b/fluid/PaddleCV/ocr_recognition/train.py index 2e294907a6bbac5f311c420ad22d51eafa972da7..394116ad2ae734d6675efd55b08134372e00250a 100755 --- a/fluid/PaddleCV/ocr_recognition/train.py +++ b/fluid/PaddleCV/ocr_recognition/train.py @@ -51,6 +51,10 @@ def train(args): train_net = attention_train_net get_feeder_data = get_attention_feeder_data + print("train net") + import sys + sys.stdout.flush() + num_classes = None num_classes = data_reader.num_classes( ) if num_classes is None else num_classes