Merge pull request #528 from jetfuel/recognize_digits_new_api

Recognize digits new api first draft

Merge pull request #528 from jetfuel/recognize_digits_new_api
Recognize digits new api first draft
ebe54c6c · Jeff Wang · GitHub · 742ea426 · 9bb171ec · ebe54c6c
4 changed file
--- a/02.recognize_digits/README.cn.md
+++ b/02.recognize_digits/README.cn.md
@@ -129,17 +129,18 @@ PaddlePaddle在API中提供了自动加载[MNIST](http://yann.lecun.com/exdb/mni
 ## 配置说明
-首先，加载PaddlePaddle的V2 api包。
+首先，加载PaddlePaddle的fluid api包。
 ```python
-import paddle.v2 as paddle
+import paddle.fluid as fluid
 ```
 其次，定义三个不同的分类器：
 - Softmax回归：只通过一层简单的以softmax为激活函数的全连接层，就可以得到分类的结果。
 ```python
-def softmax_regression(img):
+def softmax_regression():
+    img = fluid.layers.data(name='img', shape=[1, 28, 28], dtype='float32')
    predict = paddle.layer.fc(input=img,
                              size=10,
                              act=paddle.activation.Softmax())
@@ -148,64 +149,59 @@ def softmax_regression(img):
 - 多层感知器：下面代码实现了一个含有两个隐藏层（即全连接层）的多层感知器。其中两个隐藏层的激活函数均采用ReLU，输出层的激活函数用Softmax。
 ```python
-def multilayer_perceptron(img):
+def multilayer_perceptron():
+    img = fluid.layers.data(name='img', shape=[1, 28, 28], dtype='float32')
    # 第一个全连接层，激活函数为ReLU
-    hidden1 = paddle.layer.fc(input=img, size=128, act=paddle.activation.Relu())
+    hidden = fluid.layers.fc(input=img, size=200, act='relu')
    # 第二个全连接层，激活函数为ReLU
-    hidden2 = paddle.layer.fc(input=hidden1,
+    hidden = fluid.layers.fc(input=hidden, size=200, act='relu')
-                              size=64,
-                              act=paddle.activation.Relu())
    # 以softmax为激活函数的全连接输出层，输出层的大小必须为数字的个数10
-    predict = paddle.layer.fc(input=hidden2,
+    prediction = fluid.layers.fc(input=hidden, size=10, act='softmax')
-                              size=10,
+    return prediction
-                              act=paddle.activation.Softmax())
-    return predict
 ```
 - 卷积神经网络LeNet-5: 输入的二维图像，首先经过两次卷积层到池化层，再经过全连接层，最后使用以softmax为激活函数的全连接层作为输出层。
 ```python
-def convolutional_neural_network(img):
+def convolutional_neural_network():
+    img = fluid.layers.data(name='img', shape=[1, 28, 28], dtype='float32')
    # 第一个卷积-池化层
-    conv_pool_1 = paddle.networks.simple_img_conv_pool(
+    conv_pool_1 = fluid.nets.simple_img_conv_pool(
        input=img,
        filter_size=5,
        num_filters=20,
-        num_channel=1,
        pool_size=2,
        pool_stride=2,
-        act=paddle.activation.Relu())
+        act="relu")
+    conv_pool_1 = fluid.layers.batch_norm(conv_pool_1)
    # 第二个卷积-池化层
-    conv_pool_2 = paddle.networks.simple_img_conv_pool(
+    conv_pool_2 = fluid.nets.simple_img_conv_pool(
        input=conv_pool_1,
        filter_size=5,
        num_filters=50,
-        num_channel=20,
        pool_size=2,
        pool_stride=2,
-        act=paddle.activation.Relu())
+        act="relu")
    # 以softmax为激活函数的全连接输出层，输出层的大小必须为数字的个数10
-    predict = paddle.layer.fc(input=conv_pool_2,
+    prediction = fluid.layers.fc(input=conv_pool_2, size=10, act='softmax')
-                              size=10,
+    return prediction
-                              act=paddle.activation.Softmax())
-    return predict
 ```
 接着，通过`layer.data`调用来获取数据，然后调用分类器（这里我们提供了三个不同的分类器）得到分类结果。训练时，对该结果计算其损失函数，分类问题常常选择交叉熵损失函数。
 ```python
-# 该模型运行在单个CPU上
+def train_program():
-paddle.init(use_gpu=False, trainer_count=1)
+    label = fluid.layers.data(name='label', shape=[1], dtype='int64')
-images = paddle.layer.data(
+    # predict = softmax_regression(images) # uncomment for Softmax回归
-    name='pixel', type=paddle.data_type.dense_vector(784))
+    # predict = multilayer_perceptron() # uncomment for 多层感知器
-label = paddle.layer.data(
+    predict = convolutional_neural_network() # uncomment for LeNet5卷积神经网络
-    name='label', type=paddle.data_type.integer_value(10))
+    cost = fluid.layers.cross_entropy(input=predict, label=label)
+    avg_cost = fluid.layers.mean(cost)
+    acc = fluid.layers.accuracy(input=predict, label=label)
+    return [avg_cost, acc]
-# predict = softmax_regression(images) # Softmax回归
-# predict = multilayer_perceptron(images) #多层感知器
-predict = convolutional_neural_network(images) #LeNet5卷积神经网络
-cost = paddle.layer.classification_cost(input=predict, label=label)
+# 该模型运行在单个CPU上
 ```
 然后，指定训练相关的参数。
@@ -214,16 +210,16 @@ cost = paddle.layer.classification_cost(input=predict, label=label)
 - 正则化（regularization）： 是防止网络过拟合的一种手段，此处采用L2正则化。
 ```python
-parameters = paddle.parameters.create(cost)
+# 该模型运行在单个CPU上
+use_cude = False # set to True if training with GPU
+place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
 optimizer = paddle.optimizer.Momentum(
    learning_rate=0.1 / 128.0,
    momentum=0.9,
    regularization=paddle.optimizer.L2Regularization(rate=0.0005 * 128))
-trainer = paddle.trainer.SGD(cost=cost,
+trainer = fluid.Trainer(
-                             parameters=parameters,
+    train_func=train_program, place=place, optimizer=optimizer)
-                             update_equation=optimizer)
 ```
 下一步，我们开始训练过程。`paddle.dataset.movielens.train()`和`paddle.dataset.movielens.test()`分别做训练和测试数据集。这两个函数各自返回一个reader——PaddlePaddle中的reader是一个Python函数，每次调用的时候返回一个Python yield generator。
@@ -232,38 +228,18 @@ trainer = paddle.trainer.SGD(cost=cost,
 `batch`是一个特殊的decorator，它的输入是一个reader，输出是一个batched reader —— 在PaddlePaddle里，一个reader每次yield一条训练数据，而一个batched reader每次yield一个minibatch。
-`event_handler_plot`可以用来在训练过程中画图如下：
-![png](./image/train_and_test.png)
 ```python
-from paddle.v2.plot import Ploter
+train_reader = paddle.batch(
+        paddle.reader.shuffle(
-train_title = "Train cost"
+            paddle.dataset.mnist.train(), buf_size=500),
-test_title = "Test cost"
+        batch_size=64)
-cost_ploter = Ploter(train_title, test_title)
-step = 0
-# event_handler to plot a figure
-def event_handler_plot(event):
-    global step
-    if isinstance(event, paddle.event.EndIteration):
-        if step % 100 == 0:
-            cost_ploter.append(train_title, step, event.cost)
-            cost_ploter.plot()
-        step += 1
-    if isinstance(event, paddle.event.EndPass):
-        # save parameters
-        with open('params_pass_%d.tar' % event.pass_id, 'w') as f:
-            trainer.save_parameter_to_tar(f)
-        result = trainer.test(reader=paddle.batch(
+test_reader = paddle.batch(
-            paddle.dataset.mnist.test(), batch_size=128))
+            paddle.dataset.mnist.test(), batch_size=64)
-        cost_ploter.append(test_title, step, result.cost)
 ```
 `event_handler` 用来在训练过程中输出训练结果
 ```python
 lists = []
@@ -285,14 +261,14 @@ def event_handler(event):
                      result.metrics['classification_error_evaluator']))
 ```
+Now that we setup the event_handler and the reader, we can start training the model. `feed_order` is used to map the data dict to the train_program
 ```python
 trainer.train(
-    reader=paddle.batch(
+    num_epochs=1,
-        paddle.reader.shuffle(
+    event_handler=event_handler,
-            paddle.dataset.mnist.train(), buf_size=8192),
+    reader=train_reader,
-        batch_size=128),
+    feed_order=['img', 'label'])
-    event_handler=event_handler_plot,
-    num_passes=5)
 ```
 训练过程是完全自动的，event_handler里打印的日志类似如下所示：
@@ -311,27 +287,25 @@ trainer.train(
 ## 应用模型
-可以使用训练好的模型对手写体数字图片进行分类，下面程序展示了如何使用paddle.infer接口进行推断。
+可以使用训练好的模型对手写体数字图片进行分类，下面程序展示了如何使用 `fluid.Inferencer` 接口进行推断。
 ```python
-from PIL import Image
+inferencer = fluid.Inferencer(
-import numpy as np
+    # infer_func=softmax_regression, # uncomment for softmax regression
-import os
+    # infer_func=multilayer_perceptron, # uncomment for MLP
-def load_image(file):
+    infer_func=convolutional_neural_network, # uncomment for LeNet5
-    im = Image.open(file).convert('L')
+    param_path=params_dirname,
-    im = im.resize((28, 28), Image.ANTIALIAS)
+    place=place)
-    im = np.array(im).astype(np.float32).flatten()
-    im = im / 255.0 * 2.0 - 1.0
+batch_size = 1
-    return im
+import numpy
+tensor_img = numpy.random.uniform(-1.0, 1.0,
-test_data = []
+                                  [batch_size, 1, 28, 28]).astype("float32")
-cur_dir = os.getcwd()
-test_data.append((load_image(cur_dir + '/image/infer_3.png'),))
+results = inferencer.infer({'img': tensor_img})
-probs = paddle.infer(
+print("infer results: ", results[0])
-    output_layer=predict, parameters=parameters, input=test_data)
-lab = np.argsort(-probs) # probs and lab are the results of one batch data
-print "Label of image/infer_3.png is: %d" % lab[0][0]
 ```
 ## 总结

--- a/02.recognize_digits/README.md
+++ b/02.recognize_digits/README.md
 # Recognize Digits
-The source code for this tutorial is here:  [book/recognize_digits](https://github.com/PaddlePaddle/book/tree/develop/02.recognize_digits). For instructions on getting started with Paddle, please refer to [installation instructions](https://github.com/PaddlePaddle/book/blob/develop/README.md#running-the-book).
+The source code for this tutorial is here:  [book/recognize_digits](https://github.com/PaddlePaddle/book/tree/develop/02.recognize_digits).
+For instructions on getting started with Paddle, please refer to [installation instructions](https://github.com/PaddlePaddle/book/blob/develop/README.md#running-the-book).
 ## Introduction
-When one learns to program, the first task is usually to write a program that prints "Hello World!". In Machine Learning or Deep Learning, an equivalent task is to train a model to recognize hand-written digits using the [MNIST](http://yann.lecun.com/exdb/mnist/) dataset. Handwriting recognition is a classic image classification problem. The problem is relatively easy and MNIST is a complete dataset. As a simple Computer Vision dataset, MNIST contains images of handwritten digits and their corresponding labels (Fig. 1). The input image is a $28\times28$ matrix, and the label is one of the digits from $0$ to $9$. All images are normalized, meaning that they are both rescaled and centered.
+When one learns to program, the first task is usually to write a program that prints "Hello World!".
+In Machine Learning or Deep Learning, an equivalent task is to train a model to recognize hand-written digits using the [MNIST](http://yann.lecun.com/exdb/mnist/) dataset.
+Handwriting recognition is a classic image classification problem. The problem is relatively easy and MNIST is a complete dataset.
+As a simple Computer Vision dataset, MNIST contains images of handwritten digits and their corresponding labels (Fig. 1).
+The input image is a $28\times28$ matrix, and the label is one of the digits from $0$ to $9$. All images are normalized, meaning that they are both rescaled and centered.
 <p align="center">
 <img src="image/mnist_example_image.png" width="400"><br/>
 Fig. 1. Examples of MNIST images
 </p>
-The MNIST dataset is from the [NIST](https://www.nist.gov/srd/nist-special-database-19) Special Database 3 (SD-3) and the Special Database 1 (SD-1). The SD-3 is labeled by the staff of the U.S. Census Bureau, while SD-1 is labeled by high school students. Therefore the SD-3 is cleaner and easier to recognize than the SD-1 dataset. Yann LeCun et al. used half of the samples from each of SD-1 and SD-3 to create the MNIST training set of 60,000 samples and test set of 10,000 samples. 250 annotators labeled the training set, thus guaranteed that there wasn't a complete overlap of annotators of training set and test set.
+The MNIST dataset is from the [NIST](https://www.nist.gov/srd/nist-special-database-19) Special Database 3 (SD-3) and the Special Database 1 (SD-1).
+The SD-3 is labeled by the staff of the U.S. Census Bureau, while SD-1 is labeled by high school students. Therefore the SD-3 is cleaner and easier to recognize than the SD-1 dataset.
+Yann LeCun et al. used half of the samples from each of SD-1 and SD-3 to create the MNIST training set of 60,000 samples and test set of 10,000 samples.
+250 annotators labeled the training set, thus guaranteed that there wasn't a complete overlap of annotators of training set and test set.
-The MNIST dataset has been used for evaluating many image recognition algorithms such as a single layer linear classifier, Multilayer Perceptron (MLP) and Multilayer CNN LeNet\[[1](#references)\], K-Nearest Neighbors (k-NN) \[[2](#references)\], Support Vector Machine (SVM) \[[3](#references)\], Neural Networks \[[4-7](#references)\], Boosting \[[8](#references)\] and preprocessing methods like distortion removal, noise removal, and blurring.  Among these algorithms, the *Convolutional Neural Network* (CNN) has achieved a series of impressive results in Image Classification tasks, including VGGNet, GoogLeNet, and ResNet (See [Image Classification](https://github.com/PaddlePaddle/book/tree/develop/03.image_classification) tutorial).
+The MNIST dataset has been used for evaluating many image recognition algorithms such as a single layer linear classifier,
+Multilayer Perceptron (MLP) and Multilayer CNN LeNet\[[1](#references)\], K-Nearest Neighbors (k-NN) \[[2](#references)\], Support Vector Machine (SVM) \[[3](#references)\],
+Neural Networks \[[4-7](#references)\], Boosting \[[8](#references)\] and preprocessing methods like distortion removal, noise removal, and blurring.
+Among these algorithms, the *Convolutional Neural Network* (CNN) has achieved a series of impressive results in Image Classification tasks, including VGGNet, GoogLeNet,
+and ResNet (See [Image Classification](https://github.com/PaddlePaddle/book/tree/develop/03.image_classification) tutorial).
 In this tutorial, we start with a simple **softmax** regression model and go on with MLP and CNN.  Readers will see how these methods improve the recognition accuracy step-by-step.
@@ -124,183 +136,177 @@ PaddlePaddle provides a Python module, `paddle.dataset.mnist`, which downloads a
 |t10k-labels-idx1-ubyte |  Evaluation labels | 10,000 |
+## Fluid API Overview
+The demo will be using the latest paddle fluid API. Fluid API is the latest Paddle API. It simplifies the model configurations without sacrifice the performance.
+We recommend using Fluid API as it is much easier to pick up.
+Here are the quick overview on the major fluid API complements.
+1. `inference_program`: A function that specify how to get the prediction from the data input.
+This is where you specify the network flow.
+1. `train_program`: A function that specify how to get avg_cost from `inference_program` and labels.
+This is where you specify the loss calculations.
+1. `optimizer`: Configure how to minimize the loss. Paddle supports most major optimization methods.
+1. `Trainer`: Fluid trainer manages the training process specified by the `train_program` and `optimizer`. Users can monitor the training
+progress through the `event_handler` callback function.
+1. `Inferencer`: Fluid inferencer loads the `inference_program` and the parameters trained by the Trainer.
+It then can infer the data and return prediction
+We will go though all of them and dig more on the configurations in this demo.
 ## Model Configuration
 A PaddlePaddle program starts from importing the API package:
 ```python
-import paddle.v2 as paddle
+import paddle
+import paddle.fluid as fluid
 ```
-We want to use this program to demonstrate three different classifiers, each defined as a Python function:
+### Program Functions Configuration
+First, We need to setup the `inference_program` function. We want to use this program to demonstrate three different classifiers, each defined as a Python function.
+We need to feed image data to the classifier. PaddlePaddle provides a special layer `layer.data` for reading data.
+Let us create a data layer for reading images and connect it to the classification network.
 - Softmax regression: the network has a fully-connection layer with softmax activation:
 ```python
-def softmax_regression(img):
+def softmax_regression():
+    img = fluid.layers.data(name='img', shape=[1, 28, 28], dtype='float32')
    predict = paddle.layer.fc(input=img,
                              size=10,
                              act=paddle.activation.Softmax())
    return predict
 ```
- Multi-Layer Perceptron: this network has two hidden fully-connected layers, one with ReLU and the other with softmax activation:
+- Multi-Layer Perceptron: this network has two hidden fully-connected layers, both are using ReLU as activation function. The output layer is using softmax activation:
 ```python
-def multilayer_perceptron(img):
+def multilayer_perceptron():
-    hidden1 = paddle.layer.fc(input=img, size=128, act=paddle.activation.Relu())
+    img = fluid.layers.data(name='img', shape=[1, 28, 28], dtype='float32')
-    hidden2 = paddle.layer.fc(input=hidden1,
+    # first fully-connected layer, using ReLu as its activation function
-                              size=64,
+    hidden = fluid.layers.fc(input=img, size=200, act='relu')
-                              act=paddle.activation.Relu())
+    # second fully-connected layer, using ReLu as its activation function
-    predict = paddle.layer.fc(input=hidden2,
+    hidden = fluid.layers.fc(input=hidden, size=200, act='relu')
-                              size=10,
+    prediction = fluid.layers.fc(input=hidden, size=10, act='softmax')
-                              act=paddle.activation.Softmax())
+    return prediction
-    return predict
 ```
 - Convolution network LeNet-5: the input image is fed through two convolution-pooling layers, a fully-connected layer, and the softmax output layer:
 ```python
-def convolutional_neural_network(img):
+def convolutional_neural_network():
+    img = fluid.layers.data(name='img', shape=[1, 28, 28], dtype='float32')
-    conv_pool_1 = paddle.networks.simple_img_conv_pool(
+    # first conv pool
+    conv_pool_1 = fluid.nets.simple_img_conv_pool(
        input=img,
        filter_size=5,
        num_filters=20,
-        num_channel=1,
        pool_size=2,
        pool_stride=2,
-        act=paddle.activation.Relu())
+        act="relu")
+    conv_pool_1 = fluid.layers.batch_norm(conv_pool_1)
-    conv_pool_2 = paddle.networks.simple_img_conv_pool(
+    # second conv pool
+    conv_pool_2 = fluid.nets.simple_img_conv_pool(
        input=conv_pool_1,
        filter_size=5,
        num_filters=50,
-        num_channel=20,
        pool_size=2,
        pool_stride=2,
-        act=paddle.activation.Relu())
+        act="relu")
+    # output layer with softmax activation function. size = 10 since there are only 10 possible digits.
-    predict = paddle.layer.fc(input=conv_pool_2,
+    prediction = fluid.layers.fc(input=conv_pool_2, size=10, act='softmax')
-                              size=10,
+    return prediction
-                              act=paddle.activation.Softmax())
-    return predict
 ```
-PaddlePaddle provides a special layer `layer.data` for reading data. Let us create a data layer for reading images and connect it to a classification network created using one of above three functions.  We also need a cost layer for training the model.
+#### Train Program Configuration
+Then we need to setup the the `train_program`. It takes the prediction from the classifier first. During the training, it will calculate the `avg_loss` from the prediction.
+Please feel free to modify the code to test different results between `softmax regression`, `mlp`, and `convolutional neural network` classifier.
 ```python
-paddle.init(use_gpu=False, trainer_count=1)
+def train_program():
+    label = fluid.layers.data(name='label', shape=[1], dtype='int64')
+    # predict = softmax_regression(images) # uncomment for Softmax
+    # predict = multilayer_perceptron() # uncomment for MLP
+    predict = convolutional_neural_network() # uncomment for LeNet5
+    # Calculate the cost from the prediction and label.
+    cost = fluid.layers.cross_entropy(input=predict, label=label)
+    avg_cost = fluid.layers.mean(cost)
+    acc = fluid.layers.accuracy(input=predict, label=label)
+    return [avg_cost, acc]
+```
-images = paddle.layer.data(
+### Data Feeders Configuration
-    name='pixel', type=paddle.data_type.dense_vector(784))
-label = paddle.layer.data(
-    name='label', type=paddle.data_type.integer_value(10))
-# predict = softmax_regression(images)
+Then we specify the training data `paddle.dataset.mnist.train()` and testing data `paddle.dataset.mnist.test()`. These two methods are *reader creators*. Once called, a reader creator returns a *reader*.  A reader is a Python method, which, once called, returns a Python generator, which yields instances of data.
-# predict = multilayer_perceptron(images) # uncomment for MLP
-predict = convolutional_neural_network(images) # uncomment for LeNet5
-cost = paddle.layer.classification_cost(input=predict, label=label)
+`shuffle` is a reader decorator. It takes a reader A as input and returns a new reader B. Under the hood, B calls A to read data in the following fashion: it copies in `buffer_size` instances at a time into a buffer, shuffles the data, and yields the shuffled instances one at a time. A large buffer size would yield very shuffled data.
-```
-Now, it is time to specify training parameters. In the following `Momentum` optimizer, `momentum=0.9` means that 90% of the current momentum comes from that of the previous iteration. The learning rate relates to the speed at which the network training converges. Regularization is meant to prevent over-fitting; here we use the L2 regularization.
+`batch` is a special decorator, which takes a reader and outputs a *batch reader*, which doesn't yield an instance, but a minibatch at a time.
 ```python
-parameters = paddle.parameters.create(cost)
+train_reader = paddle.batch(
+        paddle.reader.shuffle(
-optimizer = paddle.optimizer.Momentum(
+            paddle.dataset.mnist.train(), buf_size=500),
-    learning_rate=0.1 / 128.0,
+        batch_size=64)
-    momentum=0.9,
-    regularization=paddle.optimizer.L2Regularization(rate=0.0005 * 128))
-trainer = paddle.trainer.SGD(cost=cost,
+test_reader = paddle.batch(
-                             parameters=parameters,
+            paddle.dataset.mnist.test(), batch_size=64)
-                             update_equation=optimizer)
 ```
-Then we specify the training data `paddle.dataset.mnist.train()` and testing data `paddle.dataset.mnist.test()`. These two methods are *reader creators*. Once called, a reader creator returns a *reader*.  A reader is a Python method, which, once called, returns a Python generator, which yields instances of data.
+### Trainer Configuration
-`shuffle` is a reader decorator. It takes a reader A as input and returns a new reader B. Under the hood, B calls A to read data in the following fashion: it copies in `buffer_size` instances at a time into a buffer, shuffles the data, and yields the shuffled instances one at a time. A large buffer size would yield very shuffled data.
+Now, we need to setup the trainer. The trainer need to take in `train_program`, `place`, and `optimizer`.
+In the following `Adam` optimizer, `learning_rate` means the speed at which the network training converges.
-`batch` is a special decorator, which takes a reader and outputs a *batch reader*, which doesn't yield an instance, but a minibatch at a time.
+```python
+use_cuda = False # set to True if training with GPU
+place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
+optimizer = fluid.optimizer.Adam(learning_rate=0.001)
-`event_handler_plot` is used to plot a figure like below：
+trainer = fluid.Trainer(
+    train_func=train_program, place=place, optimizer=optimizer)
+ ```
-![png](./image/train_and_test.png)
+#### Event Handler
-```python
-from paddle.v2.plot import Ploter
-train_title = "Train cost"
-test_title = "Test cost"
-cost_ploter = Ploter(train_title, test_title)
-step = 0
-# event_handler to plot a figure
-def event_handler_plot(event):
-    global step
-    if isinstance(event, paddle.event.EndIteration):
-        if step % 100 == 0:
-            cost_ploter.append(train_title, step, event.cost)
-            cost_ploter.plot()
-        step += 1
-    if isinstance(event, paddle.event.EndPass):
-        # save parameters
-        with open('params_pass_%d.tar' % event.pass_id, 'w') as f:
-            trainer.save_parameter_to_tar(f)
-        result = trainer.test(reader=paddle.batch(
-            paddle.dataset.mnist.test(), batch_size=128))
-        cost_ploter.append(test_title, step, result.cost)
-```
 `event_handler` is used to plot some text data when training.
 ```python
-lists = []
+# Save the parameter into a directory. The Inferencer can load the parameters from it to do infer
+params_dirname = "recognize_digits_network.inference.model"
-# event handler to print the progress
 def event_handler(event):
-    if isinstance(event, paddle.event.EndIteration):
+    if isinstance(event, fluid.EndEpochEvent):
-        if event.batch_id % 100 == 0:
+        avg_cost, acc = trainer.test(
-            print "Pass %d, Batch %d, Cost %f, %s" % (
+            reader=test_reader, feed_order=['img', 'label'])
-                event.pass_id, event.batch_id, event.cost, event.metrics)
+        print("avg_cost: %s, acc: %s" % (avg_cost, acc))
-    if isinstance(event, paddle.event.EndPass):
+        trainer.save_params(params_dirname)
-        # save parameters
-        with open('params_pass_%d.tar' % event.pass_id, 'w') as f:
-            trainer.save_parameter_to_tar(f)
-        result = trainer.test(reader=paddle.batch(
-            paddle.dataset.mnist.test(), batch_size=128))
-        print "Test with Pass %d, Cost %f, %s\n" % (
-            event.pass_id, result.cost, result.metrics)
-        lists.append((event.pass_id, result.cost,
-                      result.metrics['classification_error_evaluator']))
 ```
+Now that we setup the event_handler and the reader, we can start training the model. `feed_order` is used to map the data dict to the train_program
 ```python
 # Train the model now
 trainer.train(
-    reader=paddle.batch(
+    num_epochs=1,
-        paddle.reader.shuffle(
+    event_handler=event_handler,
-            paddle.dataset.mnist.train(), buf_size=8192),
+    reader=train_reader,
-        batch_size=128),
+    feed_order=['img', 'label'])
-    event_handler=event_handler_plot,
-    num_passes=5)
 ```
 During training, `trainer.train` invokes `event_handler` for certain events. This gives us a chance to print the training progress.
-```
+ ```
-# Pass 0, Batch 0, Cost 2.780790, {'classification_error_evaluator': 0.9453125}
+ # Pass 0, Batch 0, Cost 2.780790, {'classification_error_evaluator': 0.9453125}
-# Pass 0, Batch 100, Cost 0.635356, {'classification_error_evaluator': 0.2109375}
+ # Pass 0, Batch 100, Cost 0.635356, {'classification_error_evaluator': 0.2109375}
-# Pass 0, Batch 200, Cost 0.326094, {'classification_error_evaluator': 0.1328125}
+ # Pass 0, Batch 200, Cost 0.326094, {'classification_error_evaluator': 0.1328125}
-# Pass 0, Batch 300, Cost 0.361920, {'classification_error_evaluator': 0.1015625}
+ # Pass 0, Batch 300, Cost 0.361920, {'classification_error_evaluator': 0.1015625}
-# Pass 0, Batch 400, Cost 0.410101, {'classification_error_evaluator': 0.125}
+ # Pass 0, Batch 400, Cost 0.410101, {'classification_error_evaluator': 0.125}
-# Test with Pass 0, Cost 0.326659, {'classification_error_evaluator': 0.09470000118017197}
+ # Test with Pass 0, Cost 0.326659, {'classification_error_evaluator': 0.09470000118017197}
-```
+ ```
 After the training, we can check the model's prediction accuracy.
@@ -315,27 +321,25 @@ Usually, with MNIST data, the softmax regression model achieves an accuracy arou
 ## Application
-After training, users can use the trained model to classify images. The following code shows how to inference MNIST images through `paddle.infer` interface.
+After training, users can use the trained model to classify images. The following code shows how to inference MNIST images through `fluid.Inferencer`.
 ```python
-from PIL import Image
+inferencer = fluid.Inferencer(
-import numpy as np
+    # infer_func=softmax_regression, # uncomment for softmax regression
-import os
+    # infer_func=multilayer_perceptron, # uncomment for MLP
-def load_image(file):
+    infer_func=convolutional_neural_network, # uncomment for LeNet5
-    im = Image.open(file).convert('L')
+    param_path=params_dirname,
-    im = im.resize((28, 28), Image.ANTIALIAS)
+    place=place)
-    im = np.array(im).astype(np.float32).flatten()
-    im = im / 255.0 * 2.0 - 1.0
+batch_size = 1
-    return im
+import numpy
+tensor_img = numpy.random.uniform(-1.0, 1.0,
-test_data = []
+                                  [batch_size, 1, 28, 28]).astype("float32")
-cur_dir = os.getcwd()
-test_data.append((load_image(cur_dir + '/image/infer_3.png'),))
+results = inferencer.infer({'img': tensor_img})
-probs = paddle.infer(
+print("infer results: ", results[0])
-    output_layer=predict, parameters=parameters, input=test_data)
-lab = np.argsort(-probs) # probs and lab are the results of one batch data
-print "Label of image/infer_3.png is: %d" % lab[0][0]
 ```

--- a/02.recognize_digits/index.cn.html
+++ b/02.recognize_digits/index.cn.html
@@ -171,17 +171,18 @@ PaddlePaddle在API中提供了自动加载[MNIST](http://yann.lecun.com/exdb/mni
 ## 配置说明
-首先，加载PaddlePaddle的V2 api包。
+首先，加载PaddlePaddle的fluid api包。
 ```python
-import paddle.v2 as paddle
+import paddle.fluid as fluid
 ```
 其次，定义三个不同的分类器：
 - Softmax回归：只通过一层简单的以softmax为激活函数的全连接层，就可以得到分类的结果。
 ```python
-def softmax_regression(img):
+def softmax_regression():
+    img = fluid.layers.data(name='img', shape=[1, 28, 28], dtype='float32')
    predict = paddle.layer.fc(input=img,
                              size=10,
                              act=paddle.activation.Softmax())
@@ -190,64 +191,59 @@ def softmax_regression(img):
 - 多层感知器：下面代码实现了一个含有两个隐藏层（即全连接层）的多层感知器。其中两个隐藏层的激活函数均采用ReLU，输出层的激活函数用Softmax。
 ```python
-def multilayer_perceptron(img):
+def multilayer_perceptron():
+    img = fluid.layers.data(name='img', shape=[1, 28, 28], dtype='float32')
    # 第一个全连接层，激活函数为ReLU
-    hidden1 = paddle.layer.fc(input=img, size=128, act=paddle.activation.Relu())
+    hidden = fluid.layers.fc(input=img, size=200, act='relu')
    # 第二个全连接层，激活函数为ReLU
-    hidden2 = paddle.layer.fc(input=hidden1,
+    hidden = fluid.layers.fc(input=hidden, size=200, act='relu')
-                              size=64,
-                              act=paddle.activation.Relu())
    # 以softmax为激活函数的全连接输出层，输出层的大小必须为数字的个数10
-    predict = paddle.layer.fc(input=hidden2,
+    prediction = fluid.layers.fc(input=hidden, size=10, act='softmax')
-                              size=10,
+    return prediction
-                              act=paddle.activation.Softmax())
-    return predict
 ```
 - 卷积神经网络LeNet-5: 输入的二维图像，首先经过两次卷积层到池化层，再经过全连接层，最后使用以softmax为激活函数的全连接层作为输出层。
 ```python
-def convolutional_neural_network(img):
+def convolutional_neural_network():
+    img = fluid.layers.data(name='img', shape=[1, 28, 28], dtype='float32')
    # 第一个卷积-池化层
-    conv_pool_1 = paddle.networks.simple_img_conv_pool(
+    conv_pool_1 = fluid.nets.simple_img_conv_pool(
        input=img,
        filter_size=5,
        num_filters=20,
-        num_channel=1,
        pool_size=2,
        pool_stride=2,
-        act=paddle.activation.Relu())
+        act="relu")
+    conv_pool_1 = fluid.layers.batch_norm(conv_pool_1)
    # 第二个卷积-池化层
-    conv_pool_2 = paddle.networks.simple_img_conv_pool(
+    conv_pool_2 = fluid.nets.simple_img_conv_pool(
        input=conv_pool_1,
        filter_size=5,
        num_filters=50,
-        num_channel=20,
        pool_size=2,
        pool_stride=2,
-        act=paddle.activation.Relu())
+        act="relu")
    # 以softmax为激活函数的全连接输出层，输出层的大小必须为数字的个数10
-    predict = paddle.layer.fc(input=conv_pool_2,
+    prediction = fluid.layers.fc(input=conv_pool_2, size=10, act='softmax')
-                              size=10,
+    return prediction
-                              act=paddle.activation.Softmax())
-    return predict
 ```
 接着，通过`layer.data`调用来获取数据，然后调用分类器（这里我们提供了三个不同的分类器）得到分类结果。训练时，对该结果计算其损失函数，分类问题常常选择交叉熵损失函数。
 ```python
-# 该模型运行在单个CPU上
+def train_program():
-paddle.init(use_gpu=False, trainer_count=1)
+    label = fluid.layers.data(name='label', shape=[1], dtype='int64')
-images = paddle.layer.data(
+    # predict = softmax_regression(images) # uncomment for Softmax回归
-    name='pixel', type=paddle.data_type.dense_vector(784))
+    # predict = multilayer_perceptron() # uncomment for 多层感知器
-label = paddle.layer.data(
+    predict = convolutional_neural_network() # uncomment for LeNet5卷积神经网络
-    name='label', type=paddle.data_type.integer_value(10))
+    cost = fluid.layers.cross_entropy(input=predict, label=label)
+    avg_cost = fluid.layers.mean(cost)
+    acc = fluid.layers.accuracy(input=predict, label=label)
+    return [avg_cost, acc]
-# predict = softmax_regression(images) # Softmax回归
-# predict = multilayer_perceptron(images) #多层感知器
-predict = convolutional_neural_network(images) #LeNet5卷积神经网络
-cost = paddle.layer.classification_cost(input=predict, label=label)
+# 该模型运行在单个CPU上
 ```
 然后，指定训练相关的参数。
@@ -256,16 +252,16 @@ cost = paddle.layer.classification_cost(input=predict, label=label)
 - 正则化（regularization）： 是防止网络过拟合的一种手段，此处采用L2正则化。
 ```python
-parameters = paddle.parameters.create(cost)
+# 该模型运行在单个CPU上
+use_cude = False # set to True if training with GPU
+place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
 optimizer = paddle.optimizer.Momentum(
    learning_rate=0.1 / 128.0,
    momentum=0.9,
    regularization=paddle.optimizer.L2Regularization(rate=0.0005 * 128))
-trainer = paddle.trainer.SGD(cost=cost,
+trainer = fluid.Trainer(
-                             parameters=parameters,
+    train_func=train_program, place=place, optimizer=optimizer)
-                             update_equation=optimizer)
 ```
 下一步，我们开始训练过程。`paddle.dataset.movielens.train()`和`paddle.dataset.movielens.test()`分别做训练和测试数据集。这两个函数各自返回一个reader——PaddlePaddle中的reader是一个Python函数，每次调用的时候返回一个Python yield generator。
@@ -274,38 +270,18 @@ trainer = paddle.trainer.SGD(cost=cost,
 `batch`是一个特殊的decorator，它的输入是一个reader，输出是一个batched reader —— 在PaddlePaddle里，一个reader每次yield一条训练数据，而一个batched reader每次yield一个minibatch。
-`event_handler_plot`可以用来在训练过程中画图如下：
-![png](./image/train_and_test.png)
 ```python
-from paddle.v2.plot import Ploter
+train_reader = paddle.batch(
+        paddle.reader.shuffle(
-train_title = "Train cost"
+            paddle.dataset.mnist.train(), buf_size=500),
-test_title = "Test cost"
+        batch_size=64)
-cost_ploter = Ploter(train_title, test_title)
-step = 0
-# event_handler to plot a figure
-def event_handler_plot(event):
-    global step
-    if isinstance(event, paddle.event.EndIteration):
-        if step % 100 == 0:
-            cost_ploter.append(train_title, step, event.cost)
-            cost_ploter.plot()
-        step += 1
-    if isinstance(event, paddle.event.EndPass):
-        # save parameters
-        with open('params_pass_%d.tar' % event.pass_id, 'w') as f:
-            trainer.save_parameter_to_tar(f)
-        result = trainer.test(reader=paddle.batch(
+test_reader = paddle.batch(
-            paddle.dataset.mnist.test(), batch_size=128))
+            paddle.dataset.mnist.test(), batch_size=64)
-        cost_ploter.append(test_title, step, result.cost)
 ```
 `event_handler` 用来在训练过程中输出训练结果
 ```python
 lists = []
@@ -327,14 +303,14 @@ def event_handler(event):
                      result.metrics['classification_error_evaluator']))
 ```
+Now that we setup the event_handler and the reader, we can start training the model. `feed_order` is used to map the data dict to the train_program
 ```python
 trainer.train(
-    reader=paddle.batch(
+    num_epochs=1,
-        paddle.reader.shuffle(
+    event_handler=event_handler,
-            paddle.dataset.mnist.train(), buf_size=8192),
+    reader=train_reader,
-        batch_size=128),
+    feed_order=['img', 'label'])
-    event_handler=event_handler_plot,
-    num_passes=5)
 ```
 训练过程是完全自动的，event_handler里打印的日志类似如下所示：
@@ -353,27 +329,25 @@ trainer.train(
 ## 应用模型
-可以使用训练好的模型对手写体数字图片进行分类，下面程序展示了如何使用paddle.infer接口进行推断。
+可以使用训练好的模型对手写体数字图片进行分类，下面程序展示了如何使用 `fluid.Inferencer` 接口进行推断。
 ```python
-from PIL import Image
+inferencer = fluid.Inferencer(
-import numpy as np
+    # infer_func=softmax_regression, # uncomment for softmax regression
-import os
+    # infer_func=multilayer_perceptron, # uncomment for MLP
-def load_image(file):
+    infer_func=convolutional_neural_network, # uncomment for LeNet5
-    im = Image.open(file).convert('L')
+    param_path=params_dirname,
-    im = im.resize((28, 28), Image.ANTIALIAS)
+    place=place)
-    im = np.array(im).astype(np.float32).flatten()
-    im = im / 255.0 * 2.0 - 1.0
+batch_size = 1
-    return im
+import numpy
+tensor_img = numpy.random.uniform(-1.0, 1.0,
-test_data = []
+                                  [batch_size, 1, 28, 28]).astype("float32")
-cur_dir = os.getcwd()
-test_data.append((load_image(cur_dir + '/image/infer_3.png'),))
+results = inferencer.infer({'img': tensor_img})
-probs = paddle.infer(
+print("infer results: ", results[0])
-    output_layer=predict, parameters=parameters, input=test_data)
-lab = np.argsort(-probs) # probs and lab are the results of one batch data
-print "Label of image/infer_3.png is: %d" % lab[0][0]
 ```
 ## 总结

--- a/02.recognize_digits/index.html
+++ b/02.recognize_digits/index.html
@@ -42,19 +42,31 @@
 <div id="markdown" style='display:none'>
 # Recognize Digits
-The source code for this tutorial is here:  [book/recognize_digits](https://github.com/PaddlePaddle/book/tree/develop/02.recognize_digits). For instructions on getting started with Paddle, please refer to [installation instructions](https://github.com/PaddlePaddle/book/blob/develop/README.md#running-the-book).
+The source code for this tutorial is here:  [book/recognize_digits](https://github.com/PaddlePaddle/book/tree/develop/02.recognize_digits).
+For instructions on getting started with Paddle, please refer to [installation instructions](https://github.com/PaddlePaddle/book/blob/develop/README.md#running-the-book).
 ## Introduction
-When one learns to program, the first task is usually to write a program that prints "Hello World!". In Machine Learning or Deep Learning, an equivalent task is to train a model to recognize hand-written digits using the [MNIST](http://yann.lecun.com/exdb/mnist/) dataset. Handwriting recognition is a classic image classification problem. The problem is relatively easy and MNIST is a complete dataset. As a simple Computer Vision dataset, MNIST contains images of handwritten digits and their corresponding labels (Fig. 1). The input image is a $28\times28$ matrix, and the label is one of the digits from $0$ to $9$. All images are normalized, meaning that they are both rescaled and centered.
+When one learns to program, the first task is usually to write a program that prints "Hello World!".
+In Machine Learning or Deep Learning, an equivalent task is to train a model to recognize hand-written digits using the [MNIST](http://yann.lecun.com/exdb/mnist/) dataset.
+Handwriting recognition is a classic image classification problem. The problem is relatively easy and MNIST is a complete dataset.
+As a simple Computer Vision dataset, MNIST contains images of handwritten digits and their corresponding labels (Fig. 1).
+The input image is a $28\times28$ matrix, and the label is one of the digits from $0$ to $9$. All images are normalized, meaning that they are both rescaled and centered.
 <p align="center">
 <img src="image/mnist_example_image.png" width="400"><br/>
 Fig. 1. Examples of MNIST images
 </p>
-The MNIST dataset is from the [NIST](https://www.nist.gov/srd/nist-special-database-19) Special Database 3 (SD-3) and the Special Database 1 (SD-1). The SD-3 is labeled by the staff of the U.S. Census Bureau, while SD-1 is labeled by high school students. Therefore the SD-3 is cleaner and easier to recognize than the SD-1 dataset. Yann LeCun et al. used half of the samples from each of SD-1 and SD-3 to create the MNIST training set of 60,000 samples and test set of 10,000 samples. 250 annotators labeled the training set, thus guaranteed that there wasn't a complete overlap of annotators of training set and test set.
+The MNIST dataset is from the [NIST](https://www.nist.gov/srd/nist-special-database-19) Special Database 3 (SD-3) and the Special Database 1 (SD-1).
+The SD-3 is labeled by the staff of the U.S. Census Bureau, while SD-1 is labeled by high school students. Therefore the SD-3 is cleaner and easier to recognize than the SD-1 dataset.
+Yann LeCun et al. used half of the samples from each of SD-1 and SD-3 to create the MNIST training set of 60,000 samples and test set of 10,000 samples.
+250 annotators labeled the training set, thus guaranteed that there wasn't a complete overlap of annotators of training set and test set.
-The MNIST dataset has been used for evaluating many image recognition algorithms such as a single layer linear classifier, Multilayer Perceptron (MLP) and Multilayer CNN LeNet\[[1](#references)\], K-Nearest Neighbors (k-NN) \[[2](#references)\], Support Vector Machine (SVM) \[[3](#references)\], Neural Networks \[[4-7](#references)\], Boosting \[[8](#references)\] and preprocessing methods like distortion removal, noise removal, and blurring.  Among these algorithms, the *Convolutional Neural Network* (CNN) has achieved a series of impressive results in Image Classification tasks, including VGGNet, GoogLeNet, and ResNet (See [Image Classification](https://github.com/PaddlePaddle/book/tree/develop/03.image_classification) tutorial).
+The MNIST dataset has been used for evaluating many image recognition algorithms such as a single layer linear classifier,
+Multilayer Perceptron (MLP) and Multilayer CNN LeNet\[[1](#references)\], K-Nearest Neighbors (k-NN) \[[2](#references)\], Support Vector Machine (SVM) \[[3](#references)\],
+Neural Networks \[[4-7](#references)\], Boosting \[[8](#references)\] and preprocessing methods like distortion removal, noise removal, and blurring.
+Among these algorithms, the *Convolutional Neural Network* (CNN) has achieved a series of impressive results in Image Classification tasks, including VGGNet, GoogLeNet,
+and ResNet (See [Image Classification](https://github.com/PaddlePaddle/book/tree/develop/03.image_classification) tutorial).
 In this tutorial, we start with a simple **softmax** regression model and go on with MLP and CNN.  Readers will see how these methods improve the recognition accuracy step-by-step.
@@ -166,183 +178,177 @@ PaddlePaddle provides a Python module, `paddle.dataset.mnist`, which downloads a
 |t10k-labels-idx1-ubyte |  Evaluation labels | 10,000 |
+## Fluid API Overview
+The demo will be using the latest paddle fluid API. Fluid API is the latest Paddle API. It simplifies the model configurations without sacrifice the performance.
+We recommend using Fluid API as it is much easier to pick up.
+Here are the quick overview on the major fluid API complements.
+1. `inference_program`: A function that specify how to get the prediction from the data input.
+This is where you specify the network flow.
+1. `train_program`: A function that specify how to get avg_cost from `inference_program` and labels.
+This is where you specify the loss calculations.
+1. `optimizer`: Configure how to minimize the loss. Paddle supports most major optimization methods.
+1. `Trainer`: Fluid trainer manages the training process specified by the `train_program` and `optimizer`. Users can monitor the training
+progress through the `event_handler` callback function.
+1. `Inferencer`: Fluid inferencer loads the `inference_program` and the parameters trained by the Trainer.
+It then can infer the data and return prediction
+We will go though all of them and dig more on the configurations in this demo.
 ## Model Configuration
 A PaddlePaddle program starts from importing the API package:
 ```python
-import paddle.v2 as paddle
+import paddle
+import paddle.fluid as fluid
 ```
-We want to use this program to demonstrate three different classifiers, each defined as a Python function:
+### Program Functions Configuration
+First, We need to setup the `inference_program` function. We want to use this program to demonstrate three different classifiers, each defined as a Python function.
+We need to feed image data to the classifier. PaddlePaddle provides a special layer `layer.data` for reading data.
+Let us create a data layer for reading images and connect it to the classification network.
 - Softmax regression: the network has a fully-connection layer with softmax activation:
 ```python
-def softmax_regression(img):
+def softmax_regression():
+    img = fluid.layers.data(name='img', shape=[1, 28, 28], dtype='float32')
    predict = paddle.layer.fc(input=img,
                              size=10,
                              act=paddle.activation.Softmax())
    return predict
 ```
- Multi-Layer Perceptron: this network has two hidden fully-connected layers, one with ReLU and the other with softmax activation:
+- Multi-Layer Perceptron: this network has two hidden fully-connected layers, both are using ReLU as activation function. The output layer is using softmax activation:
 ```python
-def multilayer_perceptron(img):
+def multilayer_perceptron():
-    hidden1 = paddle.layer.fc(input=img, size=128, act=paddle.activation.Relu())
+    img = fluid.layers.data(name='img', shape=[1, 28, 28], dtype='float32')
-    hidden2 = paddle.layer.fc(input=hidden1,
+    # first fully-connected layer, using ReLu as its activation function
-                              size=64,
+    hidden = fluid.layers.fc(input=img, size=200, act='relu')
-                              act=paddle.activation.Relu())
+    # second fully-connected layer, using ReLu as its activation function
-    predict = paddle.layer.fc(input=hidden2,
+    hidden = fluid.layers.fc(input=hidden, size=200, act='relu')
-                              size=10,
+    prediction = fluid.layers.fc(input=hidden, size=10, act='softmax')
-                              act=paddle.activation.Softmax())
+    return prediction
-    return predict
 ```
 - Convolution network LeNet-5: the input image is fed through two convolution-pooling layers, a fully-connected layer, and the softmax output layer:
 ```python
-def convolutional_neural_network(img):
+def convolutional_neural_network():
+    img = fluid.layers.data(name='img', shape=[1, 28, 28], dtype='float32')
-    conv_pool_1 = paddle.networks.simple_img_conv_pool(
+    # first conv pool
+    conv_pool_1 = fluid.nets.simple_img_conv_pool(
        input=img,
        filter_size=5,
        num_filters=20,
-        num_channel=1,
        pool_size=2,
        pool_stride=2,
-        act=paddle.activation.Relu())
+        act="relu")
+    conv_pool_1 = fluid.layers.batch_norm(conv_pool_1)
-    conv_pool_2 = paddle.networks.simple_img_conv_pool(
+    # second conv pool
+    conv_pool_2 = fluid.nets.simple_img_conv_pool(
        input=conv_pool_1,
        filter_size=5,
        num_filters=50,
-        num_channel=20,
        pool_size=2,
        pool_stride=2,
-        act=paddle.activation.Relu())
+        act="relu")
+    # output layer with softmax activation function. size = 10 since there are only 10 possible digits.
-    predict = paddle.layer.fc(input=conv_pool_2,
+    prediction = fluid.layers.fc(input=conv_pool_2, size=10, act='softmax')
-                              size=10,
+    return prediction
-                              act=paddle.activation.Softmax())
-    return predict
 ```
-PaddlePaddle provides a special layer `layer.data` for reading data. Let us create a data layer for reading images and connect it to a classification network created using one of above three functions.  We also need a cost layer for training the model.
+#### Train Program Configuration
+Then we need to setup the the `train_program`. It takes the prediction from the classifier first. During the training, it will calculate the `avg_loss` from the prediction.
+Please feel free to modify the code to test different results between `softmax regression`, `mlp`, and `convolutional neural network` classifier.
 ```python
-paddle.init(use_gpu=False, trainer_count=1)
+def train_program():
+    label = fluid.layers.data(name='label', shape=[1], dtype='int64')
+    # predict = softmax_regression(images) # uncomment for Softmax
+    # predict = multilayer_perceptron() # uncomment for MLP
+    predict = convolutional_neural_network() # uncomment for LeNet5
+    # Calculate the cost from the prediction and label.
+    cost = fluid.layers.cross_entropy(input=predict, label=label)
+    avg_cost = fluid.layers.mean(cost)
+    acc = fluid.layers.accuracy(input=predict, label=label)
+    return [avg_cost, acc]
+```
-images = paddle.layer.data(
+### Data Feeders Configuration
-    name='pixel', type=paddle.data_type.dense_vector(784))
-label = paddle.layer.data(
-    name='label', type=paddle.data_type.integer_value(10))
-# predict = softmax_regression(images)
+Then we specify the training data `paddle.dataset.mnist.train()` and testing data `paddle.dataset.mnist.test()`. These two methods are *reader creators*. Once called, a reader creator returns a *reader*.  A reader is a Python method, which, once called, returns a Python generator, which yields instances of data.
-# predict = multilayer_perceptron(images) # uncomment for MLP
-predict = convolutional_neural_network(images) # uncomment for LeNet5
-cost = paddle.layer.classification_cost(input=predict, label=label)
+`shuffle` is a reader decorator. It takes a reader A as input and returns a new reader B. Under the hood, B calls A to read data in the following fashion: it copies in `buffer_size` instances at a time into a buffer, shuffles the data, and yields the shuffled instances one at a time. A large buffer size would yield very shuffled data.
-```
-Now, it is time to specify training parameters. In the following `Momentum` optimizer, `momentum=0.9` means that 90% of the current momentum comes from that of the previous iteration. The learning rate relates to the speed at which the network training converges. Regularization is meant to prevent over-fitting; here we use the L2 regularization.
+`batch` is a special decorator, which takes a reader and outputs a *batch reader*, which doesn't yield an instance, but a minibatch at a time.
 ```python
-parameters = paddle.parameters.create(cost)
+train_reader = paddle.batch(
+        paddle.reader.shuffle(
-optimizer = paddle.optimizer.Momentum(
+            paddle.dataset.mnist.train(), buf_size=500),
-    learning_rate=0.1 / 128.0,
+        batch_size=64)
-    momentum=0.9,
-    regularization=paddle.optimizer.L2Regularization(rate=0.0005 * 128))
-trainer = paddle.trainer.SGD(cost=cost,
+test_reader = paddle.batch(
-                             parameters=parameters,
+            paddle.dataset.mnist.test(), batch_size=64)
-                             update_equation=optimizer)
 ```
-Then we specify the training data `paddle.dataset.mnist.train()` and testing data `paddle.dataset.mnist.test()`. These two methods are *reader creators*. Once called, a reader creator returns a *reader*.  A reader is a Python method, which, once called, returns a Python generator, which yields instances of data.
+### Trainer Configuration
-`shuffle` is a reader decorator. It takes a reader A as input and returns a new reader B. Under the hood, B calls A to read data in the following fashion: it copies in `buffer_size` instances at a time into a buffer, shuffles the data, and yields the shuffled instances one at a time. A large buffer size would yield very shuffled data.
+Now, we need to setup the trainer. The trainer need to take in `train_program`, `place`, and `optimizer`.
+In the following `Adam` optimizer, `learning_rate` means the speed at which the network training converges.
-`batch` is a special decorator, which takes a reader and outputs a *batch reader*, which doesn't yield an instance, but a minibatch at a time.
+```python
+use_cuda = False # set to True if training with GPU
+place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace()
+optimizer = fluid.optimizer.Adam(learning_rate=0.001)
-`event_handler_plot` is used to plot a figure like below：
+trainer = fluid.Trainer(
+    train_func=train_program, place=place, optimizer=optimizer)
+ ```
-![png](./image/train_and_test.png)
+#### Event Handler
-```python
-from paddle.v2.plot import Ploter
-train_title = "Train cost"
-test_title = "Test cost"
-cost_ploter = Ploter(train_title, test_title)
-step = 0
-# event_handler to plot a figure
-def event_handler_plot(event):
-    global step
-    if isinstance(event, paddle.event.EndIteration):
-        if step % 100 == 0:
-            cost_ploter.append(train_title, step, event.cost)
-            cost_ploter.plot()
-        step += 1
-    if isinstance(event, paddle.event.EndPass):
-        # save parameters
-        with open('params_pass_%d.tar' % event.pass_id, 'w') as f:
-            trainer.save_parameter_to_tar(f)
-        result = trainer.test(reader=paddle.batch(
-            paddle.dataset.mnist.test(), batch_size=128))
-        cost_ploter.append(test_title, step, result.cost)
-```
 `event_handler` is used to plot some text data when training.
 ```python
-lists = []
+# Save the parameter into a directory. The Inferencer can load the parameters from it to do infer
+params_dirname = "recognize_digits_network.inference.model"
-# event handler to print the progress
 def event_handler(event):
-    if isinstance(event, paddle.event.EndIteration):
+    if isinstance(event, fluid.EndEpochEvent):
-        if event.batch_id % 100 == 0:
+        avg_cost, acc = trainer.test(
-            print "Pass %d, Batch %d, Cost %f, %s" % (
+            reader=test_reader, feed_order=['img', 'label'])
-                event.pass_id, event.batch_id, event.cost, event.metrics)
+        print("avg_cost: %s, acc: %s" % (avg_cost, acc))
-    if isinstance(event, paddle.event.EndPass):
+        trainer.save_params(params_dirname)
-        # save parameters
-        with open('params_pass_%d.tar' % event.pass_id, 'w') as f:
-            trainer.save_parameter_to_tar(f)
-        result = trainer.test(reader=paddle.batch(
-            paddle.dataset.mnist.test(), batch_size=128))
-        print "Test with Pass %d, Cost %f, %s\n" % (
-            event.pass_id, result.cost, result.metrics)
-        lists.append((event.pass_id, result.cost,
-                      result.metrics['classification_error_evaluator']))
 ```
+Now that we setup the event_handler and the reader, we can start training the model. `feed_order` is used to map the data dict to the train_program
 ```python
 # Train the model now
 trainer.train(
-    reader=paddle.batch(
+    num_epochs=1,
-        paddle.reader.shuffle(
+    event_handler=event_handler,
-            paddle.dataset.mnist.train(), buf_size=8192),
+    reader=train_reader,
-        batch_size=128),
+    feed_order=['img', 'label'])
-    event_handler=event_handler_plot,
-    num_passes=5)
 ```
 During training, `trainer.train` invokes `event_handler` for certain events. This gives us a chance to print the training progress.
-```
+ ```
-# Pass 0, Batch 0, Cost 2.780790, {'classification_error_evaluator': 0.9453125}
+ # Pass 0, Batch 0, Cost 2.780790, {'classification_error_evaluator': 0.9453125}
-# Pass 0, Batch 100, Cost 0.635356, {'classification_error_evaluator': 0.2109375}
+ # Pass 0, Batch 100, Cost 0.635356, {'classification_error_evaluator': 0.2109375}
-# Pass 0, Batch 200, Cost 0.326094, {'classification_error_evaluator': 0.1328125}
+ # Pass 0, Batch 200, Cost 0.326094, {'classification_error_evaluator': 0.1328125}
-# Pass 0, Batch 300, Cost 0.361920, {'classification_error_evaluator': 0.1015625}
+ # Pass 0, Batch 300, Cost 0.361920, {'classification_error_evaluator': 0.1015625}
-# Pass 0, Batch 400, Cost 0.410101, {'classification_error_evaluator': 0.125}
+ # Pass 0, Batch 400, Cost 0.410101, {'classification_error_evaluator': 0.125}
-# Test with Pass 0, Cost 0.326659, {'classification_error_evaluator': 0.09470000118017197}
+ # Test with Pass 0, Cost 0.326659, {'classification_error_evaluator': 0.09470000118017197}
-```
+ ```
 After the training, we can check the model's prediction accuracy.
@@ -357,27 +363,25 @@ Usually, with MNIST data, the softmax regression model achieves an accuracy arou
 ## Application
-After training, users can use the trained model to classify images. The following code shows how to inference MNIST images through `paddle.infer` interface.
+After training, users can use the trained model to classify images. The following code shows how to inference MNIST images through `fluid.Inferencer`.
 ```python
-from PIL import Image
+inferencer = fluid.Inferencer(
-import numpy as np
+    # infer_func=softmax_regression, # uncomment for softmax regression
-import os
+    # infer_func=multilayer_perceptron, # uncomment for MLP
-def load_image(file):
+    infer_func=convolutional_neural_network, # uncomment for LeNet5
-    im = Image.open(file).convert('L')
+    param_path=params_dirname,
-    im = im.resize((28, 28), Image.ANTIALIAS)
+    place=place)
-    im = np.array(im).astype(np.float32).flatten()
-    im = im / 255.0 * 2.0 - 1.0
+batch_size = 1
-    return im
+import numpy
+tensor_img = numpy.random.uniform(-1.0, 1.0,
-test_data = []
+                                  [batch_size, 1, 28, 28]).astype("float32")
-cur_dir = os.getcwd()
-test_data.append((load_image(cur_dir + '/image/infer_3.png'),))
+results = inferencer.infer({'img': tensor_img})
-probs = paddle.infer(
+print("infer results: ", results[0])
-    output_layer=predict, parameters=parameters, input=test_data)
-lab = np.argsort(-probs) # probs and lab are the results of one batch data
-print "Label of image/infer_3.png is: %d" % lab[0][0]
 ```