solve conflict in 01

e89151d4 · JiabinYang · 90988fbe · 804beca1 · e89151d4 · e89151d4
27 changed file
--- a/01.fit_a_line/README.cn.md
+++ b/01.fit_a_line/README.cn.md
@@ -100,6 +100,7 @@ $$MSE=\frac{1}{n}\sum_{i=1}^{n}{(\hat{Y_i}-Y_i)}^2$$
 import paddle
 import paddle.fluid as fluid
 import numpy
+from __future__ import print_function
 ```

 我们通过uci_housing模块引入了数据集合[UCI Housing Data Set](https://archive.ics.uci.edu/ml/datasets/Housing)
@@ -251,10 +252,20 @@ inferencer = fluid.Inferencer(
    infer_func=inference_program, param_path=params_dirname, place=place)

 batch_size = 10
-tensor_x = numpy.random.uniform(0, 10, [batch_size, 13]).astype("float32")
+test_reader = paddle.batch(paddle.dataset.uci_housing.test(),batch_size=batch_size)
+test_data = test_reader().next()
+test_feat = numpy.array([data[0] for data in test_data]).astype("float32")
+test_label = numpy.array([data[1] for data in test_data]).astype("float32")

-results = inferencer.infer({'x': tensor_x})
-print("infer results: ", results[0])
+results = inferencer.infer({'x': test_feat})
+
+print("infer results: (House Price)")
+for k in range(0, batch_size-1):
+    print("%d. %f" % (k, results[0][k]))
+
+print("\nground truth:")
+for k in range(0, batch_size-1):
+    print("%d. %f" % (k, test_label[k]))
 ```

 ## 总结

--- a/01.fit_a_line/README.md
+++ b/01.fit_a_line/README.md
@@ -108,6 +108,7 @@ Our program starts with importing necessary packages:
 import paddle
 import paddle.fluid as fluid
 import numpy
+from __future__ import print_function
 ```

 We encapsulated the [UCI Housing Data Set](https://archive.ics.uci.edu/ml/datasets/Housing) in our Python module `uci_housing`.  This module can
@@ -189,7 +190,7 @@ feed_order=['x', 'y']
 Moreover, an event handler is provided to print the training progress:

 ```python
-# Specify the directory path to save the parameters
+# Specify the directory to save the parameters
 params_dirname = "fit_a_line.inference.model"

 # Plot data
@@ -200,11 +201,11 @@ plot_cost = Ploter(train_title, test_title)

 step = 0

-# event_handler to print training and testing info
+# event_handler prints training and testing info
 def event_handler_plot(event):
    global step
    if isinstance(event, fluid.EndStepEvent):
-        if event.step % 10 == 0: # every 10 batches, record a test cost
+        if event.step % 10 == 0: #record a test cost every 10 batches
            test_metrics = trainer.test(
                reader=test_reader, feed_order=feed_order)

@@ -263,10 +264,20 @@ inferencer = fluid.Inferencer(
    infer_func=inference_program, param_path=params_dirname, place=place)

 batch_size = 10
-tensor_x = numpy.random.uniform(0, 10, [batch_size, 13]).astype("float32")
+test_reader = paddle.batch(paddle.dataset.uci_housing.test(),batch_size=batch_size)
+test_data = test_reader().next()
+test_feat = numpy.array([data[0] for data in test_data]).astype("float32")
+test_label = numpy.array([data[1] for data in test_data]).astype("float32")

-results = inferencer.infer({'x': tensor_x})
-print("infer results: ", results[0])
+results = inferencer.infer({'x': test_feat})
+
+print("infer results: (House Price)")
+for k in range(0, batch_size-1):
+    print("%d. %f" % (k, results[0][k]))
+
+print("\nground truth:")
+for k in range(0, batch_size-1):
+    print("%d. %f" % (k, test_label[k]))
 ```

 ## Summary

--- a/01.fit_a_line/index.cn.html
+++ b/01.fit_a_line/index.cn.html
@@ -142,6 +142,7 @@ $$MSE=\frac{1}{n}\sum_{i=1}^{n}{(\hat{Y_i}-Y_i)}^2$$
 import paddle
 import paddle.fluid as fluid
 import numpy
+from __future__ import print_function
 ```

 我们通过uci_housing模块引入了数据集合[UCI Housing Data Set](https://archive.ics.uci.edu/ml/datasets/Housing)
@@ -293,10 +294,20 @@ inferencer = fluid.Inferencer(
    infer_func=inference_program, param_path=params_dirname, place=place)

 batch_size = 10
-tensor_x = numpy.random.uniform(0, 10, [batch_size, 13]).astype("float32")
+test_reader = paddle.batch(paddle.dataset.uci_housing.test(),batch_size=batch_size)
+test_data = test_reader().next()
+test_feat = numpy.array([data[0] for data in test_data]).astype("float32")
+test_label = numpy.array([data[1] for data in test_data]).astype("float32")

-results = inferencer.infer({'x': tensor_x})
-print("infer results: ", results[0])
+results = inferencer.infer({'x': test_feat})
+
+print("infer results: (House Price)")
+for k in range(0, batch_size-1):
+    print("%d. %f" % (k, results[0][k]))
+
+print("\nground truth:")
+for k in range(0, batch_size-1):
+    print("%d. %f" % (k, test_label[k]))
 ```

 ## 总结

--- a/01.fit_a_line/index.html
+++ b/01.fit_a_line/index.html
@@ -150,6 +150,7 @@ Our program starts with importing necessary packages:
 import paddle
 import paddle.fluid as fluid
 import numpy
+from __future__ import print_function
 ```

 We encapsulated the [UCI Housing Data Set](https://archive.ics.uci.edu/ml/datasets/Housing) in our Python module `uci_housing`.  This module can
@@ -231,7 +232,7 @@ feed_order=['x', 'y']
 Moreover, an event handler is provided to print the training progress:

 ```python
-# Specify the directory path to save the parameters
+# Specify the directory to save the parameters
 params_dirname = "fit_a_line.inference.model"

 # Plot data
@@ -242,11 +243,11 @@ plot_cost = Ploter(train_title, test_title)

 step = 0

-# event_handler to print training and testing info
+# event_handler prints training and testing info
 def event_handler_plot(event):
    global step
    if isinstance(event, fluid.EndStepEvent):
-        if event.step % 10 == 0: # every 10 batches, record a test cost
+        if event.step % 10 == 0: #record a test cost every 10 batches
            test_metrics = trainer.test(
                reader=test_reader, feed_order=feed_order)

@@ -305,10 +306,20 @@ inferencer = fluid.Inferencer(
    infer_func=inference_program, param_path=params_dirname, place=place)

 batch_size = 10
-tensor_x = numpy.random.uniform(0, 10, [batch_size, 13]).astype("float32")
+test_reader = paddle.batch(paddle.dataset.uci_housing.test(),batch_size=batch_size)
+test_data = test_reader().next()
+test_feat = numpy.array([data[0] for data in test_data]).astype("float32")
+test_label = numpy.array([data[1] for data in test_data]).astype("float32")

-results = inferencer.infer({'x': tensor_x})
-print("infer results: ", results[0])
+results = inferencer.infer({'x': test_feat})
+
+print("infer results: (House Price)")
+for k in range(0, batch_size-1):
+    print("%d. %f" % (k, results[0][k]))
+
+print("\nground truth:")
+for k in range(0, batch_size-1):
+    print("%d. %f" % (k, test_label[k]))
 ```

 ## Summary

--- a/03.image_classification/README.cn.md
+++ b/03.image_classification/README.cn.md
@@ -160,6 +160,7 @@ import paddle
 import paddle.fluid as fluid
 import numpy
 import sys
+from __future__ import print_function
 ```

 本教程中我们提供了VGG和ResNet两个模型的配置。
@@ -426,7 +427,7 @@ def event_handler(event):

 通过`trainer.train`函数训练:

-**注意:** CPU，每个 Epoch 将花费大约15～20分钟。这部分可能需要一段时间。请随意修改代码，在GPU上运行测试，以提高培训速度。
+**注意:** CPU，每个 Epoch 将花费大约15～20分钟。这部分可能需要一段时间。请随意修改代码，在GPU上运行测试，以提高训练速度。

 ```python
 trainer.train(
@@ -499,10 +500,10 @@ img = load_image(cur_dir + '/image/dog.png')
 ```python
 inferencer = fluid.Inferencer(
    infer_func=inference_program, param_path=params_dirname, place=place)
-
+label_list = ["airplane", "automobile", "bird", "cat", "deer", "dog", "frog", "horse", "ship", "truck"]
 # inference
 results = inferencer.infer({'pixel': img})
-print("infer results: ", results)
+print("infer results: %s" % label_list[np.argmax(results[0])])
 ```

 ## 总结

--- a/03.image_classification/README.md
+++ b/03.image_classification/README.md
@@ -171,6 +171,7 @@ import paddle
 import paddle.fluid as fluid
 import numpy
 import sys
+from __future__ import print_function
 ```

 Now we are going to walk you through the implementations of the VGG and ResNet.
@@ -514,9 +515,10 @@ Now we are ready to do inference.
 inferencer = fluid.Inferencer(
    infer_func=inference_program, param_path=params_dirname, place=place)

+label_list = ["airplane", "automobile", "bird", "cat", "deer", "dog", "frog", "horse", "ship", "truck"]
 # inference
 results = inferencer.infer({'pixel': img})
-print("infer results: ", results)
+print("infer results: %s" % label_list[np.argmax(results[0])])
 ```



--- a/03.image_classification/index.cn.html
+++ b/03.image_classification/index.cn.html
@@ -202,6 +202,7 @@ import paddle
 import paddle.fluid as fluid
 import numpy
 import sys
+from __future__ import print_function
 ```

 本教程中我们提供了VGG和ResNet两个模型的配置。
@@ -468,7 +469,7 @@ def event_handler(event):

 通过`trainer.train`函数训练:

-**注意:** CPU，每个 Epoch 将花费大约15～20分钟。这部分可能需要一段时间。请随意修改代码，在GPU上运行测试，以提高培训速度。
+**注意:** CPU，每个 Epoch 将花费大约15～20分钟。这部分可能需要一段时间。请随意修改代码，在GPU上运行测试，以提高训练速度。

 ```python
 trainer.train(
@@ -541,10 +542,10 @@ img = load_image(cur_dir + '/image/dog.png')
 ```python
 inferencer = fluid.Inferencer(
    infer_func=inference_program, param_path=params_dirname, place=place)
-
+label_list = ["airplane", "automobile", "bird", "cat", "deer", "dog", "frog", "horse", "ship", "truck"]
 # inference
 results = inferencer.infer({'pixel': img})
-print("infer results: ", results)
+print("infer results: %s" % label_list[np.argmax(results[0])])
 ```

 ## 总结

--- a/03.image_classification/index.html
+++ b/03.image_classification/index.html
@@ -213,6 +213,7 @@ import paddle
 import paddle.fluid as fluid
 import numpy
 import sys
+from __future__ import print_function
 ```

 Now we are going to walk you through the implementations of the VGG and ResNet.
@@ -556,9 +557,10 @@ Now we are ready to do inference.
 inferencer = fluid.Inferencer(
    infer_func=inference_program, param_path=params_dirname, place=place)

+label_list = ["airplane", "automobile", "bird", "cat", "deer", "dog", "frog", "horse", "ship", "truck"]
 # inference
 results = inferencer.infer({'pixel': img})
-print("infer results: ", results)
+print("infer results: %s" % label_list[np.argmax(results[0])])
 ```



--- a/05.recommender_system/README.cn.md
+++ b/05.recommender_system/README.cn.md
@@ -215,6 +215,7 @@ print "User %s rates Movie %s with Score %s"%(user_info[uid], movie_info[mov_id]


 ```python
+from __future__ import print_function
 import math
 import sys
 import numpy as np
@@ -508,6 +509,11 @@ results = inferencer.infer(
        'movie_title': movie_title
    },
    return_numpy=False)
+
+predict_rating = np.array(results[0])
+print("Predict Rating of user id 1 on movie \"" + infer_movie_name + "\" is " + str(predict_rating[0][0]))
+print("Actual Rating of user id 1 on movie \"" + infer_movie_name + "\" is 4.")
+
 ```

 ## 总结

--- a/05.recommender_system/README.md
+++ b/05.recommender_system/README.md
@@ -185,6 +185,7 @@ After issuing a command `python train.py`, training will start immediately. The

 Our program starts with importing necessary packages and initializing some global variables:
 ```python
+from __future__ import print_function
 import math
 import sys
 import numpy as np

--- a/05.recommender_system/index.cn.html
+++ b/05.recommender_system/index.cn.html
@@ -257,6 +257,7 @@ print "User %s rates Movie %s with Score %s"%(user_info[uid], movie_info[mov_id]


 ```python
+from __future__ import print_function
 import math
 import sys
 import numpy as np
@@ -550,6 +551,11 @@ results = inferencer.infer(
        'movie_title': movie_title
    },
    return_numpy=False)
+
+predict_rating = np.array(results[0])
+print("Predict Rating of user id 1 on movie \"" + infer_movie_name + "\" is " + str(predict_rating[0][0]))
+print("Actual Rating of user id 1 on movie \"" + infer_movie_name + "\" is 4.")
+
 ```

 ## 总结

--- a/05.recommender_system/index.html
+++ b/05.recommender_system/index.html
@@ -227,6 +227,7 @@ After issuing a command `python train.py`, training will start immediately. The

 Our program starts with importing necessary packages and initializing some global variables:
 ```python
+from __future__ import print_function
 import math
 import sys
 import numpy as np

--- a/05.recommender_system/train.py
+++ b/05.recommender_system/train.py
@@ -12,6 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.

+from __future__ import print_function
 import math
 import sys
 import numpy as np
@@ -233,7 +234,11 @@ def infer(use_cuda, inference_program, params_dirname):
        },
        return_numpy=False)

-    print("infer results: ", np.array(results[0]))
+    predict_rating = np.array(results[0])
+    print("Predict Rating of user id 1 on movie \"" + infer_movie_name +
+          "\" is " + str(predict_rating[0][0]))
+    print("Actual Rating of user id 1 on movie \"" + infer_movie_name +
+          "\" is 4.")


 def main(use_cuda):

--- a/06.understand_sentiment/README.cn.md
+++ b/06.understand_sentiment/README.cn.md
@@ -107,6 +107,7 @@ Paddle在`dataset/imdb.py`中提实现了imdb数据集的自动下载和读取
 在该示例中，我们实现了两种文本分类算法，分别基于[推荐系统](https://github.com/PaddlePaddle/book/tree/develop/05.recommender_system)一节介绍过的文本卷积神经网络，以及[栈式双向LSTM](#栈式双向LSTM（Stacked Bidirectional LSTM）)。我们首先引入要用到的库和定义全局变量：

 ```python
+from __future__ import print_function
 import paddle
 import paddle.fluid as fluid
 from functools import partial
@@ -115,6 +116,7 @@ import numpy as np
 CLASS_DIM = 2
 EMB_DIM = 128
 HID_DIM = 512
+STACKED_NUM = 3
 BATCH_SIZE = 128
 USE_GPU = False
 ```
@@ -168,17 +170,12 @@ def stacked_lstm_net(data, input_dim, class_dim, emb_dim, hid_dim, stacked_num):
            input=fc, size=hid_dim, is_reverse=(i % 2) == 0)
        inputs = [fc, lstm]

-    fc_last = paddle.layer.pooling(input=inputs[0], pooling_type=paddle.pooling.Max())
-    lstm_last = paddle.layer.pooling(input=inputs[1], pooling_type=paddle.pooling.Max())
-    output = paddle.layer.fc(input=[fc_last, lstm_last],
-                             size=class_dim,
-                             act=paddle.activation.Softmax(),
-                             bias_attr=bias_attr,
-                             param_attr=para_attr)
-
-    lbl = paddle.layer.data("label", paddle.data_type.integer_value(2))
-    cost = paddle.layer.classification_cost(input=output, label=lbl)
-    return cost, output
+    fc_last = fluid.layers.sequence_pool(input=inputs[0], pool_type='max')
+    lstm_last = fluid.layers.sequence_pool(input=inputs[1], pool_type='max')
+
+    prediction = fluid.layers.fc(
+        input=[fc_last, lstm_last], size=class_dim, act='softmax')
+    return prediction
 ```
 以上的栈式双向LSTM抽象出了高级特征并把其映射到和分类类别数同样大小的向量上。`paddle.activation.Softmax`函数用来计算分类属于某个类别的概率。

@@ -193,6 +190,7 @@ def inference_program(word_dict):

    dict_dim = len(word_dict)
    net = convolution_net(data, dict_dim, CLASS_DIM, EMB_DIM, HID_DIM)
+    # net = stacked_lstm_net(data, dict_dim, CLASS_DIM, EMB_DIM, HID_DIM, STACKED_NUM)
    return net
 ```

@@ -301,7 +299,7 @@ trainer.train(

 ```python
 inferencer = fluid.Inferencer(
-        inference_program, param_path=params_dirname, place=place)
+        infer_func=partial(inference_program, word_dict), param_path=params_dirname, place=place)
 ```

 ### 生成测试用输入数据

--- a/06.understand_sentiment/README.md
+++ b/06.understand_sentiment/README.md
@@ -103,6 +103,7 @@ After issuing a command `python train.py`, training will start immediately. The
 Our program starts with importing necessary packages and initializing some global variables:

 ```python
+from __future__ import print_function
 import paddle
 import paddle.fluid as fluid
 from functools import partial
@@ -111,6 +112,7 @@ import numpy as np
 CLASS_DIM = 2
 EMB_DIM = 128
 HID_DIM = 512
+STACKED_NUM = 3
 BATCH_SIZE = 128
 USE_GPU = False
 ```
@@ -192,6 +194,7 @@ def inference_program(word_dict):

    dict_dim = len(word_dict)
    net = convolution_net(data, dict_dim, CLASS_DIM, EMB_DIM, HID_DIM)
+    # net = stacked_lstm_net(data, dict_dim, CLASS_DIM, EMB_DIM, HID_DIM, STACKED_NUM)
    return net
 ```


--- a/06.understand_sentiment/index.cn.html
+++ b/06.understand_sentiment/index.cn.html
@@ -149,6 +149,7 @@ Paddle在`dataset/imdb.py`中提实现了imdb数据集的自动下载和读取
 在该示例中，我们实现了两种文本分类算法，分别基于[推荐系统](https://github.com/PaddlePaddle/book/tree/develop/05.recommender_system)一节介绍过的文本卷积神经网络，以及[栈式双向LSTM](#栈式双向LSTM（Stacked Bidirectional LSTM）)。我们首先引入要用到的库和定义全局变量：

 ```python
+from __future__ import print_function
 import paddle
 import paddle.fluid as fluid
 from functools import partial
@@ -157,6 +158,7 @@ import numpy as np
 CLASS_DIM = 2
 EMB_DIM = 128
 HID_DIM = 512
+STACKED_NUM = 3
 BATCH_SIZE = 128
 USE_GPU = False
 ```
@@ -210,17 +212,12 @@ def stacked_lstm_net(data, input_dim, class_dim, emb_dim, hid_dim, stacked_num):
            input=fc, size=hid_dim, is_reverse=(i % 2) == 0)
        inputs = [fc, lstm]

-    fc_last = paddle.layer.pooling(input=inputs[0], pooling_type=paddle.pooling.Max())
-    lstm_last = paddle.layer.pooling(input=inputs[1], pooling_type=paddle.pooling.Max())
-    output = paddle.layer.fc(input=[fc_last, lstm_last],
-                             size=class_dim,
-                             act=paddle.activation.Softmax(),
-                             bias_attr=bias_attr,
-                             param_attr=para_attr)
-
-    lbl = paddle.layer.data("label", paddle.data_type.integer_value(2))
-    cost = paddle.layer.classification_cost(input=output, label=lbl)
-    return cost, output
+    fc_last = fluid.layers.sequence_pool(input=inputs[0], pool_type='max')
+    lstm_last = fluid.layers.sequence_pool(input=inputs[1], pool_type='max')
+
+    prediction = fluid.layers.fc(
+        input=[fc_last, lstm_last], size=class_dim, act='softmax')
+    return prediction
 ```
 以上的栈式双向LSTM抽象出了高级特征并把其映射到和分类类别数同样大小的向量上。`paddle.activation.Softmax`函数用来计算分类属于某个类别的概率。

@@ -235,6 +232,7 @@ def inference_program(word_dict):

    dict_dim = len(word_dict)
    net = convolution_net(data, dict_dim, CLASS_DIM, EMB_DIM, HID_DIM)
+    # net = stacked_lstm_net(data, dict_dim, CLASS_DIM, EMB_DIM, HID_DIM, STACKED_NUM)
    return net
 ```

@@ -343,7 +341,7 @@ trainer.train(

 ```python
 inferencer = fluid.Inferencer(
-        inference_program, param_path=params_dirname, place=place)
+        infer_func=partial(inference_program, word_dict), param_path=params_dirname, place=place)
 ```

 ### 生成测试用输入数据

--- a/06.understand_sentiment/index.html
+++ b/06.understand_sentiment/index.html
@@ -145,6 +145,7 @@ After issuing a command `python train.py`, training will start immediately. The
 Our program starts with importing necessary packages and initializing some global variables:

 ```python
+from __future__ import print_function
 import paddle
 import paddle.fluid as fluid
 from functools import partial
@@ -153,6 +154,7 @@ import numpy as np
 CLASS_DIM = 2
 EMB_DIM = 128
 HID_DIM = 512
+STACKED_NUM = 3
 BATCH_SIZE = 128
 USE_GPU = False
 ```
@@ -234,6 +236,7 @@ def inference_program(word_dict):

    dict_dim = len(word_dict)
    net = convolution_net(data, dict_dim, CLASS_DIM, EMB_DIM, HID_DIM)
+    # net = stacked_lstm_net(data, dict_dim, CLASS_DIM, EMB_DIM, HID_DIM, STACKED_NUM)
    return net
 ```


--- a/07.label_semantic_roles/README.cn.md
+++ b/07.label_semantic_roles/README.cn.md
@@ -25,7 +25,7 @@ $$\mbox{[小明]}_{\mbox{Agent}}\mbox{[昨天]}_{\mbox{Time}}\mbox{[晚上]}_\mb
 图1. 依存句法分析句法树示例
 </div>

-然而，完全句法分析需要确定句子所包含的全部句法信息，并确定句子各成分之间的关系，是一个非常困难的任务，目前技术下的句法分析准确率并不高，句法分析的细微错误都会导致SRL的错误。为了降低问题的复杂度，同时获得一定的句法结构信息，“浅层句法分析”的思想应运而生。浅层句法分析也称为部分句法分析（partial parsing）或语块划分（chunking）。和完全句法分析得到一颗完整的句法树不同，浅层句法分析只需要识别句子中某些结构相对简单的独立成分，例如：动词短语，这些被识别出来的结构称为语块。为了回避 “无法获得准确率较高的句法树” 所带来的困难，一些研究\[[1](#参考文献)\]也提出了基于语块（chunk）的SRL方法。基于语块的SRL方法将SRL作为一个序列标注问题来解决。序列标注任务一般都会采用BIO表示方式来定义序列标注的标签集，我们先来介绍这种表示方法。在BIO表示法中，B代表语块的开始，I代表语块的中间，O代表语块结束。通过B、I、O 三种标记将不同的语块赋予不同的标签，例如：对于一个角色为A的论元，将它所包含的第一个语块赋予标签B-A，将它所包含的其它语块赋予标签I-A，不属于任何论元的语块赋予标签O。
+然而，完全句法分析需要确定句子所包含的全部句法信息，并确定句子各成分之间的关系，是一个非常困难的任务，目前技术下的句法分析准确率并不高，句法分析的细微错误都会导致SRL的错误。为了降低问题的复杂度，同时获得一定的句法结构信息，“浅层句法分析”的思想应运而生。浅层句法分析也称为部分句法分析（partial parsing）或语块划分（chunking）。和完全句法分析得到一颗完整的句法树不同，浅层句法分析只需要识别句子中某些结构相对简单的独立成分，例如：动词短语，这些被识别出来的结构称为语块。为了回避 “无法获得准确率较高的句法树” 所带来的困难，一些研究\[[1](#参考文献)\]也提出了基于语块（chunk）的SRL方法。基于语块的SRL方法将SRL作为一个序列标注问题来解决。序列标注任务一般都会采用BIO表示方式来定义序列标注的标签集，我们先来介绍这种表示方法。在BIO表示法中，B代表语块的开始，I代表语块的中间，O代表语块结束。通过B、I、O 三种标记将不同的语块赋予不同的标签，例如：对于一个由角色A拓展得到的语块组，将它所包含的第一个语块赋予标签B-A，将它所包含的其它语块赋予标签I-A，不属于任何论元的语块赋予标签O。

 我们继续以上面的这句话为例，图1展示了BIO表示方法。

@@ -151,14 +151,6 @@ conll05st-release/
 4. 构造以BIO法表示的标记；
 5. 依据词典获取词对应的整数索引。

-
-```python
-# import paddle.v2.dataset.conll05 as conll05
-# conll05.corpus_reader函数完成上面第1步和第2步.
-# conll05.reader_creator函数完成上面第3步到第5步.
-# conll05.test函数可以获取处理之后的每条样本来供PaddlePaddle训练.
-```
-
 预处理完成之后一条训练样本包含9个特征，分别是：句子序列、谓词、谓词上下文（占 5 列）、谓词上下区域标志、标注序列。下表是一条训练样本的示例。

 | 句子序列 | 谓词 | 谓词上下文（窗口 = 5） | 谓词上下文区域标记 | 标注序列 |
@@ -187,6 +179,8 @@ conll05st-release/
 获取词典，打印词典大小：

 ```python
+from __future__ import print_function
+
 import math, os
 import numpy as np
 import paddle
@@ -201,9 +195,9 @@ word_dict_len = len(word_dict)
 label_dict_len = len(label_dict)
 pred_dict_len = len(verb_dict)

-print word_dict_len
-print label_dict_len
-print pred_dict_len
+print('word_dict_len: ', word_dict_len)
+print('label_dict_len: ', label_dict_len)
+print('pred_dict_len: ', pred_dict_len)
 ```

 ## 模型配置说明
@@ -431,7 +425,7 @@ def train(use_cuda, save_dirname=None, is_local=True):
                cost = cost[0]

                if batch_id % 10 == 0:
-                    print("avg_cost:" + str(cost))
+                    print("avg_cost: " + str(cost))
                    if batch_id != 0:
                        print("second per batch: " + str((time.time(
                        ) - start_time) / batch_id))

--- a/07.label_semantic_roles/README.md
+++ b/07.label_semantic_roles/README.md
@@ -175,13 +175,6 @@ The raw data needs to be preprocessed into formats that PaddlePaddle can handle.
 4. Construct the markings in BIO format;
 5. Obtain the integer index corresponding to the word according to the dictionary.

-```python
-# import paddle.v2.dataset.conll05 as conll05
-# conll05.corpus_reader does step 1 and 2 as mentioned above.
-# conll05.reader_creator does step 3 to 5.
-# conll05.test gets preprocessed training instances.
-```
-
 After preprocessing, a training sample contains nine features, namely: word sequence, predicate, predicate context (5 columns), region mark sequence, label sequence. The following table is an example of a training sample.

 | word sequence | predicate | predicate context（5 columns） | region mark sequence | label sequence|
@@ -209,6 +202,8 @@ We trained a language model on the English Wikipedia to get a word vector lookup
 Here we fetch the dictionary, and print its size:

 ```python
+from __future__ import print_function
+
 import math, os
 import numpy as np
 import paddle
@@ -223,9 +218,9 @@ word_dict_len = len(word_dict)
 label_dict_len = len(label_dict)
 pred_dict_len = len(verb_dict)

-print word_dict_len
-print label_dict_len
-print pred_dict_len
+print('word_dict_len: ', word_dict_len)
+print('label_dict_len: ', label_dict_len)
+print('pred_dict_len: ', pred_dict_len)
 ```

 ## Model Configuration
@@ -440,7 +435,7 @@ def train(use_cuda, save_dirname=None, is_local=True):
                cost = cost[0]

                if batch_id % 10 == 0:
-                    print("avg_cost:" + str(cost))
+                    print("avg_cost: " + str(cost))
                    if batch_id != 0:
                        print("second per batch: " + str((time.time(
                        ) - start_time) / batch_id))

--- a/07.label_semantic_roles/index.cn.html
+++ b/07.label_semantic_roles/index.cn.html
@@ -67,7 +67,7 @@ $$\mbox{[小明]}_{\mbox{Agent}}\mbox{[昨天]}_{\mbox{Time}}\mbox{[晚上]}_\mb
 图1. 依存句法分析句法树示例
 </div>

-然而，完全句法分析需要确定句子所包含的全部句法信息，并确定句子各成分之间的关系，是一个非常困难的任务，目前技术下的句法分析准确率并不高，句法分析的细微错误都会导致SRL的错误。为了降低问题的复杂度，同时获得一定的句法结构信息，“浅层句法分析”的思想应运而生。浅层句法分析也称为部分句法分析（partial parsing）或语块划分（chunking）。和完全句法分析得到一颗完整的句法树不同，浅层句法分析只需要识别句子中某些结构相对简单的独立成分，例如：动词短语，这些被识别出来的结构称为语块。为了回避 “无法获得准确率较高的句法树” 所带来的困难，一些研究\[[1](#参考文献)\]也提出了基于语块（chunk）的SRL方法。基于语块的SRL方法将SRL作为一个序列标注问题来解决。序列标注任务一般都会采用BIO表示方式来定义序列标注的标签集，我们先来介绍这种表示方法。在BIO表示法中，B代表语块的开始，I代表语块的中间，O代表语块结束。通过B、I、O 三种标记将不同的语块赋予不同的标签，例如：对于一个角色为A的论元，将它所包含的第一个语块赋予标签B-A，将它所包含的其它语块赋予标签I-A，不属于任何论元的语块赋予标签O。
+然而，完全句法分析需要确定句子所包含的全部句法信息，并确定句子各成分之间的关系，是一个非常困难的任务，目前技术下的句法分析准确率并不高，句法分析的细微错误都会导致SRL的错误。为了降低问题的复杂度，同时获得一定的句法结构信息，“浅层句法分析”的思想应运而生。浅层句法分析也称为部分句法分析（partial parsing）或语块划分（chunking）。和完全句法分析得到一颗完整的句法树不同，浅层句法分析只需要识别句子中某些结构相对简单的独立成分，例如：动词短语，这些被识别出来的结构称为语块。为了回避 “无法获得准确率较高的句法树” 所带来的困难，一些研究\[[1](#参考文献)\]也提出了基于语块（chunk）的SRL方法。基于语块的SRL方法将SRL作为一个序列标注问题来解决。序列标注任务一般都会采用BIO表示方式来定义序列标注的标签集，我们先来介绍这种表示方法。在BIO表示法中，B代表语块的开始，I代表语块的中间，O代表语块结束。通过B、I、O 三种标记将不同的语块赋予不同的标签，例如：对于一个由角色A拓展得到的语块组，将它所包含的第一个语块赋予标签B-A，将它所包含的其它语块赋予标签I-A，不属于任何论元的语块赋予标签O。

 我们继续以上面的这句话为例，图1展示了BIO表示方法。

@@ -193,14 +193,6 @@ conll05st-release/
 4. 构造以BIO法表示的标记；
 5. 依据词典获取词对应的整数索引。

-
-```python
-# import paddle.v2.dataset.conll05 as conll05
-# conll05.corpus_reader函数完成上面第1步和第2步.
-# conll05.reader_creator函数完成上面第3步到第5步.
-# conll05.test函数可以获取处理之后的每条样本来供PaddlePaddle训练.
-```
-
 预处理完成之后一条训练样本包含9个特征，分别是：句子序列、谓词、谓词上下文（占 5 列）、谓词上下区域标志、标注序列。下表是一条训练样本的示例。

 | 句子序列 | 谓词 | 谓词上下文（窗口 = 5） | 谓词上下文区域标记 | 标注序列 |
@@ -229,6 +221,8 @@ conll05st-release/
 获取词典，打印词典大小：

 ```python
+from __future__ import print_function
+
 import math, os
 import numpy as np
 import paddle
@@ -243,9 +237,9 @@ word_dict_len = len(word_dict)
 label_dict_len = len(label_dict)
 pred_dict_len = len(verb_dict)

-print word_dict_len
-print label_dict_len
-print pred_dict_len
+print('word_dict_len: ', word_dict_len)
+print('label_dict_len: ', label_dict_len)
+print('pred_dict_len: ', pred_dict_len)
 ```

 ## 模型配置说明
@@ -473,7 +467,7 @@ def train(use_cuda, save_dirname=None, is_local=True):
                cost = cost[0]

                if batch_id % 10 == 0:
-                    print("avg_cost:" + str(cost))
+                    print("avg_cost: " + str(cost))
                    if batch_id != 0:
                        print("second per batch: " + str((time.time(
                        ) - start_time) / batch_id))

--- a/07.label_semantic_roles/index.html
+++ b/07.label_semantic_roles/index.html
@@ -217,13 +217,6 @@ The raw data needs to be preprocessed into formats that PaddlePaddle can handle.
 4. Construct the markings in BIO format;
 5. Obtain the integer index corresponding to the word according to the dictionary.

-```python
-# import paddle.v2.dataset.conll05 as conll05
-# conll05.corpus_reader does step 1 and 2 as mentioned above.
-# conll05.reader_creator does step 3 to 5.
-# conll05.test gets preprocessed training instances.
-```
-
 After preprocessing, a training sample contains nine features, namely: word sequence, predicate, predicate context (5 columns), region mark sequence, label sequence. The following table is an example of a training sample.

 | word sequence | predicate | predicate context（5 columns） | region mark sequence | label sequence|
@@ -251,6 +244,8 @@ We trained a language model on the English Wikipedia to get a word vector lookup
 Here we fetch the dictionary, and print its size:

 ```python
+from __future__ import print_function
+
 import math, os
 import numpy as np
 import paddle
@@ -265,9 +260,9 @@ word_dict_len = len(word_dict)
 label_dict_len = len(label_dict)
 pred_dict_len = len(verb_dict)

-print word_dict_len
-print label_dict_len
-print pred_dict_len
+print('word_dict_len: ', word_dict_len)
+print('label_dict_len: ', label_dict_len)
+print('pred_dict_len: ', pred_dict_len)
 ```

 ## Model Configuration
@@ -482,7 +477,7 @@ def train(use_cuda, save_dirname=None, is_local=True):
                cost = cost[0]

                if batch_id % 10 == 0:
-                    print("avg_cost:" + str(cost))
+                    print("avg_cost: " + str(cost))
                    if batch_id != 0:
                        print("second per batch: " + str((time.time(
                        ) - start_time) / batch_id))

--- a/07.label_semantic_roles/train.py
+++ b/07.label_semantic_roles/train.py
+from __future__ import print_function
+
 import math, os
 import numpy as np
 import paddle

--- a/08.machine_translation/README.cn.md
+++ b/08.machine_translation/README.cn.md
@@ -85,7 +85,7 @@

 2. 将$z_{i+1}$通过`softmax`归一化，得到目标语言序列的第$i+1$个单词的概率分布$p_{i+1}$。概率分布公式如下：

-   $$p\left ( u_{i+1}|u_{&lt;i+1},\mathbf{x} \right )=softmax(W_sz_{i+1}+b_z)$$
+   $$p\left ( u_{i+1}|u_{<i+1},\mathbf{x} \right )=softmax(W_sz_{i+1}+b_z)$$

   其中$W_sz_{i+1}+b_z$是对每个可能的输出单词进行打分，再用softmax归一化就可以得到第$i+1$个词的概率$p_{i+1}$。

@@ -132,6 +132,7 @@
 下面我们开始根据输入数据的形式配置模型。首先引入所需的库函数以及定义全局变量。

 ```python
+from __future__ import print_function
 import contextlib

 import numpy as np
@@ -437,10 +438,13 @@ for data in test_data():
    result_scores = np.array(results[1])

    print("Original sentence:")
-    print(" ".join([src_dict[w] for w in feed_data[0][0]]))
-    print("Translated sentence:")
-    print(" ".join([trg_dict[w] for w in result_ids]))
-    print("Corresponding score: ", result_scores)
+    print(" ".join([src_dict[w] for w in feed_data[0][0][1:-1]]))
+    print("Translated score and sentence:")
+    for i in xrange(beam_size):
+        start_pos = result_ids_lod[1][i] + 1
+        end_pos = result_ids_lod[1][i+1]
+        print("%d\t%.4f\t%s\n" % (i+1, result_scores[end_pos-1],
+                " ".join([trg_dict[w] for w in result_ids[start_pos:end_pos]])))

    break
 ```

--- a/08.machine_translation/README.md
+++ b/08.machine_translation/README.md
@@ -114,7 +114,7 @@ The goal of the decoder is to maximize the probability of the next correct word

 2. Calculate the probability $p_{i+1}$ for the $i+1$-th word in the target language sequence by normalizing $z_{i+1}$ using `softmax` as follows

-   $$p\left ( u_{i+1}|u_{&lt;i+1},\mathbf{x} \right )=softmax(W_sz_{i+1}+b_z)$$
+   $$p\left ( u_{i+1}|u_{<i+1},\mathbf{x} \right )=softmax(W_sz_{i+1}+b_z)$$

   where $W_sz_{i+1}+b_z$ scores each possible words and is then normalized via softmax to produce the probability $p_{i+1}$ for the $i+1$-th word.

@@ -169,6 +169,7 @@ This subset has 193319 instances of training data and 6003 instances of test dat
 Our program starts with importing necessary packages and initializing some global variables:

 ```python
+from __future__ import print_function
 import contextlib

 import numpy as np
@@ -485,10 +486,13 @@ for data in test_data():
    result_scores = np.array(results[1])

    print("Original sentence:")
-    print(" ".join([src_dict[w] for w in feed_data[0][0]]))
-    print("Translated sentence:")
-    print(" ".join([trg_dict[w] for w in result_ids]))
-    print("Corresponding score: ", result_scores)
+    print(" ".join([src_dict[w] for w in feed_data[0][0][1:-1]]))
+    print("Translated score and sentence:")
+    for i in xrange(beam_size):
+        start_pos = result_ids_lod[1][i] + 1
+        end_pos = result_ids_lod[1][i+1]
+        print("%d\t%.4f\t%s\n" % (i+1, result_scores[end_pos-1],
+                " ".join([trg_dict[w] for w in result_ids[start_pos:end_pos]])))

    break
 ```

--- a/08.machine_translation/index.cn.html
+++ b/08.machine_translation/index.cn.html
@@ -127,7 +127,7 @@

 2. 将$z_{i+1}$通过`softmax`归一化，得到目标语言序列的第$i+1$个单词的概率分布$p_{i+1}$。概率分布公式如下：

-   $$p\left ( u_{i+1}|u_{&lt;i+1},\mathbf{x} \right )=softmax(W_sz_{i+1}+b_z)$$
+   $$p\left ( u_{i+1}|u_{<i+1},\mathbf{x} \right )=softmax(W_sz_{i+1}+b_z)$$

   其中$W_sz_{i+1}+b_z$是对每个可能的输出单词进行打分，再用softmax归一化就可以得到第$i+1$个词的概率$p_{i+1}$。

@@ -174,6 +174,7 @@
 下面我们开始根据输入数据的形式配置模型。首先引入所需的库函数以及定义全局变量。

 ```python
+from __future__ import print_function
 import contextlib

 import numpy as np
@@ -479,10 +480,13 @@ for data in test_data():
    result_scores = np.array(results[1])

    print("Original sentence:")
-    print(" ".join([src_dict[w] for w in feed_data[0][0]]))
-    print("Translated sentence:")
-    print(" ".join([trg_dict[w] for w in result_ids]))
-    print("Corresponding score: ", result_scores)
+    print(" ".join([src_dict[w] for w in feed_data[0][0][1:-1]]))
+    print("Translated score and sentence:")
+    for i in xrange(beam_size):
+        start_pos = result_ids_lod[1][i] + 1
+        end_pos = result_ids_lod[1][i+1]
+        print("%d\t%.4f\t%s\n" % (i+1, result_scores[end_pos-1],
+                " ".join([trg_dict[w] for w in result_ids[start_pos:end_pos]])))

    break
 ```

--- a/08.machine_translation/index.html
+++ b/08.machine_translation/index.html
@@ -156,7 +156,7 @@ The goal of the decoder is to maximize the probability of the next correct word

 2. Calculate the probability $p_{i+1}$ for the $i+1$-th word in the target language sequence by normalizing $z_{i+1}$ using `softmax` as follows

-   $$p\left ( u_{i+1}|u_{&lt;i+1},\mathbf{x} \right )=softmax(W_sz_{i+1}+b_z)$$
+   $$p\left ( u_{i+1}|u_{<i+1},\mathbf{x} \right )=softmax(W_sz_{i+1}+b_z)$$

   where $W_sz_{i+1}+b_z$ scores each possible words and is then normalized via softmax to produce the probability $p_{i+1}$ for the $i+1$-th word.

@@ -211,6 +211,7 @@ This subset has 193319 instances of training data and 6003 instances of test dat
 Our program starts with importing necessary packages and initializing some global variables:

 ```python
+from __future__ import print_function
 import contextlib

 import numpy as np
@@ -527,10 +528,13 @@ for data in test_data():
    result_scores = np.array(results[1])

    print("Original sentence:")
-    print(" ".join([src_dict[w] for w in feed_data[0][0]]))
-    print("Translated sentence:")
-    print(" ".join([trg_dict[w] for w in result_ids]))
-    print("Corresponding score: ", result_scores)
+    print(" ".join([src_dict[w] for w in feed_data[0][0][1:-1]]))
+    print("Translated score and sentence:")
+    for i in xrange(beam_size):
+        start_pos = result_ids_lod[1][i] + 1
+        end_pos = result_ids_lod[1][i+1]
+        print("%d\t%.4f\t%s\n" % (i+1, result_scores[end_pos-1],
+                " ".join([trg_dict[w] for w in result_ids[start_pos:end_pos]])))

    break
 ```

--- a/08.machine_translation/infer.py
+++ b/08.machine_translation/infer.py
@@ -12,6 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.

+from __future__ import print_function
 import numpy as np
 import paddle
 import paddle.fluid as fluid
@@ -187,10 +188,14 @@ def decode_main(use_cuda):
        result_scores = np.array(results[1])

        print("Original sentence:")
-        print(" ".join([src_dict[w] for w in feed_data[0][0]]))
-        print("Translated sentence:")
-        print(" ".join([trg_dict[w] for w in result_ids]))
-        print("Corresponding score: ", result_scores)
+        print(" ".join([src_dict[w] for w in feed_data[0][0][1:-1]]))
+        print("Translated score and sentence:")
+        for i in xrange(beam_size):
+            start_pos = result_ids_lod[1][i] + 1
+            end_pos = result_ids_lod[1][i + 1]
+            print("%d\t%.4f\t%s\n" % (
+                i + 1, result_scores[end_pos - 1],
+                " ".join([trg_dict[w] for w in result_ids[start_pos:end_pos]])))

        break