Merge pull request #581 from chenwhql/book08_refine

08 Style: polish formula and print format

Merge pull request #581 from chenwhql/book08_refine
08 Style: polish formula and print format
7eaa3f45 · Chen Weihang · GitHub · 6a2eecc2 · 08d5d2e9 · 7eaa3f45
5 changed file
--- a/08.machine_translation/README.cn.md
+++ b/08.machine_translation/README.cn.md
@@ -85,7 +85,7 @@

 2. 将$z_{i+1}$通过`softmax`归一化，得到目标语言序列的第$i+1$个单词的概率分布$p_{i+1}$。概率分布公式如下：

-   $$p\left ( u_{i+1}|u_{&lt;i+1},\mathbf{x} \right )=softmax(W_sz_{i+1}+b_z)$$
+   $$p\left ( u_{i+1}|u_{<i+1},\mathbf{x} \right )=softmax(W_sz_{i+1}+b_z)$$

   其中$W_sz_{i+1}+b_z$是对每个可能的输出单词进行打分，再用softmax归一化就可以得到第$i+1$个词的概率$p_{i+1}$。

@@ -132,6 +132,7 @@
 下面我们开始根据输入数据的形式配置模型。首先引入所需的库函数以及定义全局变量。

 ```python
+from __future__ import print_function
 import contextlib

 import numpy as np
@@ -437,10 +438,13 @@ for data in test_data():
    result_scores = np.array(results[1])

    print("Original sentence:")
-    print(" ".join([src_dict[w] for w in feed_data[0][0]]))
-    print("Translated sentence:")
-    print(" ".join([trg_dict[w] for w in result_ids]))
-    print("Corresponding score: ", result_scores)
+    print(" ".join([src_dict[w] for w in feed_data[0][0][1:-1]]))
+    print("Translated score and sentence:")
+    for i in xrange(beam_size):
+        start_pos = result_ids_lod[1][i] + 1
+        end_pos = result_ids_lod[1][i+1]
+        print("%d\t%.4f\t%s\n" % (i+1, result_scores[end_pos-1],
+                " ".join([trg_dict[w] for w in result_ids[start_pos:end_pos]])))

    break
 ```

--- a/08.machine_translation/README.md
+++ b/08.machine_translation/README.md
@@ -114,7 +114,7 @@ The goal of the decoder is to maximize the probability of the next correct word

 2. Calculate the probability $p_{i+1}$ for the $i+1$-th word in the target language sequence by normalizing $z_{i+1}$ using `softmax` as follows

-   $$p\left ( u_{i+1}|u_{&lt;i+1},\mathbf{x} \right )=softmax(W_sz_{i+1}+b_z)$$
+   $$p\left ( u_{i+1}|u_{<i+1},\mathbf{x} \right )=softmax(W_sz_{i+1}+b_z)$$

   where $W_sz_{i+1}+b_z$ scores each possible words and is then normalized via softmax to produce the probability $p_{i+1}$ for the $i+1$-th word.

@@ -169,6 +169,7 @@ This subset has 193319 instances of training data and 6003 instances of test dat
 Our program starts with importing necessary packages and initializing some global variables:

 ```python
+from __future__ import print_function
 import contextlib

 import numpy as np
@@ -485,10 +486,13 @@ for data in test_data():
    result_scores = np.array(results[1])

    print("Original sentence:")
-    print(" ".join([src_dict[w] for w in feed_data[0][0]]))
-    print("Translated sentence:")
-    print(" ".join([trg_dict[w] for w in result_ids]))
-    print("Corresponding score: ", result_scores)
+    print(" ".join([src_dict[w] for w in feed_data[0][0][1:-1]]))
+    print("Translated score and sentence:")
+    for i in xrange(beam_size):
+        start_pos = result_ids_lod[1][i] + 1
+        end_pos = result_ids_lod[1][i+1]
+        print("%d\t%.4f\t%s\n" % (i+1, result_scores[end_pos-1],
+                " ".join([trg_dict[w] for w in result_ids[start_pos:end_pos]])))

    break
 ```

--- a/08.machine_translation/index.cn.html
+++ b/08.machine_translation/index.cn.html
@@ -127,7 +127,7 @@

 2. 将$z_{i+1}$通过`softmax`归一化，得到目标语言序列的第$i+1$个单词的概率分布$p_{i+1}$。概率分布公式如下：

-   $$p\left ( u_{i+1}|u_{&lt;i+1},\mathbf{x} \right )=softmax(W_sz_{i+1}+b_z)$$
+   $$p\left ( u_{i+1}|u_{<i+1},\mathbf{x} \right )=softmax(W_sz_{i+1}+b_z)$$

   其中$W_sz_{i+1}+b_z$是对每个可能的输出单词进行打分，再用softmax归一化就可以得到第$i+1$个词的概率$p_{i+1}$。

@@ -174,6 +174,7 @@
 下面我们开始根据输入数据的形式配置模型。首先引入所需的库函数以及定义全局变量。

 ```python
+from __future__ import print_function
 import contextlib

 import numpy as np
@@ -479,10 +480,13 @@ for data in test_data():
    result_scores = np.array(results[1])

    print("Original sentence:")
-    print(" ".join([src_dict[w] for w in feed_data[0][0]]))
-    print("Translated sentence:")
-    print(" ".join([trg_dict[w] for w in result_ids]))
-    print("Corresponding score: ", result_scores)
+    print(" ".join([src_dict[w] for w in feed_data[0][0][1:-1]]))
+    print("Translated score and sentence:")
+    for i in xrange(beam_size):
+        start_pos = result_ids_lod[1][i] + 1
+        end_pos = result_ids_lod[1][i+1]
+        print("%d\t%.4f\t%s\n" % (i+1, result_scores[end_pos-1],
+                " ".join([trg_dict[w] for w in result_ids[start_pos:end_pos]])))

    break
 ```

--- a/08.machine_translation/index.html
+++ b/08.machine_translation/index.html
@@ -156,7 +156,7 @@ The goal of the decoder is to maximize the probability of the next correct word

 2. Calculate the probability $p_{i+1}$ for the $i+1$-th word in the target language sequence by normalizing $z_{i+1}$ using `softmax` as follows

-   $$p\left ( u_{i+1}|u_{&lt;i+1},\mathbf{x} \right )=softmax(W_sz_{i+1}+b_z)$$
+   $$p\left ( u_{i+1}|u_{<i+1},\mathbf{x} \right )=softmax(W_sz_{i+1}+b_z)$$

   where $W_sz_{i+1}+b_z$ scores each possible words and is then normalized via softmax to produce the probability $p_{i+1}$ for the $i+1$-th word.

@@ -211,6 +211,7 @@ This subset has 193319 instances of training data and 6003 instances of test dat
 Our program starts with importing necessary packages and initializing some global variables:

 ```python
+from __future__ import print_function
 import contextlib

 import numpy as np
@@ -527,10 +528,13 @@ for data in test_data():
    result_scores = np.array(results[1])

    print("Original sentence:")
-    print(" ".join([src_dict[w] for w in feed_data[0][0]]))
-    print("Translated sentence:")
-    print(" ".join([trg_dict[w] for w in result_ids]))
-    print("Corresponding score: ", result_scores)
+    print(" ".join([src_dict[w] for w in feed_data[0][0][1:-1]]))
+    print("Translated score and sentence:")
+    for i in xrange(beam_size):
+        start_pos = result_ids_lod[1][i] + 1
+        end_pos = result_ids_lod[1][i+1]
+        print("%d\t%.4f\t%s\n" % (i+1, result_scores[end_pos-1],
+                " ".join([trg_dict[w] for w in result_ids[start_pos:end_pos]])))

    break
 ```

--- a/08.machine_translation/infer.py
+++ b/08.machine_translation/infer.py
@@ -12,6 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.

+from __future__ import print_function
 import numpy as np
 import paddle
 import paddle.fluid as fluid
@@ -187,10 +188,14 @@ def decode_main(use_cuda):
        result_scores = np.array(results[1])

        print("Original sentence:")
-        print(" ".join([src_dict[w] for w in feed_data[0][0]]))
-        print("Translated sentence:")
-        print(" ".join([trg_dict[w] for w in result_ids]))
-        print("Corresponding score: ", result_scores)
+        print(" ".join([src_dict[w] for w in feed_data[0][0][1:-1]]))
+        print("Translated score and sentence:")
+        for i in xrange(beam_size):
+            start_pos = result_ids_lod[1][i] + 1
+            end_pos = result_ids_lod[1][i + 1]
+            print("%d\t%.4f\t%s\n" % (
+                i + 1, result_scores[end_pos - 1],
+                " ".join([trg_dict[w] for w in result_ids[start_pos:end_pos]])))

        break