From 801e400d062ae3e77e4a01136eff7521ed0d1230 Mon Sep 17 00:00:00 2001
From: Nicky <nicky@baidu.com>
Date: Wed, 27 Jun 2018 17:47:53 -0700
Subject: [PATCH] update beam search API in machine translation book example

---
 08.machine_translation/README.cn.md  | 21 +++++++++++++++++----
 08.machine_translation/README.md     | 21 +++++++++++++++++----
 08.machine_translation/index.cn.html | 23 +++++++++++++++++------
 08.machine_translation/index.html    | 23 +++++++++++++++++------
 08.machine_translation/infer.py      | 21 +++++++++++++++++----
 5 files changed, 85 insertions(+), 24 deletions(-)

diff --git a/08.machine_translation/README.cn.md b/08.machine_translation/README.cn.md
index c4c2a4c..014c9d7 100644
--- a/08.machine_translation/README.cn.md
+++ b/08.machine_translation/README.cn.md
@@ -253,9 +253,18 @@ def decode(context, is_sparse):
         current_score = pd.fc(input=current_state_with_lod,
                               size=target_dict_dim,
                               act='softmax')
-        topk_scores, topk_indices = pd.topk(current_score, k=topk_size)
+        topk_scores, topk_indices = pd.topk(current_score, k=beam_size)
+        # calculate accumulated scores after topk to reduce computation cost
+        accu_scores = pd.elementwise_add(
+            x=pd.log(topk_scores), y=pd.reshape(pre_score, shape=[-1]), axis=0)
         selected_ids, selected_scores = pd.beam_search(
-            pre_ids, topk_indices, topk_scores, beam_size, end_id=10, level=0)
+            pre_ids,
+            pre_score,
+            topk_indices,
+            accu_scores,
+            beam_size,
+            end_id=10,
+            level=0)
 
         pd.increment(x=counter, value=1, in_place=True)
 
@@ -264,10 +273,14 @@ def decode(context, is_sparse):
         pd.array_write(selected_ids, array=ids_array, i=counter)
         pd.array_write(selected_scores, array=scores_array, i=counter)
 
-        pd.less_than(x=counter, y=array_len, cond=cond)
+        # update the break condition: up to the max length or all candidates of
+        # source sentences have ended.
+        length_cond = pd.less_than(x=counter, y=array_len)
+        finish_cond = pd.logical_not(pd.is_empty(x=selected_ids))
+        pd.logical_and(x=length_cond, y=finish_cond, out=cond)
 
     translation_ids, translation_scores = pd.beam_search_decode(
-        ids=ids_array, scores=scores_array)
+        ids=ids_array, scores=scores_array, beam_size=beam_size, end_id=10)
 
     return translation_ids, translation_scores
 ```
diff --git a/08.machine_translation/README.md b/08.machine_translation/README.md
index 1c4ed59..6cc7232 100644
--- a/08.machine_translation/README.md
+++ b/08.machine_translation/README.md
@@ -290,9 +290,18 @@ def decode(context, is_sparse):
         current_score = pd.fc(input=current_state_with_lod,
                               size=target_dict_dim,
                               act='softmax')
-        topk_scores, topk_indices = pd.topk(current_score, k=topk_size)
+        topk_scores, topk_indices = pd.topk(current_score, k=beam_size)
+        # calculate accumulated scores after topk to reduce computation cost
+        accu_scores = pd.elementwise_add(
+            x=pd.log(topk_scores), y=pd.reshape(pre_score, shape=[-1]), axis=0)
         selected_ids, selected_scores = pd.beam_search(
-            pre_ids, topk_indices, topk_scores, beam_size, end_id=10, level=0)
+            pre_ids,
+            pre_score,
+            topk_indices,
+            accu_scores,
+            beam_size,
+            end_id=10,
+            level=0)
 
         pd.increment(x=counter, value=1, in_place=True)
 
@@ -301,10 +310,14 @@ def decode(context, is_sparse):
         pd.array_write(selected_ids, array=ids_array, i=counter)
         pd.array_write(selected_scores, array=scores_array, i=counter)
 
-        pd.less_than(x=counter, y=array_len, cond=cond)
+        # update the break condition: up to the max length or all candidates of
+        # source sentences have ended.
+        length_cond = pd.less_than(x=counter, y=array_len)
+        finish_cond = pd.logical_not(pd.is_empty(x=selected_ids))
+        pd.logical_and(x=length_cond, y=finish_cond, out=cond)
 
     translation_ids, translation_scores = pd.beam_search_decode(
-        ids=ids_array, scores=scores_array)
+        ids=ids_array, scores=scores_array, beam_size=beam_size, end_id=10)
 
     return translation_ids, translation_scores
 ```
diff --git a/08.machine_translation/index.cn.html b/08.machine_translation/index.cn.html
index 838181a..0bf14c0 100644
--- a/08.machine_translation/index.cn.html
+++ b/08.machine_translation/index.cn.html
@@ -201,7 +201,6 @@ decoder_size = hidden_dim
 
    ```python
    def encoder(is_sparse):
-    # encoder
     src_word_id = pd.data(
         name="src_word_id", shape=[1], dtype='int64', lod_level=1)
     src_embedding = pd.embedding(
@@ -221,7 +220,6 @@ decoder_size = hidden_dim
 
 ```python
    def train_decoder(context, is_sparse):
-    # decoder
     trg_language_word = pd.data(
         name="target_language_word", shape=[1], dtype='int64', lod_level=1)
     trg_embedding = pd.embedding(
@@ -297,9 +295,18 @@ def decode(context, is_sparse):
         current_score = pd.fc(input=current_state_with_lod,
                               size=target_dict_dim,
                               act='softmax')
-        topk_scores, topk_indices = pd.topk(current_score, k=topk_size)
+        topk_scores, topk_indices = pd.topk(current_score, k=beam_size)
+        # calculate accumulated scores after topk to reduce computation cost
+        accu_scores = pd.elementwise_add(
+            x=pd.log(topk_scores), y=pd.reshape(pre_score, shape=[-1]), axis=0)
         selected_ids, selected_scores = pd.beam_search(
-            pre_ids, topk_indices, topk_scores, beam_size, end_id=10, level=0)
+            pre_ids,
+            pre_score,
+            topk_indices,
+            accu_scores,
+            beam_size,
+            end_id=10,
+            level=0)
 
         pd.increment(x=counter, value=1, in_place=True)
 
@@ -308,10 +315,14 @@ def decode(context, is_sparse):
         pd.array_write(selected_ids, array=ids_array, i=counter)
         pd.array_write(selected_scores, array=scores_array, i=counter)
 
-        pd.less_than(x=counter, y=array_len, cond=cond)
+        # update the break condition: up to the max length or all candidates of
+        # source sentences have ended.
+        length_cond = pd.less_than(x=counter, y=array_len)
+        finish_cond = pd.logical_not(pd.is_empty(x=selected_ids))
+        pd.logical_and(x=length_cond, y=finish_cond, out=cond)
 
     translation_ids, translation_scores = pd.beam_search_decode(
-        ids=ids_array, scores=scores_array)
+        ids=ids_array, scores=scores_array, beam_size=beam_size, end_id=10)
 
     return translation_ids, translation_scores
 ```
diff --git a/08.machine_translation/index.html b/08.machine_translation/index.html
index 1ad35ac..a7c2f31 100644
--- a/08.machine_translation/index.html
+++ b/08.machine_translation/index.html
@@ -238,7 +238,6 @@ Then we implement encoder as follows:
 
    ```python
    def encoder(is_sparse):
-    # encoder
     src_word_id = pd.data(
         name="src_word_id", shape=[1], dtype='int64', lod_level=1)
     src_embedding = pd.embedding(
@@ -258,7 +257,6 @@ Implement the decoder for training as follows:
 
 ```python
    def train_decoder(context, is_sparse):
-    # decoder
     trg_language_word = pd.data(
         name="target_language_word", shape=[1], dtype='int64', lod_level=1)
     trg_embedding = pd.embedding(
@@ -334,9 +332,18 @@ def decode(context, is_sparse):
         current_score = pd.fc(input=current_state_with_lod,
                               size=target_dict_dim,
                               act='softmax')
-        topk_scores, topk_indices = pd.topk(current_score, k=topk_size)
+        topk_scores, topk_indices = pd.topk(current_score, k=beam_size)
+        # calculate accumulated scores after topk to reduce computation cost
+        accu_scores = pd.elementwise_add(
+            x=pd.log(topk_scores), y=pd.reshape(pre_score, shape=[-1]), axis=0)
         selected_ids, selected_scores = pd.beam_search(
-            pre_ids, topk_indices, topk_scores, beam_size, end_id=10, level=0)
+            pre_ids,
+            pre_score,
+            topk_indices,
+            accu_scores,
+            beam_size,
+            end_id=10,
+            level=0)
 
         pd.increment(x=counter, value=1, in_place=True)
 
@@ -345,10 +352,14 @@ def decode(context, is_sparse):
         pd.array_write(selected_ids, array=ids_array, i=counter)
         pd.array_write(selected_scores, array=scores_array, i=counter)
 
-        pd.less_than(x=counter, y=array_len, cond=cond)
+        # update the break condition: up to the max length or all candidates of
+        # source sentences have ended.
+        length_cond = pd.less_than(x=counter, y=array_len)
+        finish_cond = pd.logical_not(pd.is_empty(x=selected_ids))
+        pd.logical_and(x=length_cond, y=finish_cond, out=cond)
 
     translation_ids, translation_scores = pd.beam_search_decode(
-        ids=ids_array, scores=scores_array)
+        ids=ids_array, scores=scores_array, beam_size=beam_size, end_id=10)
 
     return translation_ids, translation_scores
 ```
diff --git a/08.machine_translation/infer.py b/08.machine_translation/infer.py
index 263bb73..9f9172e 100644
--- a/08.machine_translation/infer.py
+++ b/08.machine_translation/infer.py
@@ -97,9 +97,18 @@ def decode(context):
         # use score to do beam search
         current_score = pd.fc(
             input=current_state_with_lod, size=target_dict_dim, act='softmax')
-        topk_scores, topk_indices = pd.topk(current_score, k=topk_size)
+        topk_scores, topk_indices = pd.topk(current_score, k=beam_size)
+        # calculate accumulated scores after topk to reduce computation cost
+        accu_scores = pd.elementwise_add(
+            x=pd.log(topk_scores), y=pd.reshape(pre_score, shape=[-1]), axis=0)
         selected_ids, selected_scores = pd.beam_search(
-            pre_ids, topk_indices, topk_scores, beam_size, end_id=10, level=0)
+            pre_ids,
+            pre_score,
+            topk_indices,
+            accu_scores,
+            beam_size,
+            end_id=10,
+            level=0)
 
         with pd.Switch() as switch:
             with switch.case(pd.is_empty(selected_ids)):
@@ -113,10 +122,14 @@ def decode(context):
                 pd.array_write(selected_ids, array=ids_array, i=counter)
                 pd.array_write(selected_scores, array=scores_array, i=counter)
 
-                pd.less_than(x=counter, y=array_len, cond=cond)
+                # update the break condition: up to the max length or all candidates of
+                # source sentences have ended.
+                length_cond = pd.less_than(x=counter, y=array_len)
+                finish_cond = pd.logical_not(pd.is_empty(x=selected_ids))
+                pd.logical_and(x=length_cond, y=finish_cond, out=cond)
 
     translation_ids, translation_scores = pd.beam_search_decode(
-        ids=ids_array, scores=scores_array)
+        ids=ids_array, scores=scores_array, beam_size=beam_size, end_id=10)
 
     return translation_ids, translation_scores
 
-- 
GitLab