Merge pull request #559 from nickyfantasy/updateBeamSearch

update beam search API in machine translation book example

Merge pull request #559 from nickyfantasy/updateBeamSearch
update beam search API in machine translation book example
ef10dd94 · Nicky Chan · GitHub · 3f0b8ece · 801e400d · ef10dd94
5 changed file
--- a/08.machine_translation/README.cn.md
+++ b/08.machine_translation/README.cn.md
@@ -253,9 +253,18 @@ def decode(context, is_sparse):
        current_score = pd.fc(input=current_state_with_lod,
                              size=target_dict_dim,
                              act='softmax')
-        topk_scores, topk_indices = pd.topk(current_score, k=topk_size)
+        topk_scores, topk_indices = pd.topk(current_score, k=beam_size)
+        # calculate accumulated scores after topk to reduce computation cost
+        accu_scores = pd.elementwise_add(
+            x=pd.log(topk_scores), y=pd.reshape(pre_score, shape=[-1]), axis=0)
        selected_ids, selected_scores = pd.beam_search(
-            pre_ids, topk_indices, topk_scores, beam_size, end_id=10, level=0)
+            pre_ids,
+            pre_score,
+            topk_indices,
+            accu_scores,
+            beam_size,
+            end_id=10,
+            level=0)

        pd.increment(x=counter, value=1, in_place=True)

@@ -264,10 +273,14 @@ def decode(context, is_sparse):
        pd.array_write(selected_ids, array=ids_array, i=counter)
        pd.array_write(selected_scores, array=scores_array, i=counter)

-        pd.less_than(x=counter, y=array_len, cond=cond)
+        # update the break condition: up to the max length or all candidates of
+        # source sentences have ended.
+        length_cond = pd.less_than(x=counter, y=array_len)
+        finish_cond = pd.logical_not(pd.is_empty(x=selected_ids))
+        pd.logical_and(x=length_cond, y=finish_cond, out=cond)

    translation_ids, translation_scores = pd.beam_search_decode(
-        ids=ids_array, scores=scores_array)
+        ids=ids_array, scores=scores_array, beam_size=beam_size, end_id=10)

    return translation_ids, translation_scores
 ```

--- a/08.machine_translation/README.md
+++ b/08.machine_translation/README.md
@@ -290,9 +290,18 @@ def decode(context, is_sparse):
        current_score = pd.fc(input=current_state_with_lod,
                              size=target_dict_dim,
                              act='softmax')
-        topk_scores, topk_indices = pd.topk(current_score, k=topk_size)
+        topk_scores, topk_indices = pd.topk(current_score, k=beam_size)
+        # calculate accumulated scores after topk to reduce computation cost
+        accu_scores = pd.elementwise_add(
+            x=pd.log(topk_scores), y=pd.reshape(pre_score, shape=[-1]), axis=0)
        selected_ids, selected_scores = pd.beam_search(
-            pre_ids, topk_indices, topk_scores, beam_size, end_id=10, level=0)
+            pre_ids,
+            pre_score,
+            topk_indices,
+            accu_scores,
+            beam_size,
+            end_id=10,
+            level=0)

        pd.increment(x=counter, value=1, in_place=True)

@@ -301,10 +310,14 @@ def decode(context, is_sparse):
        pd.array_write(selected_ids, array=ids_array, i=counter)
        pd.array_write(selected_scores, array=scores_array, i=counter)

-        pd.less_than(x=counter, y=array_len, cond=cond)
+        # update the break condition: up to the max length or all candidates of
+        # source sentences have ended.
+        length_cond = pd.less_than(x=counter, y=array_len)
+        finish_cond = pd.logical_not(pd.is_empty(x=selected_ids))
+        pd.logical_and(x=length_cond, y=finish_cond, out=cond)

    translation_ids, translation_scores = pd.beam_search_decode(
-        ids=ids_array, scores=scores_array)
+        ids=ids_array, scores=scores_array, beam_size=beam_size, end_id=10)

    return translation_ids, translation_scores
 ```

--- a/08.machine_translation/index.cn.html
+++ b/08.machine_translation/index.cn.html
@@ -201,7 +201,6 @@ decoder_size = hidden_dim

   ```python
   def encoder(is_sparse):
-    # encoder
    src_word_id = pd.data(
        name="src_word_id", shape=[1], dtype='int64', lod_level=1)
    src_embedding = pd.embedding(
@@ -221,7 +220,6 @@ decoder_size = hidden_dim

 ```python
   def train_decoder(context, is_sparse):
-    # decoder
    trg_language_word = pd.data(
        name="target_language_word", shape=[1], dtype='int64', lod_level=1)
    trg_embedding = pd.embedding(
@@ -297,9 +295,18 @@ def decode(context, is_sparse):
        current_score = pd.fc(input=current_state_with_lod,
                              size=target_dict_dim,
                              act='softmax')
-        topk_scores, topk_indices = pd.topk(current_score, k=topk_size)
+        topk_scores, topk_indices = pd.topk(current_score, k=beam_size)
+        # calculate accumulated scores after topk to reduce computation cost
+        accu_scores = pd.elementwise_add(
+            x=pd.log(topk_scores), y=pd.reshape(pre_score, shape=[-1]), axis=0)
        selected_ids, selected_scores = pd.beam_search(
-            pre_ids, topk_indices, topk_scores, beam_size, end_id=10, level=0)
+            pre_ids,
+            pre_score,
+            topk_indices,
+            accu_scores,
+            beam_size,
+            end_id=10,
+            level=0)

        pd.increment(x=counter, value=1, in_place=True)

@@ -308,10 +315,14 @@ def decode(context, is_sparse):
        pd.array_write(selected_ids, array=ids_array, i=counter)
        pd.array_write(selected_scores, array=scores_array, i=counter)

-        pd.less_than(x=counter, y=array_len, cond=cond)
+        # update the break condition: up to the max length or all candidates of
+        # source sentences have ended.
+        length_cond = pd.less_than(x=counter, y=array_len)
+        finish_cond = pd.logical_not(pd.is_empty(x=selected_ids))
+        pd.logical_and(x=length_cond, y=finish_cond, out=cond)

    translation_ids, translation_scores = pd.beam_search_decode(
-        ids=ids_array, scores=scores_array)
+        ids=ids_array, scores=scores_array, beam_size=beam_size, end_id=10)

    return translation_ids, translation_scores
 ```

--- a/08.machine_translation/index.html
+++ b/08.machine_translation/index.html
@@ -238,7 +238,6 @@ Then we implement encoder as follows:

   ```python
   def encoder(is_sparse):
-    # encoder
    src_word_id = pd.data(
        name="src_word_id", shape=[1], dtype='int64', lod_level=1)
    src_embedding = pd.embedding(
@@ -258,7 +257,6 @@ Implement the decoder for training as follows:

 ```python
   def train_decoder(context, is_sparse):
-    # decoder
    trg_language_word = pd.data(
        name="target_language_word", shape=[1], dtype='int64', lod_level=1)
    trg_embedding = pd.embedding(
@@ -334,9 +332,18 @@ def decode(context, is_sparse):
        current_score = pd.fc(input=current_state_with_lod,
                              size=target_dict_dim,
                              act='softmax')
-        topk_scores, topk_indices = pd.topk(current_score, k=topk_size)
+        topk_scores, topk_indices = pd.topk(current_score, k=beam_size)
+        # calculate accumulated scores after topk to reduce computation cost
+        accu_scores = pd.elementwise_add(
+            x=pd.log(topk_scores), y=pd.reshape(pre_score, shape=[-1]), axis=0)
        selected_ids, selected_scores = pd.beam_search(
-            pre_ids, topk_indices, topk_scores, beam_size, end_id=10, level=0)
+            pre_ids,
+            pre_score,
+            topk_indices,
+            accu_scores,
+            beam_size,
+            end_id=10,
+            level=0)

        pd.increment(x=counter, value=1, in_place=True)

@@ -345,10 +352,14 @@ def decode(context, is_sparse):
        pd.array_write(selected_ids, array=ids_array, i=counter)
        pd.array_write(selected_scores, array=scores_array, i=counter)

-        pd.less_than(x=counter, y=array_len, cond=cond)
+        # update the break condition: up to the max length or all candidates of
+        # source sentences have ended.
+        length_cond = pd.less_than(x=counter, y=array_len)
+        finish_cond = pd.logical_not(pd.is_empty(x=selected_ids))
+        pd.logical_and(x=length_cond, y=finish_cond, out=cond)

    translation_ids, translation_scores = pd.beam_search_decode(
-        ids=ids_array, scores=scores_array)
+        ids=ids_array, scores=scores_array, beam_size=beam_size, end_id=10)

    return translation_ids, translation_scores
 ```

--- a/08.machine_translation/infer.py
+++ b/08.machine_translation/infer.py
@@ -97,9 +97,18 @@ def decode(context):
        # use score to do beam search
        current_score = pd.fc(
            input=current_state_with_lod, size=target_dict_dim, act='softmax')
-        topk_scores, topk_indices = pd.topk(current_score, k=topk_size)
+        topk_scores, topk_indices = pd.topk(current_score, k=beam_size)
+        # calculate accumulated scores after topk to reduce computation cost
+        accu_scores = pd.elementwise_add(
+            x=pd.log(topk_scores), y=pd.reshape(pre_score, shape=[-1]), axis=0)
        selected_ids, selected_scores = pd.beam_search(
-            pre_ids, topk_indices, topk_scores, beam_size, end_id=10, level=0)
+            pre_ids,
+            pre_score,
+            topk_indices,
+            accu_scores,
+            beam_size,
+            end_id=10,
+            level=0)

        with pd.Switch() as switch:
            with switch.case(pd.is_empty(selected_ids)):
@@ -113,10 +122,14 @@ def decode(context):
                pd.array_write(selected_ids, array=ids_array, i=counter)
                pd.array_write(selected_scores, array=scores_array, i=counter)

-                pd.less_than(x=counter, y=array_len, cond=cond)
+                # update the break condition: up to the max length or all candidates of
+                # source sentences have ended.
+                length_cond = pd.less_than(x=counter, y=array_len)
+                finish_cond = pd.logical_not(pd.is_empty(x=selected_ids))
+                pd.logical_and(x=length_cond, y=finish_cond, out=cond)

    translation_ids, translation_scores = pd.beam_search_decode(
-        ids=ids_array, scores=scores_array)
+        ids=ids_array, scores=scores_array, beam_size=beam_size, end_id=10)

    return translation_ids, translation_scores