From 801e400d062ae3e77e4a01136eff7521ed0d1230 Mon Sep 17 00:00:00 2001 From: Nicky Date: Wed, 27 Jun 2018 17:47:53 -0700 Subject: [PATCH] update beam search API in machine translation book example --- 08.machine_translation/README.cn.md | 21 +++++++++++++++++---- 08.machine_translation/README.md | 21 +++++++++++++++++---- 08.machine_translation/index.cn.html | 23 +++++++++++++++++------ 08.machine_translation/index.html | 23 +++++++++++++++++------ 08.machine_translation/infer.py | 21 +++++++++++++++++---- 5 files changed, 85 insertions(+), 24 deletions(-) diff --git a/08.machine_translation/README.cn.md b/08.machine_translation/README.cn.md index c4c2a4c..014c9d7 100644 --- a/08.machine_translation/README.cn.md +++ b/08.machine_translation/README.cn.md @@ -253,9 +253,18 @@ def decode(context, is_sparse): current_score = pd.fc(input=current_state_with_lod, size=target_dict_dim, act='softmax') - topk_scores, topk_indices = pd.topk(current_score, k=topk_size) + topk_scores, topk_indices = pd.topk(current_score, k=beam_size) + # calculate accumulated scores after topk to reduce computation cost + accu_scores = pd.elementwise_add( + x=pd.log(topk_scores), y=pd.reshape(pre_score, shape=[-1]), axis=0) selected_ids, selected_scores = pd.beam_search( - pre_ids, topk_indices, topk_scores, beam_size, end_id=10, level=0) + pre_ids, + pre_score, + topk_indices, + accu_scores, + beam_size, + end_id=10, + level=0) pd.increment(x=counter, value=1, in_place=True) @@ -264,10 +273,14 @@ def decode(context, is_sparse): pd.array_write(selected_ids, array=ids_array, i=counter) pd.array_write(selected_scores, array=scores_array, i=counter) - pd.less_than(x=counter, y=array_len, cond=cond) + # update the break condition: up to the max length or all candidates of + # source sentences have ended. + length_cond = pd.less_than(x=counter, y=array_len) + finish_cond = pd.logical_not(pd.is_empty(x=selected_ids)) + pd.logical_and(x=length_cond, y=finish_cond, out=cond) translation_ids, translation_scores = pd.beam_search_decode( - ids=ids_array, scores=scores_array) + ids=ids_array, scores=scores_array, beam_size=beam_size, end_id=10) return translation_ids, translation_scores ``` diff --git a/08.machine_translation/README.md b/08.machine_translation/README.md index 1c4ed59..6cc7232 100644 --- a/08.machine_translation/README.md +++ b/08.machine_translation/README.md @@ -290,9 +290,18 @@ def decode(context, is_sparse): current_score = pd.fc(input=current_state_with_lod, size=target_dict_dim, act='softmax') - topk_scores, topk_indices = pd.topk(current_score, k=topk_size) + topk_scores, topk_indices = pd.topk(current_score, k=beam_size) + # calculate accumulated scores after topk to reduce computation cost + accu_scores = pd.elementwise_add( + x=pd.log(topk_scores), y=pd.reshape(pre_score, shape=[-1]), axis=0) selected_ids, selected_scores = pd.beam_search( - pre_ids, topk_indices, topk_scores, beam_size, end_id=10, level=0) + pre_ids, + pre_score, + topk_indices, + accu_scores, + beam_size, + end_id=10, + level=0) pd.increment(x=counter, value=1, in_place=True) @@ -301,10 +310,14 @@ def decode(context, is_sparse): pd.array_write(selected_ids, array=ids_array, i=counter) pd.array_write(selected_scores, array=scores_array, i=counter) - pd.less_than(x=counter, y=array_len, cond=cond) + # update the break condition: up to the max length or all candidates of + # source sentences have ended. + length_cond = pd.less_than(x=counter, y=array_len) + finish_cond = pd.logical_not(pd.is_empty(x=selected_ids)) + pd.logical_and(x=length_cond, y=finish_cond, out=cond) translation_ids, translation_scores = pd.beam_search_decode( - ids=ids_array, scores=scores_array) + ids=ids_array, scores=scores_array, beam_size=beam_size, end_id=10) return translation_ids, translation_scores ``` diff --git a/08.machine_translation/index.cn.html b/08.machine_translation/index.cn.html index 838181a..0bf14c0 100644 --- a/08.machine_translation/index.cn.html +++ b/08.machine_translation/index.cn.html @@ -201,7 +201,6 @@ decoder_size = hidden_dim ```python def encoder(is_sparse): - # encoder src_word_id = pd.data( name="src_word_id", shape=[1], dtype='int64', lod_level=1) src_embedding = pd.embedding( @@ -221,7 +220,6 @@ decoder_size = hidden_dim ```python def train_decoder(context, is_sparse): - # decoder trg_language_word = pd.data( name="target_language_word", shape=[1], dtype='int64', lod_level=1) trg_embedding = pd.embedding( @@ -297,9 +295,18 @@ def decode(context, is_sparse): current_score = pd.fc(input=current_state_with_lod, size=target_dict_dim, act='softmax') - topk_scores, topk_indices = pd.topk(current_score, k=topk_size) + topk_scores, topk_indices = pd.topk(current_score, k=beam_size) + # calculate accumulated scores after topk to reduce computation cost + accu_scores = pd.elementwise_add( + x=pd.log(topk_scores), y=pd.reshape(pre_score, shape=[-1]), axis=0) selected_ids, selected_scores = pd.beam_search( - pre_ids, topk_indices, topk_scores, beam_size, end_id=10, level=0) + pre_ids, + pre_score, + topk_indices, + accu_scores, + beam_size, + end_id=10, + level=0) pd.increment(x=counter, value=1, in_place=True) @@ -308,10 +315,14 @@ def decode(context, is_sparse): pd.array_write(selected_ids, array=ids_array, i=counter) pd.array_write(selected_scores, array=scores_array, i=counter) - pd.less_than(x=counter, y=array_len, cond=cond) + # update the break condition: up to the max length or all candidates of + # source sentences have ended. + length_cond = pd.less_than(x=counter, y=array_len) + finish_cond = pd.logical_not(pd.is_empty(x=selected_ids)) + pd.logical_and(x=length_cond, y=finish_cond, out=cond) translation_ids, translation_scores = pd.beam_search_decode( - ids=ids_array, scores=scores_array) + ids=ids_array, scores=scores_array, beam_size=beam_size, end_id=10) return translation_ids, translation_scores ``` diff --git a/08.machine_translation/index.html b/08.machine_translation/index.html index 1ad35ac..a7c2f31 100644 --- a/08.machine_translation/index.html +++ b/08.machine_translation/index.html @@ -238,7 +238,6 @@ Then we implement encoder as follows: ```python def encoder(is_sparse): - # encoder src_word_id = pd.data( name="src_word_id", shape=[1], dtype='int64', lod_level=1) src_embedding = pd.embedding( @@ -258,7 +257,6 @@ Implement the decoder for training as follows: ```python def train_decoder(context, is_sparse): - # decoder trg_language_word = pd.data( name="target_language_word", shape=[1], dtype='int64', lod_level=1) trg_embedding = pd.embedding( @@ -334,9 +332,18 @@ def decode(context, is_sparse): current_score = pd.fc(input=current_state_with_lod, size=target_dict_dim, act='softmax') - topk_scores, topk_indices = pd.topk(current_score, k=topk_size) + topk_scores, topk_indices = pd.topk(current_score, k=beam_size) + # calculate accumulated scores after topk to reduce computation cost + accu_scores = pd.elementwise_add( + x=pd.log(topk_scores), y=pd.reshape(pre_score, shape=[-1]), axis=0) selected_ids, selected_scores = pd.beam_search( - pre_ids, topk_indices, topk_scores, beam_size, end_id=10, level=0) + pre_ids, + pre_score, + topk_indices, + accu_scores, + beam_size, + end_id=10, + level=0) pd.increment(x=counter, value=1, in_place=True) @@ -345,10 +352,14 @@ def decode(context, is_sparse): pd.array_write(selected_ids, array=ids_array, i=counter) pd.array_write(selected_scores, array=scores_array, i=counter) - pd.less_than(x=counter, y=array_len, cond=cond) + # update the break condition: up to the max length or all candidates of + # source sentences have ended. + length_cond = pd.less_than(x=counter, y=array_len) + finish_cond = pd.logical_not(pd.is_empty(x=selected_ids)) + pd.logical_and(x=length_cond, y=finish_cond, out=cond) translation_ids, translation_scores = pd.beam_search_decode( - ids=ids_array, scores=scores_array) + ids=ids_array, scores=scores_array, beam_size=beam_size, end_id=10) return translation_ids, translation_scores ``` diff --git a/08.machine_translation/infer.py b/08.machine_translation/infer.py index 263bb73..9f9172e 100644 --- a/08.machine_translation/infer.py +++ b/08.machine_translation/infer.py @@ -97,9 +97,18 @@ def decode(context): # use score to do beam search current_score = pd.fc( input=current_state_with_lod, size=target_dict_dim, act='softmax') - topk_scores, topk_indices = pd.topk(current_score, k=topk_size) + topk_scores, topk_indices = pd.topk(current_score, k=beam_size) + # calculate accumulated scores after topk to reduce computation cost + accu_scores = pd.elementwise_add( + x=pd.log(topk_scores), y=pd.reshape(pre_score, shape=[-1]), axis=0) selected_ids, selected_scores = pd.beam_search( - pre_ids, topk_indices, topk_scores, beam_size, end_id=10, level=0) + pre_ids, + pre_score, + topk_indices, + accu_scores, + beam_size, + end_id=10, + level=0) with pd.Switch() as switch: with switch.case(pd.is_empty(selected_ids)): @@ -113,10 +122,14 @@ def decode(context): pd.array_write(selected_ids, array=ids_array, i=counter) pd.array_write(selected_scores, array=scores_array, i=counter) - pd.less_than(x=counter, y=array_len, cond=cond) + # update the break condition: up to the max length or all candidates of + # source sentences have ended. + length_cond = pd.less_than(x=counter, y=array_len) + finish_cond = pd.logical_not(pd.is_empty(x=selected_ids)) + pd.logical_and(x=length_cond, y=finish_cond, out=cond) translation_ids, translation_scores = pd.beam_search_decode( - ids=ids_array, scores=scores_array) + ids=ids_array, scores=scores_array, beam_size=beam_size, end_id=10) return translation_ids, translation_scores -- GitLab