diff --git a/08.machine_translation/README.cn.md b/08.machine_translation/README.cn.md index c4c2a4ca8350f379291156df8c6a2b2ca5ffc39b..014c9d79580bff9198bf26a0d38e03ac637dd84e 100644 --- a/08.machine_translation/README.cn.md +++ b/08.machine_translation/README.cn.md @@ -253,9 +253,18 @@ def decode(context, is_sparse): current_score = pd.fc(input=current_state_with_lod, size=target_dict_dim, act='softmax') - topk_scores, topk_indices = pd.topk(current_score, k=topk_size) + topk_scores, topk_indices = pd.topk(current_score, k=beam_size) + # calculate accumulated scores after topk to reduce computation cost + accu_scores = pd.elementwise_add( + x=pd.log(topk_scores), y=pd.reshape(pre_score, shape=[-1]), axis=0) selected_ids, selected_scores = pd.beam_search( - pre_ids, topk_indices, topk_scores, beam_size, end_id=10, level=0) + pre_ids, + pre_score, + topk_indices, + accu_scores, + beam_size, + end_id=10, + level=0) pd.increment(x=counter, value=1, in_place=True) @@ -264,10 +273,14 @@ def decode(context, is_sparse): pd.array_write(selected_ids, array=ids_array, i=counter) pd.array_write(selected_scores, array=scores_array, i=counter) - pd.less_than(x=counter, y=array_len, cond=cond) + # update the break condition: up to the max length or all candidates of + # source sentences have ended. + length_cond = pd.less_than(x=counter, y=array_len) + finish_cond = pd.logical_not(pd.is_empty(x=selected_ids)) + pd.logical_and(x=length_cond, y=finish_cond, out=cond) translation_ids, translation_scores = pd.beam_search_decode( - ids=ids_array, scores=scores_array) + ids=ids_array, scores=scores_array, beam_size=beam_size, end_id=10) return translation_ids, translation_scores ``` diff --git a/08.machine_translation/README.md b/08.machine_translation/README.md index 1c4ed599908ecf8fa2af6172797c9a8b16a57678..6cc7232454fad05b9fa61a3166a105b5b0aaaf01 100644 --- a/08.machine_translation/README.md +++ b/08.machine_translation/README.md @@ -290,9 +290,18 @@ def decode(context, is_sparse): current_score = pd.fc(input=current_state_with_lod, size=target_dict_dim, act='softmax') - topk_scores, topk_indices = pd.topk(current_score, k=topk_size) + topk_scores, topk_indices = pd.topk(current_score, k=beam_size) + # calculate accumulated scores after topk to reduce computation cost + accu_scores = pd.elementwise_add( + x=pd.log(topk_scores), y=pd.reshape(pre_score, shape=[-1]), axis=0) selected_ids, selected_scores = pd.beam_search( - pre_ids, topk_indices, topk_scores, beam_size, end_id=10, level=0) + pre_ids, + pre_score, + topk_indices, + accu_scores, + beam_size, + end_id=10, + level=0) pd.increment(x=counter, value=1, in_place=True) @@ -301,10 +310,14 @@ def decode(context, is_sparse): pd.array_write(selected_ids, array=ids_array, i=counter) pd.array_write(selected_scores, array=scores_array, i=counter) - pd.less_than(x=counter, y=array_len, cond=cond) + # update the break condition: up to the max length or all candidates of + # source sentences have ended. + length_cond = pd.less_than(x=counter, y=array_len) + finish_cond = pd.logical_not(pd.is_empty(x=selected_ids)) + pd.logical_and(x=length_cond, y=finish_cond, out=cond) translation_ids, translation_scores = pd.beam_search_decode( - ids=ids_array, scores=scores_array) + ids=ids_array, scores=scores_array, beam_size=beam_size, end_id=10) return translation_ids, translation_scores ``` diff --git a/08.machine_translation/index.cn.html b/08.machine_translation/index.cn.html index 838181a25321e8f0a74d7d611cccda50be4ba231..0bf14c08b4ef76f99c60ed2662fe81386a0ff254 100644 --- a/08.machine_translation/index.cn.html +++ b/08.machine_translation/index.cn.html @@ -201,7 +201,6 @@ decoder_size = hidden_dim ```python def encoder(is_sparse): - # encoder src_word_id = pd.data( name="src_word_id", shape=[1], dtype='int64', lod_level=1) src_embedding = pd.embedding( @@ -221,7 +220,6 @@ decoder_size = hidden_dim ```python def train_decoder(context, is_sparse): - # decoder trg_language_word = pd.data( name="target_language_word", shape=[1], dtype='int64', lod_level=1) trg_embedding = pd.embedding( @@ -297,9 +295,18 @@ def decode(context, is_sparse): current_score = pd.fc(input=current_state_with_lod, size=target_dict_dim, act='softmax') - topk_scores, topk_indices = pd.topk(current_score, k=topk_size) + topk_scores, topk_indices = pd.topk(current_score, k=beam_size) + # calculate accumulated scores after topk to reduce computation cost + accu_scores = pd.elementwise_add( + x=pd.log(topk_scores), y=pd.reshape(pre_score, shape=[-1]), axis=0) selected_ids, selected_scores = pd.beam_search( - pre_ids, topk_indices, topk_scores, beam_size, end_id=10, level=0) + pre_ids, + pre_score, + topk_indices, + accu_scores, + beam_size, + end_id=10, + level=0) pd.increment(x=counter, value=1, in_place=True) @@ -308,10 +315,14 @@ def decode(context, is_sparse): pd.array_write(selected_ids, array=ids_array, i=counter) pd.array_write(selected_scores, array=scores_array, i=counter) - pd.less_than(x=counter, y=array_len, cond=cond) + # update the break condition: up to the max length or all candidates of + # source sentences have ended. + length_cond = pd.less_than(x=counter, y=array_len) + finish_cond = pd.logical_not(pd.is_empty(x=selected_ids)) + pd.logical_and(x=length_cond, y=finish_cond, out=cond) translation_ids, translation_scores = pd.beam_search_decode( - ids=ids_array, scores=scores_array) + ids=ids_array, scores=scores_array, beam_size=beam_size, end_id=10) return translation_ids, translation_scores ``` diff --git a/08.machine_translation/index.html b/08.machine_translation/index.html index 1ad35ac646a5db75c7bc41e7305bf19809e2dd27..a7c2f31b0bdacd5cebe0c25e6cf0ca5c33fefe11 100644 --- a/08.machine_translation/index.html +++ b/08.machine_translation/index.html @@ -238,7 +238,6 @@ Then we implement encoder as follows: ```python def encoder(is_sparse): - # encoder src_word_id = pd.data( name="src_word_id", shape=[1], dtype='int64', lod_level=1) src_embedding = pd.embedding( @@ -258,7 +257,6 @@ Implement the decoder for training as follows: ```python def train_decoder(context, is_sparse): - # decoder trg_language_word = pd.data( name="target_language_word", shape=[1], dtype='int64', lod_level=1) trg_embedding = pd.embedding( @@ -334,9 +332,18 @@ def decode(context, is_sparse): current_score = pd.fc(input=current_state_with_lod, size=target_dict_dim, act='softmax') - topk_scores, topk_indices = pd.topk(current_score, k=topk_size) + topk_scores, topk_indices = pd.topk(current_score, k=beam_size) + # calculate accumulated scores after topk to reduce computation cost + accu_scores = pd.elementwise_add( + x=pd.log(topk_scores), y=pd.reshape(pre_score, shape=[-1]), axis=0) selected_ids, selected_scores = pd.beam_search( - pre_ids, topk_indices, topk_scores, beam_size, end_id=10, level=0) + pre_ids, + pre_score, + topk_indices, + accu_scores, + beam_size, + end_id=10, + level=0) pd.increment(x=counter, value=1, in_place=True) @@ -345,10 +352,14 @@ def decode(context, is_sparse): pd.array_write(selected_ids, array=ids_array, i=counter) pd.array_write(selected_scores, array=scores_array, i=counter) - pd.less_than(x=counter, y=array_len, cond=cond) + # update the break condition: up to the max length or all candidates of + # source sentences have ended. + length_cond = pd.less_than(x=counter, y=array_len) + finish_cond = pd.logical_not(pd.is_empty(x=selected_ids)) + pd.logical_and(x=length_cond, y=finish_cond, out=cond) translation_ids, translation_scores = pd.beam_search_decode( - ids=ids_array, scores=scores_array) + ids=ids_array, scores=scores_array, beam_size=beam_size, end_id=10) return translation_ids, translation_scores ``` diff --git a/08.machine_translation/infer.py b/08.machine_translation/infer.py index 263bb730b0c6128c4378e7cdd779ae7765076414..9f9172e9bb4fe9cf25d50cb05c1fab6bc0715b9d 100644 --- a/08.machine_translation/infer.py +++ b/08.machine_translation/infer.py @@ -97,9 +97,18 @@ def decode(context): # use score to do beam search current_score = pd.fc( input=current_state_with_lod, size=target_dict_dim, act='softmax') - topk_scores, topk_indices = pd.topk(current_score, k=topk_size) + topk_scores, topk_indices = pd.topk(current_score, k=beam_size) + # calculate accumulated scores after topk to reduce computation cost + accu_scores = pd.elementwise_add( + x=pd.log(topk_scores), y=pd.reshape(pre_score, shape=[-1]), axis=0) selected_ids, selected_scores = pd.beam_search( - pre_ids, topk_indices, topk_scores, beam_size, end_id=10, level=0) + pre_ids, + pre_score, + topk_indices, + accu_scores, + beam_size, + end_id=10, + level=0) with pd.Switch() as switch: with switch.case(pd.is_empty(selected_ids)): @@ -113,10 +122,14 @@ def decode(context): pd.array_write(selected_ids, array=ids_array, i=counter) pd.array_write(selected_scores, array=scores_array, i=counter) - pd.less_than(x=counter, y=array_len, cond=cond) + # update the break condition: up to the max length or all candidates of + # source sentences have ended. + length_cond = pd.less_than(x=counter, y=array_len) + finish_cond = pd.logical_not(pd.is_empty(x=selected_ids)) + pd.logical_and(x=length_cond, y=finish_cond, out=cond) translation_ids, translation_scores = pd.beam_search_decode( - ids=ids_array, scores=scores_array) + ids=ids_array, scores=scores_array, beam_size=beam_size, end_id=10) return translation_ids, translation_scores