未验证 提交 ef10dd94 编写于 作者: N Nicky Chan 提交者: GitHub

Merge pull request #559 from nickyfantasy/updateBeamSearch

update beam search API in machine translation book example
...@@ -253,9 +253,18 @@ def decode(context, is_sparse): ...@@ -253,9 +253,18 @@ def decode(context, is_sparse):
current_score = pd.fc(input=current_state_with_lod, current_score = pd.fc(input=current_state_with_lod,
size=target_dict_dim, size=target_dict_dim,
act='softmax') act='softmax')
topk_scores, topk_indices = pd.topk(current_score, k=topk_size) topk_scores, topk_indices = pd.topk(current_score, k=beam_size)
# calculate accumulated scores after topk to reduce computation cost
accu_scores = pd.elementwise_add(
x=pd.log(topk_scores), y=pd.reshape(pre_score, shape=[-1]), axis=0)
selected_ids, selected_scores = pd.beam_search( selected_ids, selected_scores = pd.beam_search(
pre_ids, topk_indices, topk_scores, beam_size, end_id=10, level=0) pre_ids,
pre_score,
topk_indices,
accu_scores,
beam_size,
end_id=10,
level=0)
pd.increment(x=counter, value=1, in_place=True) pd.increment(x=counter, value=1, in_place=True)
...@@ -264,10 +273,14 @@ def decode(context, is_sparse): ...@@ -264,10 +273,14 @@ def decode(context, is_sparse):
pd.array_write(selected_ids, array=ids_array, i=counter) pd.array_write(selected_ids, array=ids_array, i=counter)
pd.array_write(selected_scores, array=scores_array, i=counter) pd.array_write(selected_scores, array=scores_array, i=counter)
pd.less_than(x=counter, y=array_len, cond=cond) # update the break condition: up to the max length or all candidates of
# source sentences have ended.
length_cond = pd.less_than(x=counter, y=array_len)
finish_cond = pd.logical_not(pd.is_empty(x=selected_ids))
pd.logical_and(x=length_cond, y=finish_cond, out=cond)
translation_ids, translation_scores = pd.beam_search_decode( translation_ids, translation_scores = pd.beam_search_decode(
ids=ids_array, scores=scores_array) ids=ids_array, scores=scores_array, beam_size=beam_size, end_id=10)
return translation_ids, translation_scores return translation_ids, translation_scores
``` ```
......
...@@ -290,9 +290,18 @@ def decode(context, is_sparse): ...@@ -290,9 +290,18 @@ def decode(context, is_sparse):
current_score = pd.fc(input=current_state_with_lod, current_score = pd.fc(input=current_state_with_lod,
size=target_dict_dim, size=target_dict_dim,
act='softmax') act='softmax')
topk_scores, topk_indices = pd.topk(current_score, k=topk_size) topk_scores, topk_indices = pd.topk(current_score, k=beam_size)
# calculate accumulated scores after topk to reduce computation cost
accu_scores = pd.elementwise_add(
x=pd.log(topk_scores), y=pd.reshape(pre_score, shape=[-1]), axis=0)
selected_ids, selected_scores = pd.beam_search( selected_ids, selected_scores = pd.beam_search(
pre_ids, topk_indices, topk_scores, beam_size, end_id=10, level=0) pre_ids,
pre_score,
topk_indices,
accu_scores,
beam_size,
end_id=10,
level=0)
pd.increment(x=counter, value=1, in_place=True) pd.increment(x=counter, value=1, in_place=True)
...@@ -301,10 +310,14 @@ def decode(context, is_sparse): ...@@ -301,10 +310,14 @@ def decode(context, is_sparse):
pd.array_write(selected_ids, array=ids_array, i=counter) pd.array_write(selected_ids, array=ids_array, i=counter)
pd.array_write(selected_scores, array=scores_array, i=counter) pd.array_write(selected_scores, array=scores_array, i=counter)
pd.less_than(x=counter, y=array_len, cond=cond) # update the break condition: up to the max length or all candidates of
# source sentences have ended.
length_cond = pd.less_than(x=counter, y=array_len)
finish_cond = pd.logical_not(pd.is_empty(x=selected_ids))
pd.logical_and(x=length_cond, y=finish_cond, out=cond)
translation_ids, translation_scores = pd.beam_search_decode( translation_ids, translation_scores = pd.beam_search_decode(
ids=ids_array, scores=scores_array) ids=ids_array, scores=scores_array, beam_size=beam_size, end_id=10)
return translation_ids, translation_scores return translation_ids, translation_scores
``` ```
......
...@@ -201,7 +201,6 @@ decoder_size = hidden_dim ...@@ -201,7 +201,6 @@ decoder_size = hidden_dim
```python ```python
def encoder(is_sparse): def encoder(is_sparse):
# encoder
src_word_id = pd.data( src_word_id = pd.data(
name="src_word_id", shape=[1], dtype='int64', lod_level=1) name="src_word_id", shape=[1], dtype='int64', lod_level=1)
src_embedding = pd.embedding( src_embedding = pd.embedding(
...@@ -221,7 +220,6 @@ decoder_size = hidden_dim ...@@ -221,7 +220,6 @@ decoder_size = hidden_dim
```python ```python
def train_decoder(context, is_sparse): def train_decoder(context, is_sparse):
# decoder
trg_language_word = pd.data( trg_language_word = pd.data(
name="target_language_word", shape=[1], dtype='int64', lod_level=1) name="target_language_word", shape=[1], dtype='int64', lod_level=1)
trg_embedding = pd.embedding( trg_embedding = pd.embedding(
...@@ -297,9 +295,18 @@ def decode(context, is_sparse): ...@@ -297,9 +295,18 @@ def decode(context, is_sparse):
current_score = pd.fc(input=current_state_with_lod, current_score = pd.fc(input=current_state_with_lod,
size=target_dict_dim, size=target_dict_dim,
act='softmax') act='softmax')
topk_scores, topk_indices = pd.topk(current_score, k=topk_size) topk_scores, topk_indices = pd.topk(current_score, k=beam_size)
# calculate accumulated scores after topk to reduce computation cost
accu_scores = pd.elementwise_add(
x=pd.log(topk_scores), y=pd.reshape(pre_score, shape=[-1]), axis=0)
selected_ids, selected_scores = pd.beam_search( selected_ids, selected_scores = pd.beam_search(
pre_ids, topk_indices, topk_scores, beam_size, end_id=10, level=0) pre_ids,
pre_score,
topk_indices,
accu_scores,
beam_size,
end_id=10,
level=0)
pd.increment(x=counter, value=1, in_place=True) pd.increment(x=counter, value=1, in_place=True)
...@@ -308,10 +315,14 @@ def decode(context, is_sparse): ...@@ -308,10 +315,14 @@ def decode(context, is_sparse):
pd.array_write(selected_ids, array=ids_array, i=counter) pd.array_write(selected_ids, array=ids_array, i=counter)
pd.array_write(selected_scores, array=scores_array, i=counter) pd.array_write(selected_scores, array=scores_array, i=counter)
pd.less_than(x=counter, y=array_len, cond=cond) # update the break condition: up to the max length or all candidates of
# source sentences have ended.
length_cond = pd.less_than(x=counter, y=array_len)
finish_cond = pd.logical_not(pd.is_empty(x=selected_ids))
pd.logical_and(x=length_cond, y=finish_cond, out=cond)
translation_ids, translation_scores = pd.beam_search_decode( translation_ids, translation_scores = pd.beam_search_decode(
ids=ids_array, scores=scores_array) ids=ids_array, scores=scores_array, beam_size=beam_size, end_id=10)
return translation_ids, translation_scores return translation_ids, translation_scores
``` ```
......
...@@ -238,7 +238,6 @@ Then we implement encoder as follows: ...@@ -238,7 +238,6 @@ Then we implement encoder as follows:
```python ```python
def encoder(is_sparse): def encoder(is_sparse):
# encoder
src_word_id = pd.data( src_word_id = pd.data(
name="src_word_id", shape=[1], dtype='int64', lod_level=1) name="src_word_id", shape=[1], dtype='int64', lod_level=1)
src_embedding = pd.embedding( src_embedding = pd.embedding(
...@@ -258,7 +257,6 @@ Implement the decoder for training as follows: ...@@ -258,7 +257,6 @@ Implement the decoder for training as follows:
```python ```python
def train_decoder(context, is_sparse): def train_decoder(context, is_sparse):
# decoder
trg_language_word = pd.data( trg_language_word = pd.data(
name="target_language_word", shape=[1], dtype='int64', lod_level=1) name="target_language_word", shape=[1], dtype='int64', lod_level=1)
trg_embedding = pd.embedding( trg_embedding = pd.embedding(
...@@ -334,9 +332,18 @@ def decode(context, is_sparse): ...@@ -334,9 +332,18 @@ def decode(context, is_sparse):
current_score = pd.fc(input=current_state_with_lod, current_score = pd.fc(input=current_state_with_lod,
size=target_dict_dim, size=target_dict_dim,
act='softmax') act='softmax')
topk_scores, topk_indices = pd.topk(current_score, k=topk_size) topk_scores, topk_indices = pd.topk(current_score, k=beam_size)
# calculate accumulated scores after topk to reduce computation cost
accu_scores = pd.elementwise_add(
x=pd.log(topk_scores), y=pd.reshape(pre_score, shape=[-1]), axis=0)
selected_ids, selected_scores = pd.beam_search( selected_ids, selected_scores = pd.beam_search(
pre_ids, topk_indices, topk_scores, beam_size, end_id=10, level=0) pre_ids,
pre_score,
topk_indices,
accu_scores,
beam_size,
end_id=10,
level=0)
pd.increment(x=counter, value=1, in_place=True) pd.increment(x=counter, value=1, in_place=True)
...@@ -345,10 +352,14 @@ def decode(context, is_sparse): ...@@ -345,10 +352,14 @@ def decode(context, is_sparse):
pd.array_write(selected_ids, array=ids_array, i=counter) pd.array_write(selected_ids, array=ids_array, i=counter)
pd.array_write(selected_scores, array=scores_array, i=counter) pd.array_write(selected_scores, array=scores_array, i=counter)
pd.less_than(x=counter, y=array_len, cond=cond) # update the break condition: up to the max length or all candidates of
# source sentences have ended.
length_cond = pd.less_than(x=counter, y=array_len)
finish_cond = pd.logical_not(pd.is_empty(x=selected_ids))
pd.logical_and(x=length_cond, y=finish_cond, out=cond)
translation_ids, translation_scores = pd.beam_search_decode( translation_ids, translation_scores = pd.beam_search_decode(
ids=ids_array, scores=scores_array) ids=ids_array, scores=scores_array, beam_size=beam_size, end_id=10)
return translation_ids, translation_scores return translation_ids, translation_scores
``` ```
......
...@@ -97,9 +97,18 @@ def decode(context): ...@@ -97,9 +97,18 @@ def decode(context):
# use score to do beam search # use score to do beam search
current_score = pd.fc( current_score = pd.fc(
input=current_state_with_lod, size=target_dict_dim, act='softmax') input=current_state_with_lod, size=target_dict_dim, act='softmax')
topk_scores, topk_indices = pd.topk(current_score, k=topk_size) topk_scores, topk_indices = pd.topk(current_score, k=beam_size)
# calculate accumulated scores after topk to reduce computation cost
accu_scores = pd.elementwise_add(
x=pd.log(topk_scores), y=pd.reshape(pre_score, shape=[-1]), axis=0)
selected_ids, selected_scores = pd.beam_search( selected_ids, selected_scores = pd.beam_search(
pre_ids, topk_indices, topk_scores, beam_size, end_id=10, level=0) pre_ids,
pre_score,
topk_indices,
accu_scores,
beam_size,
end_id=10,
level=0)
with pd.Switch() as switch: with pd.Switch() as switch:
with switch.case(pd.is_empty(selected_ids)): with switch.case(pd.is_empty(selected_ids)):
...@@ -113,10 +122,14 @@ def decode(context): ...@@ -113,10 +122,14 @@ def decode(context):
pd.array_write(selected_ids, array=ids_array, i=counter) pd.array_write(selected_ids, array=ids_array, i=counter)
pd.array_write(selected_scores, array=scores_array, i=counter) pd.array_write(selected_scores, array=scores_array, i=counter)
pd.less_than(x=counter, y=array_len, cond=cond) # update the break condition: up to the max length or all candidates of
# source sentences have ended.
length_cond = pd.less_than(x=counter, y=array_len)
finish_cond = pd.logical_not(pd.is_empty(x=selected_ids))
pd.logical_and(x=length_cond, y=finish_cond, out=cond)
translation_ids, translation_scores = pd.beam_search_decode( translation_ids, translation_scores = pd.beam_search_decode(
ids=ids_array, scores=scores_array) ids=ids_array, scores=scores_array, beam_size=beam_size, end_id=10)
return translation_ids, translation_scores return translation_ids, translation_scores
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册