From 54a47cd271156c4d302ff8421a9c23bb43aa9a96 Mon Sep 17 00:00:00 2001 From: Guo Sheng Date: Wed, 22 Apr 2020 23:40:22 +0800 Subject: [PATCH] Add tracks_own_finished to Decoder to avoid mismanagement of the finished state in dynamic_decode. (#23664) test=develop --- python/paddle/fluid/layers/rnn.py | 49 +++++++++++++++++++++++++++++-- 1 file changed, 46 insertions(+), 3 deletions(-) diff --git a/python/paddle/fluid/layers/rnn.py b/python/paddle/fluid/layers/rnn.py index 4c048355d2f..762e79e648b 100644 --- a/python/paddle/fluid/layers/rnn.py +++ b/python/paddle/fluid/layers/rnn.py @@ -601,6 +601,28 @@ class Decoder(object): """ raise NotImplementedError + @property + def tracks_own_finished(self): + """ + Describes whether the Decoder keeps track of finished states by itself. + + `decoder.step()` would emit a bool `finished` value at each decoding + step. The emited `finished` can be used to determine whether every + batch entries is finished directly, or it can be combined with the + finished tracker keeped in `dynamic_decode` by performing a logical OR + to take the already finished into account. + + If `False`, the latter would be took when performing `dynamic_decode`, + which is the default. Otherwise, the former would be took, which uses + the finished value emited by the decoder as all batch entry finished + status directly, and it is the case when batch entries might be + reordered such as beams in BeamSearchDecoder. + + Returns: + bool: A python bool `False`. + """ + return False + class BeamSearchDecoder(Decoder): """ @@ -1048,6 +1070,19 @@ class BeamSearchDecoder(Decoder): # TODO: use FinalBeamSearchDecoderOutput as output return predicted_ids, final_states + @property + def tracks_own_finished(self): + """ + BeamSearchDecoder reorders its beams and their finished state. Thus it + conflicts with `dynamic_decode` function's tracking of finished states. + Setting this property to true to avoid early stopping of decoding due + to mismanagement of the finished state. + + Returns: + bool: A python bool `True`. + """ + return True + def dynamic_decode(decoder, inits=None, @@ -1205,7 +1240,13 @@ def dynamic_decode(decoder, states_arrays) (outputs, next_states, next_inputs, next_finished) = decoder.step(step_idx, inputs, states, **kwargs) - next_finished = control_flow.logical_or(next_finished, global_finished) + if not decoder.tracks_own_finished: + # BeamSearchDecoder would track it own finished, since beams would + # be reordered and the finished status of each entry might change. + # Otherwise, perform logical OR which would not change the already + # finished. + next_finished = control_flow.logical_or(next_finished, + global_finished) next_sequence_lengths = nn.elementwise_add( sequence_lengths, tensor.cast( @@ -1226,6 +1267,10 @@ def dynamic_decode(decoder, lambda x, x_array: control_flow.array_write( x, i=step_idx, array=x_array), outputs, outputs_arrays) control_flow.increment(x=step_idx, value=1.0, in_place=True) + # update the global_finished first, since it might be also in states of + # decoder, which otherwise would write a stale finished status to array + tensor.assign(next_finished, global_finished) + tensor.assign(next_sequence_lengths, sequence_lengths) if is_test: map_structure(tensor.assign, next_inputs, global_inputs) map_structure(tensor.assign, next_states, global_states) @@ -1236,8 +1281,6 @@ def dynamic_decode(decoder, map_structure( lambda x, x_array: control_flow.array_write( x, i=step_idx, array=x_array), next_states, states_arrays) - tensor.assign(next_finished, global_finished) - tensor.assign(next_sequence_lengths, sequence_lengths) if max_step_num is not None: control_flow.logical_and( control_flow.logical_not(nn.reduce_all(global_finished)), -- GitLab