From 2e8425b693eb43141a7cac2d200de9d8ab832896 Mon Sep 17 00:00:00 2001
From: Jiaqi Liu <liujiaqi06@baidu.com>
Date: Wed, 6 Jan 2021 10:52:14 +0800
Subject: [PATCH] Fix beam search bug (#29824)

* fix beam search bug

* add dygraph unittest

* update dynamic_decode argument doc

* add warning info for state which has no lengths attribute
---
 python/paddle/fluid/layers/rnn.py             | 65 +++++++++++--------
 .../tests/unittests/test_rnn_decode_api.py    | 32 +++++----
 2 files changed, 58 insertions(+), 39 deletions(-)

diff --git a/python/paddle/fluid/layers/rnn.py b/python/paddle/fluid/layers/rnn.py
index 6e1f91a1f28..7f815e1c74d 100644
--- a/python/paddle/fluid/layers/rnn.py
+++ b/python/paddle/fluid/layers/rnn.py
@@ -16,6 +16,7 @@ from __future__ import print_function
 
 import sys
 from functools import partial, reduce
+import warnings
 
 import paddle
 from paddle.utils import deprecated
@@ -1378,14 +1379,21 @@ def _dynamic_decode_imperative(decoder,
             # To confirm states.finished/finished be consistent with
             # next_finished.
             tensor.assign(next_finished, finished)
-        next_sequence_lengths = nn.elementwise_add(
-            sequence_lengths,
-            tensor.cast(
-                control_flow.logical_not(finished), sequence_lengths.dtype))
-
-        if impute_finished:  # rectify the states for the finished.
-            next_states = map_structure(
-                lambda x, y: _maybe_copy(x, y, finished), states, next_states)
+            next_sequence_lengths = nn.elementwise_add(
+                sequence_lengths,
+                tensor.cast(
+                    control_flow.logical_not(finished), sequence_lengths.dtype))
+            if impute_finished:  # rectify the states for the finished.
+                next_states = map_structure(
+                    lambda x, y: _maybe_copy(x, y, finished), states,
+                    next_states)
+        else:
+            warnings.warn(
+                "`next_states` has no `lengths` attribute, the returned `sequence_lengths` would be all zeros."
+            ) if not hasattr(next_states, "lengths") else None
+            next_sequence_lengths = getattr(next_states, "lengths",
+                                            sequence_lengths)
+
         outputs = map_structure(
             lambda x: ArrayWrapper(x),
             step_outputs) if step_idx == 0 else map_structure(
@@ -1500,17 +1508,22 @@ def _dynamic_decode_declarative(decoder,
             # finished.
             next_finished = control_flow.logical_or(next_finished,
                                                     global_finished)
-        next_sequence_lengths = nn.elementwise_add(
-            sequence_lengths,
-            tensor.cast(
-                control_flow.logical_not(global_finished),
-                sequence_lengths.dtype))
-
-        if impute_finished:  # rectify the states for the finished.
-            next_states = map_structure(
-                lambda x, y: _maybe_copy(x, y, global_finished),
-                states,
-                next_states, )
+            next_sequence_lengths = nn.elementwise_add(
+                sequence_lengths,
+                tensor.cast(
+                    control_flow.logical_not(global_finished),
+                    sequence_lengths.dtype))
+            if impute_finished:  # rectify the states for the finished.
+                next_states = map_structure(
+                    lambda x, y: _maybe_copy(x, y, global_finished),
+                    states,
+                    next_states, )
+        else:
+            warnings.warn(
+                "`next_states` has no `lengths` attribute, the returned `sequence_lengths` would be all zeros."
+            ) if not hasattr(next_states, "lengths") else None
+            next_sequence_lengths = getattr(next_states, "lengths",
+                                            sequence_lengths)
 
         # create tensor array in global block after dtype[s] of outputs can be got
         outputs_arrays = map_structure(
@@ -1595,13 +1608,13 @@ def dynamic_decode(decoder,
             attr:`False`, the data layout would be batch major with shape
             `[batch_size, seq_len, ...]`.  If attr:`True`, the data layout would
             be time major with shape `[seq_len, batch_size, ...]`. Default: `False`.
-        impute_finished(bool, optional): If `True`, then states get copied through
-            for batch entries which are marked as finished, which differs with the
-            unfinished using the new states returned by :code:`decoder.step()` and
-            ensures that the final states have the correct values. Otherwise, states
-            wouldn't be copied through when finished. If the returned `final_states`
-            is needed, it should be set as True, which causes some slowdown.
-            Default `False`.
+        impute_finished(bool, optional): If `True` and `decoder.tracks_own_finished`
+            is False, then states get copied through for batch entries which are
+            marked as finished, which differs with the unfinished using the new states
+            returned by :code:`decoder.step()` and ensures that the final states have
+            the correct values. Otherwise, states wouldn't be copied through when
+            finished. If the returned `final_states` is needed, it should be set as
+            True, which causes some slowdown. Default `False`.
         is_test(bool, optional): A flag indicating whether to use test mode. In
             test mode, it is more memory saving. Default `False`.
         return_length(bool, optional):  A flag indicating whether to return an
diff --git a/python/paddle/fluid/tests/unittests/test_rnn_decode_api.py b/python/paddle/fluid/tests/unittests/test_rnn_decode_api.py
index da25bc8d1fb..a0009a71b3e 100644
--- a/python/paddle/fluid/tests/unittests/test_rnn_decode_api.py
+++ b/python/paddle/fluid/tests/unittests/test_rnn_decode_api.py
@@ -178,16 +178,14 @@ class Seq2SeqModel(object):
                  beam_size=4):
         self.start_token, self.end_token = start_token, end_token
         self.max_decoding_length, self.beam_size = max_decoding_length, beam_size
-        self.src_embeder = lambda x: fluid.embedding(
-            input=x,
-            size=[src_vocab_size, hidden_size],
-            dtype="float32",
-            param_attr=fluid.ParamAttr(name="source_embedding"))
-        self.trg_embeder = lambda x: fluid.embedding(
-            input=x,
-            size=[trg_vocab_size, hidden_size],
-            dtype="float32",
-            param_attr=fluid.ParamAttr(name="target_embedding"))
+        self.src_embeder = paddle.nn.Embedding(
+            src_vocab_size,
+            hidden_size,
+            weight_attr=fluid.ParamAttr(name="source_embedding"))
+        self.trg_embeder = paddle.nn.Embedding(
+            trg_vocab_size,
+            hidden_size,
+            weight_attr=fluid.ParamAttr(name="target_embedding"))
         self.encoder = Encoder(num_layers, hidden_size, dropout_prob)
         self.decoder = Decoder(num_layers, hidden_size, dropout_prob,
                                decoding_strategy, max_decoding_length)
@@ -195,7 +193,7 @@ class Seq2SeqModel(object):
             x,
             size=trg_vocab_size,
             num_flatten_dims=len(x.shape) - 1,
-            param_attr=fluid.ParamAttr(name="output_w"),
+            param_attr=fluid.ParamAttr(),
             bias_attr=False)
 
     def __call__(self, src, src_length, trg=None, trg_length=None):
@@ -556,6 +554,14 @@ class TestDynamicDecode(unittest.TestCase):
                 },
                 fetch_list=[output])[0]
 
+    def test_dynamic_basic_decoder(self):
+        paddle.disable_static()
+        src = paddle.to_tensor(np.random.randint(8, size=(8, 4)))
+        src_length = paddle.to_tensor(np.random.randint(8, size=(8)))
+        model = Seq2SeqModel(**self.model_hparams)
+        probs, samples, sample_length = model(src, src_length)
+        paddle.enable_static()
+
 
 class ModuleApiTest(unittest.TestCase):
     @classmethod
@@ -672,8 +678,8 @@ class TestBeamSearch(ModuleApiTest):
                    hidden_size,
                    bos_id=0,
                    eos_id=1,
-                   beam_size=2,
-                   max_step_num=2):
+                   beam_size=4,
+                   max_step_num=20):
         embedder = paddle.fluid.dygraph.Embedding(
             size=[vocab_size, embed_dim], dtype="float64")
         output_layer = nn.Linear(hidden_size, vocab_size)
-- 
GitLab