Clean text.py and decode.py for API 2.0 (#26853)

* Make dynamic_decode support dygraph and expose to API 2.0 test=develop * update info about BeamSearchDecoder and dynamic_decode * remove all APIs in paddle.text, expose BeamSearchDecoder and dynamic_decode * update example code * delete test_text.py, decode.py, update some doc, fix example code float64 * delete decode import from paddle.nn * fix unittest bugs * use dygraph.Embedding instead of nn.Embedding, add paddle.enbale_static() * update, correct doc * move dynamic_decode, BeamSearchDecoder API to paddle.nn * fix code style * update unittest param, delete import pf text.py * set dtype of beamsearchtest float64 * update example code of BeamSearchDecoder, dynamic_decode Co-authored-by: N LiuChiaChi <709153940@qq.com>

Clean text.py and decode.py for API 2.0 (#26853)
* Make dynamic_decode support dygraph and expose to API 2.0 test=develop * update info about BeamSearchDecoder and dynamic_decode * remove all APIs in paddle.text, expose BeamSearchDecoder and dynamic_decode * update example code * delete test_text.py, decode.py, update some doc, fix example code float64 * delete decode import from paddle.nn * fix unittest bugs * use dygraph.Embedding instead of nn.Embedding, add paddle.enbale_static() * update, correct doc * move dynamic_decode, BeamSearchDecoder API to paddle.nn * fix code style * update unittest param, delete import pf text.py * set dtype of beamsearchtest float64 * update example code of BeamSearchDecoder, dynamic_decode Co-authored-by: N LiuChiaChi <709153940@qq.com>
01335815 · Guo Sheng · GitHub · 832458d5 · 01335815 · 01335815
8 changed file
--- a/python/paddle/fluid/layers/rnn.py
+++ b/python/paddle/fluid/layers/rnn.py
@@ -17,6 +17,7 @@ from __future__ import print_function
 import sys
 from functools import partial, reduce

+import paddle
 from . import nn
 from . import tensor
 from . import control_flow
@@ -507,6 +508,9 @@ class ArrayWrapper(object):
        self.array.append(x)
        return self

+    def __getitem__(self, item):
+        return self.array.__getitem__(item)
+

 def _maybe_copy(state, new_state, step_mask):
    """update rnn state or just pass the old state through"""
@@ -859,8 +863,6 @@ class Decoder(object):

 class BeamSearchDecoder(Decoder):
    """
-	:api_attr: Static Graph
-
    Decoder with beam search decoding strategy. It wraps a cell to get probabilities,
    and follows a beam search step to calculate scores and select candidate
    token ids for each decoding step.
@@ -881,24 +883,20 @@ class BeamSearchDecoder(Decoder):

        .. code-block:: python
            
-            import paddle.fluid as fluid
-            from paddle.fluid.layers import GRUCell, BeamSearchDecoder
-
-            trg_embeder = lambda x: fluid.embedding(
-                x, size=[10000, 128], param_attr=fluid.ParamAttr(name="trg_embedding"))
-            output_layer = lambda x: layers.fc(x,
-                                            size=10000,
-                                            num_flatten_dims=len(x.shape) - 1,
-                                            param_attr=fluid.ParamAttr(name=
-                                                                        "output_w"),
-                                            bias_attr=False)
-            decoder_cell = GRUCell(hidden_size=128)
+            import numpy as np
+            import paddle
+            from paddle.nn import BeamSearchDecoder, dynamic_decode
+            from paddle.nn import GRUCell, Linear, Embedding
+            trg_embeder = Embedding(100, 32)
+            output_layer = Linear(32, 32)
+            decoder_cell = GRUCell(input_size=32, hidden_size=32)
            decoder = BeamSearchDecoder(decoder_cell,
                                        start_token=0,
                                        end_token=1,
                                        beam_size=4,
                                        embedding_fn=trg_embeder,
                                        output_fn=output_layer)
+
    """

    def __init__(self,
@@ -912,16 +910,13 @@ class BeamSearchDecoder(Decoder):
        Constructor of BeamSearchDecoder.

        Parameters:
-            cell(RNNCell): An instance of `RNNCell` or object with the same interface.
+            cell(RNNCellBase): An instance of `RNNCellBase` or object with the same interface.
            start_token(int): The start token id.
            end_token(int): The end token id.
            beam_size(int): The beam width used in beam search.
            embedding_fn(optional): A callable to apply to selected candidate ids. 
                Mostly it is an embedding layer to transform ids to embeddings,
                and the returned value acts as the `input` argument for `cell.call`.
-                **Note that fluid.embedding should be used here rather than
-                fluid.layers.embedding, since shape of ids is [batch_size, beam_size].
-                when using fluid.layers.embedding, must unsqueeze in embedding_fn.**
                If not provided, the id to embedding transformation must be built into
                `cell.call`. Default None.
            output_fn(optional): A callable to apply to the cell's output prior to
@@ -1150,6 +1145,8 @@ class BeamSearchDecoder(Decoder):
                np.array(
                    [[0.] + [-self.kinf] * (self.beam_size - 1)],
                    dtype="float32")), [self.batch_size, 1])
+        if paddle.get_default_dtype() == "float64":
+            log_probs = tensor.cast(log_probs, "float64")
        # TODO: remove the restriction of force_cpu
        init_finished = tensor.fill_constant_batch_size_like(
            input=state,
@@ -1197,7 +1194,11 @@ class BeamSearchDecoder(Decoder):
            shape=[1], dtype="int64", value=self.vocab_size)
        noend_array = [-self.kinf] * self.vocab_size
        noend_array[self.end_token] = 0
+
        self.noend_mask_tensor = tensor.assign(np.array(noend_array, "float32"))
+        if paddle.get_default_dtype() == "float64":
+            self.noend_mask_tensor = tensor.cast(self.noend_mask_tensor,
+                                                 "float64")

        step_log_probs = nn.log(nn.softmax(logits))
        step_log_probs = self._mask_probs(step_log_probs, beam_state.finished)
@@ -1328,98 +1329,103 @@ class BeamSearchDecoder(Decoder):
        return True


-def dynamic_decode(decoder,
-                   inits=None,
-                   max_step_num=None,
-                   output_time_major=False,
-                   impute_finished=False,
-                   is_test=False,
-                   return_length=False,
-                   **kwargs):
-    """
-	:api_attr: Static Graph
+def _dynamic_decode_imperative(decoder,
+                               inits=None,
+                               max_step_num=None,
+                               output_time_major=False,
+                               impute_finished=False,
+                               is_test=False,
+                               return_length=False,
+                               **kwargs):
+    def _maybe_copy(state, new_state, step_mask):
+        # TODO: use where_op
+        state_dtype = state.dtype
+        if convert_dtype(state_dtype) in ["bool"]:
+            state = tensor.cast(state, dtype="float32")
+            new_state = tensor.cast(new_state, dtype="float32")
+        if step_mask.dtype != state.dtype:
+            step_mask = tensor.cast(step_mask, dtype=state.dtype)
+            # otherwise, renamed bool gradients of would be summed up leading
+            # to sum(bool) error.
+            step_mask.stop_gradient = True
+        new_state = nn.elementwise_mul(
+            state, step_mask, axis=0) - nn.elementwise_mul(
+                new_state, (step_mask - 1), axis=0)
+        if convert_dtype(state_dtype) in ["bool"]:
+            new_state = tensor.cast(new_state, dtype=state_dtype)
+        return new_state

-    Dynamic decoding performs :code:`decoder.step()` repeatedly until the returned
-    Tensor indicating finished status contains all True values or the number of
-    decoding step reaches to :attr:`max_step_num`.
+    initial_inputs, initial_states, initial_finished = decoder.initialize(inits)
+    inputs, states, finished = (initial_inputs, initial_states,
+                                initial_finished)
+    cond = control_flow.logical_not((nn.reduce_all(initial_finished)))
+    sequence_lengths = tensor.cast(tensor.zeros_like(initial_finished), "int64")
+    outputs = None
+
+    step_idx = 0
+    step_idx_tensor = tensor.fill_constant(
+        shape=[1], dtype="int64", value=step_idx)
+    while cond.numpy():
+        (step_outputs, next_states, next_inputs, next_finished) = decoder.step(
+            step_idx_tensor, inputs, states, **kwargs)
+        if not decoder.tracks_own_finished:
+            # BeamSearchDecoder would track it own finished, since
+            # beams would be reordered and the finished status of each
+            # entry might change. Otherwise, perform logical OR which
+            # would not change the already finished.
+            next_finished = control_flow.logical_or(next_finished, finished)
+            # To confirm states.finished/finished be consistent with
+            # next_finished.
+            tensor.assign(next_finished, finished)
+        next_sequence_lengths = nn.elementwise_add(
+            sequence_lengths,
+            tensor.cast(
+                control_flow.logical_not(finished), sequence_lengths.dtype))

-    :code:`decoder.initialize()` would be called once before the decoding loop.
-    If the `decoder` has implemented `finalize` method, :code:`decoder.finalize()`
-    would be called once after the decoding loop.
+        if impute_finished:  # rectify the states for the finished.
+            next_states = map_structure(
+                lambda x, y: _maybe_copy(x, y, finished), states, next_states)
+        outputs = map_structure(
+            lambda x: ArrayWrapper(x),
+            step_outputs) if step_idx == 0 else map_structure(
+                lambda x, x_array: x_array.append(x), step_outputs, outputs)
+        inputs, states, finished, sequence_lengths = (
+            next_inputs, next_states, next_finished, next_sequence_lengths)

-    Parameters:
-        decoder(Decoder): An instance of `Decoder`.
-        inits(object, optional): Argument passed to `decoder.initialize`. 
-            Default `None`.
-        max_step_num(int, optional): The maximum number of steps. If not provided,
-            decode until the decoder is fully done, or in other words, the returned
-            Tensor by :code:`decoder.step()` indicating finished status contains
-            all True. Default `None`.
-        output_time_major(bool, optional): Indicate the data layout of Tensor included
-            in the final outputs(the first returned value of this method). If
-            attr:`False`, the data layout would be batch major with shape
-            `[batch_size, seq_len, ...]`.  If attr:`True`, the data layout would
-            be time major with shape `[seq_len, batch_size, ...]`. Default: `False`.
-        impute_finished(bool, optional): If `True`, then states get copied through
-            for batch entries which are marked as finished, which differs with the
-            unfinished using the new states returned by :code:`decoder.step()` and
-            ensures that the final states have the correct values. Otherwise, states
-            wouldn't be copied through when finished. If the returned `final_states`
-            is needed, it should be set as True, which causes some slowdown.
-            Default `False`.
-        is_test(bool, optional): A flag indicating whether to use test mode. In
-            test mode, it is more memory saving. Default `False`.
-        return_length(bool, optional):  A flag indicating whether to return an
-            extra Tensor variable in the output tuple, which stores the actual
-            lengths of all decoded sequences. Default `False`.
-        **kwargs: Additional keyword arguments. Arguments passed to `decoder.step`. 
+        control_flow.increment(x=step_idx_tensor, value=1.0, in_place=True)
+        step_idx += 1

-    Returns:
-        tuple: A tuple( :code:`(final_outputs, final_states, sequence_lengths)` ) \
-            when `return_length` is True, otherwise a tuple( :code:`(final_outputs, final_states)` ). \
-            The final outputs and states, both are Tensor or nested structure of Tensor. \
-            `final_outputs` has the same structure and data types as the :code:`outputs` \
-            returned by :code:`decoder.step()` , and each Tenser in `final_outputs` \
-            is the stacked of all decoding steps' outputs, which might be revised \
-            by :code:`decoder.finalize()` if the decoder has implemented `finalize`. \
-            `final_states` is the counterpart at last time step of initial states \
-            returned by :code:`decoder.initialize()` , thus has the same structure \
-            with it and has tensors with same shapes and data types. `sequence_lengths` \
-            is an `int64` tensor with the same shape as `finished` returned \
-            by :code:`decoder.initialize()` , and it stores the actual lengths of \
-            all decoded sequences.
-            
+        control_flow.logical_not(nn.reduce_all(finished), cond)
+        if max_step_num is not None and step_idx > max_step_num:
+            break

-    Examples:
+    final_outputs = map_structure(lambda x: nn.stack(x.array, axis=0), outputs)
+    final_states = states

-        .. code-block:: python
-            
-            import paddle.fluid as fluid
-            import paddle.fluid.layers as layers
-            from paddle.fluid.layers import GRUCell, BeamSearchDecoder, dynamic_decode
+    try:
+        final_outputs, final_states = decoder.finalize(
+            final_outputs, final_states, sequence_lengths)
+    except NotImplementedError:
+        pass

-            encoder_output = fluid.data(name="encoder_output",
-                                    shape=[-1, 32, 128],
-                                    dtype="float32")
-            trg_embeder = lambda x: fluid.embedding(
-                x, size=[10000, 128], param_attr=fluid.ParamAttr(name="trg_embedding"))
-            output_layer = lambda x: layers.fc(x,
-                                            size=10000,
-                                            num_flatten_dims=len(x.shape) - 1,
-                                            param_attr=fluid.ParamAttr(name=
-                                                                        "output_w"),
-                                            bias_attr=False)
-            decoder_cell = GRUCell(hidden_size=128)
-            decoder = BeamSearchDecoder(decoder_cell,
-                                        start_token=0,
-                                        end_token=1,
-                                        beam_size=4,
-                                        embedding_fn=trg_embeder,
-                                        output_fn=output_layer)
+    if not output_time_major:
+        final_outputs = map_structure(
+            lambda x: nn.transpose(x, [1, 0] + list(range(2, len(x.shape)))),
+            final_outputs)

-            outputs = dynamic_decode(
-                decoder=decoder, inits=decoder_cell.get_initial_states(encoder_output))
-    """
+    return (final_outputs, final_states,
+            sequence_lengths) if return_length else (final_outputs,
+                                                     final_states)
+
+
+def _dynamic_decode_declarative(decoder,
+                                inits=None,
+                                max_step_num=None,
+                                output_time_major=False,
+                                impute_finished=False,
+                                is_test=False,
+                                return_length=False,
+                                **kwargs):
    initial_inputs, initial_states, initial_finished = decoder.initialize(inits)
    global_inputs, global_states, global_finished = (
        initial_inputs, initial_states, initial_finished)
@@ -1558,6 +1564,98 @@ def dynamic_decode(decoder,
                                                     final_states)


+def dynamic_decode(decoder,
+                   inits=None,
+                   max_step_num=None,
+                   output_time_major=False,
+                   impute_finished=False,
+                   is_test=False,
+                   return_length=False,
+                   **kwargs):
+    """
+    Dynamic decoding performs :code:`decoder.step()` repeatedly until the returned
+    Tensor indicating finished status contains all True values or the number of
+    decoding step reaches to :attr:`max_step_num`.
+
+    :code:`decoder.initialize()` would be called once before the decoding loop.
+    If the `decoder` has implemented `finalize` method, :code:`decoder.finalize()`
+    would be called once after the decoding loop.
+
+    Parameters:
+        decoder(Decoder): An instance of `Decoder`.
+        inits(object, optional): Argument passed to `decoder.initialize`. 
+            Default `None`.
+        max_step_num(int, optional): The maximum number of steps. If not provided,
+            decode until the decoder is fully done, or in other words, the returned
+            Tensor by :code:`decoder.step()` indicating finished status contains
+            all True. Default `None`.
+        output_time_major(bool, optional): Indicate the data layout of Tensor included
+            in the final outputs(the first returned value of this method). If
+            attr:`False`, the data layout would be batch major with shape
+            `[batch_size, seq_len, ...]`.  If attr:`True`, the data layout would
+            be time major with shape `[seq_len, batch_size, ...]`. Default: `False`.
+        impute_finished(bool, optional): If `True`, then states get copied through
+            for batch entries which are marked as finished, which differs with the
+            unfinished using the new states returned by :code:`decoder.step()` and
+            ensures that the final states have the correct values. Otherwise, states
+            wouldn't be copied through when finished. If the returned `final_states`
+            is needed, it should be set as True, which causes some slowdown.
+            Default `False`.
+        is_test(bool, optional): A flag indicating whether to use test mode. In
+            test mode, it is more memory saving. Default `False`.
+        return_length(bool, optional):  A flag indicating whether to return an
+            extra Tensor variable in the output tuple, which stores the actual
+            lengths of all decoded sequences. Default `False`.
+        **kwargs: Additional keyword arguments. Arguments passed to `decoder.step`. 
+
+    Returns:
+        tuple: A tuple( :code:`(final_outputs, final_states, sequence_lengths)` ) \
+            when `return_length` is True, otherwise a tuple( :code:`(final_outputs, final_states)` ). \
+            The final outputs and states, both are Tensor or nested structure of Tensor. \
+            `final_outputs` has the same structure and data types as the :code:`outputs` \
+            returned by :code:`decoder.step()` , and each Tenser in `final_outputs` \
+            is the stacked of all decoding steps' outputs, which might be revised \
+            by :code:`decoder.finalize()` if the decoder has implemented `finalize`. \
+            `final_states` is the counterpart at last time step of initial states \
+            returned by :code:`decoder.initialize()` , thus has the same structure \
+            with it and has tensors with same shapes and data types. `sequence_lengths` \
+            is an `int64` tensor with the same shape as `finished` returned \
+            by :code:`decoder.initialize()` , and it stores the actual lengths of \
+            all decoded sequences.
+            
+
+    Examples:
+
+        .. code-block:: python
+            
+            import numpy as np
+            import paddle
+            from paddle.nn import BeamSearchDecoder, dynamic_decode
+            from paddle.nn import GRUCell, Linear, Embedding
+            trg_embeder = Embedding(100, 32)
+            output_layer = Linear(32, 32)
+            decoder_cell = GRUCell(input_size=32, hidden_size=32)
+            decoder = BeamSearchDecoder(decoder_cell,
+                                        start_token=0,
+                                        end_token=1,
+                                        beam_size=4,
+                                        embedding_fn=trg_embeder,
+                                        output_fn=output_layer)
+            encoder_output = paddle.ones((4, 8, 32), dtype=paddle.get_default_dtype())
+            outputs = dynamic_decode(decoder=decoder,
+                                    inits=decoder_cell.get_initial_states(encoder_output),
+                                    max_step_num=10)
+    """
+    if in_dygraph_mode():
+        return _dynamic_decode_imperative(decoder, inits, max_step_num,
+                                          output_time_major, impute_finished,
+                                          is_test, return_length, **kwargs)
+    else:
+        return _dynamic_decode_declarative(decoder, inits, max_step_num,
+                                           output_time_major, impute_finished,
+                                           is_test, return_length, **kwargs)
+
+
 class DecodeHelper(object):
    """
    DecodeHelper is the base class for any helper instance used in `BasicDecoder`.

--- a/python/paddle/fluid/tests/unittests/test_rnn_decode_api.py
+++ b/python/paddle/fluid/tests/unittests/test_rnn_decode_api.py
-# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
+# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -14,9 +14,17 @@

 from __future__ import print_function

+import random
 import unittest
 import numpy as np

+import paddle
+import paddle.nn as nn
+from paddle import Model, set_device
+from paddle.static import InputSpec as Input
+from paddle.fluid.dygraph import Layer
+from paddle.nn import BeamSearchDecoder, dynamic_decode
+
 import paddle.fluid as fluid
 import paddle.fluid.layers as layers
 import paddle.fluid.core as core
@@ -24,6 +32,8 @@ import paddle.fluid.core as core
 from paddle.fluid.executor import Executor
 from paddle.fluid import framework

+paddle.enable_static()
+

 class EncoderCell(layers.RNNCell):
    def __init__(self, num_layers, hidden_size, dropout_prob=0.):
@@ -436,6 +446,7 @@ class TestDynamicDecode(unittest.TestCase):
        self.exe = Executor(place)

    def test_mle_train(self):
+        paddle.enable_static()
        self.model_hparams["decoding_strategy"] = "train_greedy"
        agent = SeqPGAgent(
            model_cls=Seq2SeqModel,
@@ -468,6 +479,7 @@ class TestDynamicDecode(unittest.TestCase):
                  (iter_idx, reward.mean(), cost))

    def test_greedy_train(self):
+        paddle.enable_static()
        self.model_hparams["decoding_strategy"] = "infer_greedy"
        agent = SeqPGAgent(
            model_cls=Seq2SeqModel,
@@ -493,6 +505,7 @@ class TestDynamicDecode(unittest.TestCase):
                  (iter_idx, reward.mean(), cost))

    def test_sample_train(self):
+        paddle.enable_static()
        self.model_hparams["decoding_strategy"] = "infer_sample"
        agent = SeqPGAgent(
            model_cls=Seq2SeqModel,
@@ -518,6 +531,8 @@ class TestDynamicDecode(unittest.TestCase):
                  (iter_idx, reward.mean(), cost))

    def test_beam_search_infer(self):
+        paddle.set_default_dtype("float32")
+        paddle.enable_static()
        self.model_hparams["decoding_strategy"] = "beam_search"
        main_program = fluid.Program()
        startup_program = fluid.Program()
@@ -542,5 +557,154 @@ class TestDynamicDecode(unittest.TestCase):
                fetch_list=[output])[0]


+class ModuleApiTest(unittest.TestCase):
+    @classmethod
+    def setUpClass(cls):
+        cls._np_rand_state = np.random.get_state()
+        cls._py_rand_state = random.getstate()
+        cls._random_seed = 123
+        np.random.seed(cls._random_seed)
+        random.seed(cls._random_seed)
+
+        cls.model_cls = type(cls.__name__ + "Model", (Layer, ), {
+            "__init__": cls.model_init_wrapper(cls.model_init),
+            "forward": cls.model_forward
+        })
+
+    @classmethod
+    def tearDownClass(cls):
+        np.random.set_state(cls._np_rand_state)
+        random.setstate(cls._py_rand_state)
+
+    @staticmethod
+    def model_init_wrapper(func):
+        def __impl__(self, *args, **kwargs):
+            Layer.__init__(self)
+            func(self, *args, **kwargs)
+
+        return __impl__
+
+    @staticmethod
+    def model_init(model, *args, **kwargs):
+        raise NotImplementedError(
+            "model_init acts as `Model.__init__`, thus must implement it")
+
+    @staticmethod
+    def model_forward(model, *args, **kwargs):
+        return model.module(*args, **kwargs)
+
+    def make_inputs(self):
+        # TODO(guosheng): add default from `self.inputs`
+        raise NotImplementedError(
+            "model_inputs makes inputs for model, thus must implement it")
+
+    def setUp(self):
+        """
+        For the model which wraps the module to be tested:
+            Set input data by `self.inputs` list
+            Set init argument values by `self.attrs` list/dict
+            Set model parameter values by `self.param_states` dict
+            Set expected output data by `self.outputs` list
+        We can create a model instance and run once with these.
+        """
+        self.inputs = []
+        self.attrs = {}
+        self.param_states = {}
+        self.outputs = []
+
+    def _calc_output(self, place, mode="test", dygraph=True):
+        if dygraph:
+            fluid.enable_dygraph(place)
+        else:
+            fluid.disable_dygraph()
+        gen = paddle.manual_seed(self._random_seed)
+        gen._is_init_py = False
+        paddle.framework.random._manual_program_seed(self._random_seed)
+        scope = fluid.core.Scope()
+        with fluid.scope_guard(scope):
+            layer = self.model_cls(**self.attrs) if isinstance(
+                self.attrs, dict) else self.model_cls(*self.attrs)
+            model = Model(layer, inputs=self.make_inputs())
+            model.prepare()
+            if self.param_states:
+                model.load(self.param_states, optim_state=None)
+            return model.test_batch(self.inputs)
+
+    def check_output_with_place(self, place, mode="test"):
+        dygraph_output = self._calc_output(place, mode, dygraph=True)
+        stgraph_output = self._calc_output(place, mode, dygraph=False)
+        expect_output = getattr(self, "outputs", None)
+        for actual_t, expect_t in zip(dygraph_output, stgraph_output):
+            self.assertTrue(np.allclose(actual_t, expect_t, rtol=1e-5, atol=0))
+        if expect_output:
+            for actual_t, expect_t in zip(dygraph_output, expect_output):
+                self.assertTrue(
+                    np.allclose(
+                        actual_t, expect_t, rtol=1e-5, atol=0))
+
+    def check_output(self):
+        devices = ["CPU", "GPU"] if fluid.is_compiled_with_cuda() else ["CPU"]
+        for device in devices:
+            place = set_device(device)
+            self.check_output_with_place(place)
+
+
+class TestBeamSearch(ModuleApiTest):
+    def setUp(self):
+        paddle.set_default_dtype("float64")
+        shape = (8, 32)
+        self.inputs = [
+            np.random.random(shape).astype("float64"),
+            np.random.random(shape).astype("float64")
+        ]
+        self.outputs = None
+        self.attrs = {
+            "vocab_size": 100,
+            "embed_dim": 32,
+            "hidden_size": 32,
+        }
+        self.param_states = {}
+
+    @staticmethod
+    def model_init(self,
+                   vocab_size,
+                   embed_dim,
+                   hidden_size,
+                   bos_id=0,
+                   eos_id=1,
+                   beam_size=2,
+                   max_step_num=2):
+        embedder = paddle.fluid.dygraph.Embedding(
+            size=[vocab_size, embed_dim], dtype="float64")
+        output_layer = nn.Linear(hidden_size, vocab_size)
+        cell = nn.LSTMCell(embed_dim, hidden_size)
+        self.max_step_num = max_step_num
+        self.beam_search_decoder = BeamSearchDecoder(
+            cell,
+            start_token=bos_id,
+            end_token=eos_id,
+            beam_size=beam_size,
+            embedding_fn=embedder,
+            output_fn=output_layer)
+
+    @staticmethod
+    def model_forward(model, init_hidden, init_cell):
+        return dynamic_decode(
+            model.beam_search_decoder, [init_hidden, init_cell],
+            max_step_num=model.max_step_num,
+            impute_finished=True,
+            is_test=True)[0]
+
+    def make_inputs(self):
+        inputs = [
+            Input([None, self.inputs[0].shape[-1]], "float64", "init_hidden"),
+            Input([None, self.inputs[1].shape[-1]], "float64", "init_cell"),
+        ]
+        return inputs
+
+    def test_check_output(self):
+        self.check_output()
+
+
 if __name__ == '__main__':
    unittest.main()
--- a/python/paddle/nn/__init__.py
+++ b/python/paddle/nn/__init__.py
@@ -42,14 +42,11 @@ from .clip import clip_by_norm  #DEFINE_ALIAS
 # from .control_flow import StaticRNN        #DEFINE_ALIAS
 # from .control_flow import while_loop  #DEFINE_ALIAS
 # from .control_flow import rnn        #DEFINE_ALIAS
-# from .decode import BeamSearchDecoder        #DEFINE_ALIAS
+from .decode import BeamSearchDecoder  #DEFINE_ALIAS
+from .decode import dynamic_decode  #DEFINE_ALIAS
 # from .decode import Decoder        #DEFINE_ALIAS
-# from .decode import beam_search  #DEFINE_ALIAS
-# from .decode import beam_search_decode  #DEFINE_ALIAS
 # from .decode import crf_decoding        #DEFINE_ALIAS
 # from .decode import ctc_greedy_decoder        #DEFINE_ALIAS
-# from .decode import dynamic_decode        #DEFINE_ALIAS
-# from .decode import gather_tree  #DEFINE_ALIAS
 # from .input import Input        #DEFINE_ALIAS
 from .layer.activation import ELU  #DEFINE_ALIAS
 from .layer.activation import GELU  #DEFINE_ALIAS

--- a/python/paddle/nn/decode.py
+++ b/python/paddle/nn/decode.py
+#   Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from ..fluid.layers import BeamSearchDecoder  #DEFINE_ALIAS
+from ..fluid.layers import dynamic_decode  #DEFINE_ALIAS
+
+__all__ = [
+    'BeamSearchDecoder',
+    'dynamic_decode',
+]
--- a/python/paddle/nn/functional/__init__.py
+++ b/python/paddle/nn/functional/__init__.py
@@ -216,3 +216,4 @@ from .vision import pixel_shuffle  #DEFINE_ALIAS
 # from .vision import yolov3_loss  #DEFINE_ALIAS
 from .input import one_hot  #DEFINE_ALIAS
 from .input import embedding  #DEFINE_ALIAS
+from ...fluid.layers import gather_tree
--- a/python/paddle/tests/test_text.py
+++ b/python/paddle/tests/test_text.py
--- a/python/paddle/text/__init__.py
+++ b/python/paddle/text/__init__.py
@@ -12,11 +12,7 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.

-from . import text
-from .text import *
-
 from . import datasets
 from .datasets import *

-__all__ = text.__all__ \
-        + datasets.__all__
+__all__ = datasets.__all__
--- a/python/paddle/text/text.py
+++ b/python/paddle/text/text.py