未验证 提交 01335815 编写于 作者: G Guo Sheng 提交者: GitHub

Clean text.py and decode.py for API 2.0 (#26853)

* Make dynamic_decode support dygraph and expose to API 2.0
test=develop

* update info about BeamSearchDecoder and dynamic_decode

* remove all APIs in paddle.text, expose BeamSearchDecoder and dynamic_decode

* update example code

* delete test_text.py, decode.py, update some doc, fix example code float64

* delete decode import from paddle.nn

* fix unittest bugs

* use dygraph.Embedding instead of nn.Embedding, add paddle.enbale_static()

* update, correct doc

* move dynamic_decode, BeamSearchDecoder API to paddle.nn

* fix code style

* update unittest param, delete import pf text.py

* set dtype of beamsearchtest float64

* update example code of BeamSearchDecoder, dynamic_decode
Co-authored-by: NLiuChiaChi <709153940@qq.com>
上级 832458d5
...@@ -17,6 +17,7 @@ from __future__ import print_function ...@@ -17,6 +17,7 @@ from __future__ import print_function
import sys import sys
from functools import partial, reduce from functools import partial, reduce
import paddle
from . import nn from . import nn
from . import tensor from . import tensor
from . import control_flow from . import control_flow
...@@ -507,6 +508,9 @@ class ArrayWrapper(object): ...@@ -507,6 +508,9 @@ class ArrayWrapper(object):
self.array.append(x) self.array.append(x)
return self return self
def __getitem__(self, item):
return self.array.__getitem__(item)
def _maybe_copy(state, new_state, step_mask): def _maybe_copy(state, new_state, step_mask):
"""update rnn state or just pass the old state through""" """update rnn state or just pass the old state through"""
...@@ -859,8 +863,6 @@ class Decoder(object): ...@@ -859,8 +863,6 @@ class Decoder(object):
class BeamSearchDecoder(Decoder): class BeamSearchDecoder(Decoder):
""" """
:api_attr: Static Graph
Decoder with beam search decoding strategy. It wraps a cell to get probabilities, Decoder with beam search decoding strategy. It wraps a cell to get probabilities,
and follows a beam search step to calculate scores and select candidate and follows a beam search step to calculate scores and select candidate
token ids for each decoding step. token ids for each decoding step.
...@@ -881,24 +883,20 @@ class BeamSearchDecoder(Decoder): ...@@ -881,24 +883,20 @@ class BeamSearchDecoder(Decoder):
.. code-block:: python .. code-block:: python
import paddle.fluid as fluid import numpy as np
from paddle.fluid.layers import GRUCell, BeamSearchDecoder import paddle
from paddle.nn import BeamSearchDecoder, dynamic_decode
trg_embeder = lambda x: fluid.embedding( from paddle.nn import GRUCell, Linear, Embedding
x, size=[10000, 128], param_attr=fluid.ParamAttr(name="trg_embedding")) trg_embeder = Embedding(100, 32)
output_layer = lambda x: layers.fc(x, output_layer = Linear(32, 32)
size=10000, decoder_cell = GRUCell(input_size=32, hidden_size=32)
num_flatten_dims=len(x.shape) - 1,
param_attr=fluid.ParamAttr(name=
"output_w"),
bias_attr=False)
decoder_cell = GRUCell(hidden_size=128)
decoder = BeamSearchDecoder(decoder_cell, decoder = BeamSearchDecoder(decoder_cell,
start_token=0, start_token=0,
end_token=1, end_token=1,
beam_size=4, beam_size=4,
embedding_fn=trg_embeder, embedding_fn=trg_embeder,
output_fn=output_layer) output_fn=output_layer)
""" """
def __init__(self, def __init__(self,
...@@ -912,16 +910,13 @@ class BeamSearchDecoder(Decoder): ...@@ -912,16 +910,13 @@ class BeamSearchDecoder(Decoder):
Constructor of BeamSearchDecoder. Constructor of BeamSearchDecoder.
Parameters: Parameters:
cell(RNNCell): An instance of `RNNCell` or object with the same interface. cell(RNNCellBase): An instance of `RNNCellBase` or object with the same interface.
start_token(int): The start token id. start_token(int): The start token id.
end_token(int): The end token id. end_token(int): The end token id.
beam_size(int): The beam width used in beam search. beam_size(int): The beam width used in beam search.
embedding_fn(optional): A callable to apply to selected candidate ids. embedding_fn(optional): A callable to apply to selected candidate ids.
Mostly it is an embedding layer to transform ids to embeddings, Mostly it is an embedding layer to transform ids to embeddings,
and the returned value acts as the `input` argument for `cell.call`. and the returned value acts as the `input` argument for `cell.call`.
**Note that fluid.embedding should be used here rather than
fluid.layers.embedding, since shape of ids is [batch_size, beam_size].
when using fluid.layers.embedding, must unsqueeze in embedding_fn.**
If not provided, the id to embedding transformation must be built into If not provided, the id to embedding transformation must be built into
`cell.call`. Default None. `cell.call`. Default None.
output_fn(optional): A callable to apply to the cell's output prior to output_fn(optional): A callable to apply to the cell's output prior to
...@@ -1150,6 +1145,8 @@ class BeamSearchDecoder(Decoder): ...@@ -1150,6 +1145,8 @@ class BeamSearchDecoder(Decoder):
np.array( np.array(
[[0.] + [-self.kinf] * (self.beam_size - 1)], [[0.] + [-self.kinf] * (self.beam_size - 1)],
dtype="float32")), [self.batch_size, 1]) dtype="float32")), [self.batch_size, 1])
if paddle.get_default_dtype() == "float64":
log_probs = tensor.cast(log_probs, "float64")
# TODO: remove the restriction of force_cpu # TODO: remove the restriction of force_cpu
init_finished = tensor.fill_constant_batch_size_like( init_finished = tensor.fill_constant_batch_size_like(
input=state, input=state,
...@@ -1197,7 +1194,11 @@ class BeamSearchDecoder(Decoder): ...@@ -1197,7 +1194,11 @@ class BeamSearchDecoder(Decoder):
shape=[1], dtype="int64", value=self.vocab_size) shape=[1], dtype="int64", value=self.vocab_size)
noend_array = [-self.kinf] * self.vocab_size noend_array = [-self.kinf] * self.vocab_size
noend_array[self.end_token] = 0 noend_array[self.end_token] = 0
self.noend_mask_tensor = tensor.assign(np.array(noend_array, "float32")) self.noend_mask_tensor = tensor.assign(np.array(noend_array, "float32"))
if paddle.get_default_dtype() == "float64":
self.noend_mask_tensor = tensor.cast(self.noend_mask_tensor,
"float64")
step_log_probs = nn.log(nn.softmax(logits)) step_log_probs = nn.log(nn.softmax(logits))
step_log_probs = self._mask_probs(step_log_probs, beam_state.finished) step_log_probs = self._mask_probs(step_log_probs, beam_state.finished)
...@@ -1328,98 +1329,103 @@ class BeamSearchDecoder(Decoder): ...@@ -1328,98 +1329,103 @@ class BeamSearchDecoder(Decoder):
return True return True
def dynamic_decode(decoder, def _dynamic_decode_imperative(decoder,
inits=None, inits=None,
max_step_num=None, max_step_num=None,
output_time_major=False, output_time_major=False,
impute_finished=False, impute_finished=False,
is_test=False, is_test=False,
return_length=False, return_length=False,
**kwargs): **kwargs):
""" def _maybe_copy(state, new_state, step_mask):
:api_attr: Static Graph # TODO: use where_op
state_dtype = state.dtype
if convert_dtype(state_dtype) in ["bool"]:
state = tensor.cast(state, dtype="float32")
new_state = tensor.cast(new_state, dtype="float32")
if step_mask.dtype != state.dtype:
step_mask = tensor.cast(step_mask, dtype=state.dtype)
# otherwise, renamed bool gradients of would be summed up leading
# to sum(bool) error.
step_mask.stop_gradient = True
new_state = nn.elementwise_mul(
state, step_mask, axis=0) - nn.elementwise_mul(
new_state, (step_mask - 1), axis=0)
if convert_dtype(state_dtype) in ["bool"]:
new_state = tensor.cast(new_state, dtype=state_dtype)
return new_state
Dynamic decoding performs :code:`decoder.step()` repeatedly until the returned initial_inputs, initial_states, initial_finished = decoder.initialize(inits)
Tensor indicating finished status contains all True values or the number of inputs, states, finished = (initial_inputs, initial_states,
decoding step reaches to :attr:`max_step_num`. initial_finished)
cond = control_flow.logical_not((nn.reduce_all(initial_finished)))
sequence_lengths = tensor.cast(tensor.zeros_like(initial_finished), "int64")
outputs = None
step_idx = 0
step_idx_tensor = tensor.fill_constant(
shape=[1], dtype="int64", value=step_idx)
while cond.numpy():
(step_outputs, next_states, next_inputs, next_finished) = decoder.step(
step_idx_tensor, inputs, states, **kwargs)
if not decoder.tracks_own_finished:
# BeamSearchDecoder would track it own finished, since
# beams would be reordered and the finished status of each
# entry might change. Otherwise, perform logical OR which
# would not change the already finished.
next_finished = control_flow.logical_or(next_finished, finished)
# To confirm states.finished/finished be consistent with
# next_finished.
tensor.assign(next_finished, finished)
next_sequence_lengths = nn.elementwise_add(
sequence_lengths,
tensor.cast(
control_flow.logical_not(finished), sequence_lengths.dtype))
:code:`decoder.initialize()` would be called once before the decoding loop. if impute_finished: # rectify the states for the finished.
If the `decoder` has implemented `finalize` method, :code:`decoder.finalize()` next_states = map_structure(
would be called once after the decoding loop. lambda x, y: _maybe_copy(x, y, finished), states, next_states)
outputs = map_structure(
lambda x: ArrayWrapper(x),
step_outputs) if step_idx == 0 else map_structure(
lambda x, x_array: x_array.append(x), step_outputs, outputs)
inputs, states, finished, sequence_lengths = (
next_inputs, next_states, next_finished, next_sequence_lengths)
Parameters: control_flow.increment(x=step_idx_tensor, value=1.0, in_place=True)
decoder(Decoder): An instance of `Decoder`. step_idx += 1
inits(object, optional): Argument passed to `decoder.initialize`.
Default `None`.
max_step_num(int, optional): The maximum number of steps. If not provided,
decode until the decoder is fully done, or in other words, the returned
Tensor by :code:`decoder.step()` indicating finished status contains
all True. Default `None`.
output_time_major(bool, optional): Indicate the data layout of Tensor included
in the final outputs(the first returned value of this method). If
attr:`False`, the data layout would be batch major with shape
`[batch_size, seq_len, ...]`. If attr:`True`, the data layout would
be time major with shape `[seq_len, batch_size, ...]`. Default: `False`.
impute_finished(bool, optional): If `True`, then states get copied through
for batch entries which are marked as finished, which differs with the
unfinished using the new states returned by :code:`decoder.step()` and
ensures that the final states have the correct values. Otherwise, states
wouldn't be copied through when finished. If the returned `final_states`
is needed, it should be set as True, which causes some slowdown.
Default `False`.
is_test(bool, optional): A flag indicating whether to use test mode. In
test mode, it is more memory saving. Default `False`.
return_length(bool, optional): A flag indicating whether to return an
extra Tensor variable in the output tuple, which stores the actual
lengths of all decoded sequences. Default `False`.
**kwargs: Additional keyword arguments. Arguments passed to `decoder.step`.
Returns: control_flow.logical_not(nn.reduce_all(finished), cond)
tuple: A tuple( :code:`(final_outputs, final_states, sequence_lengths)` ) \ if max_step_num is not None and step_idx > max_step_num:
when `return_length` is True, otherwise a tuple( :code:`(final_outputs, final_states)` ). \ break
The final outputs and states, both are Tensor or nested structure of Tensor. \
`final_outputs` has the same structure and data types as the :code:`outputs` \
returned by :code:`decoder.step()` , and each Tenser in `final_outputs` \
is the stacked of all decoding steps' outputs, which might be revised \
by :code:`decoder.finalize()` if the decoder has implemented `finalize`. \
`final_states` is the counterpart at last time step of initial states \
returned by :code:`decoder.initialize()` , thus has the same structure \
with it and has tensors with same shapes and data types. `sequence_lengths` \
is an `int64` tensor with the same shape as `finished` returned \
by :code:`decoder.initialize()` , and it stores the actual lengths of \
all decoded sequences.
Examples: final_outputs = map_structure(lambda x: nn.stack(x.array, axis=0), outputs)
final_states = states
.. code-block:: python try:
final_outputs, final_states = decoder.finalize(
import paddle.fluid as fluid final_outputs, final_states, sequence_lengths)
import paddle.fluid.layers as layers except NotImplementedError:
from paddle.fluid.layers import GRUCell, BeamSearchDecoder, dynamic_decode pass
encoder_output = fluid.data(name="encoder_output", if not output_time_major:
shape=[-1, 32, 128], final_outputs = map_structure(
dtype="float32") lambda x: nn.transpose(x, [1, 0] + list(range(2, len(x.shape)))),
trg_embeder = lambda x: fluid.embedding( final_outputs)
x, size=[10000, 128], param_attr=fluid.ParamAttr(name="trg_embedding"))
output_layer = lambda x: layers.fc(x,
size=10000,
num_flatten_dims=len(x.shape) - 1,
param_attr=fluid.ParamAttr(name=
"output_w"),
bias_attr=False)
decoder_cell = GRUCell(hidden_size=128)
decoder = BeamSearchDecoder(decoder_cell,
start_token=0,
end_token=1,
beam_size=4,
embedding_fn=trg_embeder,
output_fn=output_layer)
outputs = dynamic_decode( return (final_outputs, final_states,
decoder=decoder, inits=decoder_cell.get_initial_states(encoder_output)) sequence_lengths) if return_length else (final_outputs,
""" final_states)
def _dynamic_decode_declarative(decoder,
inits=None,
max_step_num=None,
output_time_major=False,
impute_finished=False,
is_test=False,
return_length=False,
**kwargs):
initial_inputs, initial_states, initial_finished = decoder.initialize(inits) initial_inputs, initial_states, initial_finished = decoder.initialize(inits)
global_inputs, global_states, global_finished = ( global_inputs, global_states, global_finished = (
initial_inputs, initial_states, initial_finished) initial_inputs, initial_states, initial_finished)
...@@ -1558,6 +1564,98 @@ def dynamic_decode(decoder, ...@@ -1558,6 +1564,98 @@ def dynamic_decode(decoder,
final_states) final_states)
def dynamic_decode(decoder,
inits=None,
max_step_num=None,
output_time_major=False,
impute_finished=False,
is_test=False,
return_length=False,
**kwargs):
"""
Dynamic decoding performs :code:`decoder.step()` repeatedly until the returned
Tensor indicating finished status contains all True values or the number of
decoding step reaches to :attr:`max_step_num`.
:code:`decoder.initialize()` would be called once before the decoding loop.
If the `decoder` has implemented `finalize` method, :code:`decoder.finalize()`
would be called once after the decoding loop.
Parameters:
decoder(Decoder): An instance of `Decoder`.
inits(object, optional): Argument passed to `decoder.initialize`.
Default `None`.
max_step_num(int, optional): The maximum number of steps. If not provided,
decode until the decoder is fully done, or in other words, the returned
Tensor by :code:`decoder.step()` indicating finished status contains
all True. Default `None`.
output_time_major(bool, optional): Indicate the data layout of Tensor included
in the final outputs(the first returned value of this method). If
attr:`False`, the data layout would be batch major with shape
`[batch_size, seq_len, ...]`. If attr:`True`, the data layout would
be time major with shape `[seq_len, batch_size, ...]`. Default: `False`.
impute_finished(bool, optional): If `True`, then states get copied through
for batch entries which are marked as finished, which differs with the
unfinished using the new states returned by :code:`decoder.step()` and
ensures that the final states have the correct values. Otherwise, states
wouldn't be copied through when finished. If the returned `final_states`
is needed, it should be set as True, which causes some slowdown.
Default `False`.
is_test(bool, optional): A flag indicating whether to use test mode. In
test mode, it is more memory saving. Default `False`.
return_length(bool, optional): A flag indicating whether to return an
extra Tensor variable in the output tuple, which stores the actual
lengths of all decoded sequences. Default `False`.
**kwargs: Additional keyword arguments. Arguments passed to `decoder.step`.
Returns:
tuple: A tuple( :code:`(final_outputs, final_states, sequence_lengths)` ) \
when `return_length` is True, otherwise a tuple( :code:`(final_outputs, final_states)` ). \
The final outputs and states, both are Tensor or nested structure of Tensor. \
`final_outputs` has the same structure and data types as the :code:`outputs` \
returned by :code:`decoder.step()` , and each Tenser in `final_outputs` \
is the stacked of all decoding steps' outputs, which might be revised \
by :code:`decoder.finalize()` if the decoder has implemented `finalize`. \
`final_states` is the counterpart at last time step of initial states \
returned by :code:`decoder.initialize()` , thus has the same structure \
with it and has tensors with same shapes and data types. `sequence_lengths` \
is an `int64` tensor with the same shape as `finished` returned \
by :code:`decoder.initialize()` , and it stores the actual lengths of \
all decoded sequences.
Examples:
.. code-block:: python
import numpy as np
import paddle
from paddle.nn import BeamSearchDecoder, dynamic_decode
from paddle.nn import GRUCell, Linear, Embedding
trg_embeder = Embedding(100, 32)
output_layer = Linear(32, 32)
decoder_cell = GRUCell(input_size=32, hidden_size=32)
decoder = BeamSearchDecoder(decoder_cell,
start_token=0,
end_token=1,
beam_size=4,
embedding_fn=trg_embeder,
output_fn=output_layer)
encoder_output = paddle.ones((4, 8, 32), dtype=paddle.get_default_dtype())
outputs = dynamic_decode(decoder=decoder,
inits=decoder_cell.get_initial_states(encoder_output),
max_step_num=10)
"""
if in_dygraph_mode():
return _dynamic_decode_imperative(decoder, inits, max_step_num,
output_time_major, impute_finished,
is_test, return_length, **kwargs)
else:
return _dynamic_decode_declarative(decoder, inits, max_step_num,
output_time_major, impute_finished,
is_test, return_length, **kwargs)
class DecodeHelper(object): class DecodeHelper(object):
""" """
DecodeHelper is the base class for any helper instance used in `BasicDecoder`. DecodeHelper is the base class for any helper instance used in `BasicDecoder`.
......
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. # Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
# #
# Licensed under the Apache License, Version 2.0 (the "License"); # Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License. # you may not use this file except in compliance with the License.
...@@ -14,9 +14,17 @@ ...@@ -14,9 +14,17 @@
from __future__ import print_function from __future__ import print_function
import random
import unittest import unittest
import numpy as np import numpy as np
import paddle
import paddle.nn as nn
from paddle import Model, set_device
from paddle.static import InputSpec as Input
from paddle.fluid.dygraph import Layer
from paddle.nn import BeamSearchDecoder, dynamic_decode
import paddle.fluid as fluid import paddle.fluid as fluid
import paddle.fluid.layers as layers import paddle.fluid.layers as layers
import paddle.fluid.core as core import paddle.fluid.core as core
...@@ -24,6 +32,8 @@ import paddle.fluid.core as core ...@@ -24,6 +32,8 @@ import paddle.fluid.core as core
from paddle.fluid.executor import Executor from paddle.fluid.executor import Executor
from paddle.fluid import framework from paddle.fluid import framework
paddle.enable_static()
class EncoderCell(layers.RNNCell): class EncoderCell(layers.RNNCell):
def __init__(self, num_layers, hidden_size, dropout_prob=0.): def __init__(self, num_layers, hidden_size, dropout_prob=0.):
...@@ -436,6 +446,7 @@ class TestDynamicDecode(unittest.TestCase): ...@@ -436,6 +446,7 @@ class TestDynamicDecode(unittest.TestCase):
self.exe = Executor(place) self.exe = Executor(place)
def test_mle_train(self): def test_mle_train(self):
paddle.enable_static()
self.model_hparams["decoding_strategy"] = "train_greedy" self.model_hparams["decoding_strategy"] = "train_greedy"
agent = SeqPGAgent( agent = SeqPGAgent(
model_cls=Seq2SeqModel, model_cls=Seq2SeqModel,
...@@ -468,6 +479,7 @@ class TestDynamicDecode(unittest.TestCase): ...@@ -468,6 +479,7 @@ class TestDynamicDecode(unittest.TestCase):
(iter_idx, reward.mean(), cost)) (iter_idx, reward.mean(), cost))
def test_greedy_train(self): def test_greedy_train(self):
paddle.enable_static()
self.model_hparams["decoding_strategy"] = "infer_greedy" self.model_hparams["decoding_strategy"] = "infer_greedy"
agent = SeqPGAgent( agent = SeqPGAgent(
model_cls=Seq2SeqModel, model_cls=Seq2SeqModel,
...@@ -493,6 +505,7 @@ class TestDynamicDecode(unittest.TestCase): ...@@ -493,6 +505,7 @@ class TestDynamicDecode(unittest.TestCase):
(iter_idx, reward.mean(), cost)) (iter_idx, reward.mean(), cost))
def test_sample_train(self): def test_sample_train(self):
paddle.enable_static()
self.model_hparams["decoding_strategy"] = "infer_sample" self.model_hparams["decoding_strategy"] = "infer_sample"
agent = SeqPGAgent( agent = SeqPGAgent(
model_cls=Seq2SeqModel, model_cls=Seq2SeqModel,
...@@ -518,6 +531,8 @@ class TestDynamicDecode(unittest.TestCase): ...@@ -518,6 +531,8 @@ class TestDynamicDecode(unittest.TestCase):
(iter_idx, reward.mean(), cost)) (iter_idx, reward.mean(), cost))
def test_beam_search_infer(self): def test_beam_search_infer(self):
paddle.set_default_dtype("float32")
paddle.enable_static()
self.model_hparams["decoding_strategy"] = "beam_search" self.model_hparams["decoding_strategy"] = "beam_search"
main_program = fluid.Program() main_program = fluid.Program()
startup_program = fluid.Program() startup_program = fluid.Program()
...@@ -542,5 +557,154 @@ class TestDynamicDecode(unittest.TestCase): ...@@ -542,5 +557,154 @@ class TestDynamicDecode(unittest.TestCase):
fetch_list=[output])[0] fetch_list=[output])[0]
class ModuleApiTest(unittest.TestCase):
@classmethod
def setUpClass(cls):
cls._np_rand_state = np.random.get_state()
cls._py_rand_state = random.getstate()
cls._random_seed = 123
np.random.seed(cls._random_seed)
random.seed(cls._random_seed)
cls.model_cls = type(cls.__name__ + "Model", (Layer, ), {
"__init__": cls.model_init_wrapper(cls.model_init),
"forward": cls.model_forward
})
@classmethod
def tearDownClass(cls):
np.random.set_state(cls._np_rand_state)
random.setstate(cls._py_rand_state)
@staticmethod
def model_init_wrapper(func):
def __impl__(self, *args, **kwargs):
Layer.__init__(self)
func(self, *args, **kwargs)
return __impl__
@staticmethod
def model_init(model, *args, **kwargs):
raise NotImplementedError(
"model_init acts as `Model.__init__`, thus must implement it")
@staticmethod
def model_forward(model, *args, **kwargs):
return model.module(*args, **kwargs)
def make_inputs(self):
# TODO(guosheng): add default from `self.inputs`
raise NotImplementedError(
"model_inputs makes inputs for model, thus must implement it")
def setUp(self):
"""
For the model which wraps the module to be tested:
Set input data by `self.inputs` list
Set init argument values by `self.attrs` list/dict
Set model parameter values by `self.param_states` dict
Set expected output data by `self.outputs` list
We can create a model instance and run once with these.
"""
self.inputs = []
self.attrs = {}
self.param_states = {}
self.outputs = []
def _calc_output(self, place, mode="test", dygraph=True):
if dygraph:
fluid.enable_dygraph(place)
else:
fluid.disable_dygraph()
gen = paddle.manual_seed(self._random_seed)
gen._is_init_py = False
paddle.framework.random._manual_program_seed(self._random_seed)
scope = fluid.core.Scope()
with fluid.scope_guard(scope):
layer = self.model_cls(**self.attrs) if isinstance(
self.attrs, dict) else self.model_cls(*self.attrs)
model = Model(layer, inputs=self.make_inputs())
model.prepare()
if self.param_states:
model.load(self.param_states, optim_state=None)
return model.test_batch(self.inputs)
def check_output_with_place(self, place, mode="test"):
dygraph_output = self._calc_output(place, mode, dygraph=True)
stgraph_output = self._calc_output(place, mode, dygraph=False)
expect_output = getattr(self, "outputs", None)
for actual_t, expect_t in zip(dygraph_output, stgraph_output):
self.assertTrue(np.allclose(actual_t, expect_t, rtol=1e-5, atol=0))
if expect_output:
for actual_t, expect_t in zip(dygraph_output, expect_output):
self.assertTrue(
np.allclose(
actual_t, expect_t, rtol=1e-5, atol=0))
def check_output(self):
devices = ["CPU", "GPU"] if fluid.is_compiled_with_cuda() else ["CPU"]
for device in devices:
place = set_device(device)
self.check_output_with_place(place)
class TestBeamSearch(ModuleApiTest):
def setUp(self):
paddle.set_default_dtype("float64")
shape = (8, 32)
self.inputs = [
np.random.random(shape).astype("float64"),
np.random.random(shape).astype("float64")
]
self.outputs = None
self.attrs = {
"vocab_size": 100,
"embed_dim": 32,
"hidden_size": 32,
}
self.param_states = {}
@staticmethod
def model_init(self,
vocab_size,
embed_dim,
hidden_size,
bos_id=0,
eos_id=1,
beam_size=2,
max_step_num=2):
embedder = paddle.fluid.dygraph.Embedding(
size=[vocab_size, embed_dim], dtype="float64")
output_layer = nn.Linear(hidden_size, vocab_size)
cell = nn.LSTMCell(embed_dim, hidden_size)
self.max_step_num = max_step_num
self.beam_search_decoder = BeamSearchDecoder(
cell,
start_token=bos_id,
end_token=eos_id,
beam_size=beam_size,
embedding_fn=embedder,
output_fn=output_layer)
@staticmethod
def model_forward(model, init_hidden, init_cell):
return dynamic_decode(
model.beam_search_decoder, [init_hidden, init_cell],
max_step_num=model.max_step_num,
impute_finished=True,
is_test=True)[0]
def make_inputs(self):
inputs = [
Input([None, self.inputs[0].shape[-1]], "float64", "init_hidden"),
Input([None, self.inputs[1].shape[-1]], "float64", "init_cell"),
]
return inputs
def test_check_output(self):
self.check_output()
if __name__ == '__main__': if __name__ == '__main__':
unittest.main() unittest.main()
...@@ -42,14 +42,11 @@ from .clip import clip_by_norm #DEFINE_ALIAS ...@@ -42,14 +42,11 @@ from .clip import clip_by_norm #DEFINE_ALIAS
# from .control_flow import StaticRNN #DEFINE_ALIAS # from .control_flow import StaticRNN #DEFINE_ALIAS
# from .control_flow import while_loop #DEFINE_ALIAS # from .control_flow import while_loop #DEFINE_ALIAS
# from .control_flow import rnn #DEFINE_ALIAS # from .control_flow import rnn #DEFINE_ALIAS
# from .decode import BeamSearchDecoder #DEFINE_ALIAS from .decode import BeamSearchDecoder #DEFINE_ALIAS
from .decode import dynamic_decode #DEFINE_ALIAS
# from .decode import Decoder #DEFINE_ALIAS # from .decode import Decoder #DEFINE_ALIAS
# from .decode import beam_search #DEFINE_ALIAS
# from .decode import beam_search_decode #DEFINE_ALIAS
# from .decode import crf_decoding #DEFINE_ALIAS # from .decode import crf_decoding #DEFINE_ALIAS
# from .decode import ctc_greedy_decoder #DEFINE_ALIAS # from .decode import ctc_greedy_decoder #DEFINE_ALIAS
# from .decode import dynamic_decode #DEFINE_ALIAS
# from .decode import gather_tree #DEFINE_ALIAS
# from .input import Input #DEFINE_ALIAS # from .input import Input #DEFINE_ALIAS
from .layer.activation import ELU #DEFINE_ALIAS from .layer.activation import ELU #DEFINE_ALIAS
from .layer.activation import GELU #DEFINE_ALIAS from .layer.activation import GELU #DEFINE_ALIAS
......
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from ..fluid.layers import BeamSearchDecoder #DEFINE_ALIAS
from ..fluid.layers import dynamic_decode #DEFINE_ALIAS
__all__ = [
'BeamSearchDecoder',
'dynamic_decode',
]
...@@ -216,3 +216,4 @@ from .vision import pixel_shuffle #DEFINE_ALIAS ...@@ -216,3 +216,4 @@ from .vision import pixel_shuffle #DEFINE_ALIAS
# from .vision import yolov3_loss #DEFINE_ALIAS # from .vision import yolov3_loss #DEFINE_ALIAS
from .input import one_hot #DEFINE_ALIAS from .input import one_hot #DEFINE_ALIAS
from .input import embedding #DEFINE_ALIAS from .input import embedding #DEFINE_ALIAS
from ...fluid.layers import gather_tree
此差异已折叠。
...@@ -12,11 +12,7 @@ ...@@ -12,11 +12,7 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
from . import text
from .text import *
from . import datasets from . import datasets
from .datasets import * from .datasets import *
__all__ = text.__all__ \ __all__ = datasets.__all__
+ datasets.__all__
此差异已折叠。
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册