From ac11c4e4ffd99cf5560622e56288fa07d0d7bdb9 Mon Sep 17 00:00:00 2001 From: Qingsheng Li Date: Wed, 18 Jul 2018 22:15:42 +0800 Subject: [PATCH] Beam search decoder API (#11687) * Added beam search API to fluid/contrib * Change as review comments * API.spec change * Doc updated --- paddle/fluid/API.spec | 20 + python/paddle/fluid/__init__.py | 2 + python/paddle/fluid/contrib/__init__.py | 18 + .../paddle/fluid/contrib/decoder/__init__.py | 18 + .../contrib/decoder/beam_search_decoder.py | 838 ++++++++++++++++++ .../fluid/tests/test_beam_search_decoder.py | 265 ++++++ python/setup.py.in | 2 + 7 files changed, 1163 insertions(+) create mode 100644 python/paddle/fluid/contrib/__init__.py create mode 100644 python/paddle/fluid/contrib/decoder/__init__.py create mode 100644 python/paddle/fluid/contrib/decoder/beam_search_decoder.py create mode 100644 python/paddle/fluid/tests/test_beam_search_decoder.py diff --git a/paddle/fluid/API.spec b/paddle/fluid/API.spec index 5c17ec4fc3..8c0d87db7a 100644 --- a/paddle/fluid/API.spec +++ b/paddle/fluid/API.spec @@ -341,6 +341,26 @@ paddle.fluid.layers.polynomial_decay ArgSpec(args=['learning_rate', 'decay_steps paddle.fluid.layers.piecewise_decay ArgSpec(args=['boundaries', 'values'], varargs=None, keywords=None, defaults=None) paddle.fluid.layers.noam_decay ArgSpec(args=['d_model', 'warmup_steps'], varargs=None, keywords=None, defaults=None) paddle.fluid.layers.append_LARS ArgSpec(args=['params_grads', 'learning_rate', 'weight_decay'], varargs=None, keywords=None, defaults=None) +paddle.fluid.contrib.InitState.__init__ ArgSpec(args=['self', 'init', 'shape', 'value', 'init_boot', 'need_reorder', 'dtype'], varargs=None, keywords=None, defaults=(None, None, 0.0, None, False, 'float32')) +paddle.fluid.contrib.StateCell.__init__ ArgSpec(args=['self', 'inputs', 'states', 'out_state', 'name'], varargs=None, keywords=None, defaults=(None,)) +paddle.fluid.contrib.StateCell.compute_state ArgSpec(args=['self', 'inputs'], varargs=None, keywords=None, defaults=None) +paddle.fluid.contrib.StateCell.get_input ArgSpec(args=['self', 'input_name'], varargs=None, keywords=None, defaults=None) +paddle.fluid.contrib.StateCell.get_state ArgSpec(args=['self', 'state_name'], varargs=None, keywords=None, defaults=None) +paddle.fluid.contrib.StateCell.out_state ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None) +paddle.fluid.contrib.StateCell.set_state ArgSpec(args=['self', 'state_name', 'state_value'], varargs=None, keywords=None, defaults=None) +paddle.fluid.contrib.StateCell.state_updater ArgSpec(args=['self', 'updater'], varargs=None, keywords=None, defaults=None) +paddle.fluid.contrib.StateCell.update_states ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None) +paddle.fluid.contrib.TrainingDecoder.__init__ ArgSpec(args=['self', 'state_cell', 'name'], varargs=None, keywords=None, defaults=(None,)) +paddle.fluid.contrib.TrainingDecoder.block ArgSpec(args=[], varargs='args', keywords='kwds', defaults=None) +paddle.fluid.contrib.TrainingDecoder.output ArgSpec(args=['self'], varargs='outputs', keywords=None, defaults=None) +paddle.fluid.contrib.TrainingDecoder.static_input ArgSpec(args=['self', 'x'], varargs=None, keywords=None, defaults=None) +paddle.fluid.contrib.TrainingDecoder.step_input ArgSpec(args=['self', 'x'], varargs=None, keywords=None, defaults=None) +paddle.fluid.contrib.BeamSearchDecoder.__init__ ArgSpec(args=['self', 'state_cell', 'init_ids', 'init_scores', 'target_dict_dim', 'word_dim', 'input_var_dict', 'topk_size', 'sparse_emb', 'max_len', 'beam_size', 'end_id', 'name'], varargs=None, keywords=None, defaults=({}, 50, True, 100, 1, 1, None)) +paddle.fluid.contrib.BeamSearchDecoder.block ArgSpec(args=[], varargs='args', keywords='kwds', defaults=None) +paddle.fluid.contrib.BeamSearchDecoder.decode ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None) +paddle.fluid.contrib.BeamSearchDecoder.early_stop ArgSpec(args=['self'], varargs=None, keywords=None, defaults=None) +paddle.fluid.contrib.BeamSearchDecoder.read_array ArgSpec(args=['self', 'init', 'is_ids', 'is_scores'], varargs=None, keywords=None, defaults=(False, False)) +paddle.fluid.contrib.BeamSearchDecoder.update_array ArgSpec(args=['self', 'array', 'value'], varargs=None, keywords=None, defaults=None) paddle.fluid.transpiler.DistributeTranspiler.__init__ ArgSpec(args=['self', 'config'], varargs=None, keywords=None, defaults=(None,)) paddle.fluid.transpiler.DistributeTranspiler.create_splited_vars ArgSpec(args=['self', 'source_var', 'block', 'tag'], varargs=None, keywords=None, defaults=None) paddle.fluid.transpiler.DistributeTranspiler.get_pserver_program ArgSpec(args=['self', 'endpoint'], varargs=None, keywords=None, defaults=None) diff --git a/python/paddle/fluid/__init__.py b/python/paddle/fluid/__init__.py index 329d3c5ace..9903047f74 100644 --- a/python/paddle/fluid/__init__.py +++ b/python/paddle/fluid/__init__.py @@ -35,6 +35,7 @@ import io import evaluator import initializer import layers +import contrib import nets import optimizer import backward @@ -66,6 +67,7 @@ __all__ = framework.__all__ + executor.__all__ + concurrency.__all__ + \ 'io', 'initializer', 'layers', + 'contrib', 'transpiler', 'nets', 'optimizer', diff --git a/python/paddle/fluid/contrib/__init__.py b/python/paddle/fluid/contrib/__init__.py new file mode 100644 index 0000000000..12cd5d918e --- /dev/null +++ b/python/paddle/fluid/contrib/__init__.py @@ -0,0 +1,18 @@ +# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import decoder +from decoder import * + +__all__ = decoder.__all__ diff --git a/python/paddle/fluid/contrib/decoder/__init__.py b/python/paddle/fluid/contrib/decoder/__init__.py new file mode 100644 index 0000000000..22cfe69269 --- /dev/null +++ b/python/paddle/fluid/contrib/decoder/__init__.py @@ -0,0 +1,18 @@ +# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import beam_search_decoder +from beam_search_decoder import * + +__all__ = beam_search_decoder.__all__ diff --git a/python/paddle/fluid/contrib/decoder/beam_search_decoder.py b/python/paddle/fluid/contrib/decoder/beam_search_decoder.py new file mode 100644 index 0000000000..ba6e138782 --- /dev/null +++ b/python/paddle/fluid/contrib/decoder/beam_search_decoder.py @@ -0,0 +1,838 @@ +# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +""" +This module provides a general beam search decoder API for RNN based decoders. +The purpose of this API is to allow users to highly customize the behavior +within their RNN decoder(vanilla RNN, LSTM, attention + LSTM, future etc.), +without using the low level API such as while ops. + +This API is still under active development and may change drastically. +""" + +import contextlib +import numpy as np + +from ... import layers +from ...framework import Variable +from ... import core +from ... import framework, unique_name +from ...layer_helper import LayerHelper + +__all__ = ['InitState', 'StateCell', 'TrainingDecoder', 'BeamSearchDecoder'] + + +class _DecoderType: + TRAINING = 1 + BEAM_SEARCH = 2 + + +class InitState(object): + """ + The initial hidden state object. The state objects holds a variable, and may + use it to initialize the hidden state cell of RNN. Usually used as input to + `StateCell` class. + + Args: + init (Variable): The initial variable of the hidden state. If set None, + the variable will be created as a tensor with constant value based + on `shape` and `value` param. + shape (tuple|list): If `init` is None, new Variable's shape. Default + None. + value (float): If `init` is None, new Variable's value. Default None. + init_boot (Variable): If provided, the initial variable will be created + with the same shape as this variable. + need_reorder (bool): If set true, the init will be sorted by its lod + rank within its batches. This should be used if `batch_size > 1`. + dtype (np.dtype|core.VarDesc.VarType|str): Data type of the initial + variable. + + Returns: + An initialized state object. + + Examples: + See `StateCell`. + """ + + def __init__(self, + init=None, + shape=None, + value=0.0, + init_boot=None, + need_reorder=False, + dtype='float32'): + if init is not None: + self._init = init + elif init_boot is None: + raise ValueError( + 'init_boot must be provided to infer the shape of InitState .\n') + else: + self._init = layers.fill_constant_batch_size_like( + input=init_boot, value=value, shape=shape, dtype=dtype) + + self._shape = shape + self._value = value + self._need_reorder = need_reorder + self._dtype = dtype + + @property + def value(self): + return self._init + + @property + def need_reorder(self): + return self._need_reorder + + +class _MemoryState(object): + def __init__(self, state_name, rnn_obj, init_state): + self._state_name = state_name # each is a rnn.memory + self._rnn_obj = rnn_obj + self._state_mem = self._rnn_obj.memory( + init=init_state.value, need_reorder=init_state.need_reorder) + + def get_state(self): + return self._state_mem + + def update_state(self, state): + self._rnn_obj.update_memory(self._state_mem, state) + + +class _ArrayState(object): + def __init__(self, state_name, block, init_state): + self._state_name = state_name + self._block = block + + self._state_array = self._block.create_var( + name=unique_name.generate('array_state_array'), + type=core.VarDesc.VarType.LOD_TENSOR_ARRAY, + dtype=init_state.value.dtype) + + self._counter = self._block.create_var( + name=unique_name.generate('array_state_counter'), + type=core.VarDesc.VarType.LOD_TENSOR, + dtype='int64') + + # initialize counter + self._block.append_op( + type='fill_constant', + inputs={}, + outputs={'Out': [self._counter]}, + attrs={ + 'shape': [1], + 'dtype': self._counter.dtype, + 'value': float(0.0), + 'force_cpu': True + }) + + self._counter.stop_gradient = True + + # write initial state + block.append_op( + type='write_to_array', + inputs={'X': init_state.value, + 'I': self._counter}, + outputs={'Out': self._state_array}) + + def get_state(self): + state = layers.array_read(array=self._state_array, i=self._counter) + return state + + def update_state(self, state): + layers.increment(x=self._counter, value=1, in_place=True) + layers.array_write(state, array=self._state_array, i=self._counter) + + +class StateCell(object): + """ + The state cell class stores the hidden state of the RNN cell. A typical RNN + cell has one or more hidden states, and one or more step inputs. This class + allows you to defines the name of hidden states as well as step inputs, and + their associated variables. + + Args: + inputs (dict): A feeding dict of {name(str) : Variable}. It specifies + the names of step inputs for RNN cell, and the associated variables. + The variable could initially be None and set manually during each + RNN step. + states (dict): A feeding dict of {name(str) : InitState object}. It + specifies the names of hidden states and their initialized state. + out_state (str): A string that specifies the name of hidden state that + will be used to compute the score in beam search process. + name (str): The name of the RNN cell. Default None. + + Raises: + `ValueError`: If the initial state is not an instance of InitState, or + the out_state is not in the dict of states. + + Returns: + StateCell: The initialized StateCell object. + + Examples: + .. code-block:: python + hidden_state = InitState(init=encoder_out, need_reorder=True) + state_cell = StateCell( + inputs={'current_word': None}, + states={'h': hidden_state}, + out_state='h') + """ + + def __init__(self, inputs, states, out_state, name=None): + self._helper = LayerHelper('state_cell', name=name) + self._cur_states = {} + self._state_names = [] + for state_name, state in states.items(): + if not isinstance(state, InitState): + raise ValueError('state must be an InitState object.') + self._cur_states[state_name] = state + self._state_names.append(state_name) + self._inputs = inputs # inputs is place holder here + self._cur_decoder_obj = None + self._in_decoder = False + self._states_holder = {} + self._switched_decoder = False + self._state_updater = None + self._out_state = out_state + if self._out_state not in self._cur_states: + raise ValueError('out_state must be one state in states') + + def _enter_decoder(self, decoder_obj): + if self._in_decoder == True or self._cur_decoder_obj is not None: + raise ValueError('StateCell has already entered a decoder.') + self._in_decoder = True + self._cur_decoder_obj = decoder_obj + self._switched_decoder = False + + def _leave_decoder(self, decoder_obj): + if not self._in_decoder: + raise ValueError('StateCell not in decoder, ' + 'invalid leaving operation.') + + if self._cur_decoder_obj != decoder_obj: + raise ValueError('Inconsistent decoder object in StateCell.') + + self._in_decoder = False + self._cur_decoder_obj = None + self._switched_decoder = False + + def _switch_decoder(self): # lazy switch + if not self._in_decoder: + raise ValueError('StateCell must be enter a decoder.') + + if self._switched_decoder: + raise ValueError('StateCell already done switching.') + + for state_name in self._state_names: + if state_name not in self._states_holder: + state = self._cur_states[state_name] + + if not isinstance(state, InitState): + raise ValueError('Current type of state is %s, should be ' + 'an InitState object.' % type(state)) + + self._states_holder[state_name] = {} + + if self._cur_decoder_obj.type == _DecoderType.TRAINING: + self._states_holder[state_name][id(self._cur_decoder_obj)] \ + = _MemoryState(state_name, + self._cur_decoder_obj.dynamic_rnn, + state) + elif self._cur_decoder_obj.type == _DecoderType.BEAM_SEARCH: + self._states_holder[state_name][id(self._cur_decoder_obj)] \ + = _ArrayState(state_name, + self._cur_decoder_obj._parent_block(), + state) + else: + raise ValueError('Unknown decoder type, only support ' + '[TRAINING, BEAM_SEARCH]') + + # Read back, since current state should be LoDTensor + self._cur_states[state_name] = \ + self._states_holder[state_name][ + id(self._cur_decoder_obj)].get_state() + + self._switched_decoder = True + + def get_state(self, state_name): + """ + The getter of state object. Find the state variable by its name. + + Args: + state_name (str): A string of the state's name. + + Returns: + The associated state object. + """ + if self._in_decoder and not self._switched_decoder: + self._switch_decoder() + + if state_name not in self._cur_states: + raise ValueError( + 'Unknown state %s. Please make sure _switch_decoder() ' + 'invoked.' % state_name) + + return self._cur_states[state_name] + + def get_input(self, input_name): + """ + The getter of input variable. Find the input variable by its name. + + Args: + input_name (str): The string of the input's name. + + Returns: + The associated input variable. + """ + if input_name not in self._inputs or self._inputs[input_name] is None: + raise ValueError('Invalid input %s.' % input_name) + return self._inputs[input_name] + + def set_state(self, state_name, state_value): + """ + The setter of the state variable. Change the variable of the given + `state_name`. + + Args: + state_name (str): The name of the state to change. + state_value (Var): The variable of the new state. + """ + self._cur_states[state_name] = state_value + + def state_updater(self, updater): + """ + Set up the updater to update the hidden state every RNN step. The + behavior of updater could be customized by users. The updater should be + a function that takes a `StateCell` object as input and update the + hidden state within it. The hidden state could be accessed through + `get_state` method. + + Args: + updater (func): the updater to update the state cell. + """ + self._state_updater = updater + + def _decorator(state_cell): + if state_cell == self: + raise TypeError('Updater should only accept a StateCell object ' + 'as argument.') + updater(state_cell) + + return _decorator + + def compute_state(self, inputs): + """ + Provide the step input of RNN cell, and compute the new hidden state + with updater and give step input. + + Args: + inputs (dict): A feed dict, {name(str): Variable}. name should be + the names of step inputs for this RNN cell, and Variable should be + the associated variables. + + Examples: + .. code-block:: python + state_cell.compute_state(inputs={'x': current_word}) + """ + if self._in_decoder and not self._switched_decoder: + self._switch_decoder() + + for input_name, input_value in inputs.items(): + if input_name not in self._inputs: + raise ValueError('Unknown input %s. ' + 'Please make sure %s in input ' + 'place holder.' % (input_name, input_name)) + self._inputs[input_name] = input_value + self._state_updater(self) + + def update_states(self): + """ + Update and record state information after each RNN step. + """ + if self._in_decoder and not self._switched_decoder: + self._switched_decoder() + + for state_name, decoder_state in self._states_holder.items(): + if id(self._cur_decoder_obj) not in decoder_state: + raise ValueError('Unknown decoder object, please make sure ' + 'switch_decoder been invoked.') + decoder_state[id(self._cur_decoder_obj)].update_state( + self._cur_states[state_name]) + + def out_state(self): + """ + Get the output state variable. This must be called after update_states. + + Returns: + The output variable of the RNN cell. + """ + return self._cur_states[self._out_state] + + +class TrainingDecoder(object): + """ + A decoder that can only be used for training. The decoder could be + initialized with a `StateCell` object. The computation within the RNN cell + could be defined with decoder's block. + + Args: + state_cell (StateCell): A StateCell object that handles the input and + state variables. + name (str): The name of this decoder. Default None. + + Returns: + TrainingDecoder: The initialized TrainingDecoder object. + + Examples: + .. code-block:: python + decoder = TrainingDecoder(state_cell) + with decoder.block(): + current_word = decoder.step_input(trg_embedding) + decoder.state_cell.compute_state(inputs={'x': current_word}) + current_score = layers.fc(input=decoder.state_cell.get_state('h'), + size=32, + act='softmax') + decoder.state_cell.update_states() + decoder.output(current_score) + """ + BEFORE_DECODER = 0 + IN_DECODER = 1 + AFTER_DECODER = 2 + + def __init__(self, state_cell, name=None): + self._helper = LayerHelper('training_decoder', name=name) + self._status = TrainingDecoder.BEFORE_DECODER + self._dynamic_rnn = layers.DynamicRNN() + self._type = _DecoderType.TRAINING + self._state_cell = state_cell + self._state_cell._enter_decoder(self) + + @contextlib.contextmanager + def block(self): + """ + Define the behavior of the decoder for each RNN time step. + """ + if self._status != TrainingDecoder.BEFORE_DECODER: + raise ValueError('decoder.block() can only be invoked once') + self._status = TrainingDecoder.IN_DECODER + with self._dynamic_rnn.block(): + yield + self._status = TrainingDecoder.AFTER_DECODER + self._state_cell._leave_decoder(self) + + @property + def state_cell(self): + self._assert_in_decoder_block('state_cell') + return self._state_cell + + @property + def dynamic_rnn(self): + return self._dynamic_rnn + + @property + def type(self): + return self._type + + def step_input(self, x): + """ + Set the input variable as a step input to the RNN cell. For example, + in machine translation, each time step we read one word from the target + sentences, then the target sentence is a step input to the RNN cell. + + Args: + x (Variable): the variable to be used as step input. + + Returns: + Variable: The variable as input of current step. + + Examples: + .. code-block:: python + current_word = decoder.step_input(trg_embedding) + """ + self._assert_in_decoder_block('step_input') + return self._dynamic_rnn.step_input(x) + + def static_input(self, x): + """ + Set the input variable as a static input of RNN cell. In contrast to + step input, this variable will be used as a whole within the RNN decode + loop and will not be scattered into time steps. + + Args: + x (Variable): the variable to be used as static input. + + Returns: + Variable: The variable as input of current step. + + Examples: + .. code-block:: python + encoder_vec = decoder.static_input(encoded_vector) + """ + self._assert_in_decoder_block('static_input') + return self._dynamic_rnn.static_input(x) + + def __call__(self, *args, **kwargs): + """ + Get the output of RNN. This API should only be invoked after RNN.block() + + Returns: + Variable: The specified output of the RNN cell. + """ + if self._status != TrainingDecoder.AFTER_DECODER: + raise ValueError('Output of training decoder can only be visited ' + 'outside the block.') + return self._dynamic_rnn(*args, **kwargs) + + def output(self, *outputs): + """ + Set the output variable of the RNN cell. + + Args: + *outputs (Variables): a series of variables that treated as output + of the RNN cell. + + Examples: + .. code-block:: python + out = fluid.layers.fc(input=h, + size=32, + bias_attr=True, + act='softmax') + decoder.output(out) + """ + self._assert_in_decoder_block('output') + self._dynamic_rnn.output(*outputs) + + def _assert_in_decoder_block(self, method): + if self._status != TrainingDecoder.IN_DECODER: + raise ValueError('%s should be invoked inside block of ' + 'TrainingDecoder object.' % method) + + +class BeamSearchDecoder(object): + """ + A beam search decoder that can be used for inference. The decoder should be + initialized with a `StateCell` object. The decode process can be defined + within its block. + + Args: + state_cell (StateCell): A StateCell object that handles the input and + state variables. + init_ids (Variable): The init beam search token ids. + init_scores (Variable): The associated score of each id. + target_dict_dim (int): Size of dictionary. + word_dim (int): Word embedding dimension. + input_var_dict (dict): A feeding dict to feed the required input + variables to the state cell. It will be used by state_cell 's + compute method. Default empty. + topk_size (int): The topk size used for beam search. Default 50. + max_len (int): The maximum allowed length of the generated sentence. + Default 100. + beam_size (int): The beam width of beam search decode. Default 1. + end_id (int): The id of end token within beam search. + name (str): The name of this decoder. Default None. + + Returns: + BeamSearchDecoder: A initialized BeamSearchDecoder object. + + Examples: + .. code-block:: python + decoder = BeamSearchDecoder( + state_cell=state_cell, + init_ids=init_ids, + init_scores=init_scores, + target_dict_dim=target_dict_dim, + word_dim=word_dim, + init_var_dict={}, + topk_size=topk_size, + sparse_emb=IS_SPARSE, + max_len=max_length, + beam_size=beam_size, + end_id=1, + name=None + ) + decoder.decode() + translation_ids, translation_scores = decoder() + """ + BEFORE_BEAM_SEARCH_DECODER = 0 + IN_BEAM_SEARCH_DECODER = 1 + AFTER_BEAM_SEARCH_DECODER = 2 + + def __init__(self, + state_cell, + init_ids, + init_scores, + target_dict_dim, + word_dim, + input_var_dict={}, + topk_size=50, + sparse_emb=True, + max_len=100, + beam_size=1, + end_id=1, + name=None): + self._helper = LayerHelper('beam_search_decoder', name=name) + self._counter = layers.zeros(shape=[1], dtype='int64') + self._counter.stop_gradient = True + self._type = _DecoderType.BEAM_SEARCH + self._max_len = layers.fill_constant( + shape=[1], dtype='int64', value=max_len) + self._cond = layers.less_than( + x=self._counter, + y=layers.fill_constant( + shape=[1], dtype='int64', value=max_len)) + self._while_op = layers.While(self._cond) + self._state_cell = state_cell + self._state_cell._enter_decoder(self) + self._status = BeamSearchDecoder.BEFORE_BEAM_SEARCH_DECODER + self._zero_idx = layers.fill_constant( + shape=[1], value=0, dtype='int64', force_cpu=True) + self._array_dict = {} + self._array_link = [] + self._ids_array = None + self._scores_array = None + self._beam_size = beam_size + self._end_id = end_id + + self._init_ids = init_ids + self._init_scores = init_scores + self._target_dict_dim = target_dict_dim + self._topk_size = topk_size + self._sparse_emb = sparse_emb + self._word_dim = word_dim + self._input_var_dict = input_var_dict + + @contextlib.contextmanager + def block(self): + """ + Define the behavior of the decoder for each RNN time step. + """ + if self._status != BeamSearchDecoder.BEFORE_BEAM_SEARCH_DECODER: + raise ValueError('block() can only be invoke once.') + + self._status = BeamSearchDecoder.IN_BEAM_SEARCH_DECODER + + with self._while_op.block(): + yield + with layers.Switch() as switch: + with switch.case(self._cond): + layers.increment(x=self._counter, value=1.0, in_place=True) + + for value, array in self._array_link: + layers.array_write( + x=value, i=self._counter, array=array) + + layers.less_than( + x=self._counter, y=self._max_len, cond=self._cond) + + self._status = BeamSearchDecoder.AFTER_BEAM_SEARCH_DECODER + self._state_cell._leave_decoder(self) + + @property + def type(self): + return self._type + + def early_stop(self): + """ + Stop the generation process in advance. Could be used as "break". + """ + layers.fill_constant( + shape=[1], value=0, dtype='bool', force_cpu=True, out=self._cond) + + def decode(self): + """ + Set up the computation within the decoder. Then you could call the + decoder to get the result of beam search decode. If you want to define + a more specific decoder, you could override this function. + + Examples: + .. code-block:: python + decoder.decode() + translation_ids, translation_scores = decoder() + """ + with self.block(): + prev_ids = self.read_array(init=self._init_ids, is_ids=True) + prev_scores = self.read_array( + init=self._init_scores, is_scores=True) + prev_ids_embedding = layers.embedding( + input=prev_ids, + size=[self._target_dict_dim, self._word_dim], + dtype='float32', + is_sparse=self._sparse_emb) + + feed_dict = {} + update_dict = {} + + for init_var_name, init_var in self._input_var_dict.items(): + if init_var_name not in self.state_cell._inputs: + raise ValueError('Variable ' + init_var_name + + ' not found in StateCell!\n') + + read_var = self.read_array(init=init_var) + update_dict[init_var_name] = read_var + feed_var_expanded = layers.sequence_expand(read_var, + prev_scores) + feed_dict[init_var_name] = feed_var_expanded + + for state_str in self._state_cell._state_names: + prev_state = self.state_cell.get_state(state_str) + prev_state_expanded = layers.sequence_expand(prev_state, + prev_scores) + self.state_cell.set_state(state_str, prev_state_expanded) + + for i, input_name in enumerate(self._state_cell._inputs): + if input_name not in feed_dict: + feed_dict[input_name] = prev_ids_embedding + + self.state_cell.compute_state(inputs=feed_dict) + current_state = self.state_cell.out_state() + current_state_with_lod = layers.lod_reset( + x=current_state, y=prev_scores) + scores = layers.fc(input=current_state_with_lod, + size=self._target_dict_dim, + act='softmax') + topk_scores, topk_indices = layers.topk(scores, k=self._topk_size) + accu_scores = layers.elementwise_add( + x=layers.log(x=topk_scores), + y=layers.reshape( + prev_scores, shape=[-1]), + axis=0) + selected_ids, selected_scores = layers.beam_search( + prev_ids, + prev_scores, + topk_indices, + accu_scores, + self._beam_size, + end_id=1, + level=0) + + with layers.Switch() as switch: + with switch.case(layers.is_empty(selected_ids)): + self.early_stop() + with switch.default(): + self.state_cell.update_states() + self.update_array(prev_ids, selected_ids) + self.update_array(prev_scores, selected_scores) + for update_name, var_to_update in update_dict.items(): + self.update_array(var_to_update, feed_dict[update_name]) + + def read_array(self, init, is_ids=False, is_scores=False): + """ + Read an array to get the decoded ids and scores generated by previous + RNN step. At the first step of RNN, the init variable mut be used to + initialize the array. + + Args: + init (Variable): The initial variable for first step usage. init + must be provided. + is_ids (bool): Specify whether the variable is an id. + is_scores (bool): Specify whether the variable is a score. + + Returns: + The associated variable generated during previous RNN steps. + + Examples: + .. code-block:: python + prev_ids = decoder.read_array(init=init_ids, is_ids=True) + prev_scores = decoder.read_array(init=init_scores, is_scores=True) + """ + self._assert_in_decoder_block('read_array') + + if is_ids and is_scores: + raise ValueError('Shouldn\'t mark current array be ids array and' + 'scores array at the same time.') + + if not isinstance(init, Variable): + raise TypeError('The input argument `init` must be a Variable.') + + parent_block = self._parent_block() + array = parent_block.create_var( + name=unique_name.generate('beam_search_decoder_array'), + type=core.VarDesc.VarType.LOD_TENSOR_ARRAY, + dtype=init.dtype) + parent_block.append_op( + type='write_to_array', + inputs={'X': init, + 'I': self._zero_idx}, + outputs={'Out': array}) + + if is_ids: + self._ids_array = array + elif is_scores: + self._scores_array = array + + read_value = layers.array_read(array=array, i=self._counter) + self._array_dict[read_value.name] = array + return read_value + + def update_array(self, array, value): + """ + Store the value generated in current step in an array for each RNN step. + This array could be accessed by read_array method. + + Args: + array (Variable): The array to append the new variable to. + value (Variable): The newly generated value to be stored. + """ + self._assert_in_decoder_block('update_array') + + if not isinstance(array, Variable): + raise TypeError( + 'The input argument `array` of must be a Variable.') + if not isinstance(value, Variable): + raise TypeError('The input argument `value` of must be a Variable.') + + array = self._array_dict.get(array.name, None) + if array is None: + raise ValueError('Please invoke read_array before update_array.') + self._array_link.append((value, array)) + + def __call__(self): + """ + Run the decode process and return the final decode result. + + Returns: + A tuple of decoded (id, score) pairs. id is a Variable that holds + the generated tokens, and score is a Variable with the same shape + as id, holds the score for each generated token. + """ + if self._status != BeamSearchDecoder.AFTER_BEAM_SEARCH_DECODER: + raise ValueError('Output of BeamSearchDecoder object can ' + 'only be visited outside the block.') + return layers.beam_search_decode( + ids=self._ids_array, + scores=self._scores_array, + beam_size=self._beam_size, + end_id=self._end_id) + + @property + def state_cell(self): + self._assert_in_decoder_block('state_cell') + return self._state_cell + + def _parent_block(self): + """ + Getter of parent block. + + Returns: + The parent block of decoder. + """ + program = self._helper.main_program + parent_block_idx = program.current_block().parent_idx + if parent_block_idx < 0: + raise ValueError('Invalid block with index %d.' % parent_block_idx) + parent_block = program.block(parent_block_idx) + return parent_block + + def _assert_in_decoder_block(self, method): + if self._status != BeamSearchDecoder.IN_BEAM_SEARCH_DECODER: + raise ValueError('%s should be invoked inside block of ' + 'BeamSearchDecoder object.' % method) diff --git a/python/paddle/fluid/tests/test_beam_search_decoder.py b/python/paddle/fluid/tests/test_beam_search_decoder.py new file mode 100644 index 0000000000..7a2502fa2f --- /dev/null +++ b/python/paddle/fluid/tests/test_beam_search_decoder.py @@ -0,0 +1,265 @@ +# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserve. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. +""" +A simple machine translation demo using beam search decoder. +""" + +import contextlib +import numpy as np +import paddle +import paddle.fluid as fluid +import paddle.fluid.core as core +import paddle.fluid.framework as framework +import paddle.fluid.layers as layers +from paddle.fluid.executor import Executor +from paddle.fluid.contrib.decoder.beam_search_decoder import * +import unittest +import os + +dict_size = 30000 +source_dict_dim = target_dict_dim = dict_size +src_dict, trg_dict = paddle.dataset.wmt14.get_dict(dict_size) +hidden_dim = 32 +word_dim = 32 +decoder_size = hidden_dim +IS_SPARSE = True +batch_size = 2 +max_length = 8 +topk_size = 50 +trg_dic_size = 10000 +beam_size = 2 + + +def encoder(): + # encoder + src_word = layers.data( + name="src_word", shape=[1], dtype='int64', lod_level=1) + src_embedding = layers.embedding( + input=src_word, + size=[dict_size, word_dim], + dtype='float32', + is_sparse=IS_SPARSE) + + fc1 = layers.fc(input=src_embedding, size=hidden_dim * 4, act='tanh') + lstm_hidden0, lstm_0 = layers.dynamic_lstm(input=fc1, size=hidden_dim * 4) + encoder_out = layers.sequence_last_step(input=lstm_hidden0) + return encoder_out + + +def decoder_state_cell(context): + h = InitState(init=context, need_reorder=True) + state_cell = StateCell(inputs={'x': None}, states={'h': h}, out_state='h') + + @state_cell.state_updater + def updater(state_cell): + current_word = state_cell.get_input('x') + prev_h = state_cell.get_state('h') + # make sure lod of h heritted from prev_h + h = layers.fc(input=[prev_h, current_word], + size=decoder_size, + act='tanh') + state_cell.set_state('h', h) + + return state_cell + + +def decoder_train(state_cell): + # decoder + trg_language_word = layers.data( + name="target_word", shape=[1], dtype='int64', lod_level=1) + trg_embedding = layers.embedding( + input=trg_language_word, + size=[dict_size, word_dim], + dtype='float32', + is_sparse=IS_SPARSE) + + decoder = TrainingDecoder(state_cell) + + with decoder.block(): + current_word = decoder.step_input(trg_embedding) + decoder.state_cell.compute_state(inputs={'x': current_word}) + current_score = layers.fc(input=decoder.state_cell.get_state('h'), + size=target_dict_dim, + act='softmax') + decoder.state_cell.update_states() + decoder.output(current_score) + + return decoder() + + +def decoder_decode(state_cell): + init_ids = layers.data( + name="init_ids", shape=[1], dtype="int64", lod_level=2) + init_scores = layers.data( + name="init_scores", shape=[1], dtype="float32", lod_level=2) + + decoder = BeamSearchDecoder( + state_cell=state_cell, + init_ids=init_ids, + init_scores=init_scores, + target_dict_dim=target_dict_dim, + word_dim=word_dim, + input_var_dict={}, + topk_size=topk_size, + sparse_emb=IS_SPARSE, + max_len=max_length, + beam_size=beam_size, + end_id=1, + name=None) + decoder.decode() + translation_ids, translation_scores = decoder() + + return translation_ids, translation_scores + + +def train_main(use_cuda): + if use_cuda and not fluid.core.is_compiled_with_cuda(): + return + place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace() + + context = encoder() + state_cell = decoder_state_cell(context) + rnn_out = decoder_train(state_cell) + label = layers.data( + name="target_next_word", shape=[1], dtype='int64', lod_level=1) + cost = layers.cross_entropy(input=rnn_out, label=label) + avg_cost = layers.mean(x=cost) + + optimizer = fluid.optimizer.Adagrad(learning_rate=1e-3) + optimizer.minimize(avg_cost) + + train_reader = paddle.batch( + paddle.reader.shuffle( + paddle.dataset.wmt14.train(dict_size), buf_size=1000), + batch_size=batch_size) + feed_order = ['src_word', 'target_word', 'target_next_word'] + + exe = Executor(place) + + def train_loop(main_program): + exe.run(framework.default_startup_program()) + + feed_list = [ + main_program.global_block().var(var_name) for var_name in feed_order + ] + feeder = fluid.DataFeeder(feed_list, place) + + for pass_id in xrange(1): + for batch_id, data in enumerate(train_reader()): + outs = exe.run(main_program, + feed=feeder.feed(data), + fetch_list=[avg_cost]) + avg_cost_val = np.array(outs[0]) + print('pass_id=' + str(pass_id) + ' batch=' + str(batch_id) + + " avg_cost=" + str(avg_cost_val)) + if batch_id > 3: + break + + train_loop(framework.default_main_program()) + + +def decode_main(use_cuda): + if use_cuda and not fluid.core.is_compiled_with_cuda(): + return + place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace() + + context = encoder() + state_cell = decoder_state_cell(context) + translation_ids, translation_scores = decoder_decode(state_cell) + + exe = Executor(place) + exe.run(framework.default_startup_program()) + + init_ids_data = np.array([0 for _ in range(batch_size)], dtype='int64') + init_scores_data = np.array( + [1. for _ in range(batch_size)], dtype='float32') + init_ids_data = init_ids_data.reshape((batch_size, 1)) + init_scores_data = init_scores_data.reshape((batch_size, 1)) + init_lod = [1] * batch_size + init_lod = [init_lod, init_lod] + + init_ids = fluid.create_lod_tensor(init_ids_data, init_lod, place) + init_scores = fluid.create_lod_tensor(init_scores_data, init_lod, place) + + train_reader = paddle.batch( + paddle.reader.shuffle( + paddle.dataset.wmt14.train(dict_size), buf_size=1000), + batch_size=batch_size) + + feed_order = ['src_word'] + feed_list = [ + framework.default_main_program().global_block().var(var_name) + for var_name in feed_order + ] + feeder = fluid.DataFeeder(feed_list, place) + + data = train_reader().next() + feed_dict = feeder.feed(map(lambda x: [x[0]], data)) + feed_dict['init_ids'] = init_ids + feed_dict['init_scores'] = init_scores + + result_ids, result_scores = exe.run( + framework.default_main_program(), + feed=feed_dict, + fetch_list=[translation_ids, translation_scores], + return_numpy=False) + print result_ids.lod() + + +class TestBeamSearchDecoder(unittest.TestCase): + pass + + +@contextlib.contextmanager +def scope_prog_guard(): + prog = fluid.Program() + startup_prog = fluid.Program() + scope = fluid.core.Scope() + with fluid.scope_guard(scope): + with fluid.program_guard(prog, startup_prog): + yield + + +def inject_test_train(use_cuda): + f_name = 'test_{0}_train'.format('cuda' if use_cuda else 'cpu') + + def f(*args): + with scope_prog_guard(): + train_main(use_cuda) + + setattr(TestBeamSearchDecoder, f_name, f) + + +def inject_test_decode(use_cuda, decorator=None): + f_name = 'test_{0}_decode'.format('cuda' if use_cuda else 'cpu', 'sparse') + + def f(*args): + with scope_prog_guard(): + decode_main(use_cuda) + + if decorator is not None: + f = decorator(f) + + setattr(TestBeamSearchDecoder, f_name, f) + + +for _use_cuda_ in (False, True): + inject_test_train(_use_cuda_) + +for _use_cuda_ in (False, True): + _decorator_ = None + inject_test_decode(use_cuda=_use_cuda_, decorator=_decorator_) + +if __name__ == '__main__': + unittest.main() diff --git a/python/setup.py.in b/python/setup.py.in index a81cd19e10..4a6cddbbea 100644 --- a/python/setup.py.in +++ b/python/setup.py.in @@ -104,6 +104,8 @@ packages=['paddle', 'paddle.fluid.proto', 'paddle.fluid.proto.profiler', 'paddle.fluid.layers', + 'paddle.fluid.contrib', + 'paddle.fluid.contrib.decoder', 'paddle.fluid.transpiler', 'paddle.fluid.transpiler.details'] -- GitLab