提交 4615c517 编写于 作者: L luotao1 提交者: LCY-Seso

beam search api and unitest in hierarchical rnn (#122)

上级 baaaa0b0
...@@ -171,12 +171,13 @@ def gru_encoder_decoder(data_conf, ...@@ -171,12 +171,13 @@ def gru_encoder_decoder(data_conf,
beam_gen = beam_search(name=decoder_group_name, beam_gen = beam_search(name=decoder_group_name,
step=gru_decoder_with_attention, step=gru_decoder_with_attention,
input=group_inputs, input=group_inputs,
id_input=data_layer(name="sent_id",
size=1),
dict_file=trg_dict_path,
bos_id=0, bos_id=0,
eos_id=1, eos_id=1,
beam_size=beam_size, beam_size=beam_size,
max_length=max_length, max_length=max_length)
result_file=gen_trans_file)
seqtext_printer_evaluator(input=beam_gen,
id_input=data_layer(name="sent_id", size=1),
dict_file=trg_dict_path,
result_file=gen_trans_file)
outputs(beam_gen) outputs(beam_gen)
...@@ -202,14 +202,17 @@ After training the model, we can use it to generate sequences. A common practice ...@@ -202,14 +202,17 @@ After training the model, we can use it to generate sequences. A common practice
* use :code:`GeneratedInput` for trg_embedding. :code:`GeneratedInput` computes the embedding of the generated token at the last time step for the input at the current time step. * use :code:`GeneratedInput` for trg_embedding. :code:`GeneratedInput` computes the embedding of the generated token at the last time step for the input at the current time step.
* use :code:`beam_search` function. This function needs to set: * use :code:`beam_search` function. This function needs to set:
- :code:`id_input`: the integer ID of the data, used to identify the corresponding output in the generated files.
- :code:`dict_file`: the dictionary file for converting word id to word.
- :code:`bos_id`: the start token. Every sentence starts with the start token. - :code:`bos_id`: the start token. Every sentence starts with the start token.
- :code:`eos_id`: the end token. Every sentence ends with the end token. - :code:`eos_id`: the end token. Every sentence ends with the end token.
- :code:`beam_size`: the beam size used in beam search. - :code:`beam_size`: the beam size used in beam search.
- :code:`max_length`: the maximum length of the generated sentences. - :code:`max_length`: the maximum length of the generated sentences.
- :code:`result_file`: the path of the generation result file.
* use :code:`seqtext_printer_evaluator` to print text according to index matrix and dictionary. This function needs to set:
- :code:`id_input`: the integer ID of the data, used to identify the corresponding output in the generated files.
- :code:`dict_file`: the dictionary file for converting word id to word.
- :code:`result_file`: the path of the generation result file.
The code is listed below: The code is listed below:
.. code-block:: python .. code-block:: python
...@@ -230,14 +233,15 @@ The code is listed below: ...@@ -230,14 +233,15 @@ The code is listed below:
beam_gen = beam_search(name=decoder_group_name, beam_gen = beam_search(name=decoder_group_name,
step=gru_decoder_with_attention, step=gru_decoder_with_attention,
input=group_inputs, input=group_inputs,
id_input=data_layer(name="sent_id",
size=1),
dict_file=trg_dict_path,
bos_id=0, # Beginnning token. bos_id=0, # Beginnning token.
eos_id=1, # End of sentence token. eos_id=1, # End of sentence token.
beam_size=beam_size, beam_size=beam_size,
max_length=max_length, max_length=max_length)
result_file=gen_trans_file)
seqtext_printer_evaluator(input=beam_gen,
id_input=data_layer(name="sent_id", size=1),
dict_file=trg_dict_path,
result_file=gen_trans_file)
outputs(beam_gen) outputs(beam_gen)
......
0 1 2 3 4
1 2 3 4
1 2 3 4
1 2 3 4
1 2 3 4
1 2 3 4
1 2 3 4
1 2 3 4
1 2 3 4
1 2 3 4
1 2 3 4
1 2 3 4
1 2 3 4
1 2 3 4
1 2 3 4
#edit-mode: -*- python -*-
# Copyright (c) 2016 Baidu, Inc. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from paddle.trainer_config_helpers import *
settings(batch_size=15, learning_rate=0)
num_words = 5
beam_flag = get_config_arg('beam_search', bool, False)
sent_id = data_layer(name="sent_id", size=1)
# This layer has no actual use, but only to decide batch_size in generation.
# When generating, at least one Memory in RecurrentLayer MUST have a boot layer.
dummy_data = data_layer(name="dummy_data_input", size=2)
def outer_step(dummy_data):
gen_inputs = [StaticInput(input=dummy_data, size=2, is_seq=True),
GeneratedInput(size=num_words,
embedding_name="wordvec",
embedding_size=num_words)]
def inner_step(dummy_memory, predict_word):
# simplified RNN for testing
with mixed_layer(size=num_words) as layer:
layer += full_matrix_projection(input=predict_word,
param_attr=ParamAttr(name="transtable"))
with mixed_layer(size=num_words, act=ExpActivation()) as out:
out += trans_full_matrix_projection(input=layer,
param_attr=ParamAttr(name="wordvec"))
return out
beam_gen = beam_search(name="rnn_gen",
step=inner_step,
input=gen_inputs,
bos_id=0,
eos_id=num_words-1,
beam_size=2 if beam_flag else 1,
num_results_per_sample=2 if beam_flag else 1,
max_length=10)
return beam_gen
beam_gen_concat = recurrent_group(name="rnn_gen_concat",
step=outer_step,
input=[SubsequenceInput(dummy_data)])
seqtext_printer_evaluator(input=beam_gen_concat,
id_input=sent_id,
dict_file="./trainer/tests/test_gen_dict.txt",
result_file="./trainer/tests/dump_text.test")
#outputs(beam_gen_concat)
# In this config, as dummy_data_input doesn't work on beam_gen (we can find dummy_memory
# is read-only memory, and isn't used by other layers of step), we show the Inputs and Outputs
# as follows. Note that "__beam_search_predict__" is the default output name of beam_search.
Inputs("sent_id","dummy_data_input")
Outputs("__beam_search_predict__")
...@@ -48,15 +48,16 @@ def step(dummy_memory, predict_word): ...@@ -48,15 +48,16 @@ def step(dummy_memory, predict_word):
beam_gen = beam_search(name="rnn_gen", beam_gen = beam_search(name="rnn_gen",
step=step, step=step,
input=gen_inputs, input=gen_inputs,
id_input=sent_id,
dict_file="./trainer/tests/test_gen_dict.txt",
result_file="./trainer/tests/dump_text.test",
bos_id=0, bos_id=0,
eos_id=num_words-1, eos_id=num_words-1,
beam_size=2 if beam_flag else 1, beam_size=2 if beam_flag else 1,
num_results_per_sample=2 if beam_flag else 1, num_results_per_sample=2 if beam_flag else 1,
max_length=10) max_length=10)
seqtext_printer_evaluator(input=beam_gen,
id_input=sent_id,
dict_file="./trainer/tests/test_gen_dict.txt",
result_file="./trainer/tests/dump_text.test")
#outputs(beam_gen) #outputs(beam_gen)
# In this config, as dummy_data_input doesn't work on beam_gen (we can find dummy_memory # In this config, as dummy_data_input doesn't work on beam_gen (we can find dummy_memory
# is read-only memory, and isn't used by other layers of step), we show the Inputs and Outputs # is read-only memory, and isn't used by other layers of step), we show the Inputs and Outputs
......
...@@ -12,7 +12,6 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. ...@@ -12,7 +12,6 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and See the License for the specific language governing permissions and
limitations under the License. */ limitations under the License. */
#include <fstream> #include <fstream>
#include <paddle/utils/PythonUtil.h> #include <paddle/utils/PythonUtil.h>
...@@ -24,6 +23,8 @@ using namespace paddle; // NOLINT ...@@ -24,6 +23,8 @@ using namespace paddle; // NOLINT
using namespace std; // NOLINT using namespace std; // NOLINT
static const string& CONFIG_FILE = "trainer/tests/sample_trainer_rnn_gen.conf"; static const string& CONFIG_FILE = "trainer/tests/sample_trainer_rnn_gen.conf";
static const string& NEST_CONFIG_FILE =
"trainer/tests/sample_trainer_nest_rnn_gen.conf";
static const string& OUTPUT_DIR = "trainer/tests/dump_text.test"; static const string& OUTPUT_DIR = "trainer/tests/dump_text.test";
static string modelDir = "trainer/tests/rnn_gen_test_model_dir/t1"; // NOLINT static string modelDir = "trainer/tests/rnn_gen_test_model_dir/t1"; // NOLINT
static string expectFile = // NOLINT static string expectFile = // NOLINT
...@@ -50,32 +51,52 @@ void checkOutput(const string& expRetFile) { ...@@ -50,32 +51,52 @@ void checkOutput(const string& expRetFile) {
} }
} }
void prepareInArgs(vector<Argument>& inArgs, void prepareInArgs(vector<Argument>& inArgs, const size_t batchSize,
const size_t batchSize, bool useGpu) { bool useGpu, bool hasSubseq) {
inArgs.clear(); inArgs.clear();
// sentence id // sentence id
Argument sentId; Argument sentId;
sentId.value = nullptr; sentId.value = nullptr;
IVector::resizeOrCreate(sentId.ids, batchSize, useGpu); if (hasSubseq) {
for (size_t i = 0; i < batchSize; ++i) sentId.ids->setElement(i, i); // as there is only one sequence, there is only one label.
IVector::resizeOrCreate(sentId.ids, 1, useGpu);
sentId.ids->setElement(0, 0);
} else {
// as there is batchSize word, there is batchSize label.
IVector::resizeOrCreate(sentId.ids, batchSize, useGpu);
for (size_t i = 0; i < batchSize; ++i) sentId.ids->setElement(i, i);
}
inArgs.emplace_back(sentId); inArgs.emplace_back(sentId);
// a dummy layer to decide batch size // a dummy layer to decide batch size
Argument dummyInput; Argument dummyInput;
dummyInput.value = Matrix::create(batchSize, 2, false, useGpu); dummyInput.value = Matrix::create(batchSize, 2, false, useGpu);
dummyInput.value->randomizeUniform(); dummyInput.value->randomizeUniform();
if (hasSubseq) {
// generate one sequence with batchSize subsequence,
// and each subsequence has only one word.
dummyInput.sequenceStartPositions = ICpuGpuVector::create(2, false);
int* buf = dummyInput.sequenceStartPositions->getMutableData(false);
dummyInput.subSequenceStartPositions =
ICpuGpuVector::create(batchSize + 1, false);
int* subBuf = dummyInput.subSequenceStartPositions->getMutableData(false);
buf[0] = 0;
buf[1] = batchSize;
for (size_t i = 0; i < batchSize + 1; i++) subBuf[i] = i;
}
inArgs.emplace_back(dummyInput); inArgs.emplace_back(dummyInput);
} }
void testGeneration(bool useGpu, const string& expRetFile) { void testGeneration(const string& configFile, bool useGpu, bool hasSubseq,
const string& expRetFile) {
FLAGS_use_gpu = useGpu; FLAGS_use_gpu = useGpu;
auto config = std::make_shared<TrainerConfigHelper>(CONFIG_FILE); auto config = std::make_shared<TrainerConfigHelper>(configFile);
unique_ptr<GradientMachine> gradientMachine(GradientMachine::create(*config)); unique_ptr<GradientMachine> gradientMachine(GradientMachine::create(*config));
gradientMachine->loadParameters(modelDir); gradientMachine->loadParameters(modelDir);
vector<Argument> inArgs(2); vector<Argument> inArgs(2);
const size_t batchSize = 15; const size_t batchSize = 15;
prepareInArgs(inArgs, batchSize, useGpu); prepareInArgs(inArgs, batchSize, useGpu, hasSubseq);
vector<Argument> outArgs; vector<Argument> outArgs;
unique_ptr<Evaluator> testEvaluator(gradientMachine->makeEvaluator()); unique_ptr<Evaluator> testEvaluator(gradientMachine->makeEvaluator());
testEvaluator->start(); testEvaluator->start();
...@@ -93,16 +114,21 @@ TEST(RecurrentGradientMachine, test_generation) { ...@@ -93,16 +114,21 @@ TEST(RecurrentGradientMachine, test_generation) {
#else #else
const auto useGpuConfs = {true, false}; const auto useGpuConfs = {true, false};
#endif #endif
FLAGS_config_args = "beam_search=0"; // no beam search auto testGen = [&](const string& configFile, bool hasSubseq,
string expectRetFileNoBeam = expectFile + ".nobeam"; const string& expRetFile, bool beam_search) {
for (auto useGpu : useGpuConfs) { FLAGS_config_args = beam_search ? "beam_search=1" : "beam_search=0";
testGeneration(useGpu, expectRetFileNoBeam); for (auto useGpu : useGpuConfs) {
} testGeneration(configFile, useGpu, hasSubseq, expRetFile);
FLAGS_config_args = "beam_search=1"; // no beam search }
string expectRetFileBeam = expectFile + ".beam"; };
for (auto useGpu : useGpuConfs) { testGen(CONFIG_FILE, false, expectFile + ".nobeam", false); // no beam search
testGeneration(useGpu, expectRetFileBeam); testGen(CONFIG_FILE, false, expectFile + ".beam", true); // beam search
} // In hierarchical RNN, beam search and one way search are only in inner-RNN,
// outer-RNN will concat the generated inner-results (first for beam search)
// from inner-RNN. Thus, they have the same outer-results.
testGen(NEST_CONFIG_FILE, true, expectFile + ".nest",
false); // no beam search
testGen(NEST_CONFIG_FILE, true, expectFile + ".nest", true); // beam search
} }
#endif #endif
......
...@@ -559,6 +559,7 @@ def maxframe_printer_evaluator( ...@@ -559,6 +559,7 @@ def maxframe_printer_evaluator(
def seqtext_printer_evaluator( def seqtext_printer_evaluator(
input, input,
result_file, result_file,
id_input=None,
dict_file=None, dict_file=None,
delimited=None, delimited=None,
name=None, name=None,
...@@ -567,11 +568,10 @@ def seqtext_printer_evaluator( ...@@ -567,11 +568,10 @@ def seqtext_printer_evaluator(
Sequence text printer will print text according to index matrix and a Sequence text printer will print text according to index matrix and a
dictionary. There can be multiple input to this layer: dictionary. There can be multiple input to this layer:
1. If there is only one input, the input must be a matrix containing 1. If there is no id_input, the input must be a matrix containing
the sequence of indices; the sequence of indices;
2. If there are more than one input, the first input should be ids, 2. If there is id_input, it should be ids, and interpreted as sample ids.
and are interpreted as sample ids.
The output format will be: The output format will be:
...@@ -602,26 +602,43 @@ def seqtext_printer_evaluator( ...@@ -602,26 +602,43 @@ def seqtext_printer_evaluator(
.. code-block:: python .. code-block:: python
eval = seqtext_printer_evaluator(input, eval = seqtext_printer_evaluator(input=maxid_layer,
id_input=sample_id,
dict_file=dict_file, dict_file=dict_file,
result_file=result_file) result_file=result_file)
:param input: Input Layer name. :param input: Input Layer name.
:type input: LayerOutput|list :type input: LayerOutput|list
:param dict_file: The input dictionary which contains a list of tokens. :param result_file: Path of the file to store the generated results.
:type dict_file: basestring
:param result_file: The file is to save the results.
:type result_file: basestring :type result_file: basestring
:param id_input: Index of the input sequence, and the specified index will
be prited in the gereated results. This an optional
parameter.
:type id_input: LayerOutput
:param dict_file: Path of dictionary. This is an optional parameter.
Every line is a word in the dictionary with
(line number - 1) as the word index.
If this parameter is set to None, or to an empty string,
only word index are printed in the generated results.
:type dict_file: basestring
:param delimited: Whether to use space to separate output tokens. :param delimited: Whether to use space to separate output tokens.
Default is True. No space is added if set to False. Default is True. No space is added if set to False.
:type delimited: bool :type delimited: bool
:param name: Evaluator name. :param name: Evaluator name.
:type name: None|basestring :type name: None|basestring
:return: The seq_text_printer that prints the generated sequence to a file.
:rtype: evaluator
""" """
assert isinstance(result_file, basestring) assert isinstance(result_file, basestring)
if id_input is None:
inputs = [input]
else:
inputs = [id_input, input]
input.parents.append(id_input)
evaluator_base(name=name, evaluator_base(name=name,
type="seq_text_printer", type="seq_text_printer",
input=input, input=inputs,
dict_file=dict_file, dict_file=dict_file,
result_file=result_file, result_file=result_file,
delimited=delimited) delimited=delimited)
......
...@@ -2608,7 +2608,6 @@ def eos_layer(input, eos_id, name=None, layer_attr=None): ...@@ -2608,7 +2608,6 @@ def eos_layer(input, eos_id, name=None, layer_attr=None):
@wrap_name_default() @wrap_name_default()
def beam_search(step, input, bos_id, eos_id, beam_size, def beam_search(step, input, bos_id, eos_id, beam_size,
result_file, dict_file="", id_input=None,
max_length=500, name=None, max_length=500, name=None,
num_results_per_sample=None): num_results_per_sample=None):
""" """
...@@ -2632,8 +2631,7 @@ def beam_search(step, input, bos_id, eos_id, beam_size, ...@@ -2632,8 +2631,7 @@ def beam_search(step, input, bos_id, eos_id, beam_size,
input=[StaticInput(encoder_last)], input=[StaticInput(encoder_last)],
bos_id=0, bos_id=0,
eos_id=1, eos_id=1,
beam_size=5, beam_size=5)
result_file="./generated_sequences.txt")
Please see the following demo for more details: Please see the following demo for more details:
...@@ -2671,24 +2669,12 @@ def beam_search(step, input, bos_id, eos_id, beam_size, ...@@ -2671,24 +2669,12 @@ def beam_search(step, input, bos_id, eos_id, beam_size,
of the most promising next words. The greater the beam of the most promising next words. The greater the beam
size, the fewer candidate words are pruned. size, the fewer candidate words are pruned.
:type beam_size: int :type beam_size: int
:param result_file: Path of the file to store the generated results.
:type result_file: basestring
:param dict_file: Path of dictionary. This is an optional parameter.
Every line is a word in the dictionary with
(line number - 1) as the word index.
If this parameter is set to None, or to an empty string,
only word index are printed in the generated results.
:type dict_file: basestring
:param num_results_per_sample: Number of the generated results per input :param num_results_per_sample: Number of the generated results per input
sequence. This number must always be less than sequence. This number must always be less than
beam size. beam size.
:type num_results_per_sample: int :type num_results_per_sample: int
:param id_input: Index of the input sequence, and the specified index will :return: The generated word index.
be prited in the gereated results. This an optional :rtype: LayerOutput
parameter.
:type id_input: LayerOutput
:return: The seq_text_printer that prints the generated sequence to a file.
:rtype: evaluator
""" """
if num_results_per_sample is None: if num_results_per_sample is None:
...@@ -2704,7 +2690,6 @@ def beam_search(step, input, bos_id, eos_id, beam_size, ...@@ -2704,7 +2690,6 @@ def beam_search(step, input, bos_id, eos_id, beam_size,
real_input = [] real_input = []
for i, each_input in enumerate(input): for i, each_input in enumerate(input):
# print type(each_input)
assert isinstance(each_input, StaticInput) or isinstance( assert isinstance(each_input, StaticInput) or isinstance(
each_input, BaseGeneratedInput) each_input, BaseGeneratedInput)
if isinstance(each_input, BaseGeneratedInput): if isinstance(each_input, BaseGeneratedInput):
...@@ -2740,20 +2725,7 @@ def beam_search(step, input, bos_id, eos_id, beam_size, ...@@ -2740,20 +2725,7 @@ def beam_search(step, input, bos_id, eos_id, beam_size,
tmp = recurrent_group(step=__real_step__, input=real_input, reverse=False, tmp = recurrent_group(step=__real_step__, input=real_input, reverse=False,
name=name) name=name)
if id_input is None:
inputs = [tmp.name]
else:
assert isinstance(id_input, LayerOutput)
inputs = [id_input.name, tmp.name]
tmp.parents.append(id_input)
Evaluator(name='target_printer',
type='seq_text_printer',
dict_file=dict_file,
result_file=result_file,
inputs=inputs
)
return tmp return tmp
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册