提交 4615c517 编写于 作者: L luotao1 提交者: LCY-Seso

beam search api and unitest in hierarchical rnn (#122)

上级 baaaa0b0
......@@ -171,12 +171,13 @@ def gru_encoder_decoder(data_conf,
beam_gen = beam_search(name=decoder_group_name,
step=gru_decoder_with_attention,
input=group_inputs,
id_input=data_layer(name="sent_id",
size=1),
dict_file=trg_dict_path,
bos_id=0,
eos_id=1,
beam_size=beam_size,
max_length=max_length,
result_file=gen_trans_file)
max_length=max_length)
seqtext_printer_evaluator(input=beam_gen,
id_input=data_layer(name="sent_id", size=1),
dict_file=trg_dict_path,
result_file=gen_trans_file)
outputs(beam_gen)
......@@ -202,14 +202,17 @@ After training the model, we can use it to generate sequences. A common practice
* use :code:`GeneratedInput` for trg_embedding. :code:`GeneratedInput` computes the embedding of the generated token at the last time step for the input at the current time step.
* use :code:`beam_search` function. This function needs to set:
- :code:`id_input`: the integer ID of the data, used to identify the corresponding output in the generated files.
- :code:`dict_file`: the dictionary file for converting word id to word.
- :code:`bos_id`: the start token. Every sentence starts with the start token.
- :code:`eos_id`: the end token. Every sentence ends with the end token.
- :code:`beam_size`: the beam size used in beam search.
- :code:`max_length`: the maximum length of the generated sentences.
- :code:`result_file`: the path of the generation result file.
* use :code:`seqtext_printer_evaluator` to print text according to index matrix and dictionary. This function needs to set:
- :code:`id_input`: the integer ID of the data, used to identify the corresponding output in the generated files.
- :code:`dict_file`: the dictionary file for converting word id to word.
- :code:`result_file`: the path of the generation result file.
The code is listed below:
.. code-block:: python
......@@ -230,14 +233,15 @@ The code is listed below:
beam_gen = beam_search(name=decoder_group_name,
step=gru_decoder_with_attention,
input=group_inputs,
id_input=data_layer(name="sent_id",
size=1),
dict_file=trg_dict_path,
bos_id=0, # Beginnning token.
eos_id=1, # End of sentence token.
beam_size=beam_size,
max_length=max_length,
result_file=gen_trans_file)
max_length=max_length)
seqtext_printer_evaluator(input=beam_gen,
id_input=data_layer(name="sent_id", size=1),
dict_file=trg_dict_path,
result_file=gen_trans_file)
outputs(beam_gen)
......
0 1 2 3 4
1 2 3 4
1 2 3 4
1 2 3 4
1 2 3 4
1 2 3 4
1 2 3 4
1 2 3 4
1 2 3 4
1 2 3 4
1 2 3 4
1 2 3 4
1 2 3 4
1 2 3 4
1 2 3 4
#edit-mode: -*- python -*-
# Copyright (c) 2016 Baidu, Inc. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from paddle.trainer_config_helpers import *
settings(batch_size=15, learning_rate=0)
num_words = 5
beam_flag = get_config_arg('beam_search', bool, False)
sent_id = data_layer(name="sent_id", size=1)
# This layer has no actual use, but only to decide batch_size in generation.
# When generating, at least one Memory in RecurrentLayer MUST have a boot layer.
dummy_data = data_layer(name="dummy_data_input", size=2)
def outer_step(dummy_data):
gen_inputs = [StaticInput(input=dummy_data, size=2, is_seq=True),
GeneratedInput(size=num_words,
embedding_name="wordvec",
embedding_size=num_words)]
def inner_step(dummy_memory, predict_word):
# simplified RNN for testing
with mixed_layer(size=num_words) as layer:
layer += full_matrix_projection(input=predict_word,
param_attr=ParamAttr(name="transtable"))
with mixed_layer(size=num_words, act=ExpActivation()) as out:
out += trans_full_matrix_projection(input=layer,
param_attr=ParamAttr(name="wordvec"))
return out
beam_gen = beam_search(name="rnn_gen",
step=inner_step,
input=gen_inputs,
bos_id=0,
eos_id=num_words-1,
beam_size=2 if beam_flag else 1,
num_results_per_sample=2 if beam_flag else 1,
max_length=10)
return beam_gen
beam_gen_concat = recurrent_group(name="rnn_gen_concat",
step=outer_step,
input=[SubsequenceInput(dummy_data)])
seqtext_printer_evaluator(input=beam_gen_concat,
id_input=sent_id,
dict_file="./trainer/tests/test_gen_dict.txt",
result_file="./trainer/tests/dump_text.test")
#outputs(beam_gen_concat)
# In this config, as dummy_data_input doesn't work on beam_gen (we can find dummy_memory
# is read-only memory, and isn't used by other layers of step), we show the Inputs and Outputs
# as follows. Note that "__beam_search_predict__" is the default output name of beam_search.
Inputs("sent_id","dummy_data_input")
Outputs("__beam_search_predict__")
......@@ -48,15 +48,16 @@ def step(dummy_memory, predict_word):
beam_gen = beam_search(name="rnn_gen",
step=step,
input=gen_inputs,
id_input=sent_id,
dict_file="./trainer/tests/test_gen_dict.txt",
result_file="./trainer/tests/dump_text.test",
bos_id=0,
eos_id=num_words-1,
beam_size=2 if beam_flag else 1,
num_results_per_sample=2 if beam_flag else 1,
max_length=10)
seqtext_printer_evaluator(input=beam_gen,
id_input=sent_id,
dict_file="./trainer/tests/test_gen_dict.txt",
result_file="./trainer/tests/dump_text.test")
#outputs(beam_gen)
# In this config, as dummy_data_input doesn't work on beam_gen (we can find dummy_memory
# is read-only memory, and isn't used by other layers of step), we show the Inputs and Outputs
......
......@@ -12,7 +12,6 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <fstream>
#include <paddle/utils/PythonUtil.h>
......@@ -24,6 +23,8 @@ using namespace paddle; // NOLINT
using namespace std; // NOLINT
static const string& CONFIG_FILE = "trainer/tests/sample_trainer_rnn_gen.conf";
static const string& NEST_CONFIG_FILE =
"trainer/tests/sample_trainer_nest_rnn_gen.conf";
static const string& OUTPUT_DIR = "trainer/tests/dump_text.test";
static string modelDir = "trainer/tests/rnn_gen_test_model_dir/t1"; // NOLINT
static string expectFile = // NOLINT
......@@ -50,32 +51,52 @@ void checkOutput(const string& expRetFile) {
}
}
void prepareInArgs(vector<Argument>& inArgs,
const size_t batchSize, bool useGpu) {
void prepareInArgs(vector<Argument>& inArgs, const size_t batchSize,
bool useGpu, bool hasSubseq) {
inArgs.clear();
// sentence id
Argument sentId;
sentId.value = nullptr;
IVector::resizeOrCreate(sentId.ids, batchSize, useGpu);
for (size_t i = 0; i < batchSize; ++i) sentId.ids->setElement(i, i);
if (hasSubseq) {
// as there is only one sequence, there is only one label.
IVector::resizeOrCreate(sentId.ids, 1, useGpu);
sentId.ids->setElement(0, 0);
} else {
// as there is batchSize word, there is batchSize label.
IVector::resizeOrCreate(sentId.ids, batchSize, useGpu);
for (size_t i = 0; i < batchSize; ++i) sentId.ids->setElement(i, i);
}
inArgs.emplace_back(sentId);
// a dummy layer to decide batch size
Argument dummyInput;
dummyInput.value = Matrix::create(batchSize, 2, false, useGpu);
dummyInput.value->randomizeUniform();
if (hasSubseq) {
// generate one sequence with batchSize subsequence,
// and each subsequence has only one word.
dummyInput.sequenceStartPositions = ICpuGpuVector::create(2, false);
int* buf = dummyInput.sequenceStartPositions->getMutableData(false);
dummyInput.subSequenceStartPositions =
ICpuGpuVector::create(batchSize + 1, false);
int* subBuf = dummyInput.subSequenceStartPositions->getMutableData(false);
buf[0] = 0;
buf[1] = batchSize;
for (size_t i = 0; i < batchSize + 1; i++) subBuf[i] = i;
}
inArgs.emplace_back(dummyInput);
}
void testGeneration(bool useGpu, const string& expRetFile) {
void testGeneration(const string& configFile, bool useGpu, bool hasSubseq,
const string& expRetFile) {
FLAGS_use_gpu = useGpu;
auto config = std::make_shared<TrainerConfigHelper>(CONFIG_FILE);
auto config = std::make_shared<TrainerConfigHelper>(configFile);
unique_ptr<GradientMachine> gradientMachine(GradientMachine::create(*config));
gradientMachine->loadParameters(modelDir);
vector<Argument> inArgs(2);
const size_t batchSize = 15;
prepareInArgs(inArgs, batchSize, useGpu);
prepareInArgs(inArgs, batchSize, useGpu, hasSubseq);
vector<Argument> outArgs;
unique_ptr<Evaluator> testEvaluator(gradientMachine->makeEvaluator());
testEvaluator->start();
......@@ -93,16 +114,21 @@ TEST(RecurrentGradientMachine, test_generation) {
#else
const auto useGpuConfs = {true, false};
#endif
FLAGS_config_args = "beam_search=0"; // no beam search
string expectRetFileNoBeam = expectFile + ".nobeam";
for (auto useGpu : useGpuConfs) {
testGeneration(useGpu, expectRetFileNoBeam);
}
FLAGS_config_args = "beam_search=1"; // no beam search
string expectRetFileBeam = expectFile + ".beam";
for (auto useGpu : useGpuConfs) {
testGeneration(useGpu, expectRetFileBeam);
}
auto testGen = [&](const string& configFile, bool hasSubseq,
const string& expRetFile, bool beam_search) {
FLAGS_config_args = beam_search ? "beam_search=1" : "beam_search=0";
for (auto useGpu : useGpuConfs) {
testGeneration(configFile, useGpu, hasSubseq, expRetFile);
}
};
testGen(CONFIG_FILE, false, expectFile + ".nobeam", false); // no beam search
testGen(CONFIG_FILE, false, expectFile + ".beam", true); // beam search
// In hierarchical RNN, beam search and one way search are only in inner-RNN,
// outer-RNN will concat the generated inner-results (first for beam search)
// from inner-RNN. Thus, they have the same outer-results.
testGen(NEST_CONFIG_FILE, true, expectFile + ".nest",
false); // no beam search
testGen(NEST_CONFIG_FILE, true, expectFile + ".nest", true); // beam search
}
#endif
......
......@@ -559,6 +559,7 @@ def maxframe_printer_evaluator(
def seqtext_printer_evaluator(
input,
result_file,
id_input=None,
dict_file=None,
delimited=None,
name=None,
......@@ -567,11 +568,10 @@ def seqtext_printer_evaluator(
Sequence text printer will print text according to index matrix and a
dictionary. There can be multiple input to this layer:
1. If there is only one input, the input must be a matrix containing
1. If there is no id_input, the input must be a matrix containing
the sequence of indices;
2. If there are more than one input, the first input should be ids,
and are interpreted as sample ids.
2. If there is id_input, it should be ids, and interpreted as sample ids.
The output format will be:
......@@ -602,26 +602,43 @@ def seqtext_printer_evaluator(
.. code-block:: python
eval = seqtext_printer_evaluator(input,
eval = seqtext_printer_evaluator(input=maxid_layer,
id_input=sample_id,
dict_file=dict_file,
result_file=result_file)
:param input: Input Layer name.
:type input: LayerOutput|list
:param dict_file: The input dictionary which contains a list of tokens.
:type dict_file: basestring
:param result_file: The file is to save the results.
:param result_file: Path of the file to store the generated results.
:type result_file: basestring
:param id_input: Index of the input sequence, and the specified index will
be prited in the gereated results. This an optional
parameter.
:type id_input: LayerOutput
:param dict_file: Path of dictionary. This is an optional parameter.
Every line is a word in the dictionary with
(line number - 1) as the word index.
If this parameter is set to None, or to an empty string,
only word index are printed in the generated results.
:type dict_file: basestring
:param delimited: Whether to use space to separate output tokens.
Default is True. No space is added if set to False.
:type delimited: bool
:param name: Evaluator name.
:type name: None|basestring
:return: The seq_text_printer that prints the generated sequence to a file.
:rtype: evaluator
"""
assert isinstance(result_file, basestring)
if id_input is None:
inputs = [input]
else:
inputs = [id_input, input]
input.parents.append(id_input)
evaluator_base(name=name,
type="seq_text_printer",
input=input,
input=inputs,
dict_file=dict_file,
result_file=result_file,
delimited=delimited)
......
......@@ -2608,7 +2608,6 @@ def eos_layer(input, eos_id, name=None, layer_attr=None):
@wrap_name_default()
def beam_search(step, input, bos_id, eos_id, beam_size,
result_file, dict_file="", id_input=None,
max_length=500, name=None,
num_results_per_sample=None):
"""
......@@ -2632,8 +2631,7 @@ def beam_search(step, input, bos_id, eos_id, beam_size,
input=[StaticInput(encoder_last)],
bos_id=0,
eos_id=1,
beam_size=5,
result_file="./generated_sequences.txt")
beam_size=5)
Please see the following demo for more details:
......@@ -2671,24 +2669,12 @@ def beam_search(step, input, bos_id, eos_id, beam_size,
of the most promising next words. The greater the beam
size, the fewer candidate words are pruned.
:type beam_size: int
:param result_file: Path of the file to store the generated results.
:type result_file: basestring
:param dict_file: Path of dictionary. This is an optional parameter.
Every line is a word in the dictionary with
(line number - 1) as the word index.
If this parameter is set to None, or to an empty string,
only word index are printed in the generated results.
:type dict_file: basestring
:param num_results_per_sample: Number of the generated results per input
sequence. This number must always be less than
beam size.
:type num_results_per_sample: int
:param id_input: Index of the input sequence, and the specified index will
be prited in the gereated results. This an optional
parameter.
:type id_input: LayerOutput
:return: The seq_text_printer that prints the generated sequence to a file.
:rtype: evaluator
:return: The generated word index.
:rtype: LayerOutput
"""
if num_results_per_sample is None:
......@@ -2704,7 +2690,6 @@ def beam_search(step, input, bos_id, eos_id, beam_size,
real_input = []
for i, each_input in enumerate(input):
# print type(each_input)
assert isinstance(each_input, StaticInput) or isinstance(
each_input, BaseGeneratedInput)
if isinstance(each_input, BaseGeneratedInput):
......@@ -2740,20 +2725,7 @@ def beam_search(step, input, bos_id, eos_id, beam_size,
tmp = recurrent_group(step=__real_step__, input=real_input, reverse=False,
name=name)
if id_input is None:
inputs = [tmp.name]
else:
assert isinstance(id_input, LayerOutput)
inputs = [id_input.name, tmp.name]
tmp.parents.append(id_input)
Evaluator(name='target_printer',
type='seq_text_printer',
dict_file=dict_file,
result_file=result_file,
inputs=inputs
)
return tmp
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册