sample_trainer_rnn_gen.conf 2.7 KB
Newer Older
Z
zhangjinchao01 已提交
1
#edit-mode: -*- python -*-
2
# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved
Z
zhangjinchao01 已提交
3 4 5 6 7 8 9 10 11 12 13 14 15 16
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.


17
from paddle.trainer_config_helpers import *
Z
zhangjinchao01 已提交
18

19
settings(batch_size=15, learning_rate=0)
Z
zhangjinchao01 已提交
20 21

num_words = 5
22
beam_flag = get_config_arg('beam_search', bool, False)
Z
zhangjinchao01 已提交
23

24
sent_id = data_layer(name="sent_id", size=1)
Z
zhangjinchao01 已提交
25 26 27

# This layer has no actual use, but only to decide batch_size in generation.
# When generating, at least one Memory in RecurrentLayer MUST have a boot layer.
28 29 30 31 32 33 34 35
dummy_data = data_layer(name="dummy_data_input", size=2)

gen_inputs = [StaticInput(input=dummy_data, size=2),
              GeneratedInput(size=num_words,
                             embedding_name="wordvec",
                             embedding_size=num_words)]

def step(dummy_memory, predict_word):
X
xuwei06 已提交
36

37 38 39 40 41 42 43 44 45 46
    # simplified RNN for testing
    with mixed_layer(size=num_words) as layer:
        layer += full_matrix_projection(input=predict_word,
                                        param_attr=ParamAttr(name="transtable"))

    with mixed_layer(size=num_words, act=ExpActivation()) as out:
        out += trans_full_matrix_projection(input=layer,
                                            param_attr=ParamAttr(name="wordvec"))

    return out
X
xuwei06 已提交
47

48 49 50 51 52 53 54
beam_gen = beam_search(name="rnn_gen",
                       step=step,
                       input=gen_inputs,
                       bos_id=0,
                       eos_id=num_words-1,
                       beam_size=2 if beam_flag else 1,
                       num_results_per_sample=2 if beam_flag else 1,
X
xuwei06 已提交
55
                       max_length=10)
56

57 58
seqtext_printer_evaluator(input=beam_gen,
                          id_input=sent_id,
X
Xin Pan 已提交
59 60
                          dict_file="./legacy/trainer/tests/test_gen_dict.txt",
                          result_file="./legacy/trainer/tests/dump_text.test")
61 62 63 64 65 66
#outputs(beam_gen)
# In this config, as dummy_data_input doesn't work on beam_gen (we can find dummy_memory
# is read-only memory, and isn't used by other layers of step), we show the Inputs and Outputs
# as follows. Note that "__beam_search_predict__" is the default output name of beam_search.
Inputs("sent_id","dummy_data_input")
Outputs("__beam_search_predict__")