sample_trainer_rnn_gen.conf 3.8 KB
Newer Older
Z
zhangjinchao01 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108
#edit-mode: -*- python -*-
# Copyright (c) 2016 Baidu, Inc. All Rights Reserved
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

#Todo(luotao02) This config is only used for unitest. It is out of date now, and will be updated later.

import math

beam_search = get_config_arg('beam_search', bool, False)

model_type("recurrent_nn")

Settings(learning_rate=0, batch_size=15, algorithm='sgd')

Inputs("sent_id", "dummy_data_input")
Outputs("predict_word")

num_words = 5

DataLayer(name="sent_id", size=1, )

# This layer has no actual use, but only to decide batch_size in generation.
# When generating, at least one Memory in RecurrentLayer MUST have a boot layer.
DataLayer(name="dummy_data_input", size=2, )

if beam_search:
    RecurrentLayerGroupBegin("decoding_layer_group",
                             in_links=[],
                             out_links=["predict_word"],
                             generator=Generator(max_num_frames=10,
                                                 beam_size=2,
                                                 num_results_per_sample=2, ))
else:
    RecurrentLayerGroupBegin("decoding_layer_group",
                             in_links=[],
                             out_links=["predict_word"],
                             generator=Generator(max_num_frames=10, ))
dummy_memory = Memory(name="dummy_memory",
                      size=2,
                      boot_layer="dummy_data_input")
MixedLayer(name="dummy_memory",
           size=2,
           bias=False,
           inputs=[IdentityProjection(dummy_memory)], )
state_memory = Memory(name="state",
                      size=num_words,
                      #boot_bias=True,
                      #boot_bias_active_type = "tanh",
                      )

predict_word_memory = Memory(name="predict_word",
                             size=num_words,
                             boot_with_const_id=0, )

MixedLayer(
        name = "word_embedding",
        size = num_words, # word embedding dim is the same as num_words in this test.
        bias = False,
        inputs = TableProjection(predict_word_memory,
                                 initial_std=1,
                                 learning_rate=0,
                                 parameter_name="wordvec"))

Layer(  # simplified RNN for testing
    name="state",
    type="mixed",
    size=num_words,
    bias=False,
    inputs=[FullMatrixProjection("word_embedding",
                                 parameter_name="transtable")])

Layer(name="output",
      type="mixed",
      size=num_words,
      active_type="exponential",
      bias=False,
      inputs=TransposedFullMatrixProjection("state",
                                            initial_std=1,
                                            learning_rate=0,
                                            parameter_name="wordvec"), )

Layer(name="predict_word", type="maxid", inputs=["output"], )

Layer(name="eos_check",
      type="eos_id",
      eos_id=num_words - 1,
      inputs=["predict_word"], )
RecurrentLayerGroupEnd("decoding_layer_group")

Evaluator(name="answer_printer",
          type="seq_text_printer",
          dict_file="./trainer/tests/test_gen_dict.txt",
          result_file="./trainer/tests/dump_text.test",
          inputs=[
              "sent_id",
              "predict_word",
          ], )