提交 d2e1b46f 编写于 作者: L Luo Tao

update beam_search and seqToseq config, and add ExpActivation api

上级 425e5b0b
...@@ -128,12 +128,16 @@ def gru_encoder_decoder(data_conf, ...@@ -128,12 +128,16 @@ def gru_encoder_decoder(data_conf,
return out return out
decoder_group_name = "decoder_group" decoder_group_name = "decoder_group"
group_inputs=[StaticInput(input=encoded_vector,is_seq=True),
StaticInput(input=encoded_proj,is_seq=True)]
if not is_generating: if not is_generating:
trg_embedding = embedding_layer( trg_embedding = embedding_layer(
input=data_layer(name='target_language_word', input=data_layer(name='target_language_word',
size=target_dict_dim), size=target_dict_dim),
size=word_vector_dim, size=word_vector_dim,
param_attr=ParamAttr(name='_target_language_embedding')) param_attr=ParamAttr(name='_target_language_embedding'))
group_inputs.append(trg_embedding)
# For decoder equipped with attention mechanism, in training, # For decoder equipped with attention mechanism, in training,
# target embeding (the groudtruth) is the data input, # target embeding (the groudtruth) is the data input,
...@@ -142,22 +146,13 @@ def gru_encoder_decoder(data_conf, ...@@ -142,22 +146,13 @@ def gru_encoder_decoder(data_conf,
# for the recurrent_group. # for the recurrent_group.
decoder = recurrent_group(name=decoder_group_name, decoder = recurrent_group(name=decoder_group_name,
step=gru_decoder_with_attention, step=gru_decoder_with_attention,
input=[ input=group_inputs)
StaticInput(input=encoded_vector,
is_seq=True),
StaticInput(input=encoded_proj,
is_seq=True), trg_embedding
])
lbl = data_layer(name='target_language_next_word', lbl = data_layer(name='target_language_next_word',
size=target_dict_dim) size=target_dict_dim)
cost = classification_cost(input=decoder, label=lbl, ) cost = classification_cost(input=decoder, label=lbl)
outputs(cost) outputs(cost)
else: else:
gen_inputs = [StaticInput(input=encoded_vector,
is_seq=True),
StaticInput(input=encoded_proj,
is_seq=True), ]
# In generation, the decoder predicts a next target word based on # In generation, the decoder predicts a next target word based on
# the encoded source sequence and the last generated target word. # the encoded source sequence and the last generated target word.
...@@ -171,10 +166,11 @@ def gru_encoder_decoder(data_conf, ...@@ -171,10 +166,11 @@ def gru_encoder_decoder(data_conf,
size=target_dict_dim, size=target_dict_dim,
embedding_name='_target_language_embedding', embedding_name='_target_language_embedding',
embedding_size=word_vector_dim) embedding_size=word_vector_dim)
gen_inputs.append(trg_embedding) group_inputs.append(trg_embedding)
beam_gen = beam_search(name=decoder_group_name, beam_gen = beam_search(name=decoder_group_name,
step=gru_decoder_with_attention, step=gru_decoder_with_attention,
input=gen_inputs, input=group_inputs,
id_input=data_layer(name="sent_id", id_input=data_layer(name="sent_id",
size=1), size=1),
dict_file=trg_dict_path, dict_file=trg_dict_path,
......
...@@ -12,6 +12,13 @@ AbsActivation ...@@ -12,6 +12,13 @@ AbsActivation
:members: AbsActivation :members: AbsActivation
:noindex: :noindex:
ExpActivation
===============
.. automodule:: paddle.trainer_config_helpers.activations
:members: ExpActivation
:noindex:
IdentityActivation IdentityActivation
================== ==================
......
...@@ -13,96 +13,53 @@ ...@@ -13,96 +13,53 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
#Todo(luotao02) This config is only used for unitest. It is out of date now, and will be updated later.
import math from paddle.trainer_config_helpers import *
beam_search = get_config_arg('beam_search', bool, False) settings(batch_size=15, learning_rate=0)
model_type("recurrent_nn")
Settings(learning_rate=0, batch_size=15, algorithm='sgd')
Inputs("sent_id", "dummy_data_input")
Outputs("predict_word")
num_words = 5 num_words = 5
beam_flag = get_config_arg('beam_search', bool, False)
DataLayer(name="sent_id", size=1, ) sent_id = data_layer(name="sent_id", size=1)
# This layer has no actual use, but only to decide batch_size in generation. # This layer has no actual use, but only to decide batch_size in generation.
# When generating, at least one Memory in RecurrentLayer MUST have a boot layer. # When generating, at least one Memory in RecurrentLayer MUST have a boot layer.
DataLayer(name="dummy_data_input", size=2, ) dummy_data = data_layer(name="dummy_data_input", size=2)
if beam_search:
RecurrentLayerGroupBegin("decoding_layer_group",
in_links=[],
out_links=["predict_word"],
generator=Generator(max_num_frames=10,
beam_size=2,
num_results_per_sample=2, ))
else:
RecurrentLayerGroupBegin("decoding_layer_group",
in_links=[],
out_links=["predict_word"],
generator=Generator(max_num_frames=10, ))
dummy_memory = Memory(name="dummy_memory",
size=2,
boot_layer="dummy_data_input")
MixedLayer(name="dummy_memory",
size=2,
bias=False,
inputs=[IdentityProjection(dummy_memory)], )
state_memory = Memory(name="state",
size=num_words,
#boot_bias=True,
#boot_bias_active_type = "tanh",
)
predict_word_memory = Memory(name="predict_word",
size=num_words,
boot_with_const_id=0, )
MixedLayer( gen_inputs = [StaticInput(input=dummy_data, size=2),
name = "word_embedding", GeneratedInput(size=num_words,
size = num_words, # word embedding dim is the same as num_words in this test. embedding_name="wordvec",
bias = False, embedding_size=num_words)]
inputs = TableProjection(predict_word_memory,
initial_std=1,
learning_rate=0,
parameter_name="wordvec"))
Layer( # simplified RNN for testing def step(dummy_memory, predict_word):
name="state",
type="mixed",
size=num_words,
bias=False,
inputs=[FullMatrixProjection("word_embedding",
parameter_name="transtable")])
Layer(name="output", # simplified RNN for testing
type="mixed", with mixed_layer(size=num_words) as layer:
size=num_words, layer += full_matrix_projection(input=predict_word,
active_type="exponential", param_attr=ParamAttr(name="transtable"))
bias=False,
inputs=TransposedFullMatrixProjection("state",
initial_std=1,
learning_rate=0,
parameter_name="wordvec"), )
Layer(name="predict_word", type="maxid", inputs=["output"], ) with mixed_layer(size=num_words, act=ExpActivation()) as out:
out += trans_full_matrix_projection(input=layer,
param_attr=ParamAttr(name="wordvec"))
Layer(name="eos_check", return out
type="eos_id",
eos_id=num_words - 1,
inputs=["predict_word"], )
RecurrentLayerGroupEnd("decoding_layer_group")
Evaluator(name="answer_printer", beam_gen = beam_search(name="rnn_gen",
type="seq_text_printer", step=step,
input=gen_inputs,
id_input=sent_id,
dict_file="./trainer/tests/test_gen_dict.txt", dict_file="./trainer/tests/test_gen_dict.txt",
result_file="./trainer/tests/dump_text.test", result_file="./trainer/tests/dump_text.test",
inputs=[ bos_id=0,
"sent_id", eos_id=num_words-1,
"predict_word", beam_size=2 if beam_flag else 1,
], ) num_results_per_sample=2 if beam_flag else 1,
max_length=10)
#outputs(beam_gen)
# In this config, as dummy_data_input doesn't work on beam_gen (we can find dummy_memory
# is read-only memory, and isn't used by other layers of step), we show the Inputs and Outputs
# as follows. Note that "__beam_search_predict__" is the default output name of beam_search.
Inputs("sent_id","dummy_data_input")
Outputs("__beam_search_predict__")
...@@ -14,7 +14,7 @@ ...@@ -14,7 +14,7 @@
__all__ = ["TanhActivation", "SigmoidActivation", __all__ = ["TanhActivation", "SigmoidActivation",
"SoftmaxActivation", "IdentityActivation", "LinearActivation", "SoftmaxActivation", "IdentityActivation", "LinearActivation",
'SequenceSoftmaxActivation', 'SequenceSoftmaxActivation', 'ExpActivation',
"ReluActivation", "BReluActivation", "SoftReluActivation", "STanhActivation", "ReluActivation", "BReluActivation", "SoftReluActivation", "STanhActivation",
"AbsActivation", "SquareActivation", "BaseActivation"] "AbsActivation", "SquareActivation", "BaseActivation"]
...@@ -185,3 +185,12 @@ class SquareActivation(BaseActivation): ...@@ -185,3 +185,12 @@ class SquareActivation(BaseActivation):
""" """
def __init__(self): BaseActivation.__init__(self, 'square', False) def __init__(self): BaseActivation.__init__(self, 'square', False)
class ExpActivation(BaseActivation):
"""
Exponential Activation.
.. math::
f(z) = e^z.
"""
def __init__(self): BaseActivation.__init__(self, 'exponential', False)
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册