提交 a4bd4147 编写于 作者: J jacquesqiao 提交者: GitHub

Merge pull request #1555 from jacquesqiao/refine-import

optimize import of seqToseq_net_v2 for book
...@@ -72,31 +72,35 @@ def main(): ...@@ -72,31 +72,35 @@ def main():
# define network topology # define network topology
cost = seqToseq_net_v2(source_dict_dim, target_dict_dim) cost = seqToseq_net_v2(source_dict_dim, target_dict_dim)
parameters = paddle.parameters.create(cost) parameters = paddle.parameters.create(cost)
optimizer = paddle.optimizer.Adam(learning_rate=1e-4)
def event_handler(event):
if isinstance(event, paddle.event.EndIteration):
if event.batch_id % 10 == 0:
print "Pass %d, Batch %d, Cost %f, %s" % (
event.pass_id, event.batch_id, event.cost, event.metrics)
# define optimize method and trainer
optimizer = paddle.optimizer.Adam(learning_rate=1e-4)
trainer = paddle.trainer.SGD(cost=cost, trainer = paddle.trainer.SGD(cost=cost,
parameters=parameters, parameters=parameters,
update_equation=optimizer) update_equation=optimizer)
# define data reader
reader_dict = { reader_dict = {
'source_language_word': 0, 'source_language_word': 0,
'target_language_word': 1, 'target_language_word': 1,
'target_language_next_word': 2 'target_language_next_word': 2
} }
trn_reader = paddle.reader.batched( wmt14_reader = paddle.reader.batched(
paddle.reader.shuffle( paddle.reader.shuffle(
train_reader("data/pre-wmt14/train/train"), buf_size=8192), train_reader("data/pre-wmt14/train/train"), buf_size=8192),
batch_size=5) batch_size=5)
# define event_handler callback
def event_handler(event):
if isinstance(event, paddle.event.EndIteration):
if event.batch_id % 10 == 0:
print "Pass %d, Batch %d, Cost %f, %s" % (
event.pass_id, event.batch_id, event.cost, event.metrics)
# start to train
trainer.train( trainer.train(
reader=trn_reader, reader=wmt14_reader,
event_handler=event_handler, event_handler=event_handler,
num_passes=10000, num_passes=10000,
reader_dict=reader_dict) reader_dict=reader_dict)
......
import paddle.v2.activation as activation import paddle.v2 as paddle
import paddle.v2.attr as attr
import paddle.v2.data_type as data_type
import paddle.v2.layer as layer
import paddle.v2.networks as networks
def seqToseq_net_v2(source_dict_dim, target_dict_dim): def seqToseq_net_v2(source_dict_dim, target_dict_dim):
...@@ -12,64 +8,70 @@ def seqToseq_net_v2(source_dict_dim, target_dict_dim): ...@@ -12,64 +8,70 @@ def seqToseq_net_v2(source_dict_dim, target_dict_dim):
encoder_size = 512 # dimension of hidden unit in GRU Encoder network encoder_size = 512 # dimension of hidden unit in GRU Encoder network
#### Encoder #### Encoder
src_word_id = layer.data( src_word_id = paddle.layer.data(
name='source_language_word', name='source_language_word',
type=data_type.integer_value_sequence(source_dict_dim)) type=paddle.data_type.integer_value_sequence(source_dict_dim))
src_embedding = layer.embedding( src_embedding = paddle.layer.embedding(
input=src_word_id, input=src_word_id,
size=word_vector_dim, size=word_vector_dim,
param_attr=attr.ParamAttr(name='_source_language_embedding')) param_attr=paddle.attr.ParamAttr(name='_source_language_embedding'))
src_forward = networks.simple_gru(input=src_embedding, size=encoder_size) src_forward = paddle.networks.simple_gru(
src_backward = networks.simple_gru( input=src_embedding, size=encoder_size)
src_backward = paddle.networks.simple_gru(
input=src_embedding, size=encoder_size, reverse=True) input=src_embedding, size=encoder_size, reverse=True)
encoded_vector = layer.concat(input=[src_forward, src_backward]) encoded_vector = paddle.layer.concat(input=[src_forward, src_backward])
#### Decoder #### Decoder
with layer.mixed(size=decoder_size) as encoded_proj: with paddle.layer.mixed(size=decoder_size) as encoded_proj:
encoded_proj += layer.full_matrix_projection(input=encoded_vector) encoded_proj += paddle.layer.full_matrix_projection(
input=encoded_vector)
backward_first = layer.first_seq(input=src_backward) backward_first = paddle.layer.first_seq(input=src_backward)
with layer.mixed(size=decoder_size, act=activation.Tanh()) as decoder_boot: with paddle.layer.mixed(
decoder_boot += layer.full_matrix_projection(input=backward_first) size=decoder_size, act=paddle.activation.Tanh()) as decoder_boot:
decoder_boot += paddle.layer.full_matrix_projection(
input=backward_first)
def gru_decoder_with_attention(enc_vec, enc_proj, current_word): def gru_decoder_with_attention(enc_vec, enc_proj, current_word):
decoder_mem = layer.memory( decoder_mem = paddle.layer.memory(
name='gru_decoder', size=decoder_size, boot_layer=decoder_boot) name='gru_decoder', size=decoder_size, boot_layer=decoder_boot)
context = networks.simple_attention( context = paddle.networks.simple_attention(
encoded_sequence=enc_vec, encoded_sequence=enc_vec,
encoded_proj=enc_proj, encoded_proj=enc_proj,
decoder_state=decoder_mem) decoder_state=decoder_mem)
with layer.mixed(size=decoder_size * 3) as decoder_inputs: with paddle.layer.mixed(size=decoder_size * 3) as decoder_inputs:
decoder_inputs += layer.full_matrix_projection(input=context) decoder_inputs += paddle.layer.full_matrix_projection(input=context)
decoder_inputs += layer.full_matrix_projection(input=current_word) decoder_inputs += paddle.layer.full_matrix_projection(
input=current_word)
gru_step = layer.gru_step( gru_step = paddle.layer.gru_step(
name='gru_decoder', name='gru_decoder',
input=decoder_inputs, input=decoder_inputs,
output_mem=decoder_mem, output_mem=decoder_mem,
size=decoder_size) size=decoder_size)
with layer.mixed( with paddle.layer.mixed(
size=target_dict_dim, bias_attr=True, size=target_dict_dim,
act=activation.Softmax()) as out: bias_attr=True,
out += layer.full_matrix_projection(input=gru_step) act=paddle.activation.Softmax()) as out:
out += paddle.layer.full_matrix_projection(input=gru_step)
return out return out
decoder_group_name = "decoder_group" decoder_group_name = "decoder_group"
group_input1 = layer.StaticInputV2(input=encoded_vector, is_seq=True) group_input1 = paddle.layer.StaticInputV2(input=encoded_vector, is_seq=True)
group_input2 = layer.StaticInputV2(input=encoded_proj, is_seq=True) group_input2 = paddle.layer.StaticInputV2(input=encoded_proj, is_seq=True)
group_inputs = [group_input1, group_input2] group_inputs = [group_input1, group_input2]
trg_embedding = layer.embedding( trg_embedding = paddle.layer.embedding(
input=layer.data( input=paddle.layer.data(
name='target_language_word', name='target_language_word',
type=data_type.integer_value_sequence(target_dict_dim)), type=paddle.data_type.integer_value_sequence(target_dict_dim)),
size=word_vector_dim, size=word_vector_dim,
param_attr=attr.ParamAttr(name='_target_language_embedding')) param_attr=paddle.attr.ParamAttr(name='_target_language_embedding'))
group_inputs.append(trg_embedding) group_inputs.append(trg_embedding)
# For decoder equipped with attention mechanism, in training, # For decoder equipped with attention mechanism, in training,
...@@ -77,14 +79,14 @@ def seqToseq_net_v2(source_dict_dim, target_dict_dim): ...@@ -77,14 +79,14 @@ def seqToseq_net_v2(source_dict_dim, target_dict_dim):
# while encoded source sequence is accessed to as an unbounded memory. # while encoded source sequence is accessed to as an unbounded memory.
# Here, the StaticInput defines a read-only memory # Here, the StaticInput defines a read-only memory
# for the recurrent_group. # for the recurrent_group.
decoder = layer.recurrent_group( decoder = paddle.layer.recurrent_group(
name=decoder_group_name, name=decoder_group_name,
step=gru_decoder_with_attention, step=gru_decoder_with_attention,
input=group_inputs) input=group_inputs)
lbl = layer.data( lbl = paddle.layer.data(
name='target_language_next_word', name='target_language_next_word',
type=data_type.integer_value_sequence(target_dict_dim)) type=paddle.data_type.integer_value_sequence(target_dict_dim))
cost = layer.classification_cost(input=decoder, label=lbl) cost = paddle.layer.classification_cost(input=decoder, label=lbl)
return cost return cost
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册