diff --git a/demo/seqToseq/api_train_v2.py b/demo/seqToseq/api_train_v2.py index 74ae1cf9ec81b00b55202d7c1316c7cb074cfd43..a5f59ec379738eb5bed3e7559739cae38582ed06 100644 --- a/demo/seqToseq/api_train_v2.py +++ b/demo/seqToseq/api_train_v2.py @@ -72,31 +72,35 @@ def main(): # define network topology cost = seqToseq_net_v2(source_dict_dim, target_dict_dim) parameters = paddle.parameters.create(cost) - optimizer = paddle.optimizer.Adam(learning_rate=1e-4) - - def event_handler(event): - if isinstance(event, paddle.event.EndIteration): - if event.batch_id % 10 == 0: - print "Pass %d, Batch %d, Cost %f, %s" % ( - event.pass_id, event.batch_id, event.cost, event.metrics) + # define optimize method and trainer + optimizer = paddle.optimizer.Adam(learning_rate=1e-4) trainer = paddle.trainer.SGD(cost=cost, parameters=parameters, update_equation=optimizer) + # define data reader reader_dict = { 'source_language_word': 0, 'target_language_word': 1, 'target_language_next_word': 2 } - trn_reader = paddle.reader.batched( + wmt14_reader = paddle.reader.batched( paddle.reader.shuffle( train_reader("data/pre-wmt14/train/train"), buf_size=8192), batch_size=5) + # define event_handler callback + def event_handler(event): + if isinstance(event, paddle.event.EndIteration): + if event.batch_id % 10 == 0: + print "Pass %d, Batch %d, Cost %f, %s" % ( + event.pass_id, event.batch_id, event.cost, event.metrics) + + # start to train trainer.train( - reader=trn_reader, + reader=wmt14_reader, event_handler=event_handler, num_passes=10000, reader_dict=reader_dict) diff --git a/demo/seqToseq/seqToseq_net_v2.py b/demo/seqToseq/seqToseq_net_v2.py index 1ac95686b43526732ca0bc9bf8f4e07589e24807..058a6789d7094c71492ed9772ed5594c4c0c8f84 100644 --- a/demo/seqToseq/seqToseq_net_v2.py +++ b/demo/seqToseq/seqToseq_net_v2.py @@ -1,8 +1,4 @@ -import paddle.v2.activation as activation -import paddle.v2.attr as attr -import paddle.v2.data_type as data_type -import paddle.v2.layer as layer -import paddle.v2.networks as networks +import paddle.v2 as paddle def seqToseq_net_v2(source_dict_dim, target_dict_dim): @@ -12,64 +8,70 @@ def seqToseq_net_v2(source_dict_dim, target_dict_dim): encoder_size = 512 # dimension of hidden unit in GRU Encoder network #### Encoder - src_word_id = layer.data( + src_word_id = paddle.layer.data( name='source_language_word', - type=data_type.integer_value_sequence(source_dict_dim)) - src_embedding = layer.embedding( + type=paddle.data_type.integer_value_sequence(source_dict_dim)) + src_embedding = paddle.layer.embedding( input=src_word_id, size=word_vector_dim, - param_attr=attr.ParamAttr(name='_source_language_embedding')) - src_forward = networks.simple_gru(input=src_embedding, size=encoder_size) - src_backward = networks.simple_gru( + param_attr=paddle.attr.ParamAttr(name='_source_language_embedding')) + src_forward = paddle.networks.simple_gru( + input=src_embedding, size=encoder_size) + src_backward = paddle.networks.simple_gru( input=src_embedding, size=encoder_size, reverse=True) - encoded_vector = layer.concat(input=[src_forward, src_backward]) + encoded_vector = paddle.layer.concat(input=[src_forward, src_backward]) #### Decoder - with layer.mixed(size=decoder_size) as encoded_proj: - encoded_proj += layer.full_matrix_projection(input=encoded_vector) + with paddle.layer.mixed(size=decoder_size) as encoded_proj: + encoded_proj += paddle.layer.full_matrix_projection( + input=encoded_vector) - backward_first = layer.first_seq(input=src_backward) + backward_first = paddle.layer.first_seq(input=src_backward) - with layer.mixed(size=decoder_size, act=activation.Tanh()) as decoder_boot: - decoder_boot += layer.full_matrix_projection(input=backward_first) + with paddle.layer.mixed( + size=decoder_size, act=paddle.activation.Tanh()) as decoder_boot: + decoder_boot += paddle.layer.full_matrix_projection( + input=backward_first) def gru_decoder_with_attention(enc_vec, enc_proj, current_word): - decoder_mem = layer.memory( + decoder_mem = paddle.layer.memory( name='gru_decoder', size=decoder_size, boot_layer=decoder_boot) - context = networks.simple_attention( + context = paddle.networks.simple_attention( encoded_sequence=enc_vec, encoded_proj=enc_proj, decoder_state=decoder_mem) - with layer.mixed(size=decoder_size * 3) as decoder_inputs: - decoder_inputs += layer.full_matrix_projection(input=context) - decoder_inputs += layer.full_matrix_projection(input=current_word) + with paddle.layer.mixed(size=decoder_size * 3) as decoder_inputs: + decoder_inputs += paddle.layer.full_matrix_projection(input=context) + decoder_inputs += paddle.layer.full_matrix_projection( + input=current_word) - gru_step = layer.gru_step( + gru_step = paddle.layer.gru_step( name='gru_decoder', input=decoder_inputs, output_mem=decoder_mem, size=decoder_size) - with layer.mixed( - size=target_dict_dim, bias_attr=True, - act=activation.Softmax()) as out: - out += layer.full_matrix_projection(input=gru_step) + with paddle.layer.mixed( + size=target_dict_dim, + bias_attr=True, + act=paddle.activation.Softmax()) as out: + out += paddle.layer.full_matrix_projection(input=gru_step) return out decoder_group_name = "decoder_group" - group_input1 = layer.StaticInputV2(input=encoded_vector, is_seq=True) - group_input2 = layer.StaticInputV2(input=encoded_proj, is_seq=True) + group_input1 = paddle.layer.StaticInputV2(input=encoded_vector, is_seq=True) + group_input2 = paddle.layer.StaticInputV2(input=encoded_proj, is_seq=True) group_inputs = [group_input1, group_input2] - trg_embedding = layer.embedding( - input=layer.data( + trg_embedding = paddle.layer.embedding( + input=paddle.layer.data( name='target_language_word', - type=data_type.integer_value_sequence(target_dict_dim)), + type=paddle.data_type.integer_value_sequence(target_dict_dim)), size=word_vector_dim, - param_attr=attr.ParamAttr(name='_target_language_embedding')) + param_attr=paddle.attr.ParamAttr(name='_target_language_embedding')) group_inputs.append(trg_embedding) # For decoder equipped with attention mechanism, in training, @@ -77,14 +79,14 @@ def seqToseq_net_v2(source_dict_dim, target_dict_dim): # while encoded source sequence is accessed to as an unbounded memory. # Here, the StaticInput defines a read-only memory # for the recurrent_group. - decoder = layer.recurrent_group( + decoder = paddle.layer.recurrent_group( name=decoder_group_name, step=gru_decoder_with_attention, input=group_inputs) - lbl = layer.data( + lbl = paddle.layer.data( name='target_language_next_word', - type=data_type.integer_value_sequence(target_dict_dim)) - cost = layer.classification_cost(input=decoder, label=lbl) + type=paddle.data_type.integer_value_sequence(target_dict_dim)) + cost = paddle.layer.classification_cost(input=decoder, label=lbl) return cost