diff --git a/machine_translation/README.md b/machine_translation/README.md index f1801492eacdb73a631a4ff8bfdc34962adba20a..f06753a8180d266789140b5b03d730dc90a88df8 100644 --- a/machine_translation/README.md +++ b/machine_translation/README.md @@ -216,6 +216,16 @@ cd data ## 训练流程说明 +### paddle初始化 + +```python +# 加载 paddle的python包 +import paddle.v2 as paddle + +# 配置只使用cpu,并且使用一个cpu进行训练 +paddle.init(use_gpu=False, trainer_count=1) +``` + ### 数据定义 首先要定义词典大小,数据生成和网络配置都需要用到。然后获取wmt14的dataset reader。 diff --git a/machine_translation/api_train.py b/machine_translation/api_train.py index 107759cd25bdb8c627638904d3cebb8a501571b1..6efd254e7a48703a69c9f09dd35d41ba7ac5689a 100644 --- a/machine_translation/api_train.py +++ b/machine_translation/api_train.py @@ -9,50 +9,50 @@ def seqToseq_net(source_dict_dim, target_dict_dim): #### Encoder src_word_id = paddle.layer.data( - name='source_language_word', - type=paddle.data_type.integer_value_sequence(source_dict_dim)) + name='source_language_word', + type=paddle.data_type.integer_value_sequence(source_dict_dim)) src_embedding = paddle.layer.embedding( - input=src_word_id, - size=word_vector_dim, - param_attr=paddle.attr.ParamAttr(name='_source_language_embedding')) + input=src_word_id, + size=word_vector_dim, + param_attr=paddle.attr.ParamAttr(name='_source_language_embedding')) src_forward = paddle.networks.simple_gru( - input=src_embedding, size=encoder_size) + input=src_embedding, size=encoder_size) src_backward = paddle.networks.simple_gru( - input=src_embedding, size=encoder_size, reverse=True) + input=src_embedding, size=encoder_size, reverse=True) encoded_vector = paddle.layer.concat(input=[src_forward, src_backward]) #### Decoder with paddle.layer.mixed(size=decoder_size) as encoded_proj: encoded_proj += paddle.layer.full_matrix_projection( - input=encoded_vector) + input=encoded_vector) backward_first = paddle.layer.first_seq(input=src_backward) with paddle.layer.mixed( size=decoder_size, act=paddle.activation.Tanh()) as decoder_boot: decoder_boot += paddle.layer.full_matrix_projection( - input=backward_first) + input=backward_first) def gru_decoder_with_attention(enc_vec, enc_proj, current_word): decoder_mem = paddle.layer.memory( - name='gru_decoder', size=decoder_size, boot_layer=decoder_boot) + name='gru_decoder', size=decoder_size, boot_layer=decoder_boot) context = paddle.networks.simple_attention( - encoded_sequence=enc_vec, - encoded_proj=enc_proj, - decoder_state=decoder_mem) + encoded_sequence=enc_vec, + encoded_proj=enc_proj, + decoder_state=decoder_mem) with paddle.layer.mixed(size=decoder_size * 3) as decoder_inputs: decoder_inputs += paddle.layer.full_matrix_projection(input=context) decoder_inputs += paddle.layer.full_matrix_projection( - input=current_word) + input=current_word) gru_step = paddle.layer.gru_step( - name='gru_decoder', - input=decoder_inputs, - output_mem=decoder_mem, - size=decoder_size) + name='gru_decoder', + input=decoder_inputs, + output_mem=decoder_mem, + size=decoder_size) with paddle.layer.mixed( size=target_dict_dim, @@ -67,11 +67,11 @@ def seqToseq_net(source_dict_dim, target_dict_dim): group_inputs = [group_input1, group_input2] trg_embedding = paddle.layer.embedding( - input=paddle.layer.data( - name='target_language_word', - type=paddle.data_type.integer_value_sequence(target_dict_dim)), - size=word_vector_dim, - param_attr=paddle.attr.ParamAttr(name='_target_language_embedding')) + input=paddle.layer.data( + name='target_language_word', + type=paddle.data_type.integer_value_sequence(target_dict_dim)), + size=word_vector_dim, + param_attr=paddle.attr.ParamAttr(name='_target_language_embedding')) group_inputs.append(trg_embedding) # For decoder equipped with attention mechanism, in training, @@ -80,13 +80,13 @@ def seqToseq_net(source_dict_dim, target_dict_dim): # Here, the StaticInput defines a read-only memory # for the recurrent_group. decoder = paddle.layer.recurrent_group( - name=decoder_group_name, - step=gru_decoder_with_attention, - input=group_inputs) + name=decoder_group_name, + step=gru_decoder_with_attention, + input=group_inputs) lbl = paddle.layer.data( - name='target_language_next_word', - type=paddle.data_type.integer_value_sequence(target_dict_dim)) + name='target_language_next_word', + type=paddle.data_type.integer_value_sequence(target_dict_dim)) cost = paddle.layer.classification_cost(input=decoder, label=lbl) return cost @@ -110,15 +110,16 @@ def main(): update_equation=optimizer) # define data reader - reader_dict = { + feeding = { 'source_language_word': 0, 'target_language_word': 1, 'target_language_next_word': 2 } + wmt14_reader = paddle.batch( - paddle.reader.shuffle( - paddle.dataset.wmt14.train(dict_size=dict_size), buf_size=8192), - batch_size=5) + paddle.reader.shuffle( + paddle.dataset.wmt14.train(dict_size=dict_size), buf_size=8192), + batch_size=5) # define event_handler callback def event_handler(event): @@ -129,10 +130,10 @@ def main(): # start to train trainer.train( - reader=wmt14_reader, - event_handler=event_handler, - num_passes=10000, - reader_dict=reader_dict) + reader=wmt14_reader, + event_handler=event_handler, + num_passes=10000, + feeding=feeding) if __name__ == '__main__':