提交 99a08541 编写于 作者: P peterzhang2029

update the settings

上级 3fd720bd
source_dict_dim = 10000
target_dict_dim = 10000
class TrainConfig(object):
source_dict_dim = source_dict_dim
target_dict_dim = target_dict_dim
use_gpu = False
infer_only = False
parallel = False
batch_size = 16
pass_num = 2
pass_num = 5
learning_rate = 0.0002
buf_size = 100000
class ModelConfig(object):
dict_size = 10000
embedding_dim = 512
encoder_size = 512
decoder_size = 512
source_dict_dim = source_dict_dim
target_dict_dim = target_dict_dim
source_dict_dim = 10000
target_dict_dim = 10000
is_generating = False
beam_size = 3
max_length = 250
......@@ -5,8 +5,6 @@ from __future__ import print_function
import paddle.fluid as fluid
from config import ModelConfig as model_conf
def lstm_step(x_t, hidden_t_prev, cell_t_prev, size):
def linear(inputs):
......@@ -29,16 +27,10 @@ def lstm_step(x_t, hidden_t_prev, cell_t_prev, size):
return hidden_t, cell_t
def seq_to_seq_net(src_word_idx, trg_word_idx, label):
def seq_to_seq_net(src_word_idx, trg_word_idx, label, embedding_dim,
encoder_size, decoder_size, source_dict_dim, target_dict_dim,
is_generating, beam_size, max_length):
"""Construct a seq2seq network."""
embedding_dim = model_conf.embedding_dim
encoder_size = model_conf.encoder_size
decoder_size = model_conf.decoder_size
source_dict_dim = model_conf.source_dict_dim
target_dict_dim = model_conf.target_dict_dim
is_generating = model_conf.is_generating
beam_size = model_conf.beam_size
max_length = model_conf.max_length
def bi_lstm_encoder(input_seq, gate_size):
# Linear transformation part for input gate, output gate, forget gate
......@@ -93,13 +85,12 @@ def seq_to_seq_net(src_word_idx, trg_word_idx, label):
decoder_state_expand = fluid.layers.sequence_expand(
x=decoder_state_proj, y=encoder_proj)
concated = fluid.layers.concat(
input=[decoder_state_expand, encoder_proj], axis=1)
input=[encoder_proj, decoder_state_expand], axis=1)
attention_weights = fluid.layers.fc(input=concated,
size=1,
act='tanh',
bias_attr=False)
attention_weights = fluid.layers.sequence_softmax(
x=attention_weights)
attention_weights = fluid.layers.sequence_softmax(attention_weights)
weigths_reshape = fluid.layers.reshape(
x=attention_weights, shape=[-1])
scaled = fluid.layers.elementwise_mul(
......
......@@ -15,6 +15,7 @@ from paddle.fluid.executor import Executor
from model import seq_to_seq_net
from config import TrainConfig as train_conf
from config import ModelConfig as model_conf
def to_lodtensor(data, place):
......@@ -54,15 +55,37 @@ def train():
src_word_idx_ = pd.read_input(src_word_idx)
trg_word_idx_ = pd.read_input(trg_word_idx)
label_ = pd.read_input(label)
avg_cost = seq_to_seq_net(src_word_idx_, trg_word_idx_, label_)
avg_cost = seq_to_seq_net(
src_word_idx_,
trg_word_idx_,
label_,
embedding_dim=model_conf.embedding_dim,
encoder_size=model_conf.encoder_size,
decoder_size=model_conf.decoder_size,
source_dict_dim=model_conf.source_dict_dim,
target_dict_dim=model_conf.target_dict_dim,
is_generating=model_conf.is_generating,
beam_size=model_conf.beam_size,
max_length=model_conf.max_length)
pd.write_output(avg_cost)
avg_cost = pd()
avg_cost = fluid.layers.mean(x=avg_cost)
else:
avg_cost = seq_to_seq_net(src_word_idx, trg_word_idx, label)
avg_cost = seq_to_seq_net(
src_word_idx,
trg_word_idx,
label,
embedding_dim=model_conf.embedding_dim,
encoder_size=model_conf.encoder_size,
decoder_size=model_conf.decoder_size,
source_dict_dim=model_conf.source_dict_dim,
target_dict_dim=model_conf.target_dict_dim,
is_generating=model_conf.is_generating,
beam_size=model_conf.beam_size,
max_length=model_conf.max_length)
feeding_list = ["source_sequence", "target_sequence", "label_sequence"]
# clone from default main program
inference_program = fluid.default_main_program().clone()
optimizer = fluid.optimizer.Adam(learning_rate=train_conf.learning_rate)
......@@ -70,16 +93,16 @@ def train():
train_batch_generator = paddle.batch(
paddle.reader.shuffle(
paddle.dataset.wmt16.train(train_conf.source_dict_dim,
train_conf.target_dict_dim),
buf_size=1000),
paddle.dataset.wmt16.train(model_conf.source_dict_dim,
model_conf.target_dict_dim),
buf_size=train_conf.buf_size),
batch_size=train_conf.batch_size)
test_batch_generator = paddle.batch(
paddle.reader.shuffle(
paddle.dataset.wmt16.test(train_conf.source_dict_dim,
train_conf.target_dict_dim),
buf_size=1000),
paddle.dataset.wmt16.test(model_conf.source_dict_dim,
model_conf.target_dict_dim),
buf_size=train_conf.buf_size),
batch_size=train_conf.batch_size)
place = core.CUDAPlace(0) if train_conf.use_gpu else core.CPUPlace()
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册