提交 39d477e2 编写于 作者: G Guo Sheng 提交者: pkpk

Add seq2seq_rl (#3923)

上级 74b7049e
# -*- coding: utf-8 -*-
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import argparse
import distutils.util
def parse_args():
parser = argparse.ArgumentParser(description=__doc__)
parser.add_argument(
"--train_data_prefix", type=str, help="file prefix for train data")
parser.add_argument(
"--eval_data_prefix", type=str, help="file prefix for eval data")
parser.add_argument(
"--test_data_prefix", type=str, help="file prefix for test data")
parser.add_argument(
"--vocab_prefix", type=str, help="file prefix for vocab")
parser.add_argument("--src_lang", type=str, help="source language suffix")
parser.add_argument("--tar_lang", type=str, help="target language suffix")
parser.add_argument(
"--attention",
type=eval,
default=False,
help="Whether use attention model")
parser.add_argument(
"--optimizer",
type=str,
default='adam',
help="optimizer to use, only supprt[sgd|adam]")
parser.add_argument(
"--learning_rate",
type=float,
default=0.001,
help="learning rate for optimizer")
parser.add_argument(
"--num_layers",
type=int,
default=1,
help="layers number of encoder and decoder")
parser.add_argument(
"--hidden_size",
type=int,
default=100,
help="hidden size of encoder and decoder")
parser.add_argument("--src_vocab_size", type=int, help="source vocab size")
parser.add_argument("--tar_vocab_size", type=int, help="target vocab size")
parser.add_argument(
"--batch_size", type=int, help="batch size of each step")
parser.add_argument(
"--max_epoch", type=int, default=12, help="max epoch for the training")
parser.add_argument(
"--max_len",
type=int,
default=50,
help="max length for source and target sentence")
parser.add_argument(
"--dropout", type=float, default=0.0, help="drop probability")
parser.add_argument(
"--init_scale",
type=float,
default=0.0,
help="init scale for parameter")
parser.add_argument(
"--max_grad_norm",
type=float,
default=5.0,
help="max grad norm for global norm clip")
parser.add_argument(
"--model_path",
type=str,
default='model',
help="model path for model to save")
parser.add_argument(
"--reload_model", type=str, help="reload model to inference")
parser.add_argument(
"--infer_file", type=str, help="file name for inference")
parser.add_argument(
"--infer_output_file",
type=str,
default='infer_output',
help="file name for inference output")
parser.add_argument(
"--beam_size", type=int, default=10, help="file name for inference")
parser.add_argument(
'--use_gpu',
type=eval,
default=False,
help='Whether using gpu [True|False]')
parser.add_argument(
"--enable_ce",
action='store_true',
help="The flag indicating whether to run the task "
"for continuous evaluation.")
parser.add_argument(
"--profile", action='store_true', help="Whether enable the profile.")
args = parser.parse_args()
return args
import numpy as np
import paddle.fluid as fluid
from pg_agent import SeqPGAgent
from model import Seq2SeqModel, PolicyGradient, reward_func
import reader
from args import parse_args
def main():
args = parse_args()
num_layers = args.num_layers
hidden_size = args.hidden_size
dropout_prob = args.dropout
src_vocab_size = args.src_vocab_size
trg_vocab_size = args.tar_vocab_size
batch_size = args.batch_size
lr = args.learning_rate
train_data_prefix = args.train_data_prefix
eval_data_prefix = args.eval_data_prefix
test_data_prefix = args.test_data_prefix
vocab_prefix = args.vocab_prefix
src_lang = args.src_lang
tar_lang = args.tar_lang
print("begin to load data")
raw_data = reader.raw_data(src_lang, tar_lang, vocab_prefix,
train_data_prefix, eval_data_prefix,
test_data_prefix, args.max_len)
print("finished load data")
train_data, valid_data, test_data, _ = raw_data
def prepare_input(batch, epoch_id=0, teacher_forcing=False):
src_ids, src_mask, tar_ids, tar_mask = batch
res = {}
src_ids = src_ids.reshape((src_ids.shape[0], src_ids.shape[1]))
in_tar = tar_ids[:, :-1]
label_tar = tar_ids[:, 1:]
in_tar = in_tar.reshape((in_tar.shape[0], in_tar.shape[1]))
label_tar = label_tar.reshape(
(label_tar.shape[0], label_tar.shape[1], 1))
res['src'] = src_ids
res['src_sequence_length'] = src_mask
if teacher_forcing:
res['tar'] = in_tar
res['tar_sequence_length'] = tar_mask
res['label'] = label_tar
return res, np.sum(tar_mask)
place = fluid.CUDAPlace(0) if args.use_gpu else fluid.CPUPlace()
exe = fluid.Executor(place)
### TODO: pretrain the model with teacher-forcing MLE
### fine-tune with policy gradient
agent = SeqPGAgent(model_cls=Seq2SeqModel,
alg_cls=PolicyGradient,
reward_func=reward_func,
model_hparams={
"num_layers": num_layers,
"hidden_size": hidden_size,
"dropout_prob": dropout_prob,
"src_vocab_size": src_vocab_size,
"trg_vocab_size": trg_vocab_size,
"bos_token": 1,
"eos_token": 2,
},
alg_hparams={"lr": lr},
executor=exe)
exe.run(fluid.default_startup_program())
if args.reload_model: # load MLE pre-trained model
fluid.io.load_params(exe,
dirname=args.reload_model,
main_program=agent.full_program)
max_epoch = args.max_epoch
for epoch_id in range(max_epoch):
if args.enable_ce:
train_data_iter = reader.get_data_iter(
train_data, batch_size, enable_ce=True)
else:
train_data_iter = reader.get_data_iter(train_data, batch_size)
for batch_id, batch in enumerate(train_data_iter):
input_data_feed, word_num = prepare_input(batch, epoch_id=epoch_id)
reward, cost = agent.learn(input_data_feed)
print("epoch_id: %d, batch_id: %d, reward: %f, cost: %f" %
(epoch_id, batch_id, reward.mean(), cost))
if __name__ == '__main__':
main()
\ No newline at end of file
import numpy as np
import paddle.fluid as fluid
import paddle.fluid.layers as layers
class EncoderCell(layers.RNNCell):
def __init__(self, num_layers, hidden_size, dropout_prob=0.):
self.num_layers = num_layers
self.hidden_size = hidden_size
self.dropout_prob = dropout_prob
self.lstm_cells = [
layers.LSTMCell(hidden_size) for i in range(num_layers)
]
def call(self, step_input, states):
new_states = []
for i in range(self.num_layers):
out, new_state = self.lstm_cells[i](step_input, states[i])
step_input = layers.dropout(
out, self.dropout_prob) if self.dropout_prob > 0 else out
new_states.append(new_state)
return step_input, new_states
@property
def state_shape(self):
return [cell.state_shape for cell in self.lstm_cells]
class DecoderCell(layers.RNNCell):
def __init__(self, num_layers, hidden_size, dropout_prob=0.):
self.num_layers = num_layers
self.hidden_size = hidden_size
self.dropout_prob = dropout_prob
self.lstm_cells = [
layers.LSTMCell(hidden_size) for i in range(num_layers)
]
def attention(self, hidden, encoder_output, encoder_padding_mask):
query = layers.fc(hidden,
size=encoder_output.shape[-1],
bias_attr=False)
attn_scores = layers.matmul(layers.unsqueeze(query, [1]),
encoder_output,
transpose_y=True)
if encoder_padding_mask is not None:
attn_scores = layers.elementwise_add(attn_scores,
encoder_padding_mask)
attn_scores = layers.softmax(attn_scores)
attn_out = layers.squeeze(layers.matmul(attn_scores, encoder_output),
[1])
attn_out = layers.concat([attn_out, hidden], 1)
attn_out = layers.fc(attn_out, size=self.hidden_size, bias_attr=False)
return attn_out
def call(self,
step_input,
states,
encoder_output,
encoder_padding_mask=None):
lstm_states, input_feed = states
new_lstm_states = []
step_input = layers.concat([step_input, input_feed], 1)
for i in range(self.num_layers):
out, new_lstm_state = self.lstm_cells[i](step_input, lstm_states[i])
step_input = layers.dropout(
out, self.dropout_prob) if self.dropout_prob > 0 else out
new_lstm_states.append(new_lstm_state)
out = self.attention(step_input, encoder_output, encoder_padding_mask)
return out, [new_lstm_states, out]
class Encoder(object):
def __init__(self,num_layers, hidden_size, dropout_prob=0.):
self.encoder_cell = EncoderCell(num_layers, hidden_size, dropout_prob)
def __call__(self, src_emb, src_sequence_length):
encoder_output, encoder_final_state = layers.rnn(
cell=self.encoder_cell,
inputs=src_emb,
sequence_length=src_sequence_length,
is_reverse=False)
return encoder_output, encoder_final_state
class Decoder(object):
def __init__(self,
num_layers,
hidden_size,
dropout_prob,
decoding_strategy="infer_sample",
max_decoding_length=100):
self.decoder_cell = DecoderCell(num_layers, hidden_size, dropout_prob)
self.decoding_strategy = decoding_strategy
self.max_decoding_length = None if (
self.decoding_strategy == "train_greedy") else max_decoding_length
def __call__(self, decoder_initial_states, encoder_output,
encoder_padding_mask, **kwargs):
output_layer = kwargs.pop("output_layer", None)
if self.decoding_strategy == "train_greedy":
# for teach-forcing MLE pre-training
helper = layers.TrainingHelper(**kwargs)
elif self.decoding_strategy == "infer_sample":
helper = layers.SampleEmbeddingHelper(**kwargs)
elif self.decoding_strategy == "infer_greedy":
helper = layers.GreedyEmbeddingHelper(**kwargs)
else:
# TODO: Add beam_search training support.
raise ValueError("Unknown decoding strategy: {}".format(
self.decoding_strategy))
decoder = layers.BasicDecoder(self.decoder_cell,
helper,
output_fn=output_layer)
(decoder_output, decoder_final_state,
dec_seq_lengths) = layers.dynamic_decode(
decoder,
inits=decoder_initial_states,
max_step_num=self.max_decoding_length,
encoder_output=encoder_output,
encoder_padding_mask=encoder_padding_mask)
return decoder_output, decoder_final_state, dec_seq_lengths
class Seq2SeqModel(object):
def __init__(self,
num_layers,
hidden_size,
dropout_prob,
src_vocab_size,
trg_vocab_size,
bos_token,
eos_token,
decoding_strategy="infer_sample",
max_decoding_length=100):
self.bos_token, self.eos_token = bos_token, eos_token
self.src_embeder = lambda x: fluid.embedding(
input=x,
size=[src_vocab_size, hidden_size],
dtype="float32",
param_attr=fluid.ParamAttr(name="source_embedding"))
self.trg_embeder = lambda x: fluid.embedding(
input=x,
size=[trg_vocab_size, hidden_size],
dtype="float32",
param_attr=fluid.ParamAttr(name="target_embedding"))
self.encoder = Encoder(num_layers, hidden_size, dropout_prob)
self.decoder = Decoder(num_layers, hidden_size, dropout_prob,
decoding_strategy, max_decoding_length)
self.output_layer = lambda x: layers.fc(
x,
size=trg_vocab_size,
num_flatten_dims=len(x.shape) - 1,
param_attr=fluid.ParamAttr(name="output_w"),
bias_attr=False)
def __call__(self, src, src_length, trg=None, trg_length=None):
# encoder
encoder_output, encoder_final_state = self.encoder(
self.src_embeder(src), src_length)
decoder_initial_states = [
encoder_final_state,
self.decoder.decoder_cell.get_initial_states(
batch_ref=encoder_output, shape=[encoder_output.shape[-1]])
]
src_mask = layers.sequence_mask(src_length,
maxlen=layers.shape(src)[1],
dtype="float32")
encoder_padding_mask = (src_mask - 1.0) * 1e9
encoder_padding_mask = layers.unsqueeze(encoder_padding_mask, [1])
# decoder
decoder_kwargs = {
"inputs": self.trg_embeder(trg),
"sequence_length": trg_length,
} if self.decoder.decoding_strategy == "train_greedy" else {
"embedding_fn":
self.trg_embeder,
"start_tokens":
layers.fill_constant_batch_size_like(input=encoder_output,
shape=[-1],
dtype=src.dtype,
value=self.bos_token),
"end_token":
self.eos_token
}
decoder_kwargs["output_layer"] = self.output_layer
(decoder_output, decoder_final_state,
dec_seq_lengths) = self.decoder(decoder_initial_states, encoder_output,
encoder_padding_mask, **decoder_kwargs)
logits, samples, sample_length = (decoder_output.cell_outputs,
decoder_output.sample_ids,
dec_seq_lengths)
probs = layers.softmax(logits)
return probs, samples, sample_length
class PolicyGradient(object):
def __init__(self, model, lr=None):
self.model = model
self.lr = lr
def predict(self, src, src_length):
return self.model(src, src_length)
def learn(self, act_prob, action, reward, length=None):
"""
update policy model self.model with policy gradient algorithm
"""
neg_log_prob = layers.cross_entropy(act_prob, action)
cost = neg_log_prob * reward
cost = (layers.reduce_sum(cost) / layers.reduce_sum(length)
) if length is not None else layers.reduce_mean(cost)
optimizer = fluid.optimizer.Adam(self.lr)
optimizer.minimize(cost)
return cost
def reward_func(samples, sample_length):
samples = np.array(samples)
sample_length = np.array(sample_length)
reward = (10 - np.abs(sample_length - 10)).astype("float32")
return discount_reward(reward, sample_length, discount=1.).astype("float32")
def discount_reward(reward, sequence_length, discount=1.):
return discount_reward_1d(reward, sequence_length, discount)
def discount_reward_1d(reward, sequence_length, discount=1., dtype=None):
if sequence_length is None:
raise ValueError('sequence_length must not be `None` for 1D reward.')
reward = np.array(reward)
sequence_length = np.array(sequence_length)
batch_size = reward.shape[0]
max_seq_length = np.max(sequence_length)
dtype = dtype or reward.dtype
if discount == 1.:
dmat = np.ones([batch_size, max_seq_length], dtype=dtype)
else:
steps = np.tile(np.arange(max_seq_length), [batch_size, 1])
mask = np.asarray(steps < (sequence_length - 1)[:, None], dtype=dtype)
# Make each row = [discount, ..., discount, 1, ..., 1]
dmat = mask * discount + (1 - mask)
dmat = np.cumprod(dmat[:, ::-1], axis=1)[:, ::-1]
disc_reward = dmat * reward[:, None]
disc_reward = mask_sequences(disc_reward, sequence_length, dtype=dtype)
return disc_reward
def mask_sequences(sequence, sequence_length, dtype=None, time_major=False):
sequence = np.array(sequence)
sequence_length = np.array(sequence_length)
rank = sequence.ndim
if rank < 2:
raise ValueError("`sequence` must be 2D or higher order.")
batch_size = sequence.shape[0]
max_time = sequence.shape[1]
dtype = dtype or sequence.dtype
if time_major:
sequence = np.transpose(sequence, axes=[1, 0, 2])
steps = np.tile(np.arange(max_time), [batch_size, 1])
mask = np.asarray(steps < sequence_length[:, None], dtype=dtype)
for _ in range(2, rank):
mask = np.expand_dims(mask, -1)
sequence = sequence * mask
if time_major:
sequence = np.transpose(sequence, axes=[1, 0, 2])
return sequence
\ No newline at end of file
import paddle.fluid as fluid
import paddle.fluid.layers as layers
from model import PolicyGradient
class SeqPGAgent(object):
def __init__(self,
model_cls,
reward_func,
alg_cls=PolicyGradient,
model_hparams={},
alg_hparams={},
executor=None):
self.build_program(model_cls, reward_func, alg_cls, model_hparams,
alg_hparams)
self.executor = executor
def build_program(self, model_cls, reward_func, alg_cls, model_hparams,
alg_hparams):
self.full_program = fluid.Program()
with fluid.program_guard(self.full_program,
fluid.default_startup_program()):
source = fluid.data(name="src", shape=[None, None], dtype="int64")
source_length = fluid.data(name="src_sequence_length",
shape=[None],
dtype="int64")
self.alg = alg_cls(model=model_cls(**model_hparams), **alg_hparams)
self.probs, self.samples, self.sample_length = self.alg.predict(
source, source_length)
self.samples.stop_gradient = True
reward = fluid.layers.create_global_var(
name="reward",
shape=[-1, -1], # batch_size, seq_len
value="0",
dtype=self.probs.dtype)
self.reward = fluid.layers.py_func(
func=reward_func,
x=[self.samples, self.sample_length],
out=reward)
self.cost = self.alg.learn(self.probs, self.samples, self.reward,
self.sample_length)
# to define the same parameters between different programs
self.pred_program = self.full_program._prune_with_input(
[source.name, source_length.name],
[self.probs, self.samples, self.sample_length])
def predict(self, feed_dict):
samples, sample_length = self.executor.run(
self.pred_program,
feed=feed_dict,
fetch_list=[self.samples, self.sample_length])
return samples, sample_length
def learn(self, feed_dict):
reward, cost = self.executor.run(self.full_program,
feed=feed_dict,
fetch_list=[self.reward, self.cost])
return reward, cost
# -*- coding: utf-8 -*-
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Utilities for parsing PTB text files."""
from __future__ import absolute_import
from __future__ import division
from __future__ import print_function
import collections
import os
import io
import sys
import numpy as np
Py3 = sys.version_info[0] == 3
UNK_ID = 0
def _read_words(filename):
data = []
with io.open(filename, "r", encoding='utf-8') as f:
if Py3:
return f.read().replace("\n", "<eos>").split()
else:
return f.read().decode("utf-8").replace(u"\n", u"<eos>").split()
def read_all_line(filename):
data = []
with io.open(filename, "r", encoding='utf-8') as f:
for line in f.readlines():
data.append(line.strip())
def _build_vocab(filename):
vocab_dict = {}
ids = 0
with io.open(filename, "r", encoding='utf-8') as f:
for line in f.readlines():
vocab_dict[line.strip()] = ids
ids += 1
print("vocab word num", ids)
return vocab_dict
def _para_file_to_ids(src_file, tar_file, src_vocab, tar_vocab):
src_data = []
with io.open(src_file, "r", encoding='utf-8') as f_src:
for line in f_src.readlines():
arra = line.strip().split()
ids = [src_vocab[w] if w in src_vocab else UNK_ID for w in arra]
ids = ids
src_data.append(ids)
tar_data = []
with io.open(tar_file, "r", encoding='utf-8') as f_tar:
for line in f_tar.readlines():
arra = line.strip().split()
ids = [tar_vocab[w] if w in tar_vocab else UNK_ID for w in arra]
ids = [1] + ids + [2]
tar_data.append(ids)
return src_data, tar_data
def filter_len(src, tar, max_sequence_len=50):
new_src = []
new_tar = []
for id1, id2 in zip(src, tar):
if len(id1) > max_sequence_len:
id1 = id1[:max_sequence_len]
if len(id2) > max_sequence_len + 2:
id2 = id2[:max_sequence_len + 2]
new_src.append(id1)
new_tar.append(id2)
return new_src, new_tar
def raw_data(src_lang,
tar_lang,
vocab_prefix,
train_prefix,
eval_prefix,
test_prefix,
max_sequence_len=50):
src_vocab_file = vocab_prefix + "." + src_lang
tar_vocab_file = vocab_prefix + "." + tar_lang
src_train_file = train_prefix + "." + src_lang
tar_train_file = train_prefix + "." + tar_lang
src_eval_file = eval_prefix + "." + src_lang
tar_eval_file = eval_prefix + "." + tar_lang
src_test_file = test_prefix + "." + src_lang
tar_test_file = test_prefix + "." + tar_lang
src_vocab = _build_vocab(src_vocab_file)
tar_vocab = _build_vocab(tar_vocab_file)
train_src, train_tar = _para_file_to_ids( src_train_file, tar_train_file, \
src_vocab, tar_vocab )
train_src, train_tar = filter_len(train_src,
train_tar,
max_sequence_len=max_sequence_len)
eval_src, eval_tar = _para_file_to_ids( src_eval_file, tar_eval_file, \
src_vocab, tar_vocab )
test_src, test_tar = _para_file_to_ids( src_test_file, tar_test_file, \
src_vocab, tar_vocab )
return ( train_src, train_tar), (eval_src, eval_tar), (test_src, test_tar),\
(src_vocab, tar_vocab)
def raw_mono_data(vocab_file, file_path):
src_vocab = _build_vocab(vocab_file)
test_src, test_tar = _para_file_to_ids( file_path, file_path, \
src_vocab, src_vocab )
return (test_src, test_tar)
def get_data_iter(raw_data,
batch_size,
mode='train',
enable_ce=False,
cache_num=20):
src_data, tar_data = raw_data
data_len = len(src_data)
index = np.arange(data_len)
if mode == "train" and not enable_ce:
np.random.shuffle(index)
def to_pad_np(data, source=False):
max_len = 0
for ele in data:
if len(ele) > max_len:
max_len = len(ele)
batch_size = len(data)
ids = np.ones((batch_size, max_len), dtype='int64') * 2
mask = np.zeros((batch_size), dtype='int64')
for i, ele in enumerate(data):
ids[i, :len(ele)] = ele
if not source:
mask[i] = len(ele) - 1
else:
mask[i] = len(ele)
return ids, mask
b_src = []
if mode != "train":
cache_num = 1
for j in range(data_len):
if len(b_src) == batch_size * cache_num:
# build batch size
# sort
new_cache = sorted(
b_src, key=lambda k: len(k[0])) if mode == "train" else b_src
for i in range(cache_num):
batch_data = new_cache[i * batch_size:(i + 1) * batch_size]
src_cache = [w[0] for w in batch_data]
tar_cache = [w[1] for w in batch_data]
src_ids, src_mask = to_pad_np(src_cache, source=True)
tar_ids, tar_mask = to_pad_np(tar_cache)
yield (src_ids, src_mask, tar_ids, tar_mask)
b_src = []
b_src.append((src_data[index[j]], tar_data[index[j]]))
new_cache = sorted(b_src,
key=lambda k: len(k[0])) if mode == "train" else b_src
for i in range(cache_num):
batch_data = new_cache[i * batch_size:(i + 1) * batch_size]
if len(batch_data) < batch_size:
if mode == "train" or len(batch_data) == 0:
continue
src_cache = [w[0] for w in batch_data]
tar_cache = [w[1] for w in batch_data]
src_ids, src_mask = to_pad_np(src_cache, source=True)
tar_ids, tar_mask = to_pad_np(tar_cache)
yield (src_ids, src_mask, tar_ids, tar_mask)
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册