Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
models
提交
39d477e2
M
models
项目概览
PaddlePaddle
/
models
大约 1 年 前同步成功
通知
222
Star
6828
Fork
2962
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
602
列表
看板
标记
里程碑
合并请求
255
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
M
models
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
602
Issue
602
列表
看板
标记
里程碑
合并请求
255
合并请求
255
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
39d477e2
编写于
11月 21, 2019
作者:
G
Guo Sheng
提交者:
pkpk
11月 21, 2019
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Add seq2seq_rl (#3923)
上级
74b7049e
变更
5
隐藏空白更改
内联
并排
Showing
5 changed file
with
774 addition
and
0 deletion
+774
-0
PaddleNLP/PaddleTextGEN/seq2seq_rl/args.py
PaddleNLP/PaddleTextGEN/seq2seq_rl/args.py
+127
-0
PaddleNLP/PaddleTextGEN/seq2seq_rl/main.py
PaddleNLP/PaddleTextGEN/seq2seq_rl/main.py
+92
-0
PaddleNLP/PaddleTextGEN/seq2seq_rl/model.py
PaddleNLP/PaddleTextGEN/seq2seq_rl/model.py
+281
-0
PaddleNLP/PaddleTextGEN/seq2seq_rl/pg_agent.py
PaddleNLP/PaddleTextGEN/seq2seq_rl/pg_agent.py
+58
-0
PaddleNLP/PaddleTextGEN/seq2seq_rl/reader.py
PaddleNLP/PaddleTextGEN/seq2seq_rl/reader.py
+216
-0
未找到文件。
PaddleNLP/PaddleTextGEN/seq2seq_rl/args.py
0 → 100644
浏览文件 @
39d477e2
# -*- coding: utf-8 -*-
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from
__future__
import
absolute_import
from
__future__
import
division
from
__future__
import
print_function
import
argparse
import
distutils.util
def
parse_args
():
parser
=
argparse
.
ArgumentParser
(
description
=
__doc__
)
parser
.
add_argument
(
"--train_data_prefix"
,
type
=
str
,
help
=
"file prefix for train data"
)
parser
.
add_argument
(
"--eval_data_prefix"
,
type
=
str
,
help
=
"file prefix for eval data"
)
parser
.
add_argument
(
"--test_data_prefix"
,
type
=
str
,
help
=
"file prefix for test data"
)
parser
.
add_argument
(
"--vocab_prefix"
,
type
=
str
,
help
=
"file prefix for vocab"
)
parser
.
add_argument
(
"--src_lang"
,
type
=
str
,
help
=
"source language suffix"
)
parser
.
add_argument
(
"--tar_lang"
,
type
=
str
,
help
=
"target language suffix"
)
parser
.
add_argument
(
"--attention"
,
type
=
eval
,
default
=
False
,
help
=
"Whether use attention model"
)
parser
.
add_argument
(
"--optimizer"
,
type
=
str
,
default
=
'adam'
,
help
=
"optimizer to use, only supprt[sgd|adam]"
)
parser
.
add_argument
(
"--learning_rate"
,
type
=
float
,
default
=
0.001
,
help
=
"learning rate for optimizer"
)
parser
.
add_argument
(
"--num_layers"
,
type
=
int
,
default
=
1
,
help
=
"layers number of encoder and decoder"
)
parser
.
add_argument
(
"--hidden_size"
,
type
=
int
,
default
=
100
,
help
=
"hidden size of encoder and decoder"
)
parser
.
add_argument
(
"--src_vocab_size"
,
type
=
int
,
help
=
"source vocab size"
)
parser
.
add_argument
(
"--tar_vocab_size"
,
type
=
int
,
help
=
"target vocab size"
)
parser
.
add_argument
(
"--batch_size"
,
type
=
int
,
help
=
"batch size of each step"
)
parser
.
add_argument
(
"--max_epoch"
,
type
=
int
,
default
=
12
,
help
=
"max epoch for the training"
)
parser
.
add_argument
(
"--max_len"
,
type
=
int
,
default
=
50
,
help
=
"max length for source and target sentence"
)
parser
.
add_argument
(
"--dropout"
,
type
=
float
,
default
=
0.0
,
help
=
"drop probability"
)
parser
.
add_argument
(
"--init_scale"
,
type
=
float
,
default
=
0.0
,
help
=
"init scale for parameter"
)
parser
.
add_argument
(
"--max_grad_norm"
,
type
=
float
,
default
=
5.0
,
help
=
"max grad norm for global norm clip"
)
parser
.
add_argument
(
"--model_path"
,
type
=
str
,
default
=
'model'
,
help
=
"model path for model to save"
)
parser
.
add_argument
(
"--reload_model"
,
type
=
str
,
help
=
"reload model to inference"
)
parser
.
add_argument
(
"--infer_file"
,
type
=
str
,
help
=
"file name for inference"
)
parser
.
add_argument
(
"--infer_output_file"
,
type
=
str
,
default
=
'infer_output'
,
help
=
"file name for inference output"
)
parser
.
add_argument
(
"--beam_size"
,
type
=
int
,
default
=
10
,
help
=
"file name for inference"
)
parser
.
add_argument
(
'--use_gpu'
,
type
=
eval
,
default
=
False
,
help
=
'Whether using gpu [True|False]'
)
parser
.
add_argument
(
"--enable_ce"
,
action
=
'store_true'
,
help
=
"The flag indicating whether to run the task "
"for continuous evaluation."
)
parser
.
add_argument
(
"--profile"
,
action
=
'store_true'
,
help
=
"Whether enable the profile."
)
args
=
parser
.
parse_args
()
return
args
PaddleNLP/PaddleTextGEN/seq2seq_rl/main.py
0 → 100644
浏览文件 @
39d477e2
import
numpy
as
np
import
paddle.fluid
as
fluid
from
pg_agent
import
SeqPGAgent
from
model
import
Seq2SeqModel
,
PolicyGradient
,
reward_func
import
reader
from
args
import
parse_args
def
main
():
args
=
parse_args
()
num_layers
=
args
.
num_layers
hidden_size
=
args
.
hidden_size
dropout_prob
=
args
.
dropout
src_vocab_size
=
args
.
src_vocab_size
trg_vocab_size
=
args
.
tar_vocab_size
batch_size
=
args
.
batch_size
lr
=
args
.
learning_rate
train_data_prefix
=
args
.
train_data_prefix
eval_data_prefix
=
args
.
eval_data_prefix
test_data_prefix
=
args
.
test_data_prefix
vocab_prefix
=
args
.
vocab_prefix
src_lang
=
args
.
src_lang
tar_lang
=
args
.
tar_lang
print
(
"begin to load data"
)
raw_data
=
reader
.
raw_data
(
src_lang
,
tar_lang
,
vocab_prefix
,
train_data_prefix
,
eval_data_prefix
,
test_data_prefix
,
args
.
max_len
)
print
(
"finished load data"
)
train_data
,
valid_data
,
test_data
,
_
=
raw_data
def
prepare_input
(
batch
,
epoch_id
=
0
,
teacher_forcing
=
False
):
src_ids
,
src_mask
,
tar_ids
,
tar_mask
=
batch
res
=
{}
src_ids
=
src_ids
.
reshape
((
src_ids
.
shape
[
0
],
src_ids
.
shape
[
1
]))
in_tar
=
tar_ids
[:,
:
-
1
]
label_tar
=
tar_ids
[:,
1
:]
in_tar
=
in_tar
.
reshape
((
in_tar
.
shape
[
0
],
in_tar
.
shape
[
1
]))
label_tar
=
label_tar
.
reshape
(
(
label_tar
.
shape
[
0
],
label_tar
.
shape
[
1
],
1
))
res
[
'src'
]
=
src_ids
res
[
'src_sequence_length'
]
=
src_mask
if
teacher_forcing
:
res
[
'tar'
]
=
in_tar
res
[
'tar_sequence_length'
]
=
tar_mask
res
[
'label'
]
=
label_tar
return
res
,
np
.
sum
(
tar_mask
)
place
=
fluid
.
CUDAPlace
(
0
)
if
args
.
use_gpu
else
fluid
.
CPUPlace
()
exe
=
fluid
.
Executor
(
place
)
### TODO: pretrain the model with teacher-forcing MLE
### fine-tune with policy gradient
agent
=
SeqPGAgent
(
model_cls
=
Seq2SeqModel
,
alg_cls
=
PolicyGradient
,
reward_func
=
reward_func
,
model_hparams
=
{
"num_layers"
:
num_layers
,
"hidden_size"
:
hidden_size
,
"dropout_prob"
:
dropout_prob
,
"src_vocab_size"
:
src_vocab_size
,
"trg_vocab_size"
:
trg_vocab_size
,
"bos_token"
:
1
,
"eos_token"
:
2
,
},
alg_hparams
=
{
"lr"
:
lr
},
executor
=
exe
)
exe
.
run
(
fluid
.
default_startup_program
())
if
args
.
reload_model
:
# load MLE pre-trained model
fluid
.
io
.
load_params
(
exe
,
dirname
=
args
.
reload_model
,
main_program
=
agent
.
full_program
)
max_epoch
=
args
.
max_epoch
for
epoch_id
in
range
(
max_epoch
):
if
args
.
enable_ce
:
train_data_iter
=
reader
.
get_data_iter
(
train_data
,
batch_size
,
enable_ce
=
True
)
else
:
train_data_iter
=
reader
.
get_data_iter
(
train_data
,
batch_size
)
for
batch_id
,
batch
in
enumerate
(
train_data_iter
):
input_data_feed
,
word_num
=
prepare_input
(
batch
,
epoch_id
=
epoch_id
)
reward
,
cost
=
agent
.
learn
(
input_data_feed
)
print
(
"epoch_id: %d, batch_id: %d, reward: %f, cost: %f"
%
(
epoch_id
,
batch_id
,
reward
.
mean
(),
cost
))
if
__name__
==
'__main__'
:
main
()
\ No newline at end of file
PaddleNLP/PaddleTextGEN/seq2seq_rl/model.py
0 → 100644
浏览文件 @
39d477e2
import
numpy
as
np
import
paddle.fluid
as
fluid
import
paddle.fluid.layers
as
layers
class
EncoderCell
(
layers
.
RNNCell
):
def
__init__
(
self
,
num_layers
,
hidden_size
,
dropout_prob
=
0.
):
self
.
num_layers
=
num_layers
self
.
hidden_size
=
hidden_size
self
.
dropout_prob
=
dropout_prob
self
.
lstm_cells
=
[
layers
.
LSTMCell
(
hidden_size
)
for
i
in
range
(
num_layers
)
]
def
call
(
self
,
step_input
,
states
):
new_states
=
[]
for
i
in
range
(
self
.
num_layers
):
out
,
new_state
=
self
.
lstm_cells
[
i
](
step_input
,
states
[
i
])
step_input
=
layers
.
dropout
(
out
,
self
.
dropout_prob
)
if
self
.
dropout_prob
>
0
else
out
new_states
.
append
(
new_state
)
return
step_input
,
new_states
@
property
def
state_shape
(
self
):
return
[
cell
.
state_shape
for
cell
in
self
.
lstm_cells
]
class
DecoderCell
(
layers
.
RNNCell
):
def
__init__
(
self
,
num_layers
,
hidden_size
,
dropout_prob
=
0.
):
self
.
num_layers
=
num_layers
self
.
hidden_size
=
hidden_size
self
.
dropout_prob
=
dropout_prob
self
.
lstm_cells
=
[
layers
.
LSTMCell
(
hidden_size
)
for
i
in
range
(
num_layers
)
]
def
attention
(
self
,
hidden
,
encoder_output
,
encoder_padding_mask
):
query
=
layers
.
fc
(
hidden
,
size
=
encoder_output
.
shape
[
-
1
],
bias_attr
=
False
)
attn_scores
=
layers
.
matmul
(
layers
.
unsqueeze
(
query
,
[
1
]),
encoder_output
,
transpose_y
=
True
)
if
encoder_padding_mask
is
not
None
:
attn_scores
=
layers
.
elementwise_add
(
attn_scores
,
encoder_padding_mask
)
attn_scores
=
layers
.
softmax
(
attn_scores
)
attn_out
=
layers
.
squeeze
(
layers
.
matmul
(
attn_scores
,
encoder_output
),
[
1
])
attn_out
=
layers
.
concat
([
attn_out
,
hidden
],
1
)
attn_out
=
layers
.
fc
(
attn_out
,
size
=
self
.
hidden_size
,
bias_attr
=
False
)
return
attn_out
def
call
(
self
,
step_input
,
states
,
encoder_output
,
encoder_padding_mask
=
None
):
lstm_states
,
input_feed
=
states
new_lstm_states
=
[]
step_input
=
layers
.
concat
([
step_input
,
input_feed
],
1
)
for
i
in
range
(
self
.
num_layers
):
out
,
new_lstm_state
=
self
.
lstm_cells
[
i
](
step_input
,
lstm_states
[
i
])
step_input
=
layers
.
dropout
(
out
,
self
.
dropout_prob
)
if
self
.
dropout_prob
>
0
else
out
new_lstm_states
.
append
(
new_lstm_state
)
out
=
self
.
attention
(
step_input
,
encoder_output
,
encoder_padding_mask
)
return
out
,
[
new_lstm_states
,
out
]
class
Encoder
(
object
):
def
__init__
(
self
,
num_layers
,
hidden_size
,
dropout_prob
=
0.
):
self
.
encoder_cell
=
EncoderCell
(
num_layers
,
hidden_size
,
dropout_prob
)
def
__call__
(
self
,
src_emb
,
src_sequence_length
):
encoder_output
,
encoder_final_state
=
layers
.
rnn
(
cell
=
self
.
encoder_cell
,
inputs
=
src_emb
,
sequence_length
=
src_sequence_length
,
is_reverse
=
False
)
return
encoder_output
,
encoder_final_state
class
Decoder
(
object
):
def
__init__
(
self
,
num_layers
,
hidden_size
,
dropout_prob
,
decoding_strategy
=
"infer_sample"
,
max_decoding_length
=
100
):
self
.
decoder_cell
=
DecoderCell
(
num_layers
,
hidden_size
,
dropout_prob
)
self
.
decoding_strategy
=
decoding_strategy
self
.
max_decoding_length
=
None
if
(
self
.
decoding_strategy
==
"train_greedy"
)
else
max_decoding_length
def
__call__
(
self
,
decoder_initial_states
,
encoder_output
,
encoder_padding_mask
,
**
kwargs
):
output_layer
=
kwargs
.
pop
(
"output_layer"
,
None
)
if
self
.
decoding_strategy
==
"train_greedy"
:
# for teach-forcing MLE pre-training
helper
=
layers
.
TrainingHelper
(
**
kwargs
)
elif
self
.
decoding_strategy
==
"infer_sample"
:
helper
=
layers
.
SampleEmbeddingHelper
(
**
kwargs
)
elif
self
.
decoding_strategy
==
"infer_greedy"
:
helper
=
layers
.
GreedyEmbeddingHelper
(
**
kwargs
)
else
:
# TODO: Add beam_search training support.
raise
ValueError
(
"Unknown decoding strategy: {}"
.
format
(
self
.
decoding_strategy
))
decoder
=
layers
.
BasicDecoder
(
self
.
decoder_cell
,
helper
,
output_fn
=
output_layer
)
(
decoder_output
,
decoder_final_state
,
dec_seq_lengths
)
=
layers
.
dynamic_decode
(
decoder
,
inits
=
decoder_initial_states
,
max_step_num
=
self
.
max_decoding_length
,
encoder_output
=
encoder_output
,
encoder_padding_mask
=
encoder_padding_mask
)
return
decoder_output
,
decoder_final_state
,
dec_seq_lengths
class
Seq2SeqModel
(
object
):
def
__init__
(
self
,
num_layers
,
hidden_size
,
dropout_prob
,
src_vocab_size
,
trg_vocab_size
,
bos_token
,
eos_token
,
decoding_strategy
=
"infer_sample"
,
max_decoding_length
=
100
):
self
.
bos_token
,
self
.
eos_token
=
bos_token
,
eos_token
self
.
src_embeder
=
lambda
x
:
fluid
.
embedding
(
input
=
x
,
size
=
[
src_vocab_size
,
hidden_size
],
dtype
=
"float32"
,
param_attr
=
fluid
.
ParamAttr
(
name
=
"source_embedding"
))
self
.
trg_embeder
=
lambda
x
:
fluid
.
embedding
(
input
=
x
,
size
=
[
trg_vocab_size
,
hidden_size
],
dtype
=
"float32"
,
param_attr
=
fluid
.
ParamAttr
(
name
=
"target_embedding"
))
self
.
encoder
=
Encoder
(
num_layers
,
hidden_size
,
dropout_prob
)
self
.
decoder
=
Decoder
(
num_layers
,
hidden_size
,
dropout_prob
,
decoding_strategy
,
max_decoding_length
)
self
.
output_layer
=
lambda
x
:
layers
.
fc
(
x
,
size
=
trg_vocab_size
,
num_flatten_dims
=
len
(
x
.
shape
)
-
1
,
param_attr
=
fluid
.
ParamAttr
(
name
=
"output_w"
),
bias_attr
=
False
)
def
__call__
(
self
,
src
,
src_length
,
trg
=
None
,
trg_length
=
None
):
# encoder
encoder_output
,
encoder_final_state
=
self
.
encoder
(
self
.
src_embeder
(
src
),
src_length
)
decoder_initial_states
=
[
encoder_final_state
,
self
.
decoder
.
decoder_cell
.
get_initial_states
(
batch_ref
=
encoder_output
,
shape
=
[
encoder_output
.
shape
[
-
1
]])
]
src_mask
=
layers
.
sequence_mask
(
src_length
,
maxlen
=
layers
.
shape
(
src
)[
1
],
dtype
=
"float32"
)
encoder_padding_mask
=
(
src_mask
-
1.0
)
*
1e9
encoder_padding_mask
=
layers
.
unsqueeze
(
encoder_padding_mask
,
[
1
])
# decoder
decoder_kwargs
=
{
"inputs"
:
self
.
trg_embeder
(
trg
),
"sequence_length"
:
trg_length
,
}
if
self
.
decoder
.
decoding_strategy
==
"train_greedy"
else
{
"embedding_fn"
:
self
.
trg_embeder
,
"start_tokens"
:
layers
.
fill_constant_batch_size_like
(
input
=
encoder_output
,
shape
=
[
-
1
],
dtype
=
src
.
dtype
,
value
=
self
.
bos_token
),
"end_token"
:
self
.
eos_token
}
decoder_kwargs
[
"output_layer"
]
=
self
.
output_layer
(
decoder_output
,
decoder_final_state
,
dec_seq_lengths
)
=
self
.
decoder
(
decoder_initial_states
,
encoder_output
,
encoder_padding_mask
,
**
decoder_kwargs
)
logits
,
samples
,
sample_length
=
(
decoder_output
.
cell_outputs
,
decoder_output
.
sample_ids
,
dec_seq_lengths
)
probs
=
layers
.
softmax
(
logits
)
return
probs
,
samples
,
sample_length
class
PolicyGradient
(
object
):
def
__init__
(
self
,
model
,
lr
=
None
):
self
.
model
=
model
self
.
lr
=
lr
def
predict
(
self
,
src
,
src_length
):
return
self
.
model
(
src
,
src_length
)
def
learn
(
self
,
act_prob
,
action
,
reward
,
length
=
None
):
"""
update policy model self.model with policy gradient algorithm
"""
neg_log_prob
=
layers
.
cross_entropy
(
act_prob
,
action
)
cost
=
neg_log_prob
*
reward
cost
=
(
layers
.
reduce_sum
(
cost
)
/
layers
.
reduce_sum
(
length
)
)
if
length
is
not
None
else
layers
.
reduce_mean
(
cost
)
optimizer
=
fluid
.
optimizer
.
Adam
(
self
.
lr
)
optimizer
.
minimize
(
cost
)
return
cost
def
reward_func
(
samples
,
sample_length
):
samples
=
np
.
array
(
samples
)
sample_length
=
np
.
array
(
sample_length
)
reward
=
(
10
-
np
.
abs
(
sample_length
-
10
)).
astype
(
"float32"
)
return
discount_reward
(
reward
,
sample_length
,
discount
=
1.
).
astype
(
"float32"
)
def
discount_reward
(
reward
,
sequence_length
,
discount
=
1.
):
return
discount_reward_1d
(
reward
,
sequence_length
,
discount
)
def
discount_reward_1d
(
reward
,
sequence_length
,
discount
=
1.
,
dtype
=
None
):
if
sequence_length
is
None
:
raise
ValueError
(
'sequence_length must not be `None` for 1D reward.'
)
reward
=
np
.
array
(
reward
)
sequence_length
=
np
.
array
(
sequence_length
)
batch_size
=
reward
.
shape
[
0
]
max_seq_length
=
np
.
max
(
sequence_length
)
dtype
=
dtype
or
reward
.
dtype
if
discount
==
1.
:
dmat
=
np
.
ones
([
batch_size
,
max_seq_length
],
dtype
=
dtype
)
else
:
steps
=
np
.
tile
(
np
.
arange
(
max_seq_length
),
[
batch_size
,
1
])
mask
=
np
.
asarray
(
steps
<
(
sequence_length
-
1
)[:,
None
],
dtype
=
dtype
)
# Make each row = [discount, ..., discount, 1, ..., 1]
dmat
=
mask
*
discount
+
(
1
-
mask
)
dmat
=
np
.
cumprod
(
dmat
[:,
::
-
1
],
axis
=
1
)[:,
::
-
1
]
disc_reward
=
dmat
*
reward
[:,
None
]
disc_reward
=
mask_sequences
(
disc_reward
,
sequence_length
,
dtype
=
dtype
)
return
disc_reward
def
mask_sequences
(
sequence
,
sequence_length
,
dtype
=
None
,
time_major
=
False
):
sequence
=
np
.
array
(
sequence
)
sequence_length
=
np
.
array
(
sequence_length
)
rank
=
sequence
.
ndim
if
rank
<
2
:
raise
ValueError
(
"`sequence` must be 2D or higher order."
)
batch_size
=
sequence
.
shape
[
0
]
max_time
=
sequence
.
shape
[
1
]
dtype
=
dtype
or
sequence
.
dtype
if
time_major
:
sequence
=
np
.
transpose
(
sequence
,
axes
=
[
1
,
0
,
2
])
steps
=
np
.
tile
(
np
.
arange
(
max_time
),
[
batch_size
,
1
])
mask
=
np
.
asarray
(
steps
<
sequence_length
[:,
None
],
dtype
=
dtype
)
for
_
in
range
(
2
,
rank
):
mask
=
np
.
expand_dims
(
mask
,
-
1
)
sequence
=
sequence
*
mask
if
time_major
:
sequence
=
np
.
transpose
(
sequence
,
axes
=
[
1
,
0
,
2
])
return
sequence
\ No newline at end of file
PaddleNLP/PaddleTextGEN/seq2seq_rl/pg_agent.py
0 → 100644
浏览文件 @
39d477e2
import
paddle.fluid
as
fluid
import
paddle.fluid.layers
as
layers
from
model
import
PolicyGradient
class
SeqPGAgent
(
object
):
def
__init__
(
self
,
model_cls
,
reward_func
,
alg_cls
=
PolicyGradient
,
model_hparams
=
{},
alg_hparams
=
{},
executor
=
None
):
self
.
build_program
(
model_cls
,
reward_func
,
alg_cls
,
model_hparams
,
alg_hparams
)
self
.
executor
=
executor
def
build_program
(
self
,
model_cls
,
reward_func
,
alg_cls
,
model_hparams
,
alg_hparams
):
self
.
full_program
=
fluid
.
Program
()
with
fluid
.
program_guard
(
self
.
full_program
,
fluid
.
default_startup_program
()):
source
=
fluid
.
data
(
name
=
"src"
,
shape
=
[
None
,
None
],
dtype
=
"int64"
)
source_length
=
fluid
.
data
(
name
=
"src_sequence_length"
,
shape
=
[
None
],
dtype
=
"int64"
)
self
.
alg
=
alg_cls
(
model
=
model_cls
(
**
model_hparams
),
**
alg_hparams
)
self
.
probs
,
self
.
samples
,
self
.
sample_length
=
self
.
alg
.
predict
(
source
,
source_length
)
self
.
samples
.
stop_gradient
=
True
reward
=
fluid
.
layers
.
create_global_var
(
name
=
"reward"
,
shape
=
[
-
1
,
-
1
],
# batch_size, seq_len
value
=
"0"
,
dtype
=
self
.
probs
.
dtype
)
self
.
reward
=
fluid
.
layers
.
py_func
(
func
=
reward_func
,
x
=
[
self
.
samples
,
self
.
sample_length
],
out
=
reward
)
self
.
cost
=
self
.
alg
.
learn
(
self
.
probs
,
self
.
samples
,
self
.
reward
,
self
.
sample_length
)
# to define the same parameters between different programs
self
.
pred_program
=
self
.
full_program
.
_prune_with_input
(
[
source
.
name
,
source_length
.
name
],
[
self
.
probs
,
self
.
samples
,
self
.
sample_length
])
def
predict
(
self
,
feed_dict
):
samples
,
sample_length
=
self
.
executor
.
run
(
self
.
pred_program
,
feed
=
feed_dict
,
fetch_list
=
[
self
.
samples
,
self
.
sample_length
])
return
samples
,
sample_length
def
learn
(
self
,
feed_dict
):
reward
,
cost
=
self
.
executor
.
run
(
self
.
full_program
,
feed
=
feed_dict
,
fetch_list
=
[
self
.
reward
,
self
.
cost
])
return
reward
,
cost
PaddleNLP/PaddleTextGEN/seq2seq_rl/reader.py
0 → 100644
浏览文件 @
39d477e2
# -*- coding: utf-8 -*-
# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""Utilities for parsing PTB text files."""
from
__future__
import
absolute_import
from
__future__
import
division
from
__future__
import
print_function
import
collections
import
os
import
io
import
sys
import
numpy
as
np
Py3
=
sys
.
version_info
[
0
]
==
3
UNK_ID
=
0
def
_read_words
(
filename
):
data
=
[]
with
io
.
open
(
filename
,
"r"
,
encoding
=
'utf-8'
)
as
f
:
if
Py3
:
return
f
.
read
().
replace
(
"
\n
"
,
"<eos>"
).
split
()
else
:
return
f
.
read
().
decode
(
"utf-8"
).
replace
(
u
"
\n
"
,
u
"<eos>"
).
split
()
def
read_all_line
(
filename
):
data
=
[]
with
io
.
open
(
filename
,
"r"
,
encoding
=
'utf-8'
)
as
f
:
for
line
in
f
.
readlines
():
data
.
append
(
line
.
strip
())
def
_build_vocab
(
filename
):
vocab_dict
=
{}
ids
=
0
with
io
.
open
(
filename
,
"r"
,
encoding
=
'utf-8'
)
as
f
:
for
line
in
f
.
readlines
():
vocab_dict
[
line
.
strip
()]
=
ids
ids
+=
1
print
(
"vocab word num"
,
ids
)
return
vocab_dict
def
_para_file_to_ids
(
src_file
,
tar_file
,
src_vocab
,
tar_vocab
):
src_data
=
[]
with
io
.
open
(
src_file
,
"r"
,
encoding
=
'utf-8'
)
as
f_src
:
for
line
in
f_src
.
readlines
():
arra
=
line
.
strip
().
split
()
ids
=
[
src_vocab
[
w
]
if
w
in
src_vocab
else
UNK_ID
for
w
in
arra
]
ids
=
ids
src_data
.
append
(
ids
)
tar_data
=
[]
with
io
.
open
(
tar_file
,
"r"
,
encoding
=
'utf-8'
)
as
f_tar
:
for
line
in
f_tar
.
readlines
():
arra
=
line
.
strip
().
split
()
ids
=
[
tar_vocab
[
w
]
if
w
in
tar_vocab
else
UNK_ID
for
w
in
arra
]
ids
=
[
1
]
+
ids
+
[
2
]
tar_data
.
append
(
ids
)
return
src_data
,
tar_data
def
filter_len
(
src
,
tar
,
max_sequence_len
=
50
):
new_src
=
[]
new_tar
=
[]
for
id1
,
id2
in
zip
(
src
,
tar
):
if
len
(
id1
)
>
max_sequence_len
:
id1
=
id1
[:
max_sequence_len
]
if
len
(
id2
)
>
max_sequence_len
+
2
:
id2
=
id2
[:
max_sequence_len
+
2
]
new_src
.
append
(
id1
)
new_tar
.
append
(
id2
)
return
new_src
,
new_tar
def
raw_data
(
src_lang
,
tar_lang
,
vocab_prefix
,
train_prefix
,
eval_prefix
,
test_prefix
,
max_sequence_len
=
50
):
src_vocab_file
=
vocab_prefix
+
"."
+
src_lang
tar_vocab_file
=
vocab_prefix
+
"."
+
tar_lang
src_train_file
=
train_prefix
+
"."
+
src_lang
tar_train_file
=
train_prefix
+
"."
+
tar_lang
src_eval_file
=
eval_prefix
+
"."
+
src_lang
tar_eval_file
=
eval_prefix
+
"."
+
tar_lang
src_test_file
=
test_prefix
+
"."
+
src_lang
tar_test_file
=
test_prefix
+
"."
+
tar_lang
src_vocab
=
_build_vocab
(
src_vocab_file
)
tar_vocab
=
_build_vocab
(
tar_vocab_file
)
train_src
,
train_tar
=
_para_file_to_ids
(
src_train_file
,
tar_train_file
,
\
src_vocab
,
tar_vocab
)
train_src
,
train_tar
=
filter_len
(
train_src
,
train_tar
,
max_sequence_len
=
max_sequence_len
)
eval_src
,
eval_tar
=
_para_file_to_ids
(
src_eval_file
,
tar_eval_file
,
\
src_vocab
,
tar_vocab
)
test_src
,
test_tar
=
_para_file_to_ids
(
src_test_file
,
tar_test_file
,
\
src_vocab
,
tar_vocab
)
return
(
train_src
,
train_tar
),
(
eval_src
,
eval_tar
),
(
test_src
,
test_tar
),
\
(
src_vocab
,
tar_vocab
)
def
raw_mono_data
(
vocab_file
,
file_path
):
src_vocab
=
_build_vocab
(
vocab_file
)
test_src
,
test_tar
=
_para_file_to_ids
(
file_path
,
file_path
,
\
src_vocab
,
src_vocab
)
return
(
test_src
,
test_tar
)
def
get_data_iter
(
raw_data
,
batch_size
,
mode
=
'train'
,
enable_ce
=
False
,
cache_num
=
20
):
src_data
,
tar_data
=
raw_data
data_len
=
len
(
src_data
)
index
=
np
.
arange
(
data_len
)
if
mode
==
"train"
and
not
enable_ce
:
np
.
random
.
shuffle
(
index
)
def
to_pad_np
(
data
,
source
=
False
):
max_len
=
0
for
ele
in
data
:
if
len
(
ele
)
>
max_len
:
max_len
=
len
(
ele
)
batch_size
=
len
(
data
)
ids
=
np
.
ones
((
batch_size
,
max_len
),
dtype
=
'int64'
)
*
2
mask
=
np
.
zeros
((
batch_size
),
dtype
=
'int64'
)
for
i
,
ele
in
enumerate
(
data
):
ids
[
i
,
:
len
(
ele
)]
=
ele
if
not
source
:
mask
[
i
]
=
len
(
ele
)
-
1
else
:
mask
[
i
]
=
len
(
ele
)
return
ids
,
mask
b_src
=
[]
if
mode
!=
"train"
:
cache_num
=
1
for
j
in
range
(
data_len
):
if
len
(
b_src
)
==
batch_size
*
cache_num
:
# build batch size
# sort
new_cache
=
sorted
(
b_src
,
key
=
lambda
k
:
len
(
k
[
0
]))
if
mode
==
"train"
else
b_src
for
i
in
range
(
cache_num
):
batch_data
=
new_cache
[
i
*
batch_size
:(
i
+
1
)
*
batch_size
]
src_cache
=
[
w
[
0
]
for
w
in
batch_data
]
tar_cache
=
[
w
[
1
]
for
w
in
batch_data
]
src_ids
,
src_mask
=
to_pad_np
(
src_cache
,
source
=
True
)
tar_ids
,
tar_mask
=
to_pad_np
(
tar_cache
)
yield
(
src_ids
,
src_mask
,
tar_ids
,
tar_mask
)
b_src
=
[]
b_src
.
append
((
src_data
[
index
[
j
]],
tar_data
[
index
[
j
]]))
new_cache
=
sorted
(
b_src
,
key
=
lambda
k
:
len
(
k
[
0
]))
if
mode
==
"train"
else
b_src
for
i
in
range
(
cache_num
):
batch_data
=
new_cache
[
i
*
batch_size
:(
i
+
1
)
*
batch_size
]
if
len
(
batch_data
)
<
batch_size
:
if
mode
==
"train"
or
len
(
batch_data
)
==
0
:
continue
src_cache
=
[
w
[
0
]
for
w
in
batch_data
]
tar_cache
=
[
w
[
1
]
for
w
in
batch_data
]
src_ids
,
src_mask
=
to_pad_np
(
src_cache
,
source
=
True
)
tar_ids
,
tar_mask
=
to_pad_np
(
tar_cache
)
yield
(
src_ids
,
src_mask
,
tar_ids
,
tar_mask
)
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录