Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
hapi
提交
16a3a921
H
hapi
项目概览
PaddlePaddle
/
hapi
通知
11
Star
2
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
4
列表
看板
标记
里程碑
合并请求
7
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
H
hapi
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
4
Issue
4
列表
看板
标记
里程碑
合并请求
7
合并请求
7
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
16a3a921
编写于
9月 16, 2020
作者:
G
guosheng
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Update Transformer, seq2seq, sequence_tagging to adapt to 2.0-beta hapi apis.
上级
2898c3fa
变更
15
隐藏空白更改
内联
并排
Showing
15 changed file
with
138 addition
and
141 deletion
+138
-141
bert/readme.md
bert/readme.md
+2
-0
bert_leveldb/readme.md
bert_leveldb/readme.md
+2
-0
sentiment_classification/README.md
sentiment_classification/README.md
+2
-1
seq2seq/predict.py
seq2seq/predict.py
+17
-14
seq2seq/seq2seq_attn.py
seq2seq/seq2seq_attn.py
+2
-5
seq2seq/seq2seq_base.py
seq2seq/seq2seq_base.py
+4
-8
seq2seq/train.py
seq2seq/train.py
+10
-13
seq2seq/utility.py
seq2seq/utility.py
+5
-6
sequence_tagging/eval.py
sequence_tagging/eval.py
+9
-8
sequence_tagging/predict.py
sequence_tagging/predict.py
+8
-4
sequence_tagging/sequence_tagging.py
sequence_tagging/sequence_tagging.py
+8
-9
sequence_tagging/train.py
sequence_tagging/train.py
+9
-10
transformer/predict.py
transformer/predict.py
+27
-25
transformer/train.py
transformer/train.py
+29
-31
transformer/transformer.py
transformer/transformer.py
+4
-7
未找到文件。
bert/readme.md
浏览文件 @
16a3a921
**仍在开发中,待完成**
1.
download data: wget https://paddle-hapi.bj.bcebos.com/data/bert_data.tar.gz
2.
unzip data: tar -zvxf bert_data.tar.gz
...
...
bert_leveldb/readme.md
浏览文件 @
16a3a921
**仍在开发中,待完成**
0.
python3.7 -m pip install leveldb
1.
download data: wget https://paddle-hapi.bj.bcebos.com/data/bert_data.tar.gz
...
...
sentiment_classification/README.md
浏览文件 @
16a3a921
## 简介
**仍在开发中,待完成**
## 简介
情感是人类的一种高级智能行为,为了识别文本的情感倾向,需要深入的语义建模。另外,不同领域(如餐饮、体育)在情感的表达各不相同,因而需要有大规模覆盖各个领域的数据进行模型训练。为此,我们通过基于深度学习的语义模型和大规模数据挖掘解决上述两个问题。效果上,我们基于开源情感倾向分类数据集ChnSentiCorp进行评测。具体数据如下所示:
...
...
seq2seq/predict.py
浏览文件 @
16a3a921
...
...
@@ -19,11 +19,12 @@ import random
from
functools
import
partial
import
numpy
as
np
import
paddle
import
paddle.fluid
as
fluid
from
paddle.fluid.layers.utils
import
flatten
from
paddle.fluid.io
import
DataLoader
from
paddle.static
import
InputSpec
as
Input
from
paddle.incubate.hapi.model
import
Input
,
set_device
from
args
import
parse_args
from
seq2seq_base
import
BaseInferModel
from
seq2seq_attn
import
AttentionInferModel
...
...
@@ -48,7 +49,7 @@ def post_process_seq(seq, bos_idx, eos_idx, output_bos=False,
def
do_predict
(
args
):
device
=
set_device
(
"gpu"
if
args
.
use_gpu
else
"cpu"
)
device
=
paddle
.
set_device
(
"gpu"
if
args
.
use_gpu
else
"cpu"
)
fluid
.
enable_dygraph
(
device
)
if
args
.
eager_run
else
None
# define model
...
...
@@ -84,19 +85,21 @@ def do_predict(args):
return_list
=
True
)
model_maker
=
AttentionInferModel
if
args
.
attention
else
BaseInferModel
model
=
model_maker
(
args
.
src_vocab_size
,
args
.
tar_vocab_size
,
args
.
hidden_size
,
args
.
hidden_size
,
args
.
num_layers
,
args
.
dropout
,
bos_id
=
bos_id
,
eos_id
=
eos_id
,
beam_size
=
args
.
beam_size
,
max_out_len
=
256
)
model
=
paddle
.
Model
(
model_maker
(
args
.
src_vocab_size
,
args
.
tar_vocab_size
,
args
.
hidden_size
,
args
.
hidden_size
,
args
.
num_layers
,
args
.
dropout
,
bos_id
=
bos_id
,
eos_id
=
eos_id
,
beam_size
=
args
.
beam_size
,
max_out_len
=
256
),
inputs
=
inputs
)
model
.
prepare
(
inputs
=
inputs
,
device
=
device
)
model
.
prepare
()
# load the trained model
assert
args
.
reload_model
,
(
...
...
seq2seq/seq2seq_attn.py
浏览文件 @
16a3a921
...
...
@@ -18,10 +18,7 @@ from paddle.fluid import ParamAttr
from
paddle.fluid.initializer
import
UniformInitializer
from
paddle.fluid.dygraph
import
Embedding
,
Linear
,
Layer
from
paddle.fluid.layers
import
BeamSearchDecoder
from
paddle.incubate.hapi.model
import
Model
from
paddle.incubate.hapi.loss
import
Loss
from
paddle.incubate.hapi.text
import
DynamicDecode
,
RNN
,
BasicLSTMCell
,
RNNCell
from
paddle.text
import
DynamicDecode
,
RNN
,
BasicLSTMCell
,
RNNCell
from
seq2seq_base
import
Encoder
...
...
@@ -138,7 +135,7 @@ class Decoder(Layer):
return
predict
class
AttentionModel
(
Model
):
class
AttentionModel
(
Layer
):
def
__init__
(
self
,
src_vocab_size
,
trg_vocab_size
,
...
...
seq2seq/seq2seq_base.py
浏览文件 @
16a3a921
...
...
@@ -18,18 +18,14 @@ from paddle.fluid import ParamAttr
from
paddle.fluid.initializer
import
UniformInitializer
from
paddle.fluid.dygraph
import
Embedding
,
Linear
,
Layer
from
paddle.fluid.layers
import
BeamSearchDecoder
from
paddle.text
import
DynamicDecode
,
RNN
,
BasicLSTMCell
,
RNNCell
from
paddle.incubate.hapi.model
import
Model
from
paddle.incubate.hapi.loss
import
Loss
from
paddle.incubate.hapi.text
import
DynamicDecode
,
RNN
,
BasicLSTMCell
,
RNNCell
class
CrossEntropyCriterion
(
Loss
):
class
CrossEntropyCriterion
(
Layer
):
def
__init__
(
self
):
super
(
CrossEntropyCriterion
,
self
).
__init__
()
def
forward
(
self
,
outputs
,
labels
):
predict
,
(
trg_length
,
label
)
=
outputs
[
0
],
labels
def
forward
(
self
,
predict
,
trg_length
,
label
):
# for target padding mask
mask
=
layers
.
sequence_mask
(
trg_length
,
maxlen
=
layers
.
shape
(
predict
)[
1
],
dtype
=
predict
.
dtype
)
...
...
@@ -140,7 +136,7 @@ class Decoder(Layer):
return
predict
class
BaseModel
(
Model
):
class
BaseModel
(
Layer
):
def
__init__
(
self
,
src_vocab_size
,
trg_vocab_size
,
...
...
seq2seq/train.py
浏览文件 @
16a3a921
...
...
@@ -15,14 +15,15 @@
import
logging
import
os
import
random
from
args
import
parse_args
from
functools
import
partial
import
numpy
as
np
import
paddle
import
paddle.fluid
as
fluid
from
paddle.fluid.io
import
DataLoader
from
paddle.static
import
InputSpec
as
Input
from
paddle.incubate.hapi.model
import
Input
,
set_device
from
args
import
parse_args
from
seq2seq_base
import
BaseModel
,
CrossEntropyCriterion
from
seq2seq_attn
import
AttentionModel
from
reader
import
create_data_loader
...
...
@@ -30,7 +31,7 @@ from utility import PPL, TrainCallback, get_model_cls
def
do_train
(
args
):
device
=
set_device
(
"gpu"
if
args
.
use_gpu
else
"cpu"
)
device
=
paddle
.
set_device
(
"gpu"
if
args
.
use_gpu
else
"cpu"
)
fluid
.
enable_dygraph
(
device
)
if
args
.
eager_run
else
None
if
args
.
enable_ce
:
...
...
@@ -58,9 +59,11 @@ def do_train(args):
model_maker
=
get_model_cls
(
AttentionModel
)
if
args
.
attention
else
get_model_cls
(
BaseModel
)
model
=
model_maker
(
args
.
src_vocab_size
,
args
.
tar_vocab_size
,
args
.
hidden_size
,
args
.
hidden_size
,
args
.
num_layers
,
args
.
dropout
)
model
=
paddle
.
Model
(
model_maker
(
args
.
src_vocab_size
,
args
.
tar_vocab_size
,
args
.
hidden_size
,
args
.
hidden_size
,
args
.
num_layers
,
args
.
dropout
),
inputs
=
inputs
,
labels
=
labels
)
grad_clip
=
fluid
.
clip
.
GradientClipByGlobalNorm
(
clip_norm
=
args
.
max_grad_norm
)
optimizer
=
fluid
.
optimizer
.
Adam
(
...
...
@@ -69,13 +72,7 @@ def do_train(args):
grad_clip
=
grad_clip
)
ppl_metric
=
PPL
(
reset_freq
=
100
)
# ppl for every 100 batches
model
.
prepare
(
optimizer
,
CrossEntropyCriterion
(),
ppl_metric
,
inputs
=
inputs
,
labels
=
labels
,
device
=
device
)
model
.
prepare
(
optimizer
,
CrossEntropyCriterion
(),
ppl_metric
)
model
.
fit
(
train_data
=
train_loader
,
eval_data
=
eval_loader
,
epochs
=
args
.
max_epoch
,
...
...
seq2seq/utility.py
浏览文件 @
16a3a921
...
...
@@ -15,14 +15,13 @@
import
math
import
functools
import
paddle
import
paddle.fluid
as
fluid
from
paddle.metric
import
Metric
from
paddle.text
import
BasicLSTMCell
from
paddle.incubate.hapi.metrics
import
Metric
from
paddle.incubate.hapi.callbacks
import
ProgBarLogger
from
paddle.incubate.hapi.text
import
BasicLSTMCell
class
TrainCallback
(
ProgBarLogger
):
class
TrainCallback
(
paddle
.
callbacks
.
ProgBarLogger
):
def
__init__
(
self
,
ppl
,
log_freq
,
verbose
=
2
):
super
(
TrainCallback
,
self
).
__init__
(
log_freq
,
verbose
)
self
.
ppl
=
ppl
...
...
@@ -58,7 +57,7 @@ class PPL(Metric):
self
.
reset_freq
=
reset_freq
self
.
reset
()
def
add_metric_op
(
self
,
pred
,
seq_length
,
label
):
def
compute
(
self
,
pred
,
seq_length
,
label
):
word_num
=
fluid
.
layers
.
reduce_sum
(
seq_length
)
return
word_num
...
...
sequence_tagging/eval.py
浏览文件 @
16a3a921
...
...
@@ -18,9 +18,10 @@ SequenceTagging eval structure
from
__future__
import
division
from
__future__
import
print_function
import
paddle
import
paddle.fluid
as
fluid
from
paddle.fluid.layers.utils
import
flatten
from
paddle.
incubate.hapi.model
import
Input
,
set_device
from
paddle.
static
import
InputSpec
as
Input
from
sequence_tagging
import
SeqTagging
,
LacLoss
,
ChunkEval
from
reader
import
LacDataset
,
LacDataLoader
...
...
@@ -29,7 +30,7 @@ from utils.configure import PDConfig
def
main
(
args
):
place
=
set_device
(
args
.
device
)
place
=
paddle
.
set_device
(
args
.
device
)
fluid
.
enable_dygraph
(
place
)
if
args
.
dynamic
else
None
inputs
=
[
...
...
@@ -45,14 +46,14 @@ def main(args):
vocab_size
=
dataset
.
vocab_size
num_labels
=
dataset
.
num_labels
model
=
SeqTagging
(
args
,
vocab_size
,
num_labels
,
mode
=
"test"
)
model
=
paddle
.
Model
(
SeqTagging
(
args
,
vocab_size
,
num_labels
,
mode
=
"test"
),
inputs
=
inputs
,
labels
=
labels
)
model
.
mode
=
"test"
model
.
prepare
(
metrics
=
ChunkEval
(
num_labels
),
inputs
=
inputs
,
labels
=
labels
,
device
=
place
)
model
.
prepare
(
metrics
=
ChunkEval
(
num_labels
))
model
.
load
(
args
.
init_from_checkpoint
,
skip_mismatch
=
True
)
eval_result
=
model
.
evaluate
(
...
...
sequence_tagging/predict.py
浏览文件 @
16a3a921
...
...
@@ -20,9 +20,10 @@ from __future__ import print_function
import
six
import
paddle
import
paddle.fluid
as
fluid
from
paddle.fluid.layers.utils
import
flatten
from
paddle.
incubate.hapi.model
import
Input
,
set_device
from
paddle.
static
import
InputSpec
as
Input
from
sequence_tagging
import
SeqTagging
,
LacLoss
,
ChunkEval
from
reader
import
LacDataset
,
LacDataLoader
...
...
@@ -31,7 +32,7 @@ from utils.configure import PDConfig
def
main
(
args
):
place
=
set_device
(
args
.
device
)
place
=
paddle
.
set_device
(
args
.
device
)
fluid
.
enable_dygraph
(
place
)
if
args
.
dynamic
else
None
inputs
=
[
...
...
@@ -46,10 +47,13 @@ def main(args):
vocab_size
=
dataset
.
vocab_size
num_labels
=
dataset
.
num_labels
model
=
SeqTagging
(
args
,
vocab_size
,
num_labels
,
mode
=
"predict"
)
model
=
paddle
.
Model
(
SeqTagging
(
args
,
vocab_size
,
num_labels
,
mode
=
"predict"
),
inputs
=
inputs
)
model
.
mode
=
"test"
model
.
prepare
(
inputs
=
inputs
)
model
.
prepare
()
model
.
load
(
args
.
init_from_checkpoint
,
skip_mismatch
=
True
)
...
...
sequence_tagging/sequence_tagging.py
浏览文件 @
16a3a921
...
...
@@ -25,17 +25,16 @@ import math
import
argparse
import
numpy
as
np
import
paddle
import
paddle.fluid
as
fluid
from
paddle.incubate.hapi.metrics
import
Metric
from
paddle.incubate.hapi.model
import
Model
from
paddle.incubate.hapi.loss
import
Loss
from
paddle.incubate.hapi.text
import
SequenceTagging
from
paddle.metric
import
Metric
from
paddle.text
import
SequenceTagging
from
utils.check
import
check_gpu
,
check_version
from
utils.configure
import
PDConfig
class
SeqTagging
(
Model
):
class
SeqTagging
(
fluid
.
dygraph
.
Layer
):
def
__init__
(
self
,
args
,
vocab_size
,
num_labels
,
length
=
None
,
mode
=
"train"
):
super
(
SeqTagging
,
self
).
__init__
()
...
...
@@ -131,13 +130,13 @@ class Chunk_eval(fluid.dygraph.Layer):
return
(
num_infer_chunks
,
num_label_chunks
,
num_correct_chunks
)
class
LacLoss
(
Loss
):
class
LacLoss
(
fluid
.
dygraph
.
Layer
):
def
__init__
(
self
):
super
(
LacLoss
,
self
).
__init__
()
pass
def
forward
(
self
,
outputs
,
label
s
):
avg_cost
=
output
s
[
1
]
def
forward
(
self
,
*
arg
s
):
avg_cost
=
arg
s
[
1
]
return
avg_cost
...
...
@@ -149,7 +148,7 @@ class ChunkEval(Metric):
int
(
math
.
ceil
((
num_labels
-
1
)
/
2.0
)),
"IOB"
)
self
.
reset
()
def
add_metric_op
(
self
,
*
args
):
def
compute
(
self
,
*
args
):
crf_decode
=
args
[
0
]
lengths
=
args
[
2
]
label
=
args
[
3
]
...
...
sequence_tagging/train.py
浏览文件 @
16a3a921
...
...
@@ -18,9 +18,10 @@ SequenceTagging network structure
from
__future__
import
division
from
__future__
import
print_function
import
paddle
import
paddle.fluid
as
fluid
from
paddle.fluid.optimizer
import
AdamOptimizer
from
paddle.
incubate.hapi.model
import
Input
,
set_device
from
paddle.
static
import
InputSpec
as
Input
from
sequence_tagging
import
SeqTagging
,
LacLoss
,
ChunkEval
from
reader
import
LacDataset
,
LacDataLoader
...
...
@@ -29,7 +30,7 @@ from utils.configure import PDConfig
def
main
(
args
):
place
=
set_device
(
args
.
device
)
place
=
paddle
.
set_device
(
args
.
device
)
fluid
.
enable_dygraph
(
place
)
if
args
.
dynamic
else
None
inputs
=
[
...
...
@@ -48,19 +49,17 @@ def main(args):
vocab_size
=
dataset
.
vocab_size
num_labels
=
dataset
.
num_labels
model
=
SeqTagging
(
args
,
vocab_size
,
num_labels
,
mode
=
"train"
)
model
=
paddle
.
Model
(
SeqTagging
(
args
,
vocab_size
,
num_labels
,
mode
=
"train"
),
inputs
=
inputs
,
labels
=
labels
)
optim
=
AdamOptimizer
(
learning_rate
=
args
.
base_learning_rate
,
parameter_list
=
model
.
parameters
())
model
.
prepare
(
optim
,
LacLoss
(),
ChunkEval
(
num_labels
),
inputs
=
inputs
,
labels
=
labels
,
device
=
args
.
device
)
model
.
prepare
(
optim
,
LacLoss
(),
ChunkEval
(
num_labels
))
if
args
.
init_from_checkpoint
:
model
.
load
(
args
.
init_from_checkpoint
)
...
...
transformer/predict.py
浏览文件 @
16a3a921
...
...
@@ -21,11 +21,11 @@ import paddle
import
paddle.fluid
as
fluid
from
paddle.io
import
DataLoader
from
paddle.fluid.layers.utils
import
flatten
from
paddle.static
import
InputSpec
as
Input
from
utils.configure
import
PDConfig
from
utils.check
import
check_gpu
,
check_version
from
paddle.incubate.hapi.model
import
Input
,
set_device
from
reader
import
prepare_infer_input
,
Seq2SeqDataset
,
Seq2SeqBatchSampler
from
transformer
import
InferTransformer
...
...
@@ -48,7 +48,7 @@ def post_process_seq(seq, bos_idx, eos_idx, output_bos=False,
def
do_predict
(
args
):
device
=
set_device
(
"gpu"
if
args
.
use_cuda
else
"cpu"
)
device
=
paddle
.
set_device
(
"gpu"
if
args
.
use_cuda
else
"cpu"
)
fluid
.
enable_dygraph
(
device
)
if
args
.
eager_run
else
None
inputs
=
[
...
...
@@ -99,37 +99,39 @@ def do_predict(args):
return_list
=
True
)
# define model
transformer
=
InferTransformer
(
args
.
src_vocab_size
,
args
.
trg_vocab_size
,
args
.
max_length
+
1
,
args
.
n_layer
,
args
.
n_head
,
args
.
d_key
,
args
.
d_value
,
args
.
d_model
,
args
.
d_inner_hid
,
args
.
prepostprocess_dropout
,
args
.
attention_dropout
,
args
.
relu_dropout
,
args
.
preprocess_cmd
,
args
.
postprocess_cmd
,
args
.
weight_sharing
,
args
.
bos_idx
,
args
.
eos_idx
,
beam_size
=
args
.
beam_size
,
max_out_len
=
args
.
max_out_len
)
transformer
.
prepare
(
inputs
=
inputs
,
device
=
device
)
model
=
paddle
.
Model
(
InferTransformer
(
args
.
src_vocab_size
,
args
.
trg_vocab_size
,
args
.
max_length
+
1
,
args
.
n_layer
,
args
.
n_head
,
args
.
d_key
,
args
.
d_value
,
args
.
d_model
,
args
.
d_inner_hid
,
args
.
prepostprocess_dropout
,
args
.
attention_dropout
,
args
.
relu_dropout
,
args
.
preprocess_cmd
,
args
.
postprocess_cmd
,
args
.
weight_sharing
,
args
.
bos_idx
,
args
.
eos_idx
,
beam_size
=
args
.
beam_size
,
max_out_len
=
args
.
max_out_len
),
inputs
)
model
.
prepare
()
# load the trained model
assert
args
.
init_from_params
,
(
"Please set init_from_params to load the infer model."
)
transformer
.
load
(
args
.
init_from_params
)
model
.
load
(
args
.
init_from_params
)
# TODO: use model.predict when support variant length
f
=
open
(
args
.
output_file
,
"wb"
)
for
data
in
data_loader
():
finished_seq
=
transformer
.
test_batch
(
inputs
=
flatten
(
data
))[
0
]
finished_seq
=
model
.
test_batch
(
inputs
=
flatten
(
data
))[
0
]
finished_seq
=
np
.
transpose
(
finished_seq
,
[
0
,
2
,
1
])
for
ins
in
finished_seq
:
for
beam_idx
,
beam
in
enumerate
(
ins
):
...
...
transformer/train.py
浏览文件 @
16a3a921
...
...
@@ -19,17 +19,16 @@ import numpy as np
import
paddle
import
paddle.fluid
as
fluid
from
paddle.io
import
DataLoader
from
paddle.static
import
InputSpec
as
Input
from
utils.configure
import
PDConfig
from
utils.check
import
check_gpu
,
check_version
from
paddle.incubate.hapi.model
import
Input
,
set_device
from
paddle.incubate.hapi.callbacks
import
ProgBarLogger
from
reader
import
create_data_loader
from
transformer
import
Transformer
,
CrossEntropyCriterion
class
TrainCallback
(
ProgBarLogger
):
class
TrainCallback
(
paddle
.
callbacks
.
ProgBarLogger
):
def
__init__
(
self
,
args
,
verbose
=
2
,
...
...
@@ -75,7 +74,7 @@ class TrainCallback(ProgBarLogger):
def
do_train
(
args
):
device
=
set_device
(
"gpu"
if
args
.
use_cuda
else
"cpu"
)
device
=
paddle
.
set_device
(
"gpu"
if
args
.
use_cuda
else
"cpu"
)
fluid
.
enable_dygraph
(
device
)
if
args
.
eager_run
else
None
# set seed for CE
...
...
@@ -119,14 +118,16 @@ def do_train(args):
eval_loader
,
eval_steps_fn
)
=
create_data_loader
(
args
,
device
)
# define model
transformer
=
Transformer
(
args
.
src_vocab_size
,
args
.
trg_vocab_size
,
args
.
max_length
+
1
,
args
.
n_layer
,
args
.
n_head
,
args
.
d_key
,
args
.
d_value
,
args
.
d_model
,
args
.
d_inner_hid
,
args
.
prepostprocess_dropout
,
args
.
attention_dropout
,
args
.
relu_dropout
,
args
.
preprocess_cmd
,
args
.
postprocess_cmd
,
args
.
weight_sharing
,
args
.
bos_idx
,
args
.
eos_idx
)
transformer
.
prepare
(
model
=
paddle
.
Model
(
Transformer
(
args
.
src_vocab_size
,
args
.
trg_vocab_size
,
args
.
max_length
+
1
,
args
.
n_layer
,
args
.
n_head
,
args
.
d_key
,
args
.
d_value
,
args
.
d_model
,
args
.
d_inner_hid
,
args
.
prepostprocess_dropout
,
args
.
attention_dropout
,
args
.
relu_dropout
,
args
.
preprocess_cmd
,
args
.
postprocess_cmd
,
args
.
weight_sharing
,
args
.
bos_idx
,
args
.
eos_idx
),
inputs
,
labels
)
model
.
prepare
(
fluid
.
optimizer
.
Adam
(
learning_rate
=
fluid
.
layers
.
noam_decay
(
args
.
d_model
,
...
...
@@ -135,32 +136,29 @@ def do_train(args):
beta1
=
args
.
beta1
,
beta2
=
args
.
beta2
,
epsilon
=
float
(
args
.
eps
),
parameter_list
=
transformer
.
parameters
()),
CrossEntropyCriterion
(
args
.
label_smooth_eps
),
inputs
=
inputs
,
labels
=
labels
,
device
=
device
)
parameter_list
=
model
.
parameters
()),
CrossEntropyCriterion
(
args
.
label_smooth_eps
))
## init from some checkpoint, to resume the previous training
if
args
.
init_from_checkpoint
:
transformer
.
load
(
args
.
init_from_checkpoint
)
model
.
load
(
args
.
init_from_checkpoint
)
## init from some pretrain models, to better solve the current task
if
args
.
init_from_pretrain_model
:
transformer
.
load
(
args
.
init_from_pretrain_model
,
reset_optimizer
=
True
)
model
.
load
(
args
.
init_from_pretrain_model
,
reset_optimizer
=
True
)
# model train
transformer
.
fit
(
train_data
=
train_loader
,
eval_data
=
eval_loader
,
epochs
=
args
.
epoch
,
eval_freq
=
1
,
save_freq
=
1
,
save_dir
=
args
.
save_model
,
callbacks
=
[
TrainCallback
(
args
,
train_steps_fn
=
train_steps_fn
,
eval_steps_fn
=
eval_steps_fn
)
])
model
.
fit
(
train_data
=
train_loader
,
eval_data
=
eval_loader
,
epochs
=
args
.
epoch
,
eval_freq
=
1
,
save_freq
=
1
,
save_dir
=
args
.
save_model
,
callbacks
=
[
TrainCallback
(
args
,
train_steps_fn
=
train_steps_fn
,
eval_steps_fn
=
eval_steps_fn
)
])
if
__name__
==
"__main__"
:
...
...
transformer/transformer.py
浏览文件 @
16a3a921
...
...
@@ -19,9 +19,7 @@ import numpy as np
import
paddle.fluid
as
fluid
import
paddle.fluid.layers
as
layers
from
paddle.fluid.dygraph
import
Embedding
,
LayerNorm
,
Linear
,
Layer
from
paddle.incubate.hapi.model
import
Model
from
paddle.incubate.hapi.loss
import
Loss
from
paddle.incubate.hapi.text
import
TransformerBeamSearchDecoder
,
DynamicDecode
from
paddle.text
import
TransformerBeamSearchDecoder
,
DynamicDecode
def
position_encoding_init
(
n_position
,
d_pos_vec
):
...
...
@@ -498,13 +496,12 @@ class WrapDecoder(Layer):
return
logits
class
CrossEntropyCriterion
(
L
oss
):
class
CrossEntropyCriterion
(
L
ayer
):
def
__init__
(
self
,
label_smooth_eps
):
super
(
CrossEntropyCriterion
,
self
).
__init__
()
self
.
label_smooth_eps
=
label_smooth_eps
def
forward
(
self
,
outputs
,
labels
):
predict
,
(
label
,
weights
)
=
outputs
[
0
],
labels
def
forward
(
self
,
predict
,
label
,
weights
):
if
self
.
label_smooth_eps
:
label
=
layers
.
label_smooth
(
label
=
layers
.
one_hot
(
...
...
@@ -523,7 +520,7 @@ class CrossEntropyCriterion(Loss):
return
avg_cost
class
Transformer
(
Model
):
class
Transformer
(
Layer
):
"""
model
"""
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录