Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
hapi
提交
666339f5
H
hapi
项目概览
PaddlePaddle
/
hapi
通知
11
Star
2
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
4
列表
看板
标记
里程碑
合并请求
7
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
H
hapi
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
4
Issue
4
列表
看板
标记
里程碑
合并请求
7
合并请求
7
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
666339f5
编写于
4月 23, 2020
作者:
G
guosheng
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Refine transformer batch length and move transformer to examples.
Refine len for data_loader in model.py.
上级
ecb6d64c
变更
14
隐藏空白更改
内联
并排
Showing
14 changed file
with
77 addition
and
47 deletion
+77
-47
examples/transformer/README.md
examples/transformer/README.md
+1
-1
examples/transformer/gen_data.sh
examples/transformer/gen_data.sh
+0
-0
examples/transformer/images/multi_head_attention.png
examples/transformer/images/multi_head_attention.png
+0
-0
examples/transformer/images/transformer_network.png
examples/transformer/images/transformer_network.png
+0
-0
examples/transformer/predict.py
examples/transformer/predict.py
+3
-6
examples/transformer/reader.py
examples/transformer/reader.py
+8
-5
examples/transformer/train.py
examples/transformer/train.py
+25
-8
examples/transformer/transformer.py
examples/transformer/transformer.py
+2
-2
examples/transformer/transformer.yaml
examples/transformer/transformer.yaml
+0
-0
examples/transformer/utils/__init__.py
examples/transformer/utils/__init__.py
+0
-0
examples/transformer/utils/check.py
examples/transformer/utils/check.py
+0
-0
examples/transformer/utils/configure.py
examples/transformer/utils/configure.py
+11
-5
hapi/callbacks.py
hapi/callbacks.py
+6
-6
hapi/model.py
hapi/model.py
+21
-14
未找到文件。
transformer/README.md
→
examples/
transformer/README.md
浏览文件 @
666339f5
...
...
@@ -201,7 +201,7 @@ python -u predict.py \
--special_token
'<s>'
'<e>'
'<unk>'
\
--predict_file
gen_data/wmt16_ende_data_bpe/newstest2014.tok.bpe.32000.en-de
\
--batch_size
32
\
--init_from_params
b
ase
_model_dygraph/step_100000/transformer
\
--init_from_params
b
ig
_model_dygraph/step_100000/transformer
\
--beam_size
5
\
--max_out_len
255
\
--output_file
predict.txt
\
...
...
transformer/gen_data.sh
→
examples/
transformer/gen_data.sh
浏览文件 @
666339f5
文件已移动
transformer/images/multi_head_attention.png
→
examples/
transformer/images/multi_head_attention.png
浏览文件 @
666339f5
文件已移动
transformer/images/transformer_network.png
→
examples/
transformer/images/transformer_network.png
浏览文件 @
666339f5
文件已移动
transformer/predict.py
→
examples/
transformer/predict.py
浏览文件 @
666339f5
...
...
@@ -14,9 +14,6 @@
import
logging
import
os
import
six
import
sys
sys
.
path
.
append
(
os
.
path
.
dirname
(
os
.
path
.
dirname
(
os
.
path
.
abspath
(
__file__
))))
from
functools
import
partial
import
numpy
as
np
...
...
@@ -28,9 +25,9 @@ from paddle.fluid.layers.utils import flatten
from
utils.configure
import
PDConfig
from
utils.check
import
check_gpu
,
check_version
from
model
import
Input
,
set_device
from
hapi.
model
import
Input
,
set_device
from
reader
import
prepare_infer_input
,
Seq2SeqDataset
,
Seq2SeqBatchSampler
from
transformer
import
InferTransformer
,
position_encoding_init
from
transformer
import
InferTransformer
def
post_process_seq
(
seq
,
bos_idx
,
eos_idx
,
output_bos
=
False
,
...
...
@@ -132,7 +129,7 @@ def do_predict(args):
# TODO: use model.predict when support variant length
f
=
open
(
args
.
output_file
,
"wb"
)
for
data
in
data_loader
():
finished_seq
=
transformer
.
test
(
inputs
=
flatten
(
data
))[
0
]
finished_seq
=
transformer
.
test
_batch
(
inputs
=
flatten
(
data
))[
0
]
finished_seq
=
np
.
transpose
(
finished_seq
,
[
0
,
2
,
1
])
for
ins
in
finished_seq
:
for
beam_idx
,
beam
in
enumerate
(
ins
):
...
...
transformer/reader.py
→
examples/
transformer/reader.py
浏览文件 @
666339f5
...
...
@@ -13,7 +13,7 @@
# limitations under the License.
import
glob
import
s
ix
import
s
ys
import
os
import
io
import
itertools
...
...
@@ -26,7 +26,7 @@ from paddle.io import BatchSampler, DataLoader, Dataset
def
create_data_loader
(
args
,
device
):
data_loaders
=
[
None
,
None
]
data_loaders
=
[
(
None
,
None
)]
*
2
data_files
=
[
args
.
training_file
,
args
.
validation_file
]
if
args
.
validation_file
else
[
args
.
training_file
]
for
i
,
data_file
in
enumerate
(
data_files
):
...
...
@@ -65,7 +65,7 @@ def create_data_loader(args, device):
n_head
=
args
.
n_head
),
num_workers
=
0
,
# TODO: use multi-process
return_list
=
True
)
data_loaders
[
i
]
=
data_loader
data_loaders
[
i
]
=
(
data_loader
,
batch_sampler
.
__len__
)
return
data_loaders
...
...
@@ -476,6 +476,7 @@ class Seq2SeqBatchSampler(BatchSampler):
for
i
in
range
(
self
.
_nranks
)
]
for
batch
in
batches
]
batches
=
list
(
itertools
.
chain
.
from_iterable
(
batches
))
self
.
batch_number
=
(
len
(
batches
)
+
self
.
_nranks
-
1
)
//
self
.
_nranks
# for multi-device
for
batch_id
,
batch
in
enumerate
(
batches
):
...
...
@@ -489,11 +490,13 @@ class Seq2SeqBatchSampler(BatchSampler):
yield
batch_indices
def
__len__
(
self
):
if
hasattr
(
self
,
"batch_number"
):
#
return
self
.
batch_number
if
not
self
.
_use_token_batch
:
batch_number
=
(
len
(
self
.
_dataset
)
+
self
.
_batch_size
*
self
.
_nranks
-
1
)
//
(
self
.
_batch_size
*
self
.
_nranks
)
else
:
#
TODO(guosheng): fix the uncertain length
batch_number
=
1
#
for uncertain batch number, the actual value is self.batch_number
batch_number
=
sys
.
maxsize
return
batch_number
transformer/train.py
→
examples/
transformer/train.py
浏览文件 @
666339f5
...
...
@@ -14,9 +14,6 @@
import
logging
import
os
import
six
import
sys
sys
.
path
.
append
(
os
.
path
.
dirname
(
os
.
path
.
dirname
(
os
.
path
.
abspath
(
__file__
))))
import
numpy
as
np
import
paddle
...
...
@@ -26,14 +23,18 @@ from paddle.io import DataLoader
from
utils.configure
import
PDConfig
from
utils.check
import
check_gpu
,
check_version
from
model
import
Input
,
set_device
from
callbacks
import
ProgBarLogger
from
hapi.
model
import
Input
,
set_device
from
hapi.
callbacks
import
ProgBarLogger
from
reader
import
create_data_loader
from
transformer
import
Transformer
,
CrossEntropyCriterion
class
TrainCallback
(
ProgBarLogger
):
def
__init__
(
self
,
args
,
verbose
=
2
):
def
__init__
(
self
,
args
,
verbose
=
2
,
train_steps_fn
=
None
,
eval_steps_fn
=
None
):
# TODO(guosheng): save according to step
super
(
TrainCallback
,
self
).
__init__
(
args
.
print_step
,
verbose
)
# the best cross-entropy value with label smoothing
...
...
@@ -42,11 +43,17 @@ class TrainCallback(ProgBarLogger):
(
1.
-
args
.
label_smooth_eps
))
+
args
.
label_smooth_eps
*
np
.
log
(
args
.
label_smooth_eps
/
(
args
.
trg_vocab_size
-
1
)
+
1e-20
))
self
.
loss_normalizer
=
loss_normalizer
self
.
train_steps_fn
=
train_steps_fn
self
.
eval_steps_fn
=
eval_steps_fn
def
on_train_begin
(
self
,
logs
=
None
):
super
(
TrainCallback
,
self
).
on_train_begin
(
logs
)
self
.
train_metrics
+=
[
"normalized loss"
,
"ppl"
]
def
on_train_batch_begin
(
self
,
step
,
logs
=
None
):
if
step
==
0
and
self
.
train_steps_fn
:
self
.
train_progbar
.
_num
=
self
.
train_steps_fn
()
def
on_train_batch_end
(
self
,
step
,
logs
=
None
):
logs
[
"normalized loss"
]
=
logs
[
"loss"
][
0
]
-
self
.
loss_normalizer
logs
[
"ppl"
]
=
np
.
exp
(
min
(
logs
[
"loss"
][
0
],
100
))
...
...
@@ -57,6 +64,10 @@ class TrainCallback(ProgBarLogger):
self
.
eval_metrics
=
list
(
self
.
eval_metrics
)
+
[
"normalized loss"
,
"ppl"
]
def
on_eval_batch_begin
(
self
,
step
,
logs
=
None
):
if
step
==
0
and
self
.
eval_steps_fn
:
self
.
eval_progbar
.
_num
=
self
.
eval_steps_fn
()
def
on_eval_batch_end
(
self
,
step
,
logs
=
None
):
logs
[
"normalized loss"
]
=
logs
[
"loss"
][
0
]
-
self
.
loss_normalizer
logs
[
"ppl"
]
=
np
.
exp
(
min
(
logs
[
"loss"
][
0
],
100
))
...
...
@@ -104,7 +115,8 @@ def do_train(args):
]
# def dataloader
train_loader
,
eval_loader
=
create_data_loader
(
args
,
device
)
(
train_loader
,
train_steps_fn
),
(
eval_loader
,
eval_steps_fn
)
=
create_data_loader
(
args
,
device
)
# define model
transformer
=
Transformer
(
...
...
@@ -142,7 +154,12 @@ def do_train(args):
eval_freq
=
1
,
save_freq
=
1
,
save_dir
=
args
.
save_model
,
callbacks
=
[
TrainCallback
(
args
)])
callbacks
=
[
TrainCallback
(
args
,
train_steps_fn
=
train_steps_fn
,
eval_steps_fn
=
eval_steps_fn
)
])
if
__name__
==
"__main__"
:
...
...
transformer/transformer.py
→
examples/
transformer/transformer.py
浏览文件 @
666339f5
...
...
@@ -20,8 +20,8 @@ import paddle.fluid as fluid
import
paddle.fluid.layers
as
layers
from
paddle.fluid.dygraph
import
Embedding
,
LayerNorm
,
Linear
,
Layer
,
to_variable
from
paddle.fluid.dygraph.learning_rate_scheduler
import
LearningRateDecay
from
model
import
Model
,
CrossEntropy
,
Loss
from
text
import
TransformerBeamSearchDecoder
,
DynamicDecode
from
hapi.
model
import
Model
,
CrossEntropy
,
Loss
from
hapi.
text
import
TransformerBeamSearchDecoder
,
DynamicDecode
def
position_encoding_init
(
n_position
,
d_pos_vec
):
...
...
transformer/transformer.yaml
→
examples/
transformer/transformer.yaml
浏览文件 @
666339f5
文件已移动
transformer/utils/__init__.py
→
examples/
transformer/utils/__init__.py
浏览文件 @
666339f5
文件已移动
transformer/utils/check.py
→
examples/
transformer/utils/check.py
浏览文件 @
666339f5
文件已移动
transformer/utils/configure.py
→
examples/
transformer/utils/configure.py
浏览文件 @
666339f5
...
...
@@ -195,13 +195,19 @@ class PDConfig(object):
"Whether to perform predicting."
)
self
.
default_g
.
add_arg
(
"do_eval"
,
bool
,
False
,
"Whether to perform evaluating."
)
self
.
default_g
.
add_arg
(
"do_save_inference_model"
,
bool
,
False
,
"Whether to perform model saving for inference."
)
self
.
default_g
.
add_arg
(
"do_save_inference_model"
,
bool
,
False
,
"Whether to perform model saving for inference."
)
# NOTE: args for profiler
self
.
default_g
.
add_arg
(
"is_profiler"
,
int
,
0
,
"the switch of profiler tools. (used for benchmark)"
)
self
.
default_g
.
add_arg
(
"profiler_path"
,
str
,
'./'
,
"the profiler output file path. (used for benchmark)"
)
self
.
default_g
.
add_arg
(
"max_iter"
,
int
,
0
,
"the max train batch num.(used for benchmark)"
)
self
.
default_g
.
add_arg
(
"is_profiler"
,
int
,
0
,
"the switch of profiler tools. (used for benchmark)"
)
self
.
default_g
.
add_arg
(
"profiler_path"
,
str
,
'./'
,
"the profiler output file path. (used for benchmark)"
)
self
.
default_g
.
add_arg
(
"max_iter"
,
int
,
0
,
"the max train batch num.(used for benchmark)"
)
self
.
parser
=
parser
...
...
hapi/callbacks.py
浏览文件 @
666339f5
...
...
@@ -215,13 +215,13 @@ class ProgBarLogger(Callback):
if
self
.
train_step
%
self
.
log_freq
==
0
and
self
.
verbose
and
ParallelEnv
(
).
local_rank
==
0
:
# if steps is not None, last step will update in on_epoch_end
if
self
.
steps
and
self
.
train_step
<
self
.
steps
:
if
self
.
steps
is
None
or
self
.
train_step
<
self
.
steps
:
self
.
_updates
(
logs
,
'train'
)
def
on_epoch_end
(
self
,
epoch
,
logs
=
None
):
logs
=
logs
or
{}
if
self
.
verbose
and
ParallelEnv
().
local_rank
==
0
:
if
self
.
train_step
%
self
.
log_freq
!=
0
and
self
.
verbose
and
ParallelEnv
(
).
local_rank
==
0
:
self
.
_updates
(
logs
,
'train'
)
def
on_eval_begin
(
self
,
logs
=
None
):
...
...
@@ -242,14 +242,14 @@ class ProgBarLogger(Callback):
if
self
.
eval_step
%
self
.
log_freq
==
0
and
self
.
verbose
and
ParallelEnv
(
).
local_rank
==
0
:
# if steps is not None, last step will update in on_epoch_end
if
self
.
eval_steps
and
self
.
eval_step
<
self
.
eval_steps
:
if
self
.
eval_steps
is
None
or
self
.
eval_step
<
self
.
eval_steps
:
self
.
_updates
(
logs
,
'eval'
)
def
on_eval_end
(
self
,
logs
=
None
):
logs
=
logs
or
{}
if
self
.
verbose
and
ParallelEnv
().
local_rank
==
0
:
self
.
_updates
(
logs
,
'eval'
)
if
self
.
eval_step
%
self
.
log_freq
!=
0
:
self
.
_updates
(
logs
,
'eval'
)
print
(
'Eval samples: %d'
%
(
self
.
evaled_samples
))
...
...
hapi/model.py
浏览文件 @
666339f5
...
...
@@ -576,14 +576,15 @@ class DynamicGraphAdapter(object):
if
labels
is
not
None
:
labels
=
[
to_variable
(
l
)
for
l
in
to_list
(
labels
)]
if
self
.
_nranks
>
1
:
outputs
=
self
.
ddp_model
.
forward
(
*
[
to_variable
(
x
)
for
x
in
inputs
])
outputs
=
self
.
ddp_model
.
forward
(
*
[
to_variable
(
x
)
for
x
in
inputs
])
losses
=
self
.
model
.
_loss_function
(
outputs
,
labels
)
final_loss
=
fluid
.
layers
.
sum
(
losses
)
final_loss
=
self
.
ddp_model
.
scale_loss
(
final_loss
)
final_loss
.
backward
()
self
.
ddp_model
.
apply_collective_grads
()
else
:
outputs
=
self
.
model
.
forward
(
*
[
to_variable
(
x
)
for
x
in
inputs
])
outputs
=
self
.
model
.
forward
(
*
[
to_variable
(
x
)
for
x
in
inputs
])
losses
=
self
.
model
.
_loss_function
(
outputs
,
labels
)
final_loss
=
fluid
.
layers
.
sum
(
losses
)
final_loss
.
backward
()
...
...
@@ -592,9 +593,9 @@ class DynamicGraphAdapter(object):
self
.
model
.
clear_gradients
()
metrics
=
[]
for
metric
in
self
.
model
.
_metrics
:
metric_outs
=
metric
.
add_metric_op
(
*
(
to_list
(
outputs
)
+
to_list
(
labels
)))
m
=
metric
.
update
(
*
[
to_numpy
(
m
)
for
m
in
to_list
(
metric_outs
)])
metric_outs
=
metric
.
add_metric_op
(
*
(
to_list
(
outputs
)
+
to_list
(
labels
)))
m
=
metric
.
update
(
*
[
to_numpy
(
m
)
for
m
in
to_list
(
metric_outs
)])
metrics
.
append
(
m
)
return
([
to_numpy
(
l
)
for
l
in
losses
],
metrics
)
\
...
...
@@ -606,7 +607,7 @@ class DynamicGraphAdapter(object):
inputs
=
to_list
(
inputs
)
if
labels
is
not
None
:
labels
=
[
to_variable
(
l
)
for
l
in
to_list
(
labels
)]
outputs
=
self
.
model
.
forward
(
*
[
to_variable
(
x
)
for
x
in
inputs
])
outputs
=
self
.
model
.
forward
(
*
[
to_variable
(
x
)
for
x
in
inputs
])
if
self
.
model
.
_loss_function
:
losses
=
self
.
model
.
_loss_function
(
outputs
,
labels
)
else
:
...
...
@@ -632,9 +633,9 @@ class DynamicGraphAdapter(object):
self
.
_merge_count
[
self
.
mode
+
'_total'
]
+=
samples
self
.
_merge_count
[
self
.
mode
+
'_batch'
]
=
samples
metric_outs
=
metric
.
add_metric_op
(
*
(
to_list
(
outputs
)
+
to_list
(
labels
)))
m
=
metric
.
update
(
*
[
to_numpy
(
m
)
for
m
in
to_list
(
metric_outs
)])
metric_outs
=
metric
.
add_metric_op
(
*
(
to_list
(
outputs
)
+
to_list
(
labels
)))
m
=
metric
.
update
(
*
[
to_numpy
(
m
)
for
m
in
to_list
(
metric_outs
)])
metrics
.
append
(
m
)
# To be consistent with static graph
...
...
@@ -1009,7 +1010,7 @@ class Model(fluid.dygraph.Layer):
do_eval
=
eval_loader
is
not
None
self
.
_test_dataloader
=
eval_loader
metrics_name
=
self
.
_metrics_name
()
steps
=
len
(
train_loader
)
if
hasattr
(
train_loader
,
'__len__'
)
else
None
steps
=
self
.
_len_data_loader
(
train_loader
)
cbks
=
config_callbacks
(
callbacks
,
model
=
self
,
...
...
@@ -1037,8 +1038,7 @@ class Model(fluid.dygraph.Layer):
if
not
isinstance
(
eval_loader
,
Iterable
):
loader
=
eval_loader
()
eval_steps
=
len
(
loader
)
if
hasattr
(
loader
,
'__len__'
)
else
None
eval_steps
=
self
.
_len_data_loader
(
loader
)
cbks
.
on_begin
(
'eval'
,
{
'steps'
:
eval_steps
,
'metrics_name'
:
metrics_name
...
...
@@ -1114,7 +1114,7 @@ class Model(fluid.dygraph.Layer):
if
not
isinstance
(
eval_loader
,
Iterable
):
loader
=
eval_loader
()
eval_steps
=
len
(
loader
)
if
hasattr
(
loader
,
'__len__'
)
else
None
eval_steps
=
self
.
_len_data_loader
(
loader
)
cbks
.
on_begin
(
'eval'
,
{
'steps'
:
eval_steps
,
'metrics_name'
:
metrics_name
})
...
...
@@ -1205,7 +1205,7 @@ class Model(fluid.dygraph.Layer):
mode
,
metrics_name
,
epoch
=
None
):
size
=
len
(
data_loader
)
if
hasattr
(
data_loader
,
'__len__'
)
else
None
size
=
self
.
_len_data_loader
(
data_loader
)
logs
=
{
'steps'
:
size
,
'metrics_name'
:
metrics_name
,
...
...
@@ -1280,3 +1280,10 @@ class Model(fluid.dygraph.Layer):
for
m
in
self
.
_metrics
:
metrics_name
.
extend
(
to_list
(
m
.
name
()))
return
metrics_name
def
_len_data_loader
(
self
,
data_loader
):
try
:
steps
=
len
(
data_loader
)
except
Exception
:
steps
=
None
return
steps
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录