Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
models
提交
f69a0b5e
M
models
项目概览
PaddlePaddle
/
models
1 年多 前同步成功
通知
222
Star
6828
Fork
2962
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
602
列表
看板
标记
里程碑
合并请求
255
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
M
models
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
602
Issue
602
列表
看板
标记
里程碑
合并请求
255
合并请求
255
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
f69a0b5e
编写于
2月 11, 2020
作者:
Y
Yiqun Liu
提交者:
GitHub
2月 11, 2020
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Add an argument to enable the use of experimental feature, fusion_group. (#4252)
test=develop
上级
1fbb0875
变更
3
隐藏空白更改
内联
并排
Showing
3 changed file
with
50 addition
and
60 deletion
+50
-60
PaddleNLP/language_model/args.py
PaddleNLP/language_model/args.py
+12
-2
PaddleNLP/language_model/train.py
PaddleNLP/language_model/train.py
+35
-26
PaddleNLP/models/language_model/lm_model.py
PaddleNLP/models/language_model/lm_model.py
+3
-32
未找到文件。
PaddleNLP/language_model/args.py
浏览文件 @
f69a0b5e
...
...
@@ -59,6 +59,12 @@ def parse_args():
type
=
str2bool
,
default
=
False
,
help
=
'Whether profiling the trainning [True|False]'
)
parser
.
add_argument
(
'--enable_auto_fusion'
,
type
=
str2bool
,
default
=
False
,
help
=
'Whether enable fusion_group [True|False]. It is a experimental feature.'
)
parser
.
add_argument
(
'--use_dataloader'
,
type
=
str2bool
,
...
...
@@ -80,8 +86,12 @@ def parse_args():
parser
.
add_argument
(
'--enable_ce'
,
action
=
'store_true'
)
parser
.
add_argument
(
'--batch_size'
,
type
=
int
,
default
=
0
,
help
=
'batch size'
)
parser
.
add_argument
(
'--max_epoch'
,
type
=
int
,
default
=
0
,
help
=
'max epoch'
)
# NOTE: args for profiler, used for benchmark
parser
.
add_argument
(
'--profiler_path'
,
type
=
str
,
default
=
'/tmp/paddingrnn.profile'
,
help
=
'the profiler output file path. used for benchmark'
)
parser
.
add_argument
(
'--profiler_path'
,
type
=
str
,
default
=
'/tmp/paddingrnn.profile'
,
help
=
'the profiler output file path. used for benchmark'
)
args
=
parser
.
parse_args
()
return
args
PaddleNLP/language_model/train.py
浏览文件 @
f69a0b5e
...
...
@@ -191,6 +191,12 @@ def main():
build_strategy
=
fluid
.
BuildStrategy
()
build_strategy
.
fuse_all_optimizer_ops
=
True
try
:
fluid
.
require_version
(
min_version
=
'1.7.0'
)
build_strategy
.
enable_auto_fusion
=
args
.
enable_auto_fusion
except
Exception
as
e
:
logger
.
info
(
"PaddlePaddle version 1.7.0 or higher is "
"required when you want to enable fusion_group."
)
if
args
.
parallel
:
train_program
=
fluid
.
compiler
.
CompiledProgram
(
...
...
@@ -438,32 +444,35 @@ def main():
print
(
"ptblm
\t
lstm_language_model_%s_loss_card%d
\t
%s"
%
(
args
.
rnn_model
,
device_count
,
train_ppl
[
0
]))
# NOTE(zjl): sometimes we have not enough data for eval if batch_size is large, i.e., 2100
# Just skip to avoid error
def
is_valid_data
(
data
,
batch_size
,
num_steps
):
data_len
=
len
(
data
)
batch_len
=
data_len
//
batch_size
epoch_size
=
(
batch_len
-
1
)
//
num_steps
return
epoch_size
>=
1
valid_data_valid
=
is_valid_data
(
valid_data
,
config
.
batch_size
,
config
.
num_steps
)
if
valid_data_valid
:
valid_ppl
=
eval
(
valid_data
)
print
(
"Valid ppl: %.5f"
%
valid_ppl
[
0
])
else
:
print
(
'WARNING: length of valid_data is {}, which is not enough for batch_size {} and num_steps {}'
.
format
(
len
(
valid_data
),
config
.
batch_size
,
config
.
num_steps
))
save_model_dir
=
os
.
path
.
join
(
args
.
save_model_dir
,
str
(
epoch_id
))
if
not
os
.
path
.
exists
(
save_model_dir
):
mkpath
(
save_model_dir
)
save_model_dir
=
os
.
path
.
join
(
save_model_dir
,
'params'
)
fluid
.
save
(
main_program
,
save_model_dir
)
print
(
"Saved model to: %s.
\n
"
%
save_model_dir
)
if
not
args
.
profile
:
# NOTE(zjl): sometimes we have not enough data for eval if batch_size is large, i.e., 2100
# Just skip to avoid error
def
is_valid_data
(
data
,
batch_size
,
num_steps
):
data_len
=
len
(
data
)
batch_len
=
data_len
//
batch_size
epoch_size
=
(
batch_len
-
1
)
//
num_steps
return
epoch_size
>=
1
valid_data_valid
=
is_valid_data
(
valid_data
,
config
.
batch_size
,
config
.
num_steps
)
if
valid_data_valid
:
valid_ppl
=
eval
(
valid_data
)
print
(
"Valid ppl: %.5f"
%
valid_ppl
[
0
])
else
:
print
(
'WARNING: length of valid_data is {}, which is not enough for batch_size {} and num_steps {}'
.
format
(
len
(
valid_data
),
config
.
batch_size
,
config
.
num_steps
))
save_model_dir
=
os
.
path
.
join
(
args
.
save_model_dir
,
str
(
epoch_id
))
if
not
os
.
path
.
exists
(
save_model_dir
):
mkpath
(
save_model_dir
)
save_model_dir
=
os
.
path
.
join
(
save_model_dir
,
'params'
)
fluid
.
save
(
main_program
,
save_model_dir
)
print
(
"Saved model to: %s.
\n
"
%
save_model_dir
)
with
profile_context
(
args
.
profile
,
args
.
profiler_path
):
train
()
...
...
PaddleNLP/models/language_model/lm_model.py
浏览文件 @
f69a0b5e
...
...
@@ -190,38 +190,9 @@ def lm_model(hidden_size,
gate_input
=
layers
.
elementwise_add
(
gate_input
,
bias
)
i
,
j
,
f
,
o
=
layers
.
split
(
gate_input
,
num_or_sections
=
4
,
dim
=-
1
)
try
:
from
paddle.fluid.contrib.layers
import
fused_elemwise_activation
# fluid.contrib.layers.fused_elemwise_activation can do a fused
# operation, like:
# 1) x + sigmoid(y); x + tanh(y)
# 2) tanh(x + y)
# Now the unary operation supported in this fused op is limit, and
# we will extent this operation to support more unary operations and
# do this kind of fusion automitically in future version of paddle.fluid.
# layers.sigmoid(i) * layers.tanh(j)
tmp0
=
fused_elemwise_activation
(
x
=
layers
.
tanh
(
j
),
y
=
i
,
functor_list
=
[
'elementwise_mul'
,
'sigmoid'
],
save_intermediate_out
=
False
)
# pre_cell * layers.sigmoid(f)
tmp1
=
fused_elemwise_activation
(
x
=
pre_cell
,
y
=
f
,
functor_list
=
[
'elementwise_mul'
,
'sigmoid'
],
save_intermediate_out
=
False
)
c
=
tmp0
+
tmp1
# layers.tanh(c) * layers.sigmoid(o)
m
=
fused_elemwise_activation
(
x
=
layers
.
tanh
(
c
),
y
=
o
,
functor_list
=
[
'elementwise_mul'
,
'sigmoid'
],
save_intermediate_out
=
False
)
except
ImportError
:
c
=
pre_cell
*
layers
.
sigmoid
(
f
)
+
layers
.
sigmoid
(
i
)
*
layers
.
tanh
(
j
)
m
=
layers
.
tanh
(
c
)
*
layers
.
sigmoid
(
o
)
c
=
pre_cell
*
layers
.
sigmoid
(
f
)
+
layers
.
sigmoid
(
i
)
*
layers
.
tanh
(
j
)
m
=
layers
.
tanh
(
c
)
*
layers
.
sigmoid
(
o
)
hidden_array
[
k
]
=
m
cell_array
[
k
]
=
c
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录