Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
吃玉米的猫
models
提交
6c87d487
M
models
项目概览
吃玉米的猫
/
models
与 Fork 源项目一致
Fork自
PaddlePaddle / models
通知
1
Star
0
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
M
models
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
前往新版Gitcode,体验更适合开发者的 AI 搜索 >>
未验证
提交
6c87d487
编写于
4月 17, 2020
作者:
Z
Zhou Wei
提交者:
GitHub
4月 17, 2020
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
fix grad_clip in dygraph mode, grad_clip strategy has been upgraded since Paddle2.0 (#4541)
上级
53723856
变更
4
隐藏空白更改
内联
并排
Showing
4 changed file
with
17 addition
and
12 deletion
+17
-12
PaddleRec/gru4rec/dy_graph/gru4rec_dy.py
PaddleRec/gru4rec/dy_graph/gru4rec_dy.py
+4
-3
dygraph/ocr_recognition/train.py
dygraph/ocr_recognition/train.py
+4
-2
dygraph/ptb_lm/ptb_dy.py
dygraph/ptb_lm/ptb_dy.py
+6
-4
dygraph/seq2seq/train.py
dygraph/seq2seq/train.py
+3
-3
未找到文件。
PaddleRec/gru4rec/dy_graph/gru4rec_dy.py
浏览文件 @
6c87d487
...
...
@@ -361,10 +361,12 @@ def train_ptb_lm():
max
(
i
+
1
-
epoch_start_decay
,
0.0
))
lr_arr
.
append
(
new_lr
)
grad_clip
=
fluid
.
clip
.
GradientClipByGlobalNorm
(
max_grad_norm
)
sgd
=
AdagradOptimizer
(
parameter_list
=
ptb_model
.
parameters
(),
learning_rate
=
fluid
.
layers
.
piecewise_decay
(
boundaries
=
bd
,
values
=
lr_arr
))
boundaries
=
bd
,
values
=
lr_arr
),
grad_clip
=
grad_clip
)
print
(
"parameters:--------------------------------"
)
for
para
in
ptb_model
.
parameters
():
...
...
@@ -408,7 +410,6 @@ def train_ptb_lm():
if
args
.
ce
:
print
(
"kpis
\t
test_ppl
\t
%0.3f"
%
ppl
[
0
])
grad_clip
=
fluid
.
clip
.
GradientClipByGlobalNorm
(
max_grad_norm
)
for
epoch_id
in
range
(
max_epoch
):
ptb_model
.
train
()
total_loss
=
0.0
...
...
@@ -434,7 +435,7 @@ def train_ptb_lm():
init_hidden
=
last_hidden
dy_loss
.
backward
()
sgd
.
minimize
(
dy_loss
,
grad_clip
=
grad_clip
)
sgd
.
minimize
(
dy_loss
)
ptb_model
.
clear_gradients
()
total_loss
+=
out_loss
iters
+=
num_steps
...
...
dygraph/ocr_recognition/train.py
浏览文件 @
6c87d487
...
...
@@ -73,8 +73,10 @@ def train(args):
else
:
learning_rate
=
LR
optimizer
=
fluid
.
optimizer
.
Adam
(
learning_rate
=
learning_rate
,
parameter_list
=
ocr_attention
.
parameters
())
grad_clip
=
fluid
.
clip
.
GradientClipByGlobalNorm
(
args
.
gradient_clip
)
optimizer
=
fluid
.
optimizer
.
Adam
(
learning_rate
=
learning_rate
,
parameter_list
=
ocr_attention
.
parameters
(),
grad_clip
=
grad_clip
)
train_reader
=
data_reader
.
data_reader
(
args
.
batch_size
,
...
...
@@ -122,7 +124,7 @@ def train(args):
total_loss
+=
avg_loss
.
numpy
()
avg_loss
.
backward
()
optimizer
.
minimize
(
avg_loss
,
grad_clip
=
grad_clip
)
optimizer
.
minimize
(
avg_loss
)
ocr_attention
.
clear_gradients
()
if
batch_id
>
0
and
batch_id
%
args
.
log_period
==
0
:
...
...
dygraph/ptb_lm/ptb_dy.py
浏览文件 @
6c87d487
...
...
@@ -332,8 +332,11 @@ def train_ptb_lm():
max
(
i
+
1
-
epoch_start_decay
,
0.0
))
lr_arr
.
append
(
new_lr
)
sgd
=
SGDOptimizer
(
learning_rate
=
fluid
.
layers
.
piecewise_decay
(
boundaries
=
bd
,
values
=
lr_arr
),
parameter_list
=
ptb_model
.
parameters
())
grad_clip
=
fluid
.
clip
.
GradientClipByGlobalNorm
(
max_grad_norm
)
sgd
=
SGDOptimizer
(
learning_rate
=
fluid
.
layers
.
piecewise_decay
(
boundaries
=
bd
,
values
=
lr_arr
),
parameter_list
=
ptb_model
.
parameters
(),
grad_clip
=
grad_clip
)
def
eval
(
model
,
data
):
print
(
"begin to eval"
)
...
...
@@ -371,7 +374,6 @@ def train_ptb_lm():
ce_time
=
[]
ce_ppl
=
[]
grad_clip
=
fluid
.
clip
.
GradientClipByGlobalNorm
(
max_grad_norm
)
for
epoch_id
in
range
(
max_epoch
):
ptb_model
.
train
()
total_loss
=
0.0
...
...
@@ -402,7 +404,7 @@ def train_ptb_lm():
out_loss
=
dy_loss
.
numpy
()
dy_loss
.
backward
()
sgd
.
minimize
(
dy_loss
,
grad_clip
=
grad_clip
)
sgd
.
minimize
(
dy_loss
)
ptb_model
.
clear_gradients
()
total_loss
+=
out_loss
...
...
dygraph/seq2seq/train.py
浏览文件 @
6c87d487
...
...
@@ -88,9 +88,9 @@ def main():
lr
=
args
.
learning_rate
opt_type
=
args
.
optimizer
if
opt_type
==
"sgd"
:
optimizer
=
fluid
.
optimizer
.
SGD
(
lr
,
parameter_list
=
model
.
parameters
())
optimizer
=
fluid
.
optimizer
.
SGD
(
lr
,
parameter_list
=
model
.
parameters
()
,
grad_clip
=
gloabl_norm_clip
)
elif
opt_type
==
"adam"
:
optimizer
=
fluid
.
optimizer
.
Adam
(
lr
,
parameter_list
=
model
.
parameters
())
optimizer
=
fluid
.
optimizer
.
Adam
(
lr
,
parameter_list
=
model
.
parameters
()
,
grad_clip
=
gloabl_norm_clip
)
else
:
print
(
"only support [sgd|adam]"
)
raise
Exception
(
"opt type not support"
)
...
...
@@ -161,7 +161,7 @@ def main():
loss
=
model
(
input_data_feed
)
# print(loss.numpy()[0])
loss
.
backward
()
optimizer
.
minimize
(
loss
,
grad_clip
=
gloabl_norm_clip
)
optimizer
.
minimize
(
loss
)
model
.
clear_gradients
()
total_loss
+=
loss
*
batch_size
batch_end_time
=
time
.
time
()
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录