未验证 提交 923d11fd 编写于 作者: Z Zhou Wei 提交者: GitHub

fix models because the gradient clip strategy has been upgraded (#4515)

* fix models because the gradient clip strategy has been upgraded,test=develop

* fix models because the gradient clip strategy has been upgraded,test=develop
上级 72209769
......@@ -114,7 +114,6 @@ def encoder_net(images,
num_classes,
rnn_hidden_size=200,
regularizer=None,
gradient_clip=None,
is_test=False,
use_cudnn=False):
conv_features = ocr_convs(
......@@ -130,16 +129,13 @@ def encoder_net(images,
para_attr = fluid.ParamAttr(
regularizer=regularizer,
gradient_clip=gradient_clip,
initializer=fluid.initializer.Normal(0.0, 0.02))
bias_attr = fluid.ParamAttr(
regularizer=regularizer,
gradient_clip=gradient_clip,
initializer=fluid.initializer.Normal(0.0, 0.02),
learning_rate=2.0)
bias_attr_nobias = fluid.ParamAttr(
regularizer=regularizer,
gradient_clip=gradient_clip,
initializer=fluid.initializer.Normal(0.0, 0.02))
fc_1 = fluid.layers.fc(input=sliced_feature,
......
......@@ -33,26 +33,21 @@ def textcnn_net_multi_label(data,
"""
init_bound = 0.1
initializer = fluid.initializer.Uniform(low=-init_bound, high=init_bound)
#gradient_clip = fluid.clip.GradientClipByNorm(10.0)
gradient_clip = None
regularizer = fluid.regularizer.L2DecayRegularizer(
regularization_coeff=1e-4)
seg_param_attrs = fluid.ParamAttr(name="seg_weight",
learning_rate=640.0,
initializer=initializer,
gradient_clip=gradient_clip,
trainable=True)
fc_param_attrs_1 = fluid.ParamAttr(name="fc_weight_1",
learning_rate=1.0,
regularizer=regularizer,
initializer=initializer,
gradient_clip=gradient_clip,
trainable=True)
fc_param_attrs_2 = fluid.ParamAttr(name="fc_weight_2",
learning_rate=1.0,
regularizer=regularizer,
initializer=initializer,
gradient_clip=gradient_clip,
trainable=True)
if win_sizes is None:
......
......@@ -408,7 +408,7 @@ def train_ptb_lm():
if args.ce:
print("kpis\ttest_ppl\t%0.3f" % ppl[0])
grad_clip = fluid.dygraph_grad_clip.GradClipByGlobalNorm(max_grad_norm)
grad_clip = fluid.clip.GradientClipByGlobalNorm(max_grad_norm)
for epoch_id in range(max_epoch):
ptb_model.train()
total_loss = 0.0
......
......@@ -134,7 +134,7 @@ class Optimizer(object):
param_list = dict()
clip_norm_thres = 1.0
#grad_clip = fluid.dygraph_grad_clip.GradClipByGlobalNorm(clip_norm_thres)
#grad_clip = fluid.clip.GradientClipByGlobalNorm(clip_norm_thres)
if use_data_parallel:
loss = model.scale_loss(loss)
......
......@@ -74,7 +74,7 @@ def train(args):
learning_rate = LR
optimizer = fluid.optimizer.Adam(learning_rate=learning_rate, parameter_list=ocr_attention.parameters())
grad_clip = fluid.dygraph_grad_clip.GradClipByGlobalNorm(args.gradient_clip)
grad_clip = fluid.clip.GradientClipByGlobalNorm(args.gradient_clip)
train_reader = data_reader.data_reader(
args.batch_size,
......
......@@ -32,8 +32,8 @@ import time
from args import *
#import fluid.dygraph_grad_clip as dygraph_clip
#from fluid.dygraph_grad_clip import *
#import fluid.clip as clip
#from fluid.clip import *
import sys
if sys.version[0] == '2':
......@@ -371,7 +371,7 @@ def train_ptb_lm():
ce_time = []
ce_ppl = []
grad_clip = fluid.dygraph_grad_clip.GradClipByGlobalNorm(max_grad_norm)
grad_clip = fluid.clip.GradientClipByGlobalNorm(max_grad_norm)
for epoch_id in range(max_epoch):
ptb_model.train()
total_loss = 0.0
......
......@@ -27,7 +27,7 @@ import contextlib
import paddle
import paddle.fluid as fluid
from paddle.fluid.dygraph_grad_clip import GradClipByGlobalNorm
from paddle.fluid.clip import GradientClipByGlobalNorm
import reader
......@@ -84,7 +84,7 @@ def main():
num_layers=num_layers,
init_scale=init_scale,
dropout=dropout)
gloabl_norm_clip = GradClipByGlobalNorm(max_grad_norm)
gloabl_norm_clip = GradientClipByGlobalNorm(max_grad_norm)
lr = args.learning_rate
opt_type = args.optimizer
if opt_type == "sgd":
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册