diff --git a/PaddleCV/ocr_recognition/crnn_ctc_model.py b/PaddleCV/ocr_recognition/crnn_ctc_model.py index 55db15caed0bb88e28ef9a82d07e7fa3017f2c03..9c7c041918d47df769eb7fc9b22db90eeb2ded4d 100755 --- a/PaddleCV/ocr_recognition/crnn_ctc_model.py +++ b/PaddleCV/ocr_recognition/crnn_ctc_model.py @@ -114,7 +114,6 @@ def encoder_net(images, num_classes, rnn_hidden_size=200, regularizer=None, - gradient_clip=None, is_test=False, use_cudnn=False): conv_features = ocr_convs( @@ -130,16 +129,13 @@ def encoder_net(images, para_attr = fluid.ParamAttr( regularizer=regularizer, - gradient_clip=gradient_clip, initializer=fluid.initializer.Normal(0.0, 0.02)) bias_attr = fluid.ParamAttr( regularizer=regularizer, - gradient_clip=gradient_clip, initializer=fluid.initializer.Normal(0.0, 0.02), learning_rate=2.0) bias_attr_nobias = fluid.ParamAttr( regularizer=regularizer, - gradient_clip=gradient_clip, initializer=fluid.initializer.Normal(0.0, 0.02)) fc_1 = fluid.layers.fc(input=sliced_feature, diff --git a/PaddleNLP/dialogue_domain_classification/nets.py b/PaddleNLP/dialogue_domain_classification/nets.py index 77912b3b0cda2fcda2f6e264478b909e3472ff77..13a695381d13d9167dfb8c8627fd2aa6a1080c49 100755 --- a/PaddleNLP/dialogue_domain_classification/nets.py +++ b/PaddleNLP/dialogue_domain_classification/nets.py @@ -33,26 +33,21 @@ def textcnn_net_multi_label(data, """ init_bound = 0.1 initializer = fluid.initializer.Uniform(low=-init_bound, high=init_bound) - #gradient_clip = fluid.clip.GradientClipByNorm(10.0) - gradient_clip = None regularizer = fluid.regularizer.L2DecayRegularizer( regularization_coeff=1e-4) seg_param_attrs = fluid.ParamAttr(name="seg_weight", learning_rate=640.0, initializer=initializer, - gradient_clip=gradient_clip, trainable=True) fc_param_attrs_1 = fluid.ParamAttr(name="fc_weight_1", learning_rate=1.0, regularizer=regularizer, initializer=initializer, - gradient_clip=gradient_clip, trainable=True) fc_param_attrs_2 = fluid.ParamAttr(name="fc_weight_2", learning_rate=1.0, regularizer=regularizer, initializer=initializer, - gradient_clip=gradient_clip, trainable=True) if win_sizes is None: diff --git a/PaddleRec/gru4rec/dy_graph/gru4rec_dy.py b/PaddleRec/gru4rec/dy_graph/gru4rec_dy.py index 2a87e0bf9bfb1308d6c52a804dc125322fea6846..54455a570e508aa30f8d4f335cf69c94b15116d9 100644 --- a/PaddleRec/gru4rec/dy_graph/gru4rec_dy.py +++ b/PaddleRec/gru4rec/dy_graph/gru4rec_dy.py @@ -408,7 +408,7 @@ def train_ptb_lm(): if args.ce: print("kpis\ttest_ppl\t%0.3f" % ppl[0]) - grad_clip = fluid.dygraph_grad_clip.GradClipByGlobalNorm(max_grad_norm) + grad_clip = fluid.clip.GradientClipByGlobalNorm(max_grad_norm) for epoch_id in range(max_epoch): ptb_model.train() total_loss = 0.0 diff --git a/dygraph/bert/optimization.py b/dygraph/bert/optimization.py index 5c4c02b74c69e058e5476f95a41195af492d5dc2..7f87fa8c4ea513d98a0a507f4750fc6e96fdeada 100755 --- a/dygraph/bert/optimization.py +++ b/dygraph/bert/optimization.py @@ -134,7 +134,7 @@ class Optimizer(object): param_list = dict() clip_norm_thres = 1.0 - #grad_clip = fluid.dygraph_grad_clip.GradClipByGlobalNorm(clip_norm_thres) + #grad_clip = fluid.clip.GradientClipByGlobalNorm(clip_norm_thres) if use_data_parallel: loss = model.scale_loss(loss) diff --git a/dygraph/ocr_recognition/train.py b/dygraph/ocr_recognition/train.py index 6e5792a109c0e1b47f14b5a993c44a575dcd3c87..e01b01c150c55fd24f9ed48c513469b57996064e 100644 --- a/dygraph/ocr_recognition/train.py +++ b/dygraph/ocr_recognition/train.py @@ -74,7 +74,7 @@ def train(args): learning_rate = LR optimizer = fluid.optimizer.Adam(learning_rate=learning_rate, parameter_list=ocr_attention.parameters()) - grad_clip = fluid.dygraph_grad_clip.GradClipByGlobalNorm(args.gradient_clip) + grad_clip = fluid.clip.GradientClipByGlobalNorm(args.gradient_clip) train_reader = data_reader.data_reader( args.batch_size, diff --git a/dygraph/ptb_lm/ptb_dy.py b/dygraph/ptb_lm/ptb_dy.py index fc468b84fd321a87713bf0346e67914acfe6ab23..de85084b49ea7e440f935c1f95fe0fe96733423b 100644 --- a/dygraph/ptb_lm/ptb_dy.py +++ b/dygraph/ptb_lm/ptb_dy.py @@ -32,8 +32,8 @@ import time from args import * -#import fluid.dygraph_grad_clip as dygraph_clip -#from fluid.dygraph_grad_clip import * +#import fluid.clip as clip +#from fluid.clip import * import sys if sys.version[0] == '2': @@ -371,7 +371,7 @@ def train_ptb_lm(): ce_time = [] ce_ppl = [] - grad_clip = fluid.dygraph_grad_clip.GradClipByGlobalNorm(max_grad_norm) + grad_clip = fluid.clip.GradientClipByGlobalNorm(max_grad_norm) for epoch_id in range(max_epoch): ptb_model.train() total_loss = 0.0 diff --git a/dygraph/seq2seq/train.py b/dygraph/seq2seq/train.py index 89d18168dcd4db2284f1820a2b2db3f462b5bd7e..19b17ae71bfc4edd9fb7a2f8dd7ae76511e8a86a 100755 --- a/dygraph/seq2seq/train.py +++ b/dygraph/seq2seq/train.py @@ -27,7 +27,7 @@ import contextlib import paddle import paddle.fluid as fluid -from paddle.fluid.dygraph_grad_clip import GradClipByGlobalNorm +from paddle.fluid.clip import GradientClipByGlobalNorm import reader @@ -84,7 +84,7 @@ def main(): num_layers=num_layers, init_scale=init_scale, dropout=dropout) - gloabl_norm_clip = GradClipByGlobalNorm(max_grad_norm) + gloabl_norm_clip = GradientClipByGlobalNorm(max_grad_norm) lr = args.learning_rate opt_type = args.optimizer if opt_type == "sgd":