From ea937c1b48ed12dc03b941cedc63d956eb441fb5 Mon Sep 17 00:00:00 2001 From: whs Date: Wed, 6 Nov 2019 10:37:39 +0800 Subject: [PATCH] Change GradientClipByValue to GradientClipByGlobalNorm in ocr attention model. (#3879) --- PaddleCV/ocr_recognition/attention_model.py | 2 +- PaddleCV/ocr_recognition/run_attention.sh | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/PaddleCV/ocr_recognition/attention_model.py b/PaddleCV/ocr_recognition/attention_model.py index 963d2168..4a2dad27 100755 --- a/PaddleCV/ocr_recognition/attention_model.py +++ b/PaddleCV/ocr_recognition/attention_model.py @@ -188,7 +188,7 @@ def attention_train_net(args, data_shape, num_classes): prediction = gru_decoder_with_attention(trg_embedding, encoded_vector, encoded_proj, decoder_boot, decoder_size, num_classes) - fluid.clip.set_gradient_clip(fluid.clip.GradientClipByValue(args.gradient_clip)) + fluid.clip.set_gradient_clip(fluid.clip.GradientClipByGlobalNorm(args.gradient_clip)) label_out = fluid.layers.cast(x=label_out, dtype='int64') _, maxid = fluid.layers.topk(input=prediction, k=1) diff --git a/PaddleCV/ocr_recognition/run_attention.sh b/PaddleCV/ocr_recognition/run_attention.sh index 50ddba71..beae85cf 100644 --- a/PaddleCV/ocr_recognition/run_attention.sh +++ b/PaddleCV/ocr_recognition/run_attention.sh @@ -1,7 +1,7 @@ export CUDA_VISIBLE_DEVICES=0 nohup python train.py \ --lr=1.0 \ ---gradient_clip=10 \ +--gradient_clip=5.0 \ --model="attention" \ --log_period=10 \ > attention.log 2>&1 & -- GitLab