diff --git a/PaddleNLP/benchmark/bert/run_pretrain.py b/PaddleNLP/benchmark/bert/run_pretrain.py index ebbe4e2f0cfba4d943fbc1031a157a5341e5ae98..17e386d237d3a72354a9d576cb6b7d27f4b51582 100644 --- a/PaddleNLP/benchmark/bert/run_pretrain.py +++ b/PaddleNLP/benchmark/bert/run_pretrain.py @@ -257,7 +257,7 @@ def do_train(args): ]) if args.use_amp: amp_list = paddle.fluid.contrib.mixed_precision.AutoMixedPrecisionLists( - custom_white_list=['softmax']) + custom_white_list=['softmax', 'layer_norm', 'gelu']) optimizer = paddle.fluid.contrib.mixed_precision.decorate( optimizer, amp_list, diff --git a/PaddleNLP/benchmark/bert/run_pretrain_single.py b/PaddleNLP/benchmark/bert/run_pretrain_single.py index 74a57a91156fc3a5efa7387c5bca604113270342..bc1e0ed9cc3d9180de8dd8cfd3e056d5208e5d41 100644 --- a/PaddleNLP/benchmark/bert/run_pretrain_single.py +++ b/PaddleNLP/benchmark/bert/run_pretrain_single.py @@ -228,7 +228,7 @@ def do_train(args): ]) if args.use_amp: amp_list = paddle.fluid.contrib.mixed_precision.AutoMixedPrecisionLists( - custom_white_list=['layer_norm', 'softmax']) + custom_white_list=['layer_norm', 'softmax', 'gelu']) optimizer = paddle.fluid.contrib.mixed_precision.decorate( optimizer, amp_list,