diff --git a/python/paddle/fluid/tests/unittests/test_adamw_op.py b/python/paddle/fluid/tests/unittests/test_adamw_op.py index 0a60f4cba09bc6fd8314420c19522533e724ec37..dbeb5a430377f7199ca30220114676651cf530a2 100644 --- a/python/paddle/fluid/tests/unittests/test_adamw_op.py +++ b/python/paddle/fluid/tests/unittests/test_adamw_op.py @@ -333,7 +333,7 @@ class TestAdamWOpLayerwiseLR(TestAdamWOp): lr_ratio=simple_lr_fun) loss_ref = np.array( - [4.8383293, 3.0854003, 1.33299, -0.418993, -2.171043]) + [4.8383293, 3.084947, 1.3323904, -0.41943002, -2.1710064]) for i in range(5): a1 = linear1(a) out = linear2(a1) diff --git a/python/paddle/optimizer/adamw.py b/python/paddle/optimizer/adamw.py index 55aaac8dc4852469a167d7ae0dd65a0d4fa22bc0..5fdcc0cd0d27069ea0d8b1c788dc38ca2b739ec1 100644 --- a/python/paddle/optimizer/adamw.py +++ b/python/paddle/optimizer/adamw.py @@ -297,9 +297,8 @@ class AdamW(Adam): moment1, moment2, beta1_pow_acc, beta2_pow_acc, master_weight, 'epsilon', self._epsilon, 'lazy_mode', self._lazy_mode, 'min_row_size_to_use_multithread', 1000, 'beta1', _beta1, - 'beta2', _beta2, 'coeff', self._coeff, 'multi_precision', - find_master, 'lr_ratio', lr_ratio_) - + 'beta2', _beta2, "with_decay", with_decay, 'coeff', self._coeff, + 'multi_precision', find_master, 'lr_ratio', lr_ratio_) return None inputs = {