diff --git a/python/paddle/fluid/tests/unittests/test_adamw_op.py b/python/paddle/fluid/tests/unittests/test_adamw_op.py index 0a60f4cba09bc6fd8314420c19522533e724ec37..dbeb5a430377f7199ca30220114676651cf530a2 100644 --- a/python/paddle/fluid/tests/unittests/test_adamw_op.py +++ b/python/paddle/fluid/tests/unittests/test_adamw_op.py @@ -333,7 +333,7 @@ class TestAdamWOpLayerwiseLR(TestAdamWOp): lr_ratio=simple_lr_fun) loss_ref = np.array( - [4.8383293, 3.0854003, 1.33299, -0.418993, -2.171043]) + [4.8383293, 3.084947, 1.3323904, -0.41943002, -2.1710064]) for i in range(5): a1 = linear1(a) out = linear2(a1) diff --git a/python/paddle/optimizer/adamw.py b/python/paddle/optimizer/adamw.py index f26ee80d0af607ee804c6ecddd0536b2ca853ed5..9f96b8c9ea73c7b0b39235a7ec8b666d0c46a757 100644 --- a/python/paddle/optimizer/adamw.py +++ b/python/paddle/optimizer/adamw.py @@ -304,9 +304,8 @@ class AdamW(Adam): moment1, moment2, beta1_pow_acc, beta2_pow_acc, master_weight, 'epsilon', self._epsilon, 'lazy_mode', self._lazy_mode, 'min_row_size_to_use_multithread', 1000, 'beta1', _beta1, - 'beta2', _beta2, 'coeff', self._coeff, 'multi_precision', - find_master, 'lr_ratio', lr_ratio_) - + 'beta2', _beta2, "with_decay", with_decay, 'coeff', self._coeff, + 'multi_precision', find_master, 'lr_ratio', lr_ratio_) return None inputs = {