From 3135fbcc0ec0e9dc17c801aa862f07b623f3a5fd Mon Sep 17 00:00:00 2001 From: seiriosPlus Date: Fri, 28 Aug 2020 17:05:03 +0800 Subject: [PATCH] fix adam --- .../distributed_ops/lookup_sparse_table_fuse_adam_op.h | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) diff --git a/paddle/fluid/operators/distributed_ops/lookup_sparse_table_fuse_adam_op.h b/paddle/fluid/operators/distributed_ops/lookup_sparse_table_fuse_adam_op.h index d62254220d5..89b8d54a463 100644 --- a/paddle/fluid/operators/distributed_ops/lookup_sparse_table_fuse_adam_op.h +++ b/paddle/fluid/operators/distributed_ops/lookup_sparse_table_fuse_adam_op.h @@ -115,10 +115,10 @@ class LargeScaleFuseAdamOpKernel "param_row should have the same size with grad_row")); T lr_ = lr[0]; - T beta1_ = beta1_pow->data()[0]; - T beta2_ = beta2_pow->data()[0]; + T beta1_pow_ = beta1_pow->data()[0]; + T beta2_pow_ = beta2_pow->data()[0]; - lr_ *= sqrt(1 - beta1_) / (1 - beta2_); + lr_ *= sqrt(1 - beta2_pow_) / (1 - beta1_pow_); for (size_t i = 0; i < in_rows.size(); i++) { auto ¶ms = values[i][0]; @@ -131,8 +131,8 @@ class LargeScaleFuseAdamOpKernel for (int x = 0; x < grad_width; ++x) { auto g = grad_v.data()[grad_width * i + x]; - m1_data[x] = beta1_ * m1_data[x] + (1 - beta1_) * g; - m2_data[x] = beta2_ * m2_data[x] + (1 - beta2_) * g * g; + m1_data[x] = beta1 * m1_data[x] + (1 - beta1) * g; + m2_data[x] = beta2 * m2_data[x] + (1 - beta2) * g * g; p_data[x] -= lr_ * (m1_data[x] / (sqrt(m2_data[x]) + epsilon)); } } -- GitLab