diff --git a/python/paddle/fluid/tests/unittests/test_adam_op.py b/python/paddle/fluid/tests/unittests/test_adam_op.py index 14e83fccd655527d8f3012365e4757d23236a445..47bf8f49e39b6451ee480d461e83324b89cacee2 100644 --- a/python/paddle/fluid/tests/unittests/test_adam_op.py +++ b/python/paddle/fluid/tests/unittests/test_adam_op.py @@ -448,7 +448,6 @@ class TestAdamOpV2(unittest.TestCase): def test_adam_op_with_state_dict(self): - import paddle paddle.disable_static() emb = paddle.nn.Embedding(10, 10) @@ -517,6 +516,20 @@ class TestAdamOpV2(unittest.TestCase): adam = paddle.optimizer.Adam( 0.1, epsilon=-1, parameters=linear.parameters()) + def test_adam_op_with_sparse_input_and_weight_decay(self): + + paddle.disable_static() + x_data = np.arange(0, 10).reshape((10, 1)).astype(np.int64) + x = paddle.to_tensor(x_data, stop_gradient=False) + emb = paddle.nn.Embedding(10, 10, sparse=True) + adam = paddle.optimizer.Adam( + 0.001, parameters=emb.parameters(), weight_decay=0.01) + + with self.assertRaises(RuntimeError): + out = emb(x) + out.backward() + adam.step() + if __name__ == "__main__": unittest.main() diff --git a/python/paddle/optimizer/adam.py b/python/paddle/optimizer/adam.py index 3150b8c2d0363274dfb6fd3465110c89339cd4c9..708aaa788f60d56a2adb41c8a571079354b3c192 100644 --- a/python/paddle/optimizer/adam.py +++ b/python/paddle/optimizer/adam.py @@ -250,3 +250,47 @@ class Adam(Optimizer): stop_gradient=True) return adam_op + + @framework.dygraph_only + def step(self): + """ + Execute the optimizer and update parameters once. + + Returns: + None + + Examples: + .. code-block:: python + + import paddle + import numpy as np + paddle.disable_static() + value = np.arange(26).reshape(2, 13).astype("float32") + a = paddle.to_tensor(value) + linear = paddle.nn.Linear(13, 5) + # This can be any optimizer supported by dygraph. + adam = paddle.optimizer.Adam(learning_rate = 0.01, + parameters = linear.parameters()) + out = linear(a) + out.backward() + adam.step() + adam.clear_grad() + """ + parameter_list = self._parameter_list + self._dtype = None + params_grads = [] + for param in self._parameter_list: + if not param.trainable: + continue + if hasattr( + param, "_is_sparse" + ) and param._is_sparse and self.regularization is not None: + raise RuntimeError( + "Adam don't support weight_decay with sparse parameters, please set it to None." + ) + if param._grad_ivar() is not None: + grad_var = param._grad_ivar() + params_grads.append((param, grad_var)) + + optimize_ops = self._apply_optimize( + loss=None, startup_program=None, params_grads=params_grads)