From 4b7fe610a6e8d52fc61f53e87796e738fdbb401e Mon Sep 17 00:00:00 2001 From: caozhou <48191911+Caozhou1995@users.noreply.github.com> Date: Wed, 27 Jul 2022 16:18:08 +0800 Subject: [PATCH] add adagrad and rmsprop yaml (#44631) --- paddle/phi/api/yaml/legacy_api.yaml | 22 +++++++++++++++++++ python/paddle/fluid/optimizer.py | 19 ++++++++++++++-- .../fluid/tests/unittests/test_adagrad_op.py | 1 - 3 files changed, 39 insertions(+), 3 deletions(-) diff --git a/paddle/phi/api/yaml/legacy_api.yaml b/paddle/phi/api/yaml/legacy_api.yaml index bd48617037d..77c58816de6 100644 --- a/paddle/phi/api/yaml/legacy_api.yaml +++ b/paddle/phi/api/yaml/legacy_api.yaml @@ -48,6 +48,17 @@ kernel : func : adadelta +- api : adagrad_ + args : (Tensor param, Tensor grad, Tensor moment, Tensor learning_rate, float epsilon) + output : Tensor(param_out), Tensor(moment_out) + infer_meta : + func : AdagradInferMeta + kernel : + func : adagrad {dense, dense, dense, dense -> dense, dense} + adagrad_dense_param_sparse_grad {dense, selected_rows, dense, dense -> dense, dense} + data_type : param + inplace : (param -> param_out), (moment -> moment_out) + - api : adam_ args : (Tensor param, Tensor grad, Tensor learning_rate, Tensor moment1, Tensor moment2, Tensor beta1_pow, Tensor beta2_pow, Tensor master_param, Tensor skip_update, Scalar beta1, Scalar beta2, Scalar epsilon, bool lazy_mode, int64_t min_row_size_to_use_multithread, bool multi_precision, bool use_global_beta_pow) output : Tensor(param_out), Tensor(moment1_out), Tensor(moment2_out), Tensor(beta1_pow_out), Tensor(beta2_pow_out), Tensor(master_param_outs) @@ -1851,6 +1862,17 @@ func : reverse_array backward : reverse_array_grad +- api : rmsprop_ + args : (Tensor param, Tensor mean_square, Tensor grad, Tensor moment, Tensor learning_rate, Tensor mean_grad, float epsilon, float decay, float momentum, bool centered) + output : Tensor(param_out), Tensor(moment_out), Tensor(mean_square_out), Tensor(mean_grad_out) + infer_meta : + func : RmspropInferMeta + kernel : + func : rmsprop {dense, dense, dense, dense, dense, dense -> dense, dense, dense, dense} + rmsprop_dense_param_sparse_grad {dense, dense, selected_rows, dense, dense, dense -> dense, dense, dense, dense} + optional : mean_grad + inplace : (param -> param_out), (moment -> moment_out), (mean_square -> mean_square_out), (mean_grad -> mean_grad_out) + - api : roi_align args : (Tensor x, Tensor boxes, Tensor boxes_num, int pooled_height, int pooled_width, float spatial_scale, int sampling_ratio, bool aligned) output : Tensor diff --git a/python/paddle/fluid/optimizer.py b/python/paddle/fluid/optimizer.py index c97809a069d..a3c68099089 100755 --- a/python/paddle/fluid/optimizer.py +++ b/python/paddle/fluid/optimizer.py @@ -2279,11 +2279,18 @@ class AdagradOptimizer(Optimizer): moment_acc = self._get_accumulator(self._moment_acc_str, param_and_grad[0]) - if framework._non_static_mode(): + if in_dygraph_mode(): + _C_ops.final_state_adagrad_(param_and_grad[0], param_and_grad[1], + moment_acc, + self._create_param_lr(param_and_grad), + self._epsilon) + return None + elif _in_legacy_dygraph(): _C_ops.adagrad(param_and_grad[0], param_and_grad[1], moment_acc, self._create_param_lr(param_and_grad), param_and_grad[0], moment_acc, "epsilon", self._epsilon) + return None else: # Create the adagrad optimizer op adagrad_op = block.append_op( @@ -3374,7 +3381,14 @@ class RMSPropOptimizer(Optimizer): param_and_grad[0]) mean_grad_acc = self._get_accumulator(self._mean_grad_acc_str, param_and_grad[0]) - if framework._non_static_mode(): + if in_dygraph_mode(): + _C_ops.final_state_rmsprop_(param_and_grad[0], mean_square_acc, + param_and_grad[1], momentum_acc, + self._create_param_lr(param_and_grad), + mean_grad_acc, self._epsilon, self._rho, + self._momentum, self._centered) + return None + elif _in_legacy_dygraph(): _C_ops.rmsprop(param_and_grad[0], mean_square_acc, self._create_param_lr(param_and_grad), param_and_grad[1], momentum_acc, param_and_grad[0], @@ -3382,6 +3396,7 @@ class RMSPropOptimizer(Optimizer): "epsilon", self._epsilon, "decay", self._rho, "momentum", self._momentum, "centered", self._centered) + return None else: rmsprop_op = block.append_op( type=self.type, diff --git a/python/paddle/fluid/tests/unittests/test_adagrad_op.py b/python/paddle/fluid/tests/unittests/test_adagrad_op.py index 4f290d4befa..5f5d41ec2c0 100644 --- a/python/paddle/fluid/tests/unittests/test_adagrad_op.py +++ b/python/paddle/fluid/tests/unittests/test_adagrad_op.py @@ -29,7 +29,6 @@ class TestAdagradOp1(OpTest): def setUp(self): self.op_type = "adagrad" - param = np.random.random((123, 321)).astype("float32") grad = np.random.random((123, 321)).astype("float32") moment = np.zeros((123, 321)).astype("float32") -- GitLab