From 8cbf79a3fc4db601b1f7fbdebc70bb2a115d0411 Mon Sep 17 00:00:00 2001 From: chentianyu03 Date: Wed, 13 Apr 2022 10:13:23 +0800 Subject: [PATCH] [Yaml]Add adam yaml (#41561) * add adam yaml * add adam final_state api * add adam_impl --- paddle/phi/api/lib/api_custom_impl.cc | 181 ++++++++++++++++++ paddle/phi/api/lib/api_custom_impl.h | 18 ++ .../fluid/tests/unittests/test_adam_op.py | 13 ++ .../fluid/tests/unittests/test_optimizer.py | 6 + python/paddle/optimizer/adam.py | 18 +- python/paddle/utils/code_gen/api.yaml | 6 + 6 files changed, 241 insertions(+), 1 deletion(-) diff --git a/paddle/phi/api/lib/api_custom_impl.cc b/paddle/phi/api/lib/api_custom_impl.cc index 0f1cbc3f191..d7f148fff81 100644 --- a/paddle/phi/api/lib/api_custom_impl.cc +++ b/paddle/phi/api/lib/api_custom_impl.cc @@ -33,6 +33,187 @@ limitations under the License. */ namespace paddle { namespace experimental { +std::tuple adam_impl( + const Tensor& param, + const Tensor& grad, + const Tensor& learning_rate, + const Tensor& moment1, + const Tensor& moment2, + const Tensor& beta1_pow, + const Tensor& beta2_pow, + paddle::optional master_param, + paddle::optional skip_update, + const Scalar& beta1, + const Scalar& beta2, + const Scalar& epsilon, + bool lazy_mode, + int64_t min_row_size_to_use_multithread, + bool multi_precision, + bool use_global_beta_pow) { + Backend kernel_backend = Backend::UNDEFINED; + DataLayout kernel_layout = DataLayout::UNDEFINED; + DataType kernel_data_type = DataType::UNDEFINED; + if (kernel_backend == Backend::UNDEFINED || + kernel_layout == DataLayout::UNDEFINED || + kernel_data_type == DataType::UNDEFINED) { + auto kernel_key_set = ParseKernelKeyByInputArgs(param); + auto kernel_key = kernel_key_set.GetHighestPriorityKernelKey(); + if (kernel_backend == Backend::UNDEFINED) { + kernel_backend = kernel_key.backend(); + } + if (kernel_layout == DataLayout::UNDEFINED) { + kernel_layout = kernel_key.layout(); + } + if (kernel_data_type == DataType::UNDEFINED) { + kernel_data_type = kernel_key.dtype(); + } + } + std::string kernel_name = "adam"; + const auto& kernel = phi::KernelFactory::Instance().SelectKernelOrThrowError( + kernel_name, {kernel_backend, kernel_layout, kernel_data_type}); + VLOG(6) << kernel_name << " API kernel key: [" << kernel_backend << ", " + << kernel_layout << ", " << kernel_data_type << "]"; + VLOG(6) << kernel_name << " API kernel: " << kernel; + + auto* dev_ctx = GetDeviceContextByBackend(kernel_backend); + + auto input_param = PrepareData(param, kernel.InputAt(0), {}); + auto input_grad = PrepareData(grad, kernel.InputAt(1), {}); + auto input_lr = PrepareData(learning_rate, kernel.InputAt(2), {}); + auto input_moment1 = PrepareData(moment1, kernel.InputAt(3), {}); + auto input_moment2 = PrepareData(moment2, kernel.InputAt(4), {}); + auto input_beta1_pow = PrepareData(beta1_pow, kernel.InputAt(5), {}); + auto input_beta2_pow = PrepareData(beta2_pow, kernel.InputAt(6), {}); + paddle::optional input_master_param(paddle::none); + auto input_master_param_ptr = + PrepareData(master_param, kernel.InputAt(7), {}); + paddle::optional input_skip_update(paddle::none); + auto input_skip_update_ptr = PrepareData(skip_update, kernel.InputAt(8), {}); + + std::tuple api_output; + auto kernel_out_0 = input_param.get(); + auto kernel_out_1 = input_moment1.get(); + auto kernel_out_2 = input_moment2.get(); + auto kernel_out_3 = input_beta1_pow.get(); + auto kernel_out_4 = input_beta2_pow.get(); + phi::DenseTensor* kernel_out_5 = nullptr; + if (input_master_param_ptr) { + input_master_param = + paddle::make_optional(*input_master_param_ptr); + kernel_out_5 = + paddle::make_optional(*input_master_param_ptr) + .get_ptr(); + } + + if (input_skip_update_ptr) { + input_skip_update = + paddle::make_optional(*input_skip_update_ptr); + } + + paddle::optional input_meta_ref_master_param( + paddle::none); + phi::DenseTensor dt; + phi::MetaTensor input_meta_tmp_master_param(dt); + if (input_master_param_ptr) { + input_meta_tmp_master_param.set_dtype(input_master_param_ptr->dtype()); + input_meta_tmp_master_param.set_dims(input_master_param_ptr->dims()); + input_meta_tmp_master_param.set_layout(input_master_param_ptr->layout()); + input_meta_ref_master_param = input_meta_tmp_master_param; + } + + paddle::optional input_meta_ref_skip_update( + paddle::none); + phi::DenseTensor dt1; + phi::MetaTensor input_meta_tmp_skip_update(dt1); + if (input_skip_update_ptr) { + input_meta_tmp_skip_update.set_dtype(input_skip_update_ptr->dtype()); + input_meta_tmp_skip_update.set_dims(input_skip_update_ptr->dims()); + input_meta_tmp_skip_update.set_layout(input_skip_update_ptr->layout()); + input_meta_ref_skip_update = input_meta_tmp_skip_update; + } + + phi::MetaTensor meta_out_0(kernel_out_0); + phi::MetaTensor meta_out_1(kernel_out_1); + phi::MetaTensor meta_out_2(kernel_out_2); + phi::MetaTensor meta_out_3(kernel_out_3); + phi::MetaTensor meta_out_4(kernel_out_4); + phi::MetaTensor meta_out_5(kernel_out_5); + + phi::AdamInferMeta(MakeMetaTensor(*input_param), + MakeMetaTensor(*input_grad), + MakeMetaTensor(*input_lr), + MakeMetaTensor(*input_moment1), + MakeMetaTensor(*input_moment2), + MakeMetaTensor(*input_beta1_pow), + MakeMetaTensor(*input_beta2_pow), + input_meta_ref_master_param, + input_meta_ref_skip_update, + beta1, + beta2, + epsilon, + lazy_mode, + min_row_size_to_use_multithread, + multi_precision, + use_global_beta_pow, + &meta_out_0, + &meta_out_1, + &meta_out_2, + &meta_out_3, + &meta_out_4, + &meta_out_5); + + using kernel_signature = void (*)(const platform::DeviceContext&, + const phi::DenseTensor&, + const phi::DenseTensor&, + const phi::DenseTensor&, + const phi::DenseTensor&, + const phi::DenseTensor&, + const phi::DenseTensor&, + const phi::DenseTensor&, + paddle::optional, + paddle::optional, + const Scalar&, + const Scalar&, + const Scalar&, + bool, + int64_t, + bool, + bool, + phi::DenseTensor*, + phi::DenseTensor*, + phi::DenseTensor*, + phi::DenseTensor*, + phi::DenseTensor*, + phi::DenseTensor*); + auto* kernel_fn = kernel.GetVariadicKernelFn(); + + (*kernel_fn)(*dev_ctx, + *input_param, + *input_grad, + *input_lr, + *input_moment1, + *input_moment2, + *input_beta1_pow, + *input_beta2_pow, + input_master_param, + input_skip_update, + beta1, + beta2, + epsilon, + lazy_mode, + min_row_size_to_use_multithread, + multi_precision, + use_global_beta_pow, + kernel_out_0, + kernel_out_1, + kernel_out_2, + kernel_out_3, + kernel_out_4, + kernel_out_5); + + return api_output; +} + ////////////////// Forward api impls ////////////////////// Tensor conv2d_impl(const Tensor& input, diff --git a/paddle/phi/api/lib/api_custom_impl.h b/paddle/phi/api/lib/api_custom_impl.h index 0d1ba3e98e5..5d46ed69181 100644 --- a/paddle/phi/api/lib/api_custom_impl.h +++ b/paddle/phi/api/lib/api_custom_impl.h @@ -30,6 +30,24 @@ namespace experimental { ////////////////// Forward api impls ////////////////////// +std::tuple adam_impl( + const Tensor& param, + const Tensor& grad, + const Tensor& learning_rate, + const Tensor& moment1, + const Tensor& moment2, + const Tensor& beta1_pow, + const Tensor& beta2_pow, + paddle::optional master_param, + paddle::optional skip_update, + const Scalar& beta1, + const Scalar& beta2, + const Scalar& epsilon, + bool lazy_mode, + int64_t min_row_size_to_use_multithread, + bool multi_precision, + bool use_global_beta_pow); + std::tuple batch_norm_impl( const Tensor& x, const Tensor& scale, diff --git a/python/paddle/fluid/tests/unittests/test_adam_op.py b/python/paddle/fluid/tests/unittests/test_adam_op.py index d05c9a3c313..d254cd286e6 100644 --- a/python/paddle/fluid/tests/unittests/test_adam_op.py +++ b/python/paddle/fluid/tests/unittests/test_adam_op.py @@ -21,6 +21,7 @@ from paddle.fluid import core from paddle.fluid.op import Operator import paddle.fluid as fluid import paddle +from paddle.fluid.framework import _test_eager_guard class TestAdamOp1(OpTest): @@ -189,6 +190,10 @@ class TestAdamOpMultipleSteps(OpTest): self.inputs['Grad'] = np.random.uniform( -1, 1, (102, 105)).astype("float32") + def test_api_eager_dygraph(self): + with _test_eager_guard(): + self.test_check_output() + def adam_step(inputs, attributes): ''' @@ -732,6 +737,14 @@ class TestAdamOpV2(unittest.TestCase): adam.step() paddle.enable_static() + def test_api_eager_dygraph(self): + with _test_eager_guard(): + self.test_adam_op_dygraph() + self.test_adam_op_with_state_dict() + self.test_adam_with_grad_clip() + self.test_adam_op_with_set_lr() + self.test_adam_op_with_sparse_input_and_weight_decay() + class TestAdamOptimizer(unittest.TestCase): def _test(self, diff --git a/python/paddle/fluid/tests/unittests/test_optimizer.py b/python/paddle/fluid/tests/unittests/test_optimizer.py index e8820d5a870..ba1e9be815d 100644 --- a/python/paddle/fluid/tests/unittests/test_optimizer.py +++ b/python/paddle/fluid/tests/unittests/test_optimizer.py @@ -24,6 +24,7 @@ import paddle.compat as cpt import numpy as np from paddle.fluid.backward import append_backward from paddle.fluid.framework import Program, program_guard, convert_np_dtype_to_dtype_ +from paddle.fluid.framework import _test_eager_guard import paddle from paddle.io import Dataset import numpy @@ -1114,6 +1115,11 @@ class TestOptimizerDtype(unittest.TestCase): def test_float32(self): self.check_with_dtype('float32') + def test_api_eager_dygraph(self): + with _test_eager_guard(): + self.test_float64() + self.test_float32() + class TestMasterWeightSaveForFP16(unittest.TestCase): ''' diff --git a/python/paddle/optimizer/adam.py b/python/paddle/optimizer/adam.py index aae3d97a795..de09193ac79 100644 --- a/python/paddle/optimizer/adam.py +++ b/python/paddle/optimizer/adam.py @@ -336,7 +336,23 @@ class Adam(Optimizer): lr = self._create_param_lr(param_and_grad) # create the adam optimize op - if framework._non_static_mode(): + if framework.in_dygraph_mode(): + found_inf = self._get_auxiliary_var('found_inf') + + _beta1 = self._beta1 if not isinstance( + self._beta1, Variable) else self._beta1.numpy().item(0) + _beta2 = self._beta2 if not isinstance( + self._beta2, Variable) else self._beta2.numpy().item(0) + + _, _, _, _, _, _ = _C_ops.final_state_adam( + param_and_grad[0], param_and_grad[1], lr, moment1, moment2, + beta1_pow_acc, beta2_pow_acc, master_weight, found_inf, _beta1, + _beta2, self._epsilon, self._lazy_mode, 1000, find_master, + False) + + return None + + if framework._in_legacy_dygraph(): _beta1 = self._beta1 if not isinstance( self._beta1, Variable) else self._beta1.numpy().item(0) diff --git a/python/paddle/utils/code_gen/api.yaml b/python/paddle/utils/code_gen/api.yaml index 6b58c840613..08028ba1718 100644 --- a/python/paddle/utils/code_gen/api.yaml +++ b/python/paddle/utils/code_gen/api.yaml @@ -45,6 +45,12 @@ kernel : func : adadelta +- api : adam + args : (Tensor param, Tensor grad, Tensor learning_rate, Tensor moment1, Tensor moment2, Tensor beta1_pow, Tensor beta2_pow, Tensor master_param, Tensor skip_update, Scalar beta1, Scalar beta2, Scalar epsilon, bool lazy_mode, int64_t min_row_size_to_use_multithread, bool multi_precision, bool use_global_beta_pow) + output : Tensor(param_out), Tensor(moment1_out), Tensor(moment2_out), Tensor(beta1_pow_out), Tensor(beta2_pow_out), Tensor(master_param_outs) + optional : master_param, skip_update + invoke : adam_impl(param, grad, learning_rate, moment1, moment2, beta1_pow, beta2_pow, master_param, skip_update, beta1, beta2, epsilon, lazy_mode, min_row_size_to_use_multithread, multi_precision, use_global_beta_pow) + - api : adamax args : (Tensor param, Tensor grad, Tensor learning_rate, Tensor moment, Tensor inf_norm, Tensor beta1_pow, float beta1, float beta2, float epsilon) output : Tensor(param_out), Tensor(avg_squared_grad_out), Tensor(avg_squared_update_out) -- GitLab