From 710b664d4726cb168f99176dcff94883e75208f1 Mon Sep 17 00:00:00 2001 From: RedContritio Date: Thu, 13 Apr 2023 10:15:05 +0800 Subject: [PATCH] support auto generate for op adamax optimizer (#52702) --- .../fluid/operators/optimizers/adamax_op.cc | 112 ------------------ .../optimizers/unity_build_rule.cmake | 2 - paddle/phi/api/yaml/legacy_ops.yaml | 11 -- paddle/phi/api/yaml/op_compat.yaml | 6 + paddle/phi/api/yaml/ops.yaml | 11 ++ paddle/phi/ops/compat/adamax_sig.cc | 49 -------- 6 files changed, 17 insertions(+), 174 deletions(-) delete mode 100644 paddle/fluid/operators/optimizers/adamax_op.cc delete mode 100644 paddle/phi/ops/compat/adamax_sig.cc diff --git a/paddle/fluid/operators/optimizers/adamax_op.cc b/paddle/fluid/operators/optimizers/adamax_op.cc deleted file mode 100644 index 881b2eee6af..00000000000 --- a/paddle/fluid/operators/optimizers/adamax_op.cc +++ /dev/null @@ -1,112 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "paddle/fluid/framework/infershape_utils.h" -#include "paddle/fluid/framework/op_registry.h" -#include "paddle/phi/core/infermeta_utils.h" -#include "paddle/phi/infermeta/multiary.h" - -namespace paddle { -namespace operators { - -class AdamaxOp : public framework::OperatorWithKernel { - public: - using framework::OperatorWithKernel::OperatorWithKernel; - - phi::KernelKey GetExpectedKernelType( - const framework::ExecutionContext &ctx) const override { - return phi::KernelKey(OperatorWithKernel::IndicateVarDataType(ctx, "Param"), - ctx.GetPlace()); - } -}; - -class AdamaxOpMaker : public framework::OpProtoAndCheckerMaker { - public: - void Make() override { - AddInput("Param", "(Tensor) Input parameter"); - AddInput("Grad", "(Tensor) Input gradient"); - AddInput("LearningRate", "(Tensor) Learning rate"); - AddInput("Moment", "(Tensor) First moment"); - AddInput("InfNorm", - "(Tensor) " - "Input exponentially weighted infinity norm"); - AddInput("Beta1Pow", "(Tensor) Input beta1 power accumulator"); - AddInput("MasterParam", "FP32 master weight for AMP.").AsDispensable(); - AddOutput("ParamOut", "(Tensor) Output parameter"); - AddOutput("MomentOut", "(Tensor) Output first moment"); - AddOutput("InfNormOut", - "(Tensor) " - "Output exponentially weighted infinity norm"); - AddOutput("MasterParamOut", - "The updated FP32 master weight for AMP. " - "It shared memory with Input(MasterParam).") - .AsDispensable(); - - AddAttr("beta1", - "(float, default 0.9) " - "Exponential decay rate for the " - "1st moment estimates.") - .SetDefault(0.9f); - AddAttr("beta2", - "(float, default 0.999) " - "exponential decay rate for the weighted " - "infinity norm estimates.") - .SetDefault(0.999f); - AddAttr("epsilon", - "(float, default 1.0e-8) " - "Constant for numerical stability") - .SetDefault(1.0e-8f); - AddAttr("multi_precision", - "(bool, default false) " - "Whether to use multi-precision during weight updating.") - .SetDefault(false); - AddComment(R"DOC( -Adamax Optimizer. - -We implement the Adamax optimizer from Section 7 of the Adam -paper: https://arxiv.org/abs/1412.6980. Adamax is a variant of the -Adam algorithm based on the infinity norm. - -Adamax updates: - -$$ -moment\_out = \beta_1 * moment + (1 - \beta_1) * grad \\ -inf\_norm\_out = max(\beta_2 * inf\_norm + \epsilon, |grad|) \\ -learning\_rate = \frac{learning\_rate}{1 - \beta_{1\_pow}} \\ -param\_out = param - learning\_rate * \frac{moment\_out}{inf\_norm\_out} -$$ - -The original paper does not have an epsilon attribute. -However, it is added here for numerical stability to prevent the -division by 0 error. - -)DOC"); - } -}; - -} // namespace operators -} // namespace paddle - -namespace ops = paddle::operators; -DECLARE_INFER_SHAPE_FUNCTOR(adamax, - AdamaxInferMetaFunctor, - PD_INFER_META(phi::AdamaxInferMeta)); - -REGISTER_OPERATOR( - adamax, - ops::AdamaxOp, - ops::AdamaxOpMaker, - paddle::framework::EmptyGradOpMaker, - paddle::framework::EmptyGradOpMaker, - AdamaxInferMetaFunctor); diff --git a/paddle/fluid/operators/optimizers/unity_build_rule.cmake b/paddle/fluid/operators/optimizers/unity_build_rule.cmake index 676d554bc00..6936175d874 100644 --- a/paddle/fluid/operators/optimizers/unity_build_rule.cmake +++ b/paddle/fluid/operators/optimizers/unity_build_rule.cmake @@ -10,7 +10,6 @@ register_unity_group( lars_momentum_op.cc proximal_adagrad_op.cc adam_op.cc - adamax_op.cc dgc_momentum_op.cc proximal_gd_op.cc decayed_adagrad_op.cc @@ -26,7 +25,6 @@ register_unity_group( proximal_adagrad_op.cu adagrad_op.cu adam_op.cu - adamax_op.cu decayed_adagrad_op.cu adadelta_op.cu lamb_op.cu diff --git a/paddle/phi/api/yaml/legacy_ops.yaml b/paddle/phi/api/yaml/legacy_ops.yaml index abd42601a8f..cd499a2d049 100755 --- a/paddle/phi/api/yaml/legacy_ops.yaml +++ b/paddle/phi/api/yaml/legacy_ops.yaml @@ -33,17 +33,6 @@ optional : master_param, skip_update inplace : (param -> param_out), (moment1 -> moment1_out), (moment2 -> moment2_out), (beta1_pow -> beta1_pow_out), (beta2_pow -> beta2_pow_out), (master_param -> master_param_outs) -- op : adamax_ - args : (Tensor param, Tensor grad, Tensor learning_rate, Tensor moment, Tensor inf_norm, Tensor beta1_pow, Tensor master_param, float beta1, float beta2, float epsilon, bool multi_precision) - output : Tensor(param_out), Tensor(avg_squared_grad_out), Tensor(avg_squared_update_out), Tensor(master_param_outs) - infer_meta : - func : AdamaxInferMeta - kernel : - func : adamax - data_type : param - optional : master_param - inplace : (param -> param_out), (moment -> avg_squared_grad_out), (inf_norm -> avg_squared_update_out), (master_param ->master_param_outs) - - op : adamw_ args : (Tensor param, Tensor grad, Tensor learning_rate, Tensor moment1, Tensor moment2, Tensor beta1_pow, Tensor beta2_pow, Tensor master_param, Tensor skip_update, Scalar beta1, Scalar beta2, Scalar epsilon, float lr_ratio, float coeff, bool with_decay, bool lazy_mode, int64_t min_row_size_to_use_multithread, bool multi_precision, bool use_global_beta_pow) output : Tensor(param_out), Tensor(moment1_out), Tensor(moment2_out), Tensor(beta1_pow_out), Tensor(beta2_pow_out), Tensor(master_param_outs) diff --git a/paddle/phi/api/yaml/op_compat.yaml b/paddle/phi/api/yaml/op_compat.yaml index 44f065feb7d..cf5453a3846 100644 --- a/paddle/phi/api/yaml/op_compat.yaml +++ b/paddle/phi/api/yaml/op_compat.yaml @@ -50,6 +50,12 @@ outputs : { param_out : ParamOut, moment_out : MomentOut, master_param_out : MasterParamOut } +- op : adamax_ + inputs : + {param : Param, grad: Grad, learning_rate : LearningRate, moment : Moment, inf_norm : InfNorm, beta1_pow : Beta1Pow, master_param : MasterParam} + outputs : + {param_out : ParamOut, moment_out : MomentOut, inf_norm_out : InfNormOut, master_param_out : MasterParamOut} + - op : add (elementwise_add) backward : add_grad (elementwise_add_grad) extra : diff --git a/paddle/phi/api/yaml/ops.yaml b/paddle/phi/api/yaml/ops.yaml index 20adbd31aca..2ad4d563d9f 100644 --- a/paddle/phi/api/yaml/ops.yaml +++ b/paddle/phi/api/yaml/ops.yaml @@ -44,6 +44,17 @@ optional : master_param, master_param_out inplace : (param -> param_out), (moment -> moment_out), (master_param -> master_param_out) +- op : adamax_ + args : (Tensor param, Tensor grad, Tensor learning_rate, Tensor moment, Tensor inf_norm, Tensor beta1_pow, Tensor master_param, float beta1 = 0.9f, float beta2 = 0.999f, float epsilon = 1.0e-8f, bool multi_precision = false) + output : Tensor(param_out), Tensor(moment_out), Tensor(inf_norm_out), Tensor(master_param_out) + infer_meta : + func : AdamaxInferMeta + kernel : + func : adamax + data_type : param + optional : master_param, master_param_out + inplace : (param -> param_out), (moment -> moment_out), (inf_norm -> inf_norm_out), (master_param ->master_param_out) + - op : addmm args : (Tensor input, Tensor x, Tensor y, float beta=1.0, float alpha=1.0) output : Tensor diff --git a/paddle/phi/ops/compat/adamax_sig.cc b/paddle/phi/ops/compat/adamax_sig.cc deleted file mode 100644 index 9c012de3771..00000000000 --- a/paddle/phi/ops/compat/adamax_sig.cc +++ /dev/null @@ -1,49 +0,0 @@ -// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. -// -// Licensed under the Apache License, Version 2.0 (the "License"); -// you may not use this file except in compliance with the License. -// You may obtain a copy of the License at -// -// http://www.apache.org/licenses/LICENSE-2.0 -// -// Unless required by applicable law or agreed to in writing, software -// distributed under the License is distributed on an "AS IS" BASIS, -// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -// See the License for the specific language governing permissions and -// limitations under the License. -#include - -#include "paddle/phi/core/compat/op_utils.h" -#include "paddle/utils/small_vector.h" - -namespace phi { - -KernelSignature AdamaxOpArgumentMapping(const ArgumentMappingContext& ctx) { - paddle::small_vector in_names = {"Param", - "Grad", - "LearningRate", - "Moment", - "InfNorm", - "Beta1Pow", - "MasterParam"}; - paddle::small_vector out_names = { - "ParamOut", "MomentOut", "InfNormOut", "MasterParamOut"}; - paddle::small_vector attr_names; - attr_names.emplace_back("beta1"); - attr_names.emplace_back("beta2"); - attr_names.emplace_back("epsilon"); - attr_names.emplace_back("multi_precision"); - - if (ctx.IsDenseTensorInput("Grad")) { - return KernelSignature("adamax", - std::move(in_names), - std::move(attr_names), - std::move(out_names)); - } else { - return KernelSignature("unregistered", {}, {}, {}); - } -} - -} // namespace phi - -PD_REGISTER_ARG_MAPPING_FN(adamax, phi::AdamaxOpArgumentMapping); -- GitLab