From 1ac654a69f3713cf12d47aff0b853b63de354803 Mon Sep 17 00:00:00 2001 From: Kexin Zhao Date: Mon, 2 Oct 2017 13:48:35 -0700 Subject: [PATCH] Implementing the Adagrad optimizer step operator --- paddle/operators/adagrad_op.cc | 85 +++++++++++++++++++ paddle/operators/adagrad_op.cu | 20 +++++ paddle/operators/adagrad_op.h | 53 ++++++++++++ .../v2/framework/tests/test_adagrad_op.py | 32 +++++++ 4 files changed, 190 insertions(+) create mode 100644 paddle/operators/adagrad_op.cc create mode 100644 paddle/operators/adagrad_op.cu create mode 100644 paddle/operators/adagrad_op.h create mode 100644 python/paddle/v2/framework/tests/test_adagrad_op.py diff --git a/paddle/operators/adagrad_op.cc b/paddle/operators/adagrad_op.cc new file mode 100644 index 0000000000..03e22cc600 --- /dev/null +++ b/paddle/operators/adagrad_op.cc @@ -0,0 +1,85 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "paddle/operators/adagrad_op.h" + +namespace paddle { +namespace operators { + +class AdagradOp : public framework::OperatorWithKernel { + public: + using framework::OperatorWithKernel::OperatorWithKernel; + + protected: + void InferShape(framework::InferShapeContextBase *ctx) const override { + PADDLE_ENFORCE(ctx->HasInput("param"), + "Input(param) of AdagradOp should not be null."); + PADDLE_ENFORCE(ctx->HasInput("grad"), + "Input(grad) of AdagradOp should not be null."); + PADDLE_ENFORCE(ctx->HasInput("moment"), + "Input(moment) of AdagradOp should not be null."); + + PADDLE_ENFORCE(ctx->HasOutput("param_out"), + "Output(param_out) of AdagradOp should not be null."); + PADDLE_ENFORCE(ctx->HasOutput("moment_out"), + "Output(moment_out) of AdagradOp should not be null."); + + auto param_dim = ctx->GetInputDim("param"); + PADDLE_ENFORCE_EQ( + param_dim, ctx->GetInputDim("grad"), + "Param and grad input of AdagradOp should have the same dimension."); + PADDLE_ENFORCE_EQ( + param_dim, ctx->GetInputDim("moment"), + "Param and moment input of AdagradOp should have the same dimension."); + + ctx->SetOutputDim("param_out", param_dim); + ctx->SetOutputDim("moment_out", param_dim); + } +}; + +class AdagradOpMaker : public framework::OpProtoAndCheckerMaker { + public: + AdagradOpMaker(framework::OpProto *proto, + framework::OpAttrChecker *op_checker) + : OpProtoAndCheckerMaker(proto, op_checker) { + AddInput("param", "Input parameter"); + AddInput("grad", "Input gradient"); + AddInput("moment", "Second moment"); + + AddOutput("param_out", "Output parameter"); + AddOutput("moment_out", "Output second moment"); + + AddAttr("learning_rate", "Learning rate"); + AddAttr("epsilon", "Constant for numerical stability"); + AddComment(R"DOC( + +Adaptive Gradient Algorithm (Adagrad). + +moment_out = moment + grad * grad +param_out = param - learning_rate * grad / (sqrt(moment_out) + epsilon) + +The original paper(http://www.jmlr.org/papers/volume12/duchi11a/duchi11a.pdf) +does not have the epsilon attribute. It is added here for numerical stability +by avoiding division by zero. + +)DOC"); + } +}; +} // namespace operators +} // namespace paddle + +namespace ops = paddle::operators; +REGISTER_OP_WITHOUT_GRADIENT(adagrad, ops::AdagradOp, ops::AdagradOpMaker); +REGISTER_OP_CPU_KERNEL(adagrad, + ops::AdagradOpKernel); diff --git a/paddle/operators/adagrad_op.cu b/paddle/operators/adagrad_op.cu new file mode 100644 index 0000000000..be16973c54 --- /dev/null +++ b/paddle/operators/adagrad_op.cu @@ -0,0 +1,20 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +#define EIGEN_USE_GPU +#include "paddle/operators/adagrad_op.h" + +namespace ops = paddle::operators; +REGISTER_OP_GPU_KERNEL(adagrad, + ops::AdagradOpKernel); \ No newline at end of file diff --git a/paddle/operators/adagrad_op.h b/paddle/operators/adagrad_op.h new file mode 100644 index 0000000000..ca1836c3fa --- /dev/null +++ b/paddle/operators/adagrad_op.h @@ -0,0 +1,53 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once +#include "paddle/framework/eigen.h" +#include "paddle/framework/op_registry.h" + +namespace paddle { +namespace operators { + +using Tensor = framework::Tensor; +template +using EigenVector = framework::EigenVector; + +template +class AdagradOpKernel : public framework::OpKernel { + public: + void Compute(const framework::ExecutionContext& ctx) const override { + auto param_out = ctx.Output("param_out"); + auto moment_out = ctx.Output("moment_out"); + + param_out->mutable_data(ctx.GetPlace()); + moment_out->mutable_data(ctx.GetPlace()); + + float lr = ctx.Attr("learning_rate"); + float epsilon = ctx.Attr("epsilon"); + + auto p = EigenVector::Flatten(*ctx.Input("param")); + auto g = EigenVector::Flatten(*ctx.Input("grad")); + auto m = EigenVector::Flatten(*ctx.Input("moment")); + auto p_out = EigenVector::Flatten(*param_out); + auto m_out = EigenVector::Flatten(*moment_out); + auto place = ctx.GetEigenDevice(); + + m_out.device(place) = m + g * g; + p_out.device(place) = p - lr * g / (m_out.sqrt() + epsilon); + } +}; + +} // namespace operators +} // namespace paddle diff --git a/python/paddle/v2/framework/tests/test_adagrad_op.py b/python/paddle/v2/framework/tests/test_adagrad_op.py new file mode 100644 index 0000000000..b3f8b812e1 --- /dev/null +++ b/python/paddle/v2/framework/tests/test_adagrad_op.py @@ -0,0 +1,32 @@ +import unittest +import numpy as np +from op_test import OpTest + + +class TestAdagradOp(OpTest): + def setUp(self): + self.op_type = "adagrad" + + param = np.random.random((123, 321)).astype("float32") + grad = np.random.random((123, 321)).astype("float32") + moment = np.zeros((123, 321)).astype("float32") + + learning_rate = 0.01 + epsilon = 1e-6 + + self.inputs = {'param': param, 'grad': grad, 'moment': moment} + + self.attrs = {'learning_rate': learning_rate, 'epsilon': epsilon} + + moment_out = moment + grad * grad + param_out = param - learning_rate * grad / (np.sqrt(moment_out) + + epsilon) + + self.outputs = {'param_out': param_out, 'moment_out': moment_out} + + def test_check_output(self): + self.check_output() + + +if __name__ == "__main__": + unittest.main() -- GitLab