diff --git a/paddle/operators/rmsprop_op.cc b/paddle/operators/rmsprop_op.cc new file mode 100644 index 0000000000000000000000000000000000000000..dcf3599f4dd7c99da4a12ef2c2016955b2a9de91 --- /dev/null +++ b/paddle/operators/rmsprop_op.cc @@ -0,0 +1,87 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "paddle/operators/rmsprop_op.h" + +namespace paddle { +namespace operators { + +class RmspropOp : public framework::OperatorWithKernel { + public: + using framework::OperatorWithKernel::OperatorWithKernel; + + protected: + void InferShape(framework::InferShapeContextBase *ctx) const override { + PADDLE_ENFORCE(ctx->HasInput("Param"), + "Input(param) of RmspropOp should not be null."); + PADDLE_ENFORCE(ctx->HasInput("Grad"), + "Input(grad) of RmspropOp should not be null."); + PADDLE_ENFORCE(ctx->HasInput("Moment"), + "Input(moment) of RmspropOp should not be null."); + + PADDLE_ENFORCE(ctx->HasOutput("ParamOut"), + "Output(param_out) of RmspropOp should not be null."); + PADDLE_ENFORCE(ctx->HasOutput("MomentOut"), + "Output(moment_out) of RmspropOp should not be null."); + + auto param_dim = ctx->GetInputDim("Param"); + PADDLE_ENFORCE_EQ( + param_dim, ctx->GetInputDim("Grad"), + "Param and grad input of RmspropOp should have the same dimension."); + PADDLE_ENFORCE_EQ( + param_dim, ctx->GetInputDim("Moment"), + "Param and moment input of RmspropOp should have the same dimension."); + + ctx->SetOutputDim("ParamOut", param_dim); + ctx->SetOutputDim("MomentOut", param_dim); + } +}; + +class RmspropOpMaker : public framework::OpProtoAndCheckerMaker { + public: + RmspropOpMaker(framework::OpProto *proto, + framework::OpAttrChecker *op_checker) + : OpProtoAndCheckerMaker(proto, op_checker) { + AddInput("Param", "Input parameter"); + AddInput("Grad", "Input gradient"); + AddInput("Moment", "Second moment"); + + AddOutput("ParamOut", "Output parameter"); + AddOutput("MomentOut", "Output second moment"); + + AddAttr("learningRate", "Learning rate"); + AddAttr("epsilon", "Constant for numerical stability"); + AddAttr("decayRate", "Decay rate for moving average of gradients"); + AddComment(R"DOC( + +RMSprop + +MomentOut = decayRate * Moment + (1 - decayRate) * Grad * Grad +ParamOut = Param - learningRate * Grad / (sqrt(MomentOut) + epsilon) + +The original slide(Slide 29 of +http://www.cs.toronto.edu/~tijmen/csc321/slides/lecture_slides_lec6.pdf) +does not have the epsilon attribute. It is added here for numerical stability +to avoid division by zero. + +)DOC"); + } +}; +} // namespace operators +} // namespace paddle + +namespace ops = paddle::operators; +REGISTER_OP_WITHOUT_GRADIENT(rmsprop, ops::RmspropOp, ops::RmspropOpMaker); +REGISTER_OP_CPU_KERNEL(rmsprop, + ops::RmspropOpKernel); diff --git a/paddle/operators/rmsprop_op.cu b/paddle/operators/rmsprop_op.cu new file mode 100644 index 0000000000000000000000000000000000000000..52634a54816bcd5ad0ba82a56f1df95110112265 --- /dev/null +++ b/paddle/operators/rmsprop_op.cu @@ -0,0 +1,20 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. */ + +#define EIGEN_USE_GPU +#include "paddle/operators/rmsprop_op.h" + +namespace ops = paddle::operators; +REGISTER_OP_GPU_KERNEL(rmsprop, + ops::RmspropOpKernel); diff --git a/paddle/operators/rmsprop_op.h b/paddle/operators/rmsprop_op.h new file mode 100644 index 0000000000000000000000000000000000000000..c94c24bddd346f08ea87b803baeae9a65ed1c8f7 --- /dev/null +++ b/paddle/operators/rmsprop_op.h @@ -0,0 +1,54 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once +#include "paddle/framework/eigen.h" +#include "paddle/framework/op_registry.h" + +namespace paddle { +namespace operators { + +using Tensor = framework::Tensor; +template +using EigenVector = framework::EigenVector; + +template +class RmspropOpKernel : public framework::OpKernel { + public: + void Compute(const framework::ExecutionContext& ctx) const override { + auto param_out = ctx.Output("ParamOut"); + auto moment_out = ctx.Output("MomentOut"); + + param_out->mutable_data(ctx.GetPlace()); + moment_out->mutable_data(ctx.GetPlace()); + + float lr = ctx.Attr("learningRate"); + float epsilon = ctx.Attr("epsilon"); + float decay = ctx.Attr("decayRate"); + + auto p = EigenVector::Flatten(*ctx.Input("Param")); + auto g = EigenVector::Flatten(*ctx.Input("Grad")); + auto m = EigenVector::Flatten(*ctx.Input("Moment")); + auto p_out = EigenVector::Flatten(*param_out); + auto m_out = EigenVector::Flatten(*moment_out); + auto place = ctx.GetEigenDevice(); + + m_out.device(place) = decay * m + (1 - decay) * g * g; + p_out.device(place) = p - lr * g / (m_out.sqrt() + epsilon); + } +}; + +} // namespace operators +} // namespace paddle diff --git a/python/paddle/v2/framework/tests/test_rmsprop_op.py b/python/paddle/v2/framework/tests/test_rmsprop_op.py new file mode 100644 index 0000000000000000000000000000000000000000..1fc59a0f11542c286c0838ba4a937cbab3d3630e --- /dev/null +++ b/python/paddle/v2/framework/tests/test_rmsprop_op.py @@ -0,0 +1,37 @@ +import unittest +import numpy as np +from op_test import OpTest + + +class TestRmspropOp(OpTest): + def setUp(self): + self.op_type = "rmsprop" + + param = np.random.random((123, 321)).astype("float32") + grad = np.random.random((123, 321)).astype("float32") + moment = np.zeros((123, 321)).astype("float32") + + learning_rate = 0.01 + epsilon = 1e-6 + decay_rate = 0.9 + + self.inputs = {'Param': param, 'Grad': grad, 'Moment': moment} + + self.attrs = { + 'learningRate': learning_rate, + 'epsilon': epsilon, + 'decayRate': decay_rate + } + + moment_out = decay_rate * moment + (1 - decay_rate) * grad * grad + param_out = param - learning_rate * grad / (np.sqrt(moment_out) + + epsilon) + + self.outputs = {'ParamOut': param_out, 'MomentOut': moment_out} + + def test_check_output(self): + self.check_output() + + +if __name__ == "__main__": + unittest.main()