From 78f4c803f370f461f9e62e905b7870a65d05b55c Mon Sep 17 00:00:00 2001 From: Kexin Zhao Date: Thu, 5 Oct 2017 21:47:25 -0700 Subject: [PATCH] change learning rate and fix format --- paddle/operators/adagrad_op.cc | 67 ++++++++++--------- paddle/operators/adagrad_op.h | 43 ++++++------ .../v2/framework/tests/test_adagrad_op.py | 48 +++++++++++-- 3 files changed, 97 insertions(+), 61 deletions(-) diff --git a/paddle/operators/adagrad_op.cc b/paddle/operators/adagrad_op.cc index 56a5fbcb86..ea2ff3c503 100644 --- a/paddle/operators/adagrad_op.cc +++ b/paddle/operators/adagrad_op.cc @@ -23,33 +23,33 @@ class AdagradOp : public framework::OperatorWithKernel { protected: void InferShape(framework::InferShapeContextBase *ctx) const override { - PADDLE_ENFORCE(ctx->HasInput("param"), - "Input(param) of AdagradOp should not be null."); - PADDLE_ENFORCE(ctx->HasInput("grad"), - "Input(grad) of AdagradOp should not be null."); - PADDLE_ENFORCE(ctx->HasInput("moment"), - "Input(moment) of AdagradOp should not be null."); - PADDLE_ENFORCE(ctx->HasInput("learning_rate"), - "Input(learning_rate) of AdagradOp should not be null."); - - PADDLE_ENFORCE(ctx->HasOutput("param_out"), - "Output(param_out) of AdagradOp should not be null."); - PADDLE_ENFORCE(ctx->HasOutput("moment_out"), - "Output(moment_out) of AdagradOp should not be null."); - - auto lr_dims = ctx->GetInputDim("learning_rate"); + PADDLE_ENFORCE(ctx->HasInput("Param"), + "Input(Param) of AdagradOp should not be null."); + PADDLE_ENFORCE(ctx->HasInput("Grad"), + "Input(Grad) of AdagradOp should not be null."); + PADDLE_ENFORCE(ctx->HasInput("Moment"), + "Input(Moment) of AdagradOp should not be null."); + PADDLE_ENFORCE(ctx->HasInput("LearningRate"), + "Input(LearningRate) of AdagradOp should not be null."); + + PADDLE_ENFORCE(ctx->HasOutput("ParamOut"), + "Output(ParamOut) of AdagradOp should not be null."); + PADDLE_ENFORCE(ctx->HasOutput("MomentOut"), + "Output(MomentOut) of AdagradOp should not be null."); + + auto lr_dims = ctx->GetInputDim("LearningRate"); PADDLE_ENFORCE_EQ(framework::product(lr_dims), 1, - "learning_rate should have one element"); - auto param_dim = ctx->GetInputDim("param"); + "LearningRate should have one element"); + auto param_dims = ctx->GetInputDim("Param"); PADDLE_ENFORCE_EQ( - param_dim, ctx->GetInputDim("grad"), - "Param and grad input of AdagradOp should have the same dimension."); + param_dims, ctx->GetInputDim("Grad"), + "Param and Grad input of AdagradOp should have the same dimension."); PADDLE_ENFORCE_EQ( - param_dim, ctx->GetInputDim("moment"), - "Param and moment input of AdagradOp should have the same dimension."); + param_dims, ctx->GetInputDim("Moment"), + "Param and Moment input of AdagradOp should have the same dimension."); - ctx->SetOutputDim("param_out", param_dim); - ctx->SetOutputDim("moment_out", param_dim); + ctx->SetOutputDim("ParamOut", param_dims); + ctx->SetOutputDim("MomentOut", param_dims); } }; @@ -58,15 +58,18 @@ class AdagradOpMaker : public framework::OpProtoAndCheckerMaker { AdagradOpMaker(framework::OpProto *proto, framework::OpAttrChecker *op_checker) : OpProtoAndCheckerMaker(proto, op_checker) { - AddInput("param", "Input parameter"); - AddInput("grad", "Input gradient"); - AddInput("moment", "Second moment"); - AddInput("learning_rate", "learning rate of adagrad"); - - AddOutput("param_out", "Output parameter"); - AddOutput("moment_out", "Output second moment"); - - AddAttr("epsilon", "Constant for numerical stability"); + AddInput("Param", "(Tensor) Input parameter"); + AddInput("Grad", "(Tensor) Input gradient"); + AddInput("Moment", "(Tensor) Second moment"); + AddInput("LearningRate", "(Tensor) Learning rate"); + + AddOutput("ParamOut", "(Tensor) Output parameter"); + AddOutput("MomentOut", "(Tensor) Output second moment"); + + AddAttr("epsilon", + "(float, default 1.0e-6) " + "Constant for numerical stability") + .SetDefault(1.0e-6f); AddComment(R"DOC( Adaptive Gradient Algorithm (Adagrad). diff --git a/paddle/operators/adagrad_op.h b/paddle/operators/adagrad_op.h index 73833d4a3f..c5d8f751d3 100644 --- a/paddle/operators/adagrad_op.h +++ b/paddle/operators/adagrad_op.h @@ -19,40 +19,35 @@ limitations under the License. */ namespace paddle { namespace operators { -using Tensor = framework::Tensor; - -template -using EigenScalar = framework::EigenScalar; - -template -using EigenVector = framework::EigenVector; - template class AdagradOpKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { - auto param_out = ctx.Output("param_out"); - auto moment_out = ctx.Output("moment_out"); + auto param_out_tensor = ctx.Output("ParamOut"); + auto moment_out_tensor = ctx.Output("MomentOut"); - param_out->mutable_data(ctx.GetPlace()); - moment_out->mutable_data(ctx.GetPlace()); + param_out_tensor->mutable_data(ctx.GetPlace()); + moment_out_tensor->mutable_data(ctx.GetPlace()); - float lr = ctx.Input("learning_rate")->data()[0]; float epsilon = ctx.Attr("epsilon"); - auto p = EigenVector::Flatten(*ctx.Input("param")); - auto g = EigenVector::Flatten(*ctx.Input("grad")); - auto m = EigenVector::Flatten(*ctx.Input("moment")); - auto lr = EigenScalar::From(*ctx.Input("learning_rate")); - - auto p_out = EigenVector::Flatten(*param_out); - auto m_out = EigenVector::Flatten(*moment_out); + auto param = framework::EigenVector::Flatten( + *ctx.Input("Param")); + auto grad = framework::EigenVector::Flatten( + *ctx.Input("Grad")); + auto moment = framework::EigenVector::Flatten( + *ctx.Input("Moment")); + auto lr = framework::EigenVector::Flatten( + *ctx.Input("LearningRate")); + + auto param_out = framework::EigenVector::Flatten(*param_out_tensor); + auto moment_out = framework::EigenVector::Flatten(*moment_out_tensor); auto place = ctx.GetEigenDevice(); - m_out.device(place) = m + g * g; - p_out.device(place) = p - lr * g / (m_out.sqrt() + epsilon); + moment_out.device(place) = moment + grad * grad; + Eigen::DSizes m_dsize(moment_out_tensor->numel()); + param_out.device(place) = + param - lr.broadcast(m_dsize) * grad / (moment_out.sqrt() + epsilon); } }; diff --git a/python/paddle/v2/framework/tests/test_adagrad_op.py b/python/paddle/v2/framework/tests/test_adagrad_op.py index 2ee38ea37c..66bad349e5 100644 --- a/python/paddle/v2/framework/tests/test_adagrad_op.py +++ b/python/paddle/v2/framework/tests/test_adagrad_op.py @@ -3,25 +3,63 @@ import numpy as np from op_test import OpTest -class TestAdagradOp(OpTest): +class TestAdagradOp1(OpTest): + ''' Test Adagrad operator with explicit attributes + ''' + def setUp(self): self.op_type = "adagrad" param = np.random.random((123, 321)).astype("float32") grad = np.random.random((123, 321)).astype("float32") moment = np.zeros((123, 321)).astype("float32") + lr = 0.01 + epsilon = 1e-8 + + self.inputs = { + 'Param': param, + 'Grad': grad, + 'Moment': moment, + 'LearningRate': np.array([lr]).astype("float32") + } + + self.attrs = {'epsilon': epsilon} + + moment_out = moment + grad * grad + param_out = param - lr * grad / (np.sqrt(moment_out) + epsilon) + + self.outputs = {'ParamOut': param_out, 'MomentOut': moment_out} + + def test_check_output(self): + self.check_output() + + +class TestAdagradOp2(OpTest): + ''' Test Adagrad operator with default attributes + ''' - lr = np.array([0.01]).astype("float32") + def setUp(self): + self.op_type = "adagrad" + + param = np.random.random((123, 321)).astype("float32") + grad = np.random.random((123, 321)).astype("float32") + moment = np.zeros((123, 321)).astype("float32") + lr = 0.01 epsilon = 1e-6 - self.inputs = {'param': param, 'grad': grad, 'moment': moment} + self.inputs = { + 'Param': param, + 'Grad': grad, + 'Moment': moment, + 'LearningRate': np.array([lr]).astype("float32") + } - self.attrs = {'learning_rate': learning_rate, 'epsilon': epsilon} + self.attrs = {'epsilon': epsilon} moment_out = moment + grad * grad param_out = param - lr * grad / (np.sqrt(moment_out) + epsilon) - self.outputs = {'param_out': param_out, 'moment_out': moment_out} + self.outputs = {'ParamOut': param_out, 'MomentOut': moment_out} def test_check_output(self): self.check_output() -- GitLab