提交 78f4c803 编写于 作者: K Kexin Zhao

change learning rate and fix format

上级 d1de7ec6
......@@ -23,33 +23,33 @@ class AdagradOp : public framework::OperatorWithKernel {
protected:
void InferShape(framework::InferShapeContextBase *ctx) const override {
PADDLE_ENFORCE(ctx->HasInput("param"),
"Input(param) of AdagradOp should not be null.");
PADDLE_ENFORCE(ctx->HasInput("grad"),
"Input(grad) of AdagradOp should not be null.");
PADDLE_ENFORCE(ctx->HasInput("moment"),
"Input(moment) of AdagradOp should not be null.");
PADDLE_ENFORCE(ctx->HasInput("learning_rate"),
"Input(learning_rate) of AdagradOp should not be null.");
PADDLE_ENFORCE(ctx->HasOutput("param_out"),
"Output(param_out) of AdagradOp should not be null.");
PADDLE_ENFORCE(ctx->HasOutput("moment_out"),
"Output(moment_out) of AdagradOp should not be null.");
auto lr_dims = ctx->GetInputDim("learning_rate");
PADDLE_ENFORCE(ctx->HasInput("Param"),
"Input(Param) of AdagradOp should not be null.");
PADDLE_ENFORCE(ctx->HasInput("Grad"),
"Input(Grad) of AdagradOp should not be null.");
PADDLE_ENFORCE(ctx->HasInput("Moment"),
"Input(Moment) of AdagradOp should not be null.");
PADDLE_ENFORCE(ctx->HasInput("LearningRate"),
"Input(LearningRate) of AdagradOp should not be null.");
PADDLE_ENFORCE(ctx->HasOutput("ParamOut"),
"Output(ParamOut) of AdagradOp should not be null.");
PADDLE_ENFORCE(ctx->HasOutput("MomentOut"),
"Output(MomentOut) of AdagradOp should not be null.");
auto lr_dims = ctx->GetInputDim("LearningRate");
PADDLE_ENFORCE_EQ(framework::product(lr_dims), 1,
"learning_rate should have one element");
auto param_dim = ctx->GetInputDim("param");
"LearningRate should have one element");
auto param_dims = ctx->GetInputDim("Param");
PADDLE_ENFORCE_EQ(
param_dim, ctx->GetInputDim("grad"),
"Param and grad input of AdagradOp should have the same dimension.");
param_dims, ctx->GetInputDim("Grad"),
"Param and Grad input of AdagradOp should have the same dimension.");
PADDLE_ENFORCE_EQ(
param_dim, ctx->GetInputDim("moment"),
"Param and moment input of AdagradOp should have the same dimension.");
param_dims, ctx->GetInputDim("Moment"),
"Param and Moment input of AdagradOp should have the same dimension.");
ctx->SetOutputDim("param_out", param_dim);
ctx->SetOutputDim("moment_out", param_dim);
ctx->SetOutputDim("ParamOut", param_dims);
ctx->SetOutputDim("MomentOut", param_dims);
}
};
......@@ -58,15 +58,18 @@ class AdagradOpMaker : public framework::OpProtoAndCheckerMaker {
AdagradOpMaker(framework::OpProto *proto,
framework::OpAttrChecker *op_checker)
: OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("param", "Input parameter");
AddInput("grad", "Input gradient");
AddInput("moment", "Second moment");
AddInput("learning_rate", "learning rate of adagrad");
AddOutput("param_out", "Output parameter");
AddOutput("moment_out", "Output second moment");
AddAttr<float>("epsilon", "Constant for numerical stability");
AddInput("Param", "(Tensor) Input parameter");
AddInput("Grad", "(Tensor) Input gradient");
AddInput("Moment", "(Tensor) Second moment");
AddInput("LearningRate", "(Tensor) Learning rate");
AddOutput("ParamOut", "(Tensor) Output parameter");
AddOutput("MomentOut", "(Tensor) Output second moment");
AddAttr<float>("epsilon",
"(float, default 1.0e-6) "
"Constant for numerical stability")
.SetDefault(1.0e-6f);
AddComment(R"DOC(
Adaptive Gradient Algorithm (Adagrad).
......
......@@ -19,40 +19,35 @@ limitations under the License. */
namespace paddle {
namespace operators {
using Tensor = framework::Tensor;
template <typename T, int MajorType = Eigen::RowMajor,
typename IndexType = Eigen::DenseIndex>
using EigenScalar = framework::EigenScalar<T, MajorType, IndexType>;
template <typename T, int MajorType = Eigen::RowMajor,
typename IndexType = Eigen::DenseIndex>
using EigenVector = framework::EigenVector<T, MajorType, IndexType>;
template <typename Place, typename T>
class AdagradOpKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext& ctx) const override {
auto param_out = ctx.Output<Tensor>("param_out");
auto moment_out = ctx.Output<Tensor>("moment_out");
auto param_out_tensor = ctx.Output<framework::Tensor>("ParamOut");
auto moment_out_tensor = ctx.Output<framework::Tensor>("MomentOut");
param_out->mutable_data<T>(ctx.GetPlace());
moment_out->mutable_data<T>(ctx.GetPlace());
param_out_tensor->mutable_data<T>(ctx.GetPlace());
moment_out_tensor->mutable_data<T>(ctx.GetPlace());
float lr = ctx.Input<Tensor>("learning_rate")->data<float>()[0];
float epsilon = ctx.Attr<float>("epsilon");
auto p = EigenVector<T>::Flatten(*ctx.Input<Tensor>("param"));
auto g = EigenVector<T>::Flatten(*ctx.Input<Tensor>("grad"));
auto m = EigenVector<T>::Flatten(*ctx.Input<Tensor>("moment"));
auto lr = EigenScalar<T>::From(*ctx.Input<Tensor>("learning_rate"));
auto p_out = EigenVector<T>::Flatten(*param_out);
auto m_out = EigenVector<T>::Flatten(*moment_out);
auto param = framework::EigenVector<T>::Flatten(
*ctx.Input<framework::Tensor>("Param"));
auto grad = framework::EigenVector<T>::Flatten(
*ctx.Input<framework::Tensor>("Grad"));
auto moment = framework::EigenVector<T>::Flatten(
*ctx.Input<framework::Tensor>("Moment"));
auto lr = framework::EigenVector<T>::Flatten(
*ctx.Input<framework::Tensor>("LearningRate"));
auto param_out = framework::EigenVector<T>::Flatten(*param_out_tensor);
auto moment_out = framework::EigenVector<T>::Flatten(*moment_out_tensor);
auto place = ctx.GetEigenDevice<Place>();
m_out.device(place) = m + g * g;
p_out.device(place) = p - lr * g / (m_out.sqrt() + epsilon);
moment_out.device(place) = moment + grad * grad;
Eigen::DSizes<int, 1> m_dsize(moment_out_tensor->numel());
param_out.device(place) =
param - lr.broadcast(m_dsize) * grad / (moment_out.sqrt() + epsilon);
}
};
......
......@@ -3,25 +3,63 @@ import numpy as np
from op_test import OpTest
class TestAdagradOp(OpTest):
class TestAdagradOp1(OpTest):
''' Test Adagrad operator with explicit attributes
'''
def setUp(self):
self.op_type = "adagrad"
param = np.random.random((123, 321)).astype("float32")
grad = np.random.random((123, 321)).astype("float32")
moment = np.zeros((123, 321)).astype("float32")
lr = 0.01
epsilon = 1e-8
self.inputs = {
'Param': param,
'Grad': grad,
'Moment': moment,
'LearningRate': np.array([lr]).astype("float32")
}
self.attrs = {'epsilon': epsilon}
moment_out = moment + grad * grad
param_out = param - lr * grad / (np.sqrt(moment_out) + epsilon)
self.outputs = {'ParamOut': param_out, 'MomentOut': moment_out}
def test_check_output(self):
self.check_output()
class TestAdagradOp2(OpTest):
''' Test Adagrad operator with default attributes
'''
lr = np.array([0.01]).astype("float32")
def setUp(self):
self.op_type = "adagrad"
param = np.random.random((123, 321)).astype("float32")
grad = np.random.random((123, 321)).astype("float32")
moment = np.zeros((123, 321)).astype("float32")
lr = 0.01
epsilon = 1e-6
self.inputs = {'param': param, 'grad': grad, 'moment': moment}
self.inputs = {
'Param': param,
'Grad': grad,
'Moment': moment,
'LearningRate': np.array([lr]).astype("float32")
}
self.attrs = {'learning_rate': learning_rate, 'epsilon': epsilon}
self.attrs = {'epsilon': epsilon}
moment_out = moment + grad * grad
param_out = param - lr * grad / (np.sqrt(moment_out) + epsilon)
self.outputs = {'param_out': param_out, 'moment_out': moment_out}
self.outputs = {'ParamOut': param_out, 'MomentOut': moment_out}
def test_check_output(self):
self.check_output()
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册