提交 78f4c803 编写于 作者: K Kexin Zhao

change learning rate and fix format

上级 d1de7ec6
...@@ -23,33 +23,33 @@ class AdagradOp : public framework::OperatorWithKernel { ...@@ -23,33 +23,33 @@ class AdagradOp : public framework::OperatorWithKernel {
protected: protected:
void InferShape(framework::InferShapeContextBase *ctx) const override { void InferShape(framework::InferShapeContextBase *ctx) const override {
PADDLE_ENFORCE(ctx->HasInput("param"), PADDLE_ENFORCE(ctx->HasInput("Param"),
"Input(param) of AdagradOp should not be null."); "Input(Param) of AdagradOp should not be null.");
PADDLE_ENFORCE(ctx->HasInput("grad"), PADDLE_ENFORCE(ctx->HasInput("Grad"),
"Input(grad) of AdagradOp should not be null."); "Input(Grad) of AdagradOp should not be null.");
PADDLE_ENFORCE(ctx->HasInput("moment"), PADDLE_ENFORCE(ctx->HasInput("Moment"),
"Input(moment) of AdagradOp should not be null."); "Input(Moment) of AdagradOp should not be null.");
PADDLE_ENFORCE(ctx->HasInput("learning_rate"), PADDLE_ENFORCE(ctx->HasInput("LearningRate"),
"Input(learning_rate) of AdagradOp should not be null."); "Input(LearningRate) of AdagradOp should not be null.");
PADDLE_ENFORCE(ctx->HasOutput("param_out"), PADDLE_ENFORCE(ctx->HasOutput("ParamOut"),
"Output(param_out) of AdagradOp should not be null."); "Output(ParamOut) of AdagradOp should not be null.");
PADDLE_ENFORCE(ctx->HasOutput("moment_out"), PADDLE_ENFORCE(ctx->HasOutput("MomentOut"),
"Output(moment_out) of AdagradOp should not be null."); "Output(MomentOut) of AdagradOp should not be null.");
auto lr_dims = ctx->GetInputDim("learning_rate"); auto lr_dims = ctx->GetInputDim("LearningRate");
PADDLE_ENFORCE_EQ(framework::product(lr_dims), 1, PADDLE_ENFORCE_EQ(framework::product(lr_dims), 1,
"learning_rate should have one element"); "LearningRate should have one element");
auto param_dim = ctx->GetInputDim("param"); auto param_dims = ctx->GetInputDim("Param");
PADDLE_ENFORCE_EQ( PADDLE_ENFORCE_EQ(
param_dim, ctx->GetInputDim("grad"), param_dims, ctx->GetInputDim("Grad"),
"Param and grad input of AdagradOp should have the same dimension."); "Param and Grad input of AdagradOp should have the same dimension.");
PADDLE_ENFORCE_EQ( PADDLE_ENFORCE_EQ(
param_dim, ctx->GetInputDim("moment"), param_dims, ctx->GetInputDim("Moment"),
"Param and moment input of AdagradOp should have the same dimension."); "Param and Moment input of AdagradOp should have the same dimension.");
ctx->SetOutputDim("param_out", param_dim); ctx->SetOutputDim("ParamOut", param_dims);
ctx->SetOutputDim("moment_out", param_dim); ctx->SetOutputDim("MomentOut", param_dims);
} }
}; };
...@@ -58,15 +58,18 @@ class AdagradOpMaker : public framework::OpProtoAndCheckerMaker { ...@@ -58,15 +58,18 @@ class AdagradOpMaker : public framework::OpProtoAndCheckerMaker {
AdagradOpMaker(framework::OpProto *proto, AdagradOpMaker(framework::OpProto *proto,
framework::OpAttrChecker *op_checker) framework::OpAttrChecker *op_checker)
: OpProtoAndCheckerMaker(proto, op_checker) { : OpProtoAndCheckerMaker(proto, op_checker) {
AddInput("param", "Input parameter"); AddInput("Param", "(Tensor) Input parameter");
AddInput("grad", "Input gradient"); AddInput("Grad", "(Tensor) Input gradient");
AddInput("moment", "Second moment"); AddInput("Moment", "(Tensor) Second moment");
AddInput("learning_rate", "learning rate of adagrad"); AddInput("LearningRate", "(Tensor) Learning rate");
AddOutput("param_out", "Output parameter"); AddOutput("ParamOut", "(Tensor) Output parameter");
AddOutput("moment_out", "Output second moment"); AddOutput("MomentOut", "(Tensor) Output second moment");
AddAttr<float>("epsilon", "Constant for numerical stability"); AddAttr<float>("epsilon",
"(float, default 1.0e-6) "
"Constant for numerical stability")
.SetDefault(1.0e-6f);
AddComment(R"DOC( AddComment(R"DOC(
Adaptive Gradient Algorithm (Adagrad). Adaptive Gradient Algorithm (Adagrad).
......
...@@ -19,40 +19,35 @@ limitations under the License. */ ...@@ -19,40 +19,35 @@ limitations under the License. */
namespace paddle { namespace paddle {
namespace operators { namespace operators {
using Tensor = framework::Tensor;
template <typename T, int MajorType = Eigen::RowMajor,
typename IndexType = Eigen::DenseIndex>
using EigenScalar = framework::EigenScalar<T, MajorType, IndexType>;
template <typename T, int MajorType = Eigen::RowMajor,
typename IndexType = Eigen::DenseIndex>
using EigenVector = framework::EigenVector<T, MajorType, IndexType>;
template <typename Place, typename T> template <typename Place, typename T>
class AdagradOpKernel : public framework::OpKernel<T> { class AdagradOpKernel : public framework::OpKernel<T> {
public: public:
void Compute(const framework::ExecutionContext& ctx) const override { void Compute(const framework::ExecutionContext& ctx) const override {
auto param_out = ctx.Output<Tensor>("param_out"); auto param_out_tensor = ctx.Output<framework::Tensor>("ParamOut");
auto moment_out = ctx.Output<Tensor>("moment_out"); auto moment_out_tensor = ctx.Output<framework::Tensor>("MomentOut");
param_out->mutable_data<T>(ctx.GetPlace()); param_out_tensor->mutable_data<T>(ctx.GetPlace());
moment_out->mutable_data<T>(ctx.GetPlace()); moment_out_tensor->mutable_data<T>(ctx.GetPlace());
float lr = ctx.Input<Tensor>("learning_rate")->data<float>()[0];
float epsilon = ctx.Attr<float>("epsilon"); float epsilon = ctx.Attr<float>("epsilon");
auto p = EigenVector<T>::Flatten(*ctx.Input<Tensor>("param")); auto param = framework::EigenVector<T>::Flatten(
auto g = EigenVector<T>::Flatten(*ctx.Input<Tensor>("grad")); *ctx.Input<framework::Tensor>("Param"));
auto m = EigenVector<T>::Flatten(*ctx.Input<Tensor>("moment")); auto grad = framework::EigenVector<T>::Flatten(
auto lr = EigenScalar<T>::From(*ctx.Input<Tensor>("learning_rate")); *ctx.Input<framework::Tensor>("Grad"));
auto moment = framework::EigenVector<T>::Flatten(
auto p_out = EigenVector<T>::Flatten(*param_out); *ctx.Input<framework::Tensor>("Moment"));
auto m_out = EigenVector<T>::Flatten(*moment_out); auto lr = framework::EigenVector<T>::Flatten(
*ctx.Input<framework::Tensor>("LearningRate"));
auto param_out = framework::EigenVector<T>::Flatten(*param_out_tensor);
auto moment_out = framework::EigenVector<T>::Flatten(*moment_out_tensor);
auto place = ctx.GetEigenDevice<Place>(); auto place = ctx.GetEigenDevice<Place>();
m_out.device(place) = m + g * g; moment_out.device(place) = moment + grad * grad;
p_out.device(place) = p - lr * g / (m_out.sqrt() + epsilon); Eigen::DSizes<int, 1> m_dsize(moment_out_tensor->numel());
param_out.device(place) =
param - lr.broadcast(m_dsize) * grad / (moment_out.sqrt() + epsilon);
} }
}; };
......
...@@ -3,25 +3,63 @@ import numpy as np ...@@ -3,25 +3,63 @@ import numpy as np
from op_test import OpTest from op_test import OpTest
class TestAdagradOp(OpTest): class TestAdagradOp1(OpTest):
''' Test Adagrad operator with explicit attributes
'''
def setUp(self): def setUp(self):
self.op_type = "adagrad" self.op_type = "adagrad"
param = np.random.random((123, 321)).astype("float32") param = np.random.random((123, 321)).astype("float32")
grad = np.random.random((123, 321)).astype("float32") grad = np.random.random((123, 321)).astype("float32")
moment = np.zeros((123, 321)).astype("float32") moment = np.zeros((123, 321)).astype("float32")
lr = 0.01
epsilon = 1e-8
self.inputs = {
'Param': param,
'Grad': grad,
'Moment': moment,
'LearningRate': np.array([lr]).astype("float32")
}
self.attrs = {'epsilon': epsilon}
moment_out = moment + grad * grad
param_out = param - lr * grad / (np.sqrt(moment_out) + epsilon)
self.outputs = {'ParamOut': param_out, 'MomentOut': moment_out}
def test_check_output(self):
self.check_output()
class TestAdagradOp2(OpTest):
''' Test Adagrad operator with default attributes
'''
lr = np.array([0.01]).astype("float32") def setUp(self):
self.op_type = "adagrad"
param = np.random.random((123, 321)).astype("float32")
grad = np.random.random((123, 321)).astype("float32")
moment = np.zeros((123, 321)).astype("float32")
lr = 0.01
epsilon = 1e-6 epsilon = 1e-6
self.inputs = {'param': param, 'grad': grad, 'moment': moment} self.inputs = {
'Param': param,
'Grad': grad,
'Moment': moment,
'LearningRate': np.array([lr]).astype("float32")
}
self.attrs = {'learning_rate': learning_rate, 'epsilon': epsilon} self.attrs = {'epsilon': epsilon}
moment_out = moment + grad * grad moment_out = moment + grad * grad
param_out = param - lr * grad / (np.sqrt(moment_out) + epsilon) param_out = param - lr * grad / (np.sqrt(moment_out) + epsilon)
self.outputs = {'param_out': param_out, 'moment_out': moment_out} self.outputs = {'ParamOut': param_out, 'MomentOut': moment_out}
def test_check_output(self): def test_check_output(self):
self.check_output() self.check_output()
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册