diff --git a/paddle/operators/adagrad_op.h b/paddle/operators/adagrad_op.h index 0d77dbcbacd4efb6c1900e57b5c4ea9e9b136771..66f5b0f449a4f11a3c734c98a6a97833763348a1 100644 --- a/paddle/operators/adagrad_op.h +++ b/paddle/operators/adagrad_op.h @@ -47,8 +47,7 @@ class AdagradOpKernel : public framework::OpKernel { *ctx.Input("Grad")); auto moment = framework::EigenVector::Flatten( *ctx.Input("Moment")); - auto lr = framework::EigenVector::Flatten( - *ctx.Input("LearningRate")); + auto* learning_rate = ctx.Input("LearningRate"); auto param_out = framework::EigenVector::Flatten(*param_out_tensor); auto moment_out = framework::EigenVector::Flatten(*moment_out_tensor); @@ -56,8 +55,16 @@ class AdagradOpKernel : public framework::OpKernel { moment_out.device(*place) = moment + grad * grad; Eigen::DSizes m_dsize(moment_out_tensor->numel()); - param_out.device(*place) = - param - lr.broadcast(m_dsize) * grad / (moment_out.sqrt() + epsilon); + if (platform::is_cpu_place(ctx.GetPlace())) { + auto* lr = learning_rate->data(); + param_out.device(*place) = + param - lr[0] * grad / (moment_out.sqrt() + epsilon); + } else { + auto lr = framework::EigenVector::Flatten(*learning_rate); + param_out.device(*place) = + param - + lr.broadcast(m_dsize) * grad / (moment_out.sqrt() + epsilon); + } } else if (grad_var->IsType()) { auto* param_tensor = ctx.Input("Param"); PADDLE_ENFORCE_EQ(param_tensor, param_out_tensor);