From 39ef5736c2f796079403e2483930236a2be45444 Mon Sep 17 00:00:00 2001 From: Yang Yu Date: Mon, 25 Dec 2017 12:15:54 +0800 Subject: [PATCH] Use tranform to rewrite adam --- paddle/operators/adam_op.h | 132 ++++++++++++++++++++++++++----------- 1 file changed, 93 insertions(+), 39 deletions(-) diff --git a/paddle/operators/adam_op.h b/paddle/operators/adam_op.h index 45157842a..887258530 100644 --- a/paddle/operators/adam_op.h +++ b/paddle/operators/adam_op.h @@ -13,59 +13,113 @@ See the License for the specific language governing permissions and limitations under the License. */ #pragma once -#include "paddle/framework/eigen.h" +#include // for sqrt in CPU and CUDA #include "paddle/framework/op_registry.h" +#include "paddle/operators/detail/safe_ref.h" +#include "paddle/platform/transform.h" namespace paddle { namespace operators { +template +struct AdamFunctor { + T beta1_; + T beta2_; + T epsilon_; + + const T* beta1_pow_; + const T* beta2_pow_; + const T* moment1_; + T* moment1_out_; + const T* moment2_; + T* moment2_out_; + const T* lr_; + const T* grad_; + const T* param_; + + AdamFunctor(T beta1, T beta2, T epsilon, const T* beta1_pow, + const T* beta2_pow, const T* mom1, T* mom1_out, const T* mom2, + T* mom2_out, const T* lr, const T* grad, const T* param) + : beta1_(beta1), + beta2_(beta2), + epsilon_(epsilon), + beta1_pow_(beta1_pow), + beta2_pow_(beta2_pow), + moment1_(mom1), + moment1_out_(mom1_out), + moment2_(mom2), + moment2_out_(mom2_out), + lr_(lr), + grad_(grad), + param_(param) {} + + // From param[i] --> param_out[i]; + inline HOSTDEVICE T operator()(const T& p) const { + size_t i = &p - param_; + // Merge all memory access together. + T g = grad_[i]; + T mom1 = moment1_[i]; + T mom2 = moment2_[i]; + T lr = *lr_; + T beta1_pow = *beta1_pow_; + T beta2_pow = *beta2_pow_; + + // Calculation + lr = lr * sqrt(1 - beta2_pow) / (1 - beta1_pow); + mom1 = beta1_ * mom1 + (1 - beta1_) * g; + mom2 = beta2_ * mom2 + (1 - beta2_) * g * g; + T new_p = p - lr * (mom1 / (sqrt(mom2) + epsilon_)); + + // Write back to global memory + moment1_out_[i] = mom1; + moment2_out_[i] = mom2; + return new_p; + } +}; + template class AdamOpKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { - auto param_out_tensor = ctx.Output("ParamOut"); - auto moment1_out_tensor = ctx.Output("Moment1Out"); - auto moment2_out_tensor = ctx.Output("Moment2Out"); - - param_out_tensor->mutable_data(ctx.GetPlace()); - moment1_out_tensor->mutable_data(ctx.GetPlace()); - moment2_out_tensor->mutable_data(ctx.GetPlace()); + using paddle::framework::LoDTensor; + using paddle::operators::detail::Ref; T beta1 = static_cast(ctx.Attr("beta1")); T beta2 = static_cast(ctx.Attr("beta2")); T epsilon = static_cast(ctx.Attr("epsilon")); + auto& param = Ref(ctx.Input("Param"), "Must set Param"); + auto& grad = Ref(ctx.Input("Grad"), "Must set Grad"); + auto& mom1 = Ref(ctx.Input("Moment1"), "Must set Moment1"); + auto& mom2 = Ref(ctx.Input("Moment2"), "Must set Moment2"); + auto& lr = + Ref(ctx.Input("LearningRate"), "Must set LearningRate"); + + auto& beta1_pow = + Ref(ctx.Input("Beta1Pow"), "Must set Beta1Pow"); + auto& beta2_pow = + Ref(ctx.Input("Beta2Pow"), "Must set Beta2Pow"); + + auto& param_out = + Ref(ctx.Output("ParamOut"), "Must set ParamOut"); + auto& mom1_out = + Ref(ctx.Output("Moment1Out"), "Must set Moment1Out"); + auto& mom2_out = + Ref(ctx.Output("Moment2Out"), "Must set Moment1Out"); + + AdamFunctor functor(beta1, beta2, epsilon, beta1_pow.template data(), + beta2_pow.template data(), + mom1.template data(), + mom1_out.template mutable_data(ctx.GetPlace()), + mom2.template data(), + mom2_out.template mutable_data(ctx.GetPlace()), + lr.template data(), grad.template data(), + param.template data()); - auto param = framework::EigenVector::Flatten( - *ctx.Input("Param")); - auto grad = framework::EigenVector::Flatten( - *ctx.Input("Grad")); - auto moment1 = framework::EigenVector::Flatten( - *ctx.Input("Moment1")); - auto moment2 = framework::EigenVector::Flatten( - *ctx.Input("Moment2")); - auto lr = framework::EigenVector::Flatten( - *ctx.Input("LearningRate")); - auto beta1_pow = framework::EigenVector::Flatten( - *ctx.Input("Beta1Pow")); - auto beta2_pow = framework::EigenVector::Flatten( - *ctx.Input("Beta2Pow")); - auto param_out = framework::EigenVector::Flatten(*param_out_tensor); - auto moment1_out = framework::EigenVector::Flatten(*moment1_out_tensor); - auto moment2_out = framework::EigenVector::Flatten(*moment2_out_tensor); - auto* place = ctx.template device_context().eigen_device(); - - moment1_out.device(*place) = beta1 * moment1 + (1 - beta1) * grad; - moment2_out.device(*place) = beta2 * moment2 + (1 - beta2) * grad.square(); - - // All of these are tensors of 1 element - auto lr_t = lr * (1 - beta2_pow).sqrt() / (1 - beta1_pow); - // Eigen does not support automatic broadcast - // Get dimensions of moment vector to broadcast lr_t - Eigen::DSizes m_dsize(moment1_out_tensor->numel()); - param_out.device(*place) = - param - - lr_t.broadcast(m_dsize) * - (moment1_out / (moment2_out.sqrt() + epsilon)); + const T* in_ptr = param.template data(); + T* out_ptr = param_out.template mutable_data(ctx.GetPlace()); + platform::Transform trans; + trans(static_cast(ctx.device_context()), in_ptr, + in_ptr + param_out.numel(), out_ptr, functor); } }; -- GitLab