From e0b136c0f972813d87e8f03d67e97b7b7c4dfcb3 Mon Sep 17 00:00:00 2001 From: wanghaoshuang Date: Sun, 18 Mar 2018 22:24:43 +0800 Subject: [PATCH] Refine average accumulates op 1. Rename inputs and outputs 2. Add some comments --- .../fluid/operators/average_accumulates_op.cc | 138 +++++++++++------- .../fluid/operators/average_accumulates_op.cu | 36 +++-- .../fluid/operators/average_accumulates_op.h | 92 ++++++------ 3 files changed, 147 insertions(+), 119 deletions(-) diff --git a/paddle/fluid/operators/average_accumulates_op.cc b/paddle/fluid/operators/average_accumulates_op.cc index 808693b61c3..368a1f5612c 100644 --- a/paddle/fluid/operators/average_accumulates_op.cc +++ b/paddle/fluid/operators/average_accumulates_op.cc @@ -1,4 +1,4 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. +/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. Licensed under the Apache License, Version 2.0 (the "License"); you may not use this file except in compliance with the License. @@ -21,9 +21,9 @@ template <> void getAccumulators( const framework::ExecutionContext& ctx, int64_t& num_updates_, int64_t& num_accumulates_, int64_t& old_num_accumulates_) { - auto* in_old_num_accumulates = ctx.Input("old_num_accumulates"); - auto* in_num_accumulates = ctx.Input("num_accumulates"); - auto* in_num_updates = ctx.Input("num_updates"); + auto* in_old_num_accumulates = ctx.Input("in_old_num_accumulates"); + auto* in_num_accumulates = ctx.Input("in_num_accumulates"); + auto* in_num_updates = ctx.Input("in_num_updates"); old_num_accumulates_ = in_old_num_accumulates->data()[0]; num_accumulates_ = in_num_accumulates->data()[0]; @@ -34,9 +34,9 @@ template <> void setAccumulators( const framework::ExecutionContext& ctx, int64_t num_updates_, int64_t num_accumulates_, int64_t old_num_accumulates_) { - auto* out_old_num_accumulates = ctx.Output("old_num_accumulates"); - auto* out_num_accumulates = ctx.Output("num_accumulates"); - auto* out_num_updates = ctx.Output("num_updates"); + auto* out_old_num_accumulates = ctx.Output("out_old_num_accumulates"); + auto* out_num_accumulates = ctx.Output("out_num_accumulates"); + auto* out_num_updates = ctx.Output("out_num_updates"); out_old_num_accumulates->data()[0] = old_num_accumulates_; out_num_accumulates->data()[0] = num_accumulates_; @@ -49,64 +49,62 @@ class AverageAccumulatesOp : public framework::OperatorWithKernel { void InferShape(framework::InferShapeContext* ctx) const override { PADDLE_ENFORCE( - ctx->HasInput("Param"), - "Input (Param) of average_accumulates op should not be null."); + ctx->HasInput("param"), + "Input (param) of average_accumulates op should not be null."); PADDLE_ENFORCE( - ctx->HasInput("Grad"), - "Input (Grad) of average_accumulates op should not be null."); - PADDLE_ENFORCE( - ctx->HasInput("sum_1"), + ctx->HasInput("in_sum_1"), "Input (sum_1) of average_accumulates op should not be null."); PADDLE_ENFORCE( - ctx->HasInput("sum_2"), + ctx->HasInput("in_sum_2"), "Input (sum_2) of average_accumulates op should not be null."); PADDLE_ENFORCE( - ctx->HasInput("sum_3"), + ctx->HasInput("in_sum_3"), "Input (sum_3) of average_accumulates op should not be null."); - PADDLE_ENFORCE(ctx->HasInput("num_accumulates"), - "Input (num_accumulates) of average_accumulates op should " - "not be null."); - PADDLE_ENFORCE(ctx->HasInput("old_num_accumulates"), + PADDLE_ENFORCE( + ctx->HasInput("in_num_accumulates"), + "Input (in_num_accumulates) of average_accumulates op should " + "not be null."); + PADDLE_ENFORCE(ctx->HasInput("in_old_num_accumulates"), "Input (old_num_accumulates) of average_accumulates op " "should not be null."); PADDLE_ENFORCE( - ctx->HasInput("num_updates"), + ctx->HasInput("in_num_updates"), "Input (num_updates) of average_accumulates op should not be null."); PADDLE_ENFORCE( - ctx->HasOutput("sum_1"), + ctx->HasOutput("out_sum_1"), "Output (sum_1) of average_accumulates op should not be null."); PADDLE_ENFORCE( - ctx->HasOutput("sum_2"), + ctx->HasOutput("out_sum_2"), "Output (sum_2) of average_accumulates op should not be null."); PADDLE_ENFORCE( - ctx->HasOutput("sum_3"), + ctx->HasOutput("out_sum_3"), "Output (sum_3) of average_accumulates op should not be null."); - PADDLE_ENFORCE(ctx->HasOutput("num_accumulates"), + PADDLE_ENFORCE(ctx->HasOutput("out_num_accumulates"), "Output (num_accumulates) of average_accumulates op should " "not be null."); - PADDLE_ENFORCE(ctx->HasOutput("old_num_accumulates"), + PADDLE_ENFORCE(ctx->HasOutput("out_old_num_accumulates"), "Output (old_num_accumulates) of average_accumulates op " "should not be null."); PADDLE_ENFORCE( - ctx->HasOutput("num_updates"), + ctx->HasOutput("out_num_updates"), "Output (num_updates) of average_accumulates op should not be null."); - auto in_dim = ctx->GetInputDim("Param"); + auto in_dim = ctx->GetInputDim("param"); - ctx->SetOutputDim("sum_1", in_dim); - ctx->SetOutputDim("sum_2", in_dim); - ctx->SetOutputDim("sum_3", in_dim); - ctx->SetOutputDim("num_accumulates", {1}); - ctx->SetOutputDim("old_num_accumulates", {1}); - ctx->SetOutputDim("num_updates", {1}); + ctx->SetOutputDim("out_sum_1", in_dim); + ctx->SetOutputDim("out_sum_2", in_dim); + ctx->SetOutputDim("out_sum_3", in_dim); + ctx->SetOutputDim("out_num_accumulates", {1}); + ctx->SetOutputDim("out_old_num_accumulates", {1}); + ctx->SetOutputDim("out_num_updates", {1}); } protected: framework::OpKernelType GetExpectedKernelType( const framework::ExecutionContext& ctx) const override { return framework::OpKernelType( - framework::ToDataType(ctx.Input("Param")->type()), + framework::ToDataType(ctx.Input("param")->type()), ctx.GetPlace()); } }; @@ -115,26 +113,60 @@ class AverageAccumulatesOpMaker : public framework::OpProtoAndCheckerMaker { public: AverageAccumulatesOpMaker(OpProto* proto, OpAttrChecker* op_checker) : OpProtoAndCheckerMaker(proto, op_checker) { - AddInput("sum_1", ""); - AddInput("sum_2", ""); - AddInput("sum_3", ""); - AddInput("num_accumulates", ""); - AddInput("old_num_accumulates", ""); - AddInput("num_updates", ""); - - AddOutput("sum_1", ""); - AddOutput("sum_2", ""); - AddOutput("sum_3", ""); - AddOutput("num_accumulates", ""); - AddOutput("old_num_accumulates", ""); - AddOutput("num_updates", ""); - - AddAttr("", "average_window"); - AddAttr("", "max_average_window"); - AddAttr("", "min_average_window"); + AddInput("param", + "Input(Tensor or LoDTensor): The parameter to be accumulated."); + AddInput("in_sum_1", + "Input(Tensor or LoDTensor): A tensor used to store the parameter " + "sums with the same shape as input(param)."); + AddInput("in_sum_2", + "Input(Tensor or LoDTensor): A auxiliary tensor to help " + "accumulating sums of parameter values with the same shape as " + "input(param). It is used to avoid loss of precision due to too " + "many sums."); + AddInput("in_sum_3", + "Input(Tensor or LoDTensor): A auxiliary tensor to help " + "accumulating sums of parameter values with the same shape as " + "input(param)."); + AddInput("in_num_accumulates", + "Input(Tensor): The accumulating times of current window with " + "shape [1]."); + AddInput("in_old_num_accumulates", + "Input(Tensor): The accumulating times of previous window with " + "shape [1]."); + AddInput("in_num_updates", + "Input(Tensor): The total number of batches used by trainning " + "before this batch with shape [1]."); + + AddOutput("out_sum_1", + "Output(Tensor or LoDTensor): A tensor used to store the " + "parameter sums with the same shape as input(param)."); + AddOutput("out_sum_2", + "Output(Tensor or LoDTensor): A auxiliary tensor to help " + "accumulating sums of parameter values with the same shape as " + "input(param). It is used to avoid loss of precision due to too " + "many sums."); + AddOutput("out_sum_3", + "Output(Tensor or LoDTensor): A auxiliary tensor to help " + "accumulating sums of parameter values with the same shape as " + "input(param)."); + AddOutput("out_num_accumulates", + "Output(Tensor): The accumulating times of current window with " + "shape [1]."); + AddOutput("out_old_num_accumulates", + "Output(Tensor): The accumulating times of previous window with " + "shape [1]."); + AddOutput("out_num_updates", + "Output(Tensor): The total number of batches used by trainning " + "before this batch with shape [1]."); + + AddAttr("average_window", + "The rate of average window size relative to num_updates."); + AddAttr("max_average_window", "Maximum size of average window."); + AddAttr("min_average_window", "Minimu size of average window."); AddComment(R"DOC( AverageAccumulates Operator. +Accumulate the sum of parameter whtin sliding window. The size of sliding window is determined by 'average_window', 'max_average_window' and 'min_average_window'. )DOC"); } }; @@ -143,10 +175,10 @@ AverageAccumulates Operator. } // namespace paddle namespace ops = paddle::operators; -REGISTER_OPERATOR(average_accumulate, ops::AverageAccumulatesOp, +REGISTER_OPERATOR(average_accumulates, ops::AverageAccumulatesOp, ops::AverageAccumulatesOpMaker, paddle::framework::EmptyGradOpMaker); REGISTER_OP_CPU_KERNEL( - average_accumulate, + average_accumulates, ops::AverageAccumulatesKernel, ops::AverageAccumulatesKernel); diff --git a/paddle/fluid/operators/average_accumulates_op.cu b/paddle/fluid/operators/average_accumulates_op.cu index 56f2f02fd23..dbaa8ba6c99 100644 --- a/paddle/fluid/operators/average_accumulates_op.cu +++ b/paddle/fluid/operators/average_accumulates_op.cu @@ -21,39 +21,43 @@ template <> void getAccumulators( const framework::ExecutionContext& ctx, int64_t& num_updates_, int64_t& num_accumulates_, int64_t& old_num_accumulates_) { - auto* in_old_num_accumulates = ctx.Input("old_num_accumulates"); - auto* in_num_accumulates = ctx.Input("num_accumulates"); - auto* in_num_updates = ctx.Input("num_updates"); - + auto* in_old_num_accumulates = ctx.Input("in_old_num_accumulates"); + auto* in_num_accumulates = ctx.Input("in_num_accumulates"); + auto* in_num_updates = ctx.Input("in_num_updates"); + auto stream = ctx.cuda_device_context().stream(); memory::Copy(platform::CPUPlace(), &old_num_accumulates_, platform::CUDAPlace(), in_old_num_accumulates->data(), - sizeof(int64_t)); + sizeof(int64_t), stream); memory::Copy(platform::CPUPlace(), &num_accumulates_, platform::CUDAPlace(), - in_old_num_accumulates->data(), sizeof(int64_t)); + in_num_accumulates->data(), sizeof(int64_t), stream); memory::Copy(platform::CPUPlace(), &num_updates_, platform::CUDAPlace(), - in_num_updates->data(), sizeof(int64_t)); + in_num_updates->data(), sizeof(int64_t), stream); } template <> void setAccumulators( const framework::ExecutionContext& ctx, int64_t num_updates_, int64_t num_accumulates_, int64_t old_num_accumulates_) { - auto* out_old_num_accumulates = ctx.Output("old_num_accumulates"); - auto* out_num_accumulates = ctx.Output("num_accumulates"); - auto* out_num_updates = ctx.Output("num_updates"); + auto stream = ctx.cuda_device_context().stream(); + auto* out_old_num_accumulates = ctx.Output("out_old_num_accumulates"); + auto* out_num_accumulates = ctx.Output("out_num_accumulates"); + auto* out_num_updates = ctx.Output("out_num_updates"); memory::Copy(platform::CUDAPlace(), out_old_num_accumulates->data(), - platform::CPUPlace(), &old_num_accumulates_, sizeof(int64_t)); + platform::CPUPlace(), &old_num_accumulates_, sizeof(int64_t), + stream); memory::Copy(platform::CUDAPlace(), out_num_accumulates->data(), - platform::CPUPlace(), &num_accumulates_, sizeof(int64_t)); + platform::CPUPlace(), &num_accumulates_, sizeof(int64_t), + stream); memory::Copy(platform::CUDAPlace(), out_num_updates->data(), - platform::CPUPlace(), &num_updates_, sizeof(int64_t)); -} -} + platform::CPUPlace(), &num_updates_, sizeof(int64_t), stream); } +} // namespace operators +} // namespace paddle + namespace ops = paddle::operators; REGISTER_OP_CUDA_KERNEL( - average_accumulate, + average_accumulates, ops::AverageAccumulatesKernel, ops::AverageAccumulatesKernel); diff --git a/paddle/fluid/operators/average_accumulates_op.h b/paddle/fluid/operators/average_accumulates_op.h index 73814dd24b9..d33fd5519ad 100644 --- a/paddle/fluid/operators/average_accumulates_op.h +++ b/paddle/fluid/operators/average_accumulates_op.h @@ -29,88 +29,80 @@ using EigenVector = framework::EigenVector; template void getAccumulators(const framework::ExecutionContext& ctx, - int64_t& num_updates_, int64_t& num_accumulates_, - int64_t& old_num_accumulates_); + int64_t& num_updates, int64_t& num_accumulates, + int64_t& old_num_accumulates); template void setAccumulators(const framework::ExecutionContext& ctx, - int64_t num_updates_, int64_t num_accumulates_, - int64_t old_num_accumulates_); + int64_t num_updates, int64_t num_accumulates, + int64_t old_num_accumulates); template class AverageAccumulatesKernel : public framework::OpKernel { public: void Compute(const framework::ExecutionContext& ctx) const override { + // It is used to avoid loss of precision static const int64_t kMaxNumAccumulates = 16384; - // accumulators - int64_t num_updates_ = 0; - int64_t num_accumulates_ = 0; - int64_t old_num_accumulates_ = 0; - // attrs - int64_t min_average_window_; - int64_t max_average_window_; - float average_window_; - - auto* param = ctx.Input("Param"); - auto* in_sum_1 = ctx.Input("sum_1"); - auto* in_sum_2 = ctx.Input("sum_2"); - auto* in_sum_3 = ctx.Input("sum_3"); - - auto* out_sum_1 = ctx.Output("sum_1"); - auto* out_sum_2 = ctx.Output("sum_2"); - auto* out_sum_3 = ctx.Output("sum_3"); - - getAccumulators(ctx, num_updates_, num_accumulates_, - old_num_accumulates_); - average_window_ = ctx.Attr("average_window"); - max_average_window_ = - ctx.Attr("max_average_window"); // default bach number - min_average_window_ = - ctx.Attr("min_average_window"); // default 10000L - min_average_window_ = - std::min(min_average_window_, max_average_window_); - + // Get accumulators from input + int64_t num_updates = 0; + int64_t num_accumulates = 0; + int64_t old_num_accumulates = 0; + getAccumulators(ctx, num_updates, num_accumulates, + old_num_accumulates); + + // Get attrs + float average_window = ctx.Attr("average_window"); + int64_t max_average_window = ctx.Attr("max_average_window"); + int64_t min_average_window = ctx.Attr("min_average_window"); + min_average_window = + std::min(min_average_window, max_average_window); + + // Get inputs + auto* param = ctx.Input("param"); + auto* in_sum_1 = ctx.Input("in_sum_1"); + auto* in_sum_2 = ctx.Input("in_sum_2"); + auto* in_sum_3 = ctx.Input("in_sum_3"); auto param_tensor = EigenVector::Flatten(*param); auto in_sum_1_tensor = EigenVector::Flatten(*in_sum_1); auto in_sum_2_tensor = EigenVector::Flatten(*in_sum_2); auto in_sum_3_tensor = EigenVector::Flatten(*in_sum_3); + + // Get outputs + auto* out_sum_1 = ctx.Output("out_sum_1"); + auto* out_sum_2 = ctx.Output("out_sum_2"); + auto* out_sum_3 = ctx.Output("out_sum_3"); auto out_sum_1_tensor = EigenVector::Flatten(*out_sum_1); auto out_sum_2_tensor = EigenVector::Flatten(*out_sum_2); auto out_sum_3_tensor = EigenVector::Flatten(*out_sum_3); + // Compute auto& place = *ctx.template device_context().eigen_device(); math::SetConstant constant_functor; - // start batch - ++num_updates_; - ++num_accumulates_; - - // update + ++num_updates; + ++num_accumulates; out_sum_1_tensor.device(place) = in_sum_1_tensor + param_tensor; - out_sum_2_tensor.device(place) = in_sum_2_tensor; out_sum_3_tensor.device(place) = in_sum_3_tensor; - // needSpecialTraversal - if (num_updates_ % kMaxNumAccumulates == 0) { + if (num_updates % kMaxNumAccumulates == 0) { out_sum_2_tensor.device(place) = in_sum_2_tensor + in_sum_1_tensor; constant_functor(ctx.template device_context(), out_sum_1, 0.0); } - - if (num_accumulates_ >= min_average_window_ && - num_accumulates_ >= std::min(max_average_window_, - num_updates_ * average_window_)) { + if (num_accumulates >= min_average_window && + num_accumulates >= std::min(max_average_window, + num_updates * average_window)) { out_sum_3_tensor.device(place) = in_sum_1_tensor + in_sum_2_tensor; constant_functor(ctx.template device_context(), out_sum_1, 0.0); constant_functor(ctx.template device_context(), out_sum_2, 0.0); - - // finishBatch - old_num_accumulates_ = num_accumulates_; - num_accumulates_ = 0; + old_num_accumulates = num_accumulates; + num_accumulates = 0; } - setAccumulators(ctx, num_updates_, num_accumulates_, - old_num_accumulates_); + + // Set accumulators to output + setAccumulators(ctx, num_updates, num_accumulates, + old_num_accumulates); } }; -- GitLab