diff --git a/paddle/fluid/eager/auto_code_generator/final_state_generator/python_c_gen.py b/paddle/fluid/eager/auto_code_generator/final_state_generator/python_c_gen.py index f358e4d332d6455c86f1c07d96b4bdd9d2bc80e9..81210afe5d89da148e6893361825bbb727cd62a5 100644 --- a/paddle/fluid/eager/auto_code_generator/final_state_generator/python_c_gen.py +++ b/paddle/fluid/eager/auto_code_generator/final_state_generator/python_c_gen.py @@ -57,6 +57,8 @@ no_amp_list = [ 'adam', 'adamw_', 'adamw', + 'average_accumulates', + 'average_accumulates_', 'decayed_adagrad_', 'decayed_adagrad', 'dgc_momentum_', diff --git a/paddle/fluid/operators/average_accumulates_op.cc b/paddle/fluid/operators/average_accumulates_op.cc index 856a703fd2b0687ce72f7927181d0ed2fbf42542..9f8f295c249353e7645f0ed9dd3daf2aa6510662 100644 --- a/paddle/fluid/operators/average_accumulates_op.cc +++ b/paddle/fluid/operators/average_accumulates_op.cc @@ -12,99 +12,19 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ -#include "paddle/fluid/operators/average_accumulates_op.h" +#include "paddle/fluid/framework/eigen.h" +#include "paddle/fluid/framework/op_registry.h" + +#include "paddle/fluid/framework/infershape_utils.h" +#include "paddle/phi/infermeta/multiary.h" namespace paddle { namespace operators { -template <> -void GetAccumulators(const framework::ExecutionContext& ctx, - int64_t* num_updates, - int64_t* num_accumulates, - int64_t* old_num_accumulates) { - auto* in_old_num_accumulates = ctx.Input("in_old_num_accumulates"); - auto* in_num_accumulates = ctx.Input("in_num_accumulates"); - auto* in_num_updates = ctx.Input("in_num_updates"); - - *old_num_accumulates = in_old_num_accumulates->data()[0]; - *num_accumulates = in_num_accumulates->data()[0]; - *num_updates = in_num_updates->data()[0]; -} - -template <> -void SetAccumulators(const framework::ExecutionContext& ctx, - int64_t num_updates, - int64_t num_accumulates, - int64_t old_num_accumulates) { - auto* out_old_num_accumulates = ctx.Output("out_old_num_accumulates"); - auto* out_num_accumulates = ctx.Output("out_num_accumulates"); - auto* out_num_updates = ctx.Output("out_num_updates"); - - out_old_num_accumulates->data()[0] = old_num_accumulates; - out_num_accumulates->data()[0] = num_accumulates; - out_num_updates->data()[0] = num_updates; -} - class AverageAccumulatesOp : public framework::OperatorWithKernel { public: using framework::OperatorWithKernel::OperatorWithKernel; - void InferShape(framework::InferShapeContext* ctx) const override { - OP_INOUT_CHECK( - ctx->HasInput("param"), "Input", "param", "AverageAccumulates"); - OP_INOUT_CHECK( - ctx->HasInput("in_sum_1"), "Input", "in_sum_1", "AverageAccumulates"); - OP_INOUT_CHECK( - ctx->HasInput("in_sum_2"), "Input", "in_sum_2", "AverageAccumulates"); - OP_INOUT_CHECK( - ctx->HasInput("in_sum_3"), "Input", "in_sum_3", "AverageAccumulates"); - OP_INOUT_CHECK(ctx->HasInput("in_num_accumulates"), - "Input", - "in_num_accumulates", - "AverageAccumulates"); - OP_INOUT_CHECK(ctx->HasInput("in_old_num_accumulates"), - "Input", - "in_old_num_accumulates", - "AverageAccumulates"); - OP_INOUT_CHECK(ctx->HasInput("in_num_updates"), - "Input", - "in_num_updates", - "AverageAccumulates"); - - OP_INOUT_CHECK(ctx->HasOutput("out_sum_1"), - "Output", - "out_sum_1", - "AverageAccumulates"); - OP_INOUT_CHECK(ctx->HasOutput("out_sum_2"), - "Output", - "out_sum_2", - "AverageAccumulates"); - OP_INOUT_CHECK(ctx->HasOutput("out_sum_3"), - "Output", - "out_sum_3", - "AverageAccumulates"); - OP_INOUT_CHECK(ctx->HasOutput("out_num_accumulates"), - "Output", - "out_num_accumulates", - "AverageAccumulates"); - OP_INOUT_CHECK(ctx->HasOutput("out_old_num_accumulates"), - "Output", - "out_old_num_accumulates", - "AverageAccumulates"); - OP_INOUT_CHECK(ctx->HasOutput("out_num_updates"), - "Output", - "out_num_updates", - "AverageAccumulates"); - auto in_dim = ctx->GetInputDim("param"); - - ctx->SetOutputDim("out_sum_1", in_dim); - ctx->SetOutputDim("out_sum_2", in_dim); - ctx->SetOutputDim("out_sum_3", in_dim); - ctx->SetOutputDim("out_num_accumulates", {1}); - ctx->SetOutputDim("out_old_num_accumulates", {1}); - ctx->SetOutputDim("out_num_updates", {1}); - } - protected: framework::OpKernelType GetExpectedKernelType( const framework::ExecutionContext& ctx) const override { @@ -209,12 +129,14 @@ And for a mini-batch in training, accumulators were computed as below steps: } // namespace paddle namespace ops = paddle::operators; +DECLARE_INFER_SHAPE_FUNCTOR(average_accumulates, + AverageAccumulatesInferShapeFunctor, + PD_INFER_META(phi::AverageAccumulatesInferMeta)); + REGISTER_OPERATOR( average_accumulates, ops::AverageAccumulatesOp, ops::AverageAccumulatesOpMaker, paddle::framework::EmptyGradOpMaker, - paddle::framework::EmptyGradOpMaker); -REGISTER_OP_CPU_KERNEL(average_accumulates, - ops::AverageAccumulatesKernel, - ops::AverageAccumulatesKernel); + paddle::framework::EmptyGradOpMaker, + AverageAccumulatesInferShapeFunctor); diff --git a/paddle/fluid/operators/average_accumulates_op.cu b/paddle/fluid/operators/average_accumulates_op.cu deleted file mode 100644 index d793d528a5b18ce626f2c893718ac5c7fc5b0833..0000000000000000000000000000000000000000 --- a/paddle/fluid/operators/average_accumulates_op.cu +++ /dev/null @@ -1,90 +0,0 @@ -/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include "paddle/fluid/operators/average_accumulates_op.h" -#include "paddle/fluid/platform/device/gpu/gpu_info.h" - -namespace paddle { -namespace operators { -template <> -void GetAccumulators( - const framework::ExecutionContext& ctx, - int64_t* num_updates_, - int64_t* num_accumulates_, - int64_t* old_num_accumulates_) { - auto* in_old_num_accumulates = ctx.Input("in_old_num_accumulates"); - auto* in_num_accumulates = ctx.Input("in_num_accumulates"); - auto* in_num_updates = ctx.Input("in_num_updates"); - auto stream = ctx.cuda_device_context().stream(); - auto cuda_place = in_old_num_accumulates->place(); - memory::Copy(platform::CPUPlace(), - old_num_accumulates_, - cuda_place, - in_old_num_accumulates->data(), - sizeof(int64_t), - stream); - memory::Copy(platform::CPUPlace(), - num_accumulates_, - cuda_place, - in_num_accumulates->data(), - sizeof(int64_t), - stream); - memory::Copy(platform::CPUPlace(), - num_updates_, - cuda_place, - in_num_updates->data(), - sizeof(int64_t), - stream); -} - -template <> -void SetAccumulators( - const framework::ExecutionContext& ctx, - int64_t num_updates_, - int64_t num_accumulates_, - int64_t old_num_accumulates_) { - auto stream = ctx.cuda_device_context().stream(); - auto* out_old_num_accumulates = ctx.Output("out_old_num_accumulates"); - auto* out_num_accumulates = ctx.Output("out_num_accumulates"); - auto* out_num_updates = ctx.Output("out_num_updates"); - auto cuda_place = out_old_num_accumulates->place(); - - memory::Copy(cuda_place, - out_old_num_accumulates->data(), - platform::CPUPlace(), - &old_num_accumulates_, - sizeof(int64_t), - stream); - memory::Copy(cuda_place, - out_num_accumulates->data(), - platform::CPUPlace(), - &num_accumulates_, - sizeof(int64_t), - stream); - memory::Copy(cuda_place, - out_num_updates->data(), - platform::CPUPlace(), - &num_updates_, - sizeof(int64_t), - stream); -} - -} // namespace operators -} // namespace paddle - -namespace ops = paddle::operators; -REGISTER_OP_CUDA_KERNEL( - average_accumulates, - ops::AverageAccumulatesKernel, - ops::AverageAccumulatesKernel); diff --git a/paddle/fluid/operators/average_accumulates_op.h b/paddle/fluid/operators/average_accumulates_op.h deleted file mode 100644 index afa43f8c240c5826df04e19259b5737fc7d7f467..0000000000000000000000000000000000000000 --- a/paddle/fluid/operators/average_accumulates_op.h +++ /dev/null @@ -1,119 +0,0 @@ -/* Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#pragma once -#include - -#include "paddle/fluid/framework/eigen.h" -#include "paddle/fluid/framework/op_registry.h" -#include "paddle/phi/kernels/funcs/math_function.h" - -namespace paddle { -namespace operators { - -using Tensor = framework::Tensor; - -template -void GetAccumulators(const framework::ExecutionContext& ctx, - int64_t* num_updates, - int64_t* num_accumulates, - int64_t* old_num_accumulates); - -template -void SetAccumulators(const framework::ExecutionContext& ctx, - int64_t num_updates, - int64_t num_accumulates, - int64_t old_num_accumulates); - -template -class AverageAccumulatesKernel : public framework::OpKernel { - public: - void Compute(const framework::ExecutionContext& ctx) const override { - // It is used to avoid loss of precision - static const int64_t kMaxNumAccumulates = 16384; - // Get accumulators from input - int64_t num_updates = 0; - int64_t num_accumulates = 0; - int64_t old_num_accumulates = 0; - GetAccumulators( - ctx, &num_updates, &num_accumulates, &old_num_accumulates); - - // Get attrs - float average_window = ctx.Attr("average_window"); - int64_t max_average_window = ctx.Attr("max_average_window"); - int64_t min_average_window = ctx.Attr("min_average_window"); - PADDLE_ENFORCE_LE( - min_average_window, - max_average_window, - platform::errors::InvalidArgument( - "The min_average_window > " - "max_average_window is not right, min_average_window is %ld, " - "max_average_window is %ld.", - min_average_window, - max_average_window)); - - // Get inputs - auto* param = ctx.Input("param"); - auto* in_sum_1 = ctx.Input("in_sum_1"); - auto* in_sum_2 = ctx.Input("in_sum_2"); - auto* in_sum_3 = ctx.Input("in_sum_3"); - auto param_tensor = framework::EigenVector::Flatten(*param); - auto in_sum_1_tensor = framework::EigenVector::Flatten(*in_sum_1); - auto in_sum_2_tensor = framework::EigenVector::Flatten(*in_sum_2); - auto in_sum_3_tensor = framework::EigenVector::Flatten(*in_sum_3); - - // Get outputs - auto* out_sum_1 = ctx.Output("out_sum_1"); - auto* out_sum_2 = ctx.Output("out_sum_2"); - auto* out_sum_3 = ctx.Output("out_sum_3"); - auto out_sum_1_tensor = framework::EigenVector::Flatten(*out_sum_1); - auto out_sum_2_tensor = framework::EigenVector::Flatten(*out_sum_2); - auto out_sum_3_tensor = framework::EigenVector::Flatten(*out_sum_3); - - // Compute - auto& place = *ctx.template device_context().eigen_device(); - phi::funcs::SetConstant constant_functor; - ++num_updates; - ++num_accumulates; - out_sum_1_tensor.device(place) = in_sum_1_tensor + param_tensor; - out_sum_2_tensor.device(place) = in_sum_2_tensor; - out_sum_3_tensor.device(place) = in_sum_3_tensor; - if (num_updates % kMaxNumAccumulates == 0) { - // Move the sum to a different buffer to avoid loss of precision due to - // too many sums. - out_sum_2_tensor.device(place) = in_sum_2_tensor + in_sum_1_tensor; - constant_functor( - ctx.template device_context(), out_sum_1, 0.0); - } - if (num_accumulates >= min_average_window && - num_accumulates >= std::min(max_average_window, - num_updates * average_window)) { - // Now the average window is too long, discard the old sum. - out_sum_3_tensor.device(place) = in_sum_1_tensor + in_sum_2_tensor; - constant_functor( - ctx.template device_context(), out_sum_1, 0.0); - constant_functor( - ctx.template device_context(), out_sum_2, 0.0); - old_num_accumulates = num_accumulates; - num_accumulates = 0; - } - - // Set accumulators to output - SetAccumulators( - ctx, num_updates, num_accumulates, old_num_accumulates); - } -}; - -} // namespace operators -} // namespace paddle diff --git a/paddle/phi/api/yaml/legacy_api.yaml b/paddle/phi/api/yaml/legacy_api.yaml index 600f93683eff357dcc869f6a271d53d60a061d62..bd48617037d283a009fac39aefc6b88f800935ca 100644 --- a/paddle/phi/api/yaml/legacy_api.yaml +++ b/paddle/phi/api/yaml/legacy_api.yaml @@ -264,6 +264,17 @@ kernel : func : auc +#average_accumulates +- api : average_accumulates_ + args : (Tensor param, Tensor in_sum_1, Tensor in_sum_2, Tensor in_sum_3, Tensor in_num_accumulates, Tensor in_old_num_accumulates, Tensor in_num_updates, float average_window, int64_t max_average_window, int64_t min_average_window) + output : Tensor(out_sum_1), Tensor(out_sum_2), Tensor(out_sum_3), Tensor(out_num_accumulates), Tensor(out_old_num_accumulates), Tensor(out_num_updates) + infer_meta: + func : AverageAccumulatesInferMeta + kernel : + func : average_accumulates {dense, dense, dense, dense, dense ,dense, dense -> dense, dense, dense, dense, dense, dense} + data_type : param + inplace : (in_sum_1 -> out_sum_1), (in_sum_2 -> out_sum_2), (in_sum_3 -> out_sum_3), (in_num_accumulates -> out_num_accumulates), (in_old_num_accumulates -> out_old_num_accumulates), (in_num_updates -> out_num_updates) + # batch_norm - api : batch_norm args : (Tensor x, Tensor scale, Tensor bias, Tensor mean, Tensor variance, float momentum, float epsilon, str data_layout, bool is_test, bool use_global_stats, bool trainable_statistics, bool fuse_with_relu) diff --git a/paddle/phi/infermeta/multiary.cc b/paddle/phi/infermeta/multiary.cc index 1a05ad495c9813054dad4c0787215cc05e51d231..a524506c7f07b126d205eab93432024ae4978105 100644 --- a/paddle/phi/infermeta/multiary.cc +++ b/paddle/phi/infermeta/multiary.cc @@ -434,6 +434,68 @@ void AucInferMeta(const MetaTensor& input, } } +void AverageAccumulatesInferMeta(const MetaTensor& param, + const MetaTensor& in_sum_1, + const MetaTensor& in_sum_2, + const MetaTensor& in_sum_3, + const MetaTensor& in_num_accumulates, + const MetaTensor& in_old_num_accumulates, + const MetaTensor& in_num_updates, + float average_window, + int64_t max_average_window, + int64_t min_average_window, + MetaTensor* out_sum_1, + MetaTensor* out_sum_2, + MetaTensor* out_sum_3, + MetaTensor* out_num_accumulates, + MetaTensor* out_old_num_accumulates, + MetaTensor* out_num_updates) { + // auto in_dim = param.dims; + PADDLE_ENFORCE_NE( + out_sum_1, + nullptr, + errors::NotFound( + "Output(out_sum_1) of AverageAccumulates should not be null.")); + PADDLE_ENFORCE_NE( + out_sum_2, + nullptr, + errors::NotFound( + "Output(out_sum_2) of AverageAccumulates should not be null.")); + PADDLE_ENFORCE_NE( + out_sum_3, + nullptr, + errors::NotFound( + "Output(out_sum_3) of AverageAccumulates should not be null.")); + PADDLE_ENFORCE_NE(out_num_accumulates, + nullptr, + errors::NotFound("Output(out_num_accumulates) of " + "AverageAccumulates should not be null.")); + + PADDLE_ENFORCE_NE(out_old_num_accumulates, + nullptr, + errors::NotFound("Output(out_old_num_accumulates) of " + "AverageAccumulates should not be null.")); + + PADDLE_ENFORCE_NE( + out_num_updates, + nullptr, + errors::NotFound( + "Output(out_num_updates) of AverageAccumulates should not be null.")); + + out_sum_1->set_dims(in_sum_1.dims()); + out_sum_1->set_dtype(in_sum_1.dtype()); + out_sum_2->set_dims(in_sum_2.dims()); + out_sum_2->set_dtype(in_sum_2.dtype()); + out_sum_3->set_dims(in_sum_3.dims()); + out_sum_3->set_dtype(in_sum_3.dtype()); + out_num_accumulates->set_dims({1}); + out_num_accumulates->set_dtype(in_num_accumulates.dtype()); + out_old_num_accumulates->set_dims({1}); + out_old_num_accumulates->set_dtype(in_old_num_accumulates.dtype()); + out_num_updates->set_dims({1}); + out_num_updates->set_dtype(in_num_updates.dtype()); +} + void BatchNormInferMeta(const MetaTensor& x, const MetaTensor& scale, const MetaTensor& bias, diff --git a/paddle/phi/infermeta/multiary.h b/paddle/phi/infermeta/multiary.h index 06d2530cffa2c19f99baaa0a1292b0672c9a018d..60342dc58f5c91543762fe210ab9dbb7fc79604c 100644 --- a/paddle/phi/infermeta/multiary.h +++ b/paddle/phi/infermeta/multiary.h @@ -134,6 +134,23 @@ void AucInferMeta(const MetaTensor& input, MetaTensor* stat_neg_out, MetaConfig config = MetaConfig()); +void AverageAccumulatesInferMeta(const MetaTensor& param, + const MetaTensor& in_sum_1, + const MetaTensor& in_sum_2, + const MetaTensor& in_sum_3, + const MetaTensor& in_num_accumulates, + const MetaTensor& in_old_num_accumulates, + const MetaTensor& in_num_updates, + float average_window, + int64_t max_average_window, + int64_t min_average_window, + MetaTensor* out_sum_1, + MetaTensor* out_sum_2, + MetaTensor* out_sum_3, + MetaTensor* out_num_accumulates, + MetaTensor* out_old_num_accumulates, + MetaTensor* out_num_updates); + void BatchNormInferMeta(const MetaTensor& x, const MetaTensor& scale, const MetaTensor& bias, diff --git a/paddle/phi/kernels/average_accumulates_kernel.h b/paddle/phi/kernels/average_accumulates_kernel.h new file mode 100644 index 0000000000000000000000000000000000000000..63f2b362cfde3a37a292845f4b0530d93e53192c --- /dev/null +++ b/paddle/phi/kernels/average_accumulates_kernel.h @@ -0,0 +1,57 @@ +// Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. +// +// Licensed under the Apache License, Version 2.0 (the "License"); +// you may not use this file except in compliance with the License. +// You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, software +// distributed under the License is distributed on an "AS IS" BASIS, +// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +// See the License for the specific language governing permissions and +// limitations under the License. + +#pragma once + +#include "paddle/phi/core/dense_tensor.h" + +namespace phi { + +template +void GetAccumulators(const Context& dev_ctx, + const DenseTensor& in_num_accumulates, + const DenseTensor& in_old_num_accumulates, + const DenseTensor& in_num_updates, + int64_t* num_updates, + int64_t* num_accumulates, + int64_t* old_num_accumulates); + +template +void SetAccumulators(const Context& dev_ctx, + int64_t num_updates, + int64_t num_accumulates, + int64_t old_num_accumulates, + DenseTensor* out_num_accumulates, + DenseTensor* out_old_num_accumulates, + DenseTensor* out_num_updates); + +template +void AverageAccumulatesKernel(const Context& dev_ctx, + const DenseTensor& param, + const DenseTensor& in_sum_1, + const DenseTensor& in_sum_2, + const DenseTensor& in_sum_3, + const DenseTensor& in_num_accumulates, + const DenseTensor& in_old_num_accumulates, + const DenseTensor& in_num_updates, + float average_window, + int64_t max_average_window, + int64_t min_average_window, + DenseTensor* out_sum_1, + DenseTensor* out_sum_2, + DenseTensor* out_sum_3, + DenseTensor* out_num_accumulates, + DenseTensor* out_old_num_accumulates, + DenseTensor* out_num_updates); +} // namespace phi diff --git a/paddle/phi/kernels/cpu/average_accumulates_kernel.cc b/paddle/phi/kernels/cpu/average_accumulates_kernel.cc new file mode 100644 index 0000000000000000000000000000000000000000..14eb38d5b99b6e3870f54f334e034161e2a1f472 --- /dev/null +++ b/paddle/phi/kernels/cpu/average_accumulates_kernel.cc @@ -0,0 +1,56 @@ +/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "paddle/phi/kernels/average_accumulates_kernel.h" +#include "paddle/phi/kernels/impl/average_accumulates_kernel_impl.h" + +#include "paddle/phi/backends/cpu/cpu_context.h" +#include "paddle/phi/core/kernel_registry.h" + +namespace phi { + +template <> +void GetAccumulators(const phi::CPUContext& dev_ctx, + const DenseTensor& in_num_accumulates, + const DenseTensor& in_old_num_accumulates, + const DenseTensor& in_num_updates, + int64_t* num_updates, + int64_t* num_accumulates, + int64_t* old_num_accumulates) { + *old_num_accumulates = in_old_num_accumulates.data()[0]; + *num_accumulates = in_num_accumulates.data()[0]; + *num_updates = in_num_updates.data()[0]; +} + +template <> +void SetAccumulators(const phi::CPUContext& dev_ctx, + int64_t num_updates, + int64_t num_accumulates, + int64_t old_num_accumulates, + DenseTensor* out_num_accumulates, + DenseTensor* out_old_num_accumulates, + DenseTensor* out_num_updates) { + out_old_num_accumulates->data()[0] = old_num_accumulates; + out_num_accumulates->data()[0] = num_accumulates; + out_num_updates->data()[0] = num_updates; +} + +} // namespace phi + +PD_REGISTER_KERNEL(average_accumulates, + CPU, + ALL_LAYOUT, + phi::AverageAccumulatesKernel, + float, + double) {} diff --git a/paddle/phi/kernels/gpu/average_accumulates_kernel.cu b/paddle/phi/kernels/gpu/average_accumulates_kernel.cu new file mode 100644 index 0000000000000000000000000000000000000000..98a6699d9754f116fe399a851efd953431de0c6a --- /dev/null +++ b/paddle/phi/kernels/gpu/average_accumulates_kernel.cu @@ -0,0 +1,100 @@ +/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "paddle/phi/kernels/average_accumulates_kernel.h" +#include "paddle/phi/kernels/impl/average_accumulates_kernel_impl.h" + +#include "paddle/phi/backends/gpu/gpu_context.h" +#include "paddle/phi/core/kernel_registry.h" + +namespace phi { + +template <> +void GetAccumulators(const phi::GPUContext& dev_ctx, + const DenseTensor& in_num_accumulates, + const DenseTensor& in_old_num_accumulates, + const DenseTensor& in_num_updates, + int64_t* num_updates, + int64_t* num_accumulates, + int64_t* old_num_accumulates) { + auto stream = dev_ctx.stream(); + auto cuda_place = in_old_num_accumulates.place(); + paddle::memory::Copy(phi::CPUPlace(), + old_num_accumulates, + cuda_place, + in_old_num_accumulates.data(), + sizeof(int64_t), + stream); + paddle::memory::Copy(phi::CPUPlace(), + num_accumulates, + cuda_place, + in_num_accumulates.data(), + sizeof(int64_t), + stream); + paddle::memory::Copy(phi::CPUPlace(), + num_updates, + cuda_place, + in_num_updates.data(), + sizeof(int64_t), + stream); +} + +template <> +void SetAccumulators(const phi::GPUContext& dev_ctx, + int64_t num_updates, + int64_t num_accumulates, + int64_t old_num_accumulates, + DenseTensor* out_num_accumulates, + DenseTensor* out_old_num_accumulates, + DenseTensor* out_num_updates) { + int64_t* out_num_accumulates_ptr = + dev_ctx.template Alloc(out_num_accumulates); + int64_t* out_old_num_accumulates_ptr = + dev_ctx.template Alloc(out_old_num_accumulates); + int64_t* out_num_updates_ptr = + dev_ctx.template Alloc(out_num_updates); + + auto stream = dev_ctx.stream(); + + auto cuda_place = out_old_num_accumulates->place(); + paddle::memory::Copy(dev_ctx.GetPlace(), + out_num_accumulates_ptr, + phi::CPUPlace(), + &num_accumulates, + sizeof(int64_t), + stream); + + paddle::memory::Copy(dev_ctx.GetPlace(), + out_old_num_accumulates_ptr, + phi::CPUPlace(), + &old_num_accumulates, + sizeof(int64_t), + stream); + + paddle::memory::Copy(cuda_place, + out_num_updates_ptr, + phi::CPUPlace(), + &num_updates, + sizeof(int64_t), + stream); +} + +} // namespace phi + +PD_REGISTER_KERNEL(average_accumulates, + GPU, + ALL_LAYOUT, + phi::AverageAccumulatesKernel, + float, + double) {} diff --git a/paddle/phi/kernels/impl/average_accumulates_kernel_impl.h b/paddle/phi/kernels/impl/average_accumulates_kernel_impl.h new file mode 100644 index 0000000000000000000000000000000000000000..8731316317d4774df09c4634340215e0d3647c16 --- /dev/null +++ b/paddle/phi/kernels/impl/average_accumulates_kernel_impl.h @@ -0,0 +1,146 @@ +/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once +#include "paddle/phi/kernels/average_accumulates_kernel.h" + +#include + +#include "paddle/phi/kernels/funcs/eigen/common.h" +#include "paddle/phi/kernels/funcs/math_function.h" + +namespace phi { + +template +void AverageAccumulatesKernel(const Context& dev_ctx, + const DenseTensor& param, + const DenseTensor& in_sum_1, + const DenseTensor& in_sum_2, + const DenseTensor& in_sum_3, + const DenseTensor& in_num_accumulates, + const DenseTensor& in_old_num_accumulates, + const DenseTensor& in_num_updates, + float average_window, + int64_t max_average_window, + int64_t min_average_window, + DenseTensor* out_sum_1, + DenseTensor* out_sum_2, + DenseTensor* out_sum_3, + DenseTensor* out_num_accumulates, + DenseTensor* out_old_num_accumulates, + DenseTensor* out_num_updates) { + // It is used to avoid loss of precision + static const int64_t kMaxNumAccumulates = 16384; + // Get accumulators from input + // int64_t num_updates = 0; + // int64_t num_accumulates = 0; + // int64_t old_num_accumulates = 0; + + auto num_updates_cpu = + paddle::memory::Alloc(phi::CPUPlace(), sizeof(int64_t)); + int64_t* num_updates_cpu_ptr = + reinterpret_cast(num_updates_cpu->ptr()); + + auto num_accumulates_cpu = + paddle::memory::Alloc(phi::CPUPlace(), sizeof(int64_t)); + int64_t* num_accumulates_cpu_ptr = + reinterpret_cast(num_accumulates_cpu->ptr()); + + auto old_num_accumulates_cpu = + paddle::memory::Alloc(phi::CPUPlace(), sizeof(int64_t)); + int64_t* old_num_accumulates_cpu_ptr = + reinterpret_cast(old_num_accumulates_cpu->ptr()); + + GetAccumulators(dev_ctx, + in_num_accumulates, + in_old_num_accumulates, + in_num_updates, + num_updates_cpu_ptr, + num_accumulates_cpu_ptr, + old_num_accumulates_cpu_ptr); + // Get attrs + // float average_window = ctx.Attr("average_window"); + // int64_t max_average_window = ctx.Attr("max_average_window"); + // int64_t min_average_window = ctx.Attr("min_average_window"); + PADDLE_ENFORCE_LE( + min_average_window, + max_average_window, + errors::InvalidArgument( + "The min_average_window > " + "max_average_window is not right, min_average_window is %ld, " + "max_average_window is %ld.", + min_average_window, + max_average_window)); + + // Get inputs + // auto* param = ctx.Input("param"); + // auto* in_sum_1 = ctx.Input("in_sum_1"); + // auto* in_sum_2 = ctx.Input("in_sum_2"); + // auto* in_sum_3 = ctx.Input("in_sum_3"); + auto param_tensor = EigenVector::Flatten(param); + auto in_sum_1_tensor = EigenVector::Flatten(in_sum_1); + auto in_sum_2_tensor = EigenVector::Flatten(in_sum_2); + auto in_sum_3_tensor = EigenVector::Flatten(in_sum_3); + + // Get outputs + // auto* out_sum_1 = ctx.Output("out_sum_1"); + // auto* out_sum_2 = ctx.Output("out_sum_2"); + // auto* out_sum_3 = ctx.Output("out_sum_3"); + dev_ctx.template Alloc(out_sum_1); + dev_ctx.template Alloc(out_sum_2); + dev_ctx.template Alloc(out_sum_3); + + auto out_sum_1_tensor = EigenVector::Flatten(*out_sum_1); + auto out_sum_2_tensor = EigenVector::Flatten(*out_sum_2); + auto out_sum_3_tensor = EigenVector::Flatten(*out_sum_3); + + // Compute + // auto& place = *ctx.template device_context().eigen_device(); + auto& place = *dev_ctx.eigen_device(); + + funcs::SetConstant constant_functor; + ++(*num_updates_cpu_ptr); + ++(*num_accumulates_cpu_ptr); + out_sum_1_tensor.device(place) = in_sum_1_tensor + param_tensor; + out_sum_2_tensor.device(place) = in_sum_2_tensor; + out_sum_3_tensor.device(place) = in_sum_3_tensor; + if ((*num_updates_cpu_ptr) % kMaxNumAccumulates == 0) { + // Move the sum to a different buffer to avoid loss of precision due to + // too many sums. + out_sum_2_tensor.device(place) = in_sum_2_tensor + in_sum_1_tensor; + constant_functor(dev_ctx, out_sum_1, static_cast(0)); + } + if ((*num_accumulates_cpu_ptr) >= min_average_window && + (*num_accumulates_cpu_ptr) >= + std::min(max_average_window, + (*num_updates_cpu_ptr) * average_window)) { + // Now the average window is too long, discard the old sum. + out_sum_3_tensor.device(place) = in_sum_1_tensor + in_sum_2_tensor; + constant_functor(dev_ctx, out_sum_1, static_cast(0)); + constant_functor(dev_ctx, out_sum_2, static_cast(0)); + (*old_num_accumulates_cpu_ptr) = (*num_accumulates_cpu_ptr); + (*num_accumulates_cpu_ptr) = 0; + } + + // Set accumulators to output + SetAccumulators(dev_ctx, + *num_updates_cpu_ptr, + *num_accumulates_cpu_ptr, + *old_num_accumulates_cpu_ptr, + out_num_accumulates, + out_old_num_accumulates, + out_num_updates); +} + +} // namespace phi diff --git a/paddle/phi/ops/compat/average_accumulates_sig.cc b/paddle/phi/ops/compat/average_accumulates_sig.cc new file mode 100644 index 0000000000000000000000000000000000000000..c14e8ab3575531a6f534260fe6f1e128f012e293 --- /dev/null +++ b/paddle/phi/ops/compat/average_accumulates_sig.cc @@ -0,0 +1,39 @@ +/* Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "paddle/phi/core/compat/op_utils.h" + +namespace phi { +KernelSignature AverageAccumulatesOpArgumentMapping( + const ArgumentMappingContext& ctx) { + return KernelSignature( + "average_accumulates", + {"param", + "in_sum_1", + "in_sum_2", + "in_sum_3", + "in_num_accumulates", + "in_old_num_accumulates", + "in_num_updates"}, + {"average_window", "max_average_window", "min_average_window"}, + {"out_sum_1", + "out_sum_2", + "out_sum_3", + "out_num_accumulates", + "out_old_num_accumulates", + "out_num_updates"}); +} +} // namespace phi +PD_REGISTER_ARG_MAPPING_FN(average_accumulates, + phi::AverageAccumulatesOpArgumentMapping); diff --git a/python/paddle/incubate/optimizer/modelaverage.py b/python/paddle/incubate/optimizer/modelaverage.py index 361827ba48de25fdebe3720e769bd31b39895619..b7d499f77292ec11145ddedeec5df4ddce24f47d 100644 --- a/python/paddle/incubate/optimizer/modelaverage.py +++ b/python/paddle/incubate/optimizer/modelaverage.py @@ -21,6 +21,7 @@ import numpy as np from paddle.fluid.dygraph import base as imperative_base from paddle.fluid.wrapped_decorator import signature_safe_contextmanager from paddle import _C_ops +from paddle.fluid.framework import in_dygraph_mode __all__ = [] @@ -231,7 +232,14 @@ class ModelAverage(Optimizer): old_num_accumulates = self._get_accumulator('old_num_accumulates', param_and_grad[0]) num_updates = self._get_accumulator('num_updates', param_and_grad[0]) - if framework._non_static_mode(): + + if in_dygraph_mode(): + _, _, _, _, _, _ = _C_ops.final_state_average_accumulates_( + param_and_grad[0], sum_1, sum_2, sum_3, num_accumulates, + old_num_accumulates, num_updates, self.average_window, + self.max_average_window, self.min_average_window) + return None + elif framework._non_static_mode(): _, _, _, _, _, _ = _C_ops.average_accumulates( param_and_grad[0], sum_1, sum_2, sum_3, num_accumulates, old_num_accumulates, num_updates, sum_1, sum_2, sum_3,