From 0e4bcede044effad239e8b6aa8aa2b23dd96487e Mon Sep 17 00:00:00 2001 From: Leo Chen Date: Fri, 16 Jul 2021 15:20:24 +0800 Subject: [PATCH] [NPU] add clear_float_status op (#34190) * add clear_float_status op * refine infershape * fix typo * refine check_finite_and_scale * refine code --- .../amp/check_finite_and_unscale_op_npu.cc | 4 - .../operators/amp/clear_float_status_op.cc | 77 +++++++++++++++++++ .../amp/clear_float_status_op_npu.cc | 54 +++++++++++++ .../contrib/mixed_precision/decorator.py | 4 + .../test_amp_check_finite_and_scale_op_npu.py | 12 +++ 5 files changed, 147 insertions(+), 4 deletions(-) create mode 100644 paddle/fluid/operators/amp/clear_float_status_op.cc create mode 100644 paddle/fluid/operators/amp/clear_float_status_op_npu.cc diff --git a/paddle/fluid/operators/amp/check_finite_and_unscale_op_npu.cc b/paddle/fluid/operators/amp/check_finite_and_unscale_op_npu.cc index 26280cd2bd1..68da8fd5808 100644 --- a/paddle/fluid/operators/amp/check_finite_and_unscale_op_npu.cc +++ b/paddle/fluid/operators/amp/check_finite_and_unscale_op_npu.cc @@ -97,10 +97,6 @@ class CheckFiniteAndUnscaleNPUKernel : public framework::OpKernel { NpuOpRunner("Mul", {*x, *tmp_inverse_out}, {*out}, {}); runner_mul.Run(stream); } - - const auto& runner_clear_status = - NpuOpRunner("NPUClearFloatStatus", {*float_status}, {tmp}); - runner_clear_status.Run(stream); } }; diff --git a/paddle/fluid/operators/amp/clear_float_status_op.cc b/paddle/fluid/operators/amp/clear_float_status_op.cc new file mode 100644 index 00000000000..7a906a51879 --- /dev/null +++ b/paddle/fluid/operators/amp/clear_float_status_op.cc @@ -0,0 +1,77 @@ +/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + +http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include +#include +#include + +#include "paddle/fluid/framework/op_registry.h" + +namespace paddle { +namespace operators { + +class ClearFloatStatusOp : public framework::OperatorWithKernel { + public: + using framework::OperatorWithKernel::OperatorWithKernel; + + void InferShape(framework::InferShapeContext* ctx) const override { + OP_INOUT_CHECK(ctx->HasOutput("FloatStatusOut"), "Output", "FloatStatusOut", + "clear_float_status"); + ctx->SetOutputDim("FloatStatusOut", ctx->GetInputDim("FloatStatus")); + } + + protected: + framework::OpKernelType GetExpectedKernelType( + const framework::ExecutionContext& ctx) const override { + return framework::OpKernelType(framework::proto::VarType::FP32, + ctx.GetPlace()); + } +}; + +class ClearFloatStatusMaker : public framework::OpProtoAndCheckerMaker { + public: + void Make() override { + AddInput("FloatStatus", + "(Tensor) of shape {8} that holds the float status."); + AddOutput( + "FloatStatusOut", + "(Tensor) of shape {8} that holds the float status, which is cleared."); + AddComment(R"DOC( + Clear the float status +)DOC"); + } +}; + +template +class ClearFloatStatusKernel : public framework::OpKernel { + public: + void Compute(const framework::ExecutionContext& ctx) const override { + PADDLE_THROW(platform::errors::Unimplemented( + "Operator clear_float_status is not supported on CPU")); + } +}; + +} // namespace operators +} // namespace paddle + +namespace ops = paddle::operators; +using CPU = paddle::platform::CPUDeviceContext; + +REGISTER_OPERATOR( + clear_float_status, ops::ClearFloatStatusOp, ops::ClearFloatStatusMaker, + paddle::framework::EmptyGradOpMaker, + paddle::framework::EmptyGradOpMaker); + +REGISTER_OP_CPU_KERNEL(clear_float_status, + ops::ClearFloatStatusKernel); diff --git a/paddle/fluid/operators/amp/clear_float_status_op_npu.cc b/paddle/fluid/operators/amp/clear_float_status_op_npu.cc new file mode 100644 index 00000000000..d5bdcc37c2a --- /dev/null +++ b/paddle/fluid/operators/amp/clear_float_status_op_npu.cc @@ -0,0 +1,54 @@ +/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include +#include +#include "paddle/fluid/framework/op_registry.h" +#include "paddle/fluid/operators/npu_op_runner.h" + +namespace paddle { +namespace operators { + +using Tensor = framework::Tensor; + +template +class ClearFloatStatusKernel : public framework::OpKernel { + public: + void Compute(const framework::ExecutionContext& ctx) const override { + const auto* float_status = ctx.Input("FloatStatus"); + auto* float_status_out = ctx.Output("FloatStatusOut"); + // NOTE(zhiqiu): NPUClearFloatStatus modifies the input. + PADDLE_ENFORCE_EQ(float_status_out, float_status, + platform::errors::PreconditionNotMet( + "The input(FloatStatus) and Output(FloatStatusOut) " + "should be the same.")); + Tensor tmp; + tmp.mutable_data({8}, ctx.GetPlace()); + const auto& runner = + NpuOpRunner("NPUClearFloatStatus", {*float_status}, {tmp}); + auto stream = + ctx.template device_context() + .stream(); + runner.Run(stream); + } +}; + +} // namespace operators +} // namespace paddle + +namespace ops = paddle::operators; + +REGISTER_OP_NPU_KERNEL( + clear_float_status, + ops::ClearFloatStatusKernel); diff --git a/python/paddle/fluid/contrib/mixed_precision/decorator.py b/python/paddle/fluid/contrib/mixed_precision/decorator.py index d5d2e7a0d96..09b8629a978 100644 --- a/python/paddle/fluid/contrib/mixed_precision/decorator.py +++ b/python/paddle/fluid/contrib/mixed_precision/decorator.py @@ -173,6 +173,10 @@ class OptimizerWithMixedPrecision(object): self._train_program.global_block().append_op( type="alloc_float_status", outputs={"FloatStatus": float_status}, ) + self._train_program.global_block().append_op( + type="clear_float_status", + inputs={"FloatStatus": float_status}, + outputs={"FloatStatusOut": float_status}, ) self._float_status = float_status else: self._float_status = None diff --git a/python/paddle/fluid/tests/unittests/npu/test_amp_check_finite_and_scale_op_npu.py b/python/paddle/fluid/tests/unittests/npu/test_amp_check_finite_and_scale_op_npu.py index 8828892dca3..e92bfbb4d77 100644 --- a/python/paddle/fluid/tests/unittests/npu/test_amp_check_finite_and_scale_op_npu.py +++ b/python/paddle/fluid/tests/unittests/npu/test_amp_check_finite_and_scale_op_npu.py @@ -40,6 +40,10 @@ class TestCheckFiniteAndUnscale(unittest.TestCase): main_program.global_block().append_op( type="alloc_float_status", outputs={"FloatStatus": float_status}, ) + main_program.global_block().append_op( + type="clear_float_status", + inputs={"FloatStatus": float_status}, + outputs={"FloatStatusOut": float_status}, ) c = paddle.fluid.layers.elementwise_div(a, b) out, found_inf = check_finite_and_unscale( [c], scale, float_status=float_status) @@ -106,12 +110,20 @@ class TestCheckFiniteAndUnscaleClearFloatStatus(unittest.TestCase): main_program.global_block().append_op( type="alloc_float_status", outputs={"FloatStatus": float_status}, ) + main_program.global_block().append_op( + type="clear_float_status", + inputs={"FloatStatus": float_status}, + outputs={"FloatStatusOut": float_status}, ) c = paddle.fluid.layers.elementwise_div(a, b) out, found_inf = check_finite_and_unscale( [c], scale, float_status=float_status) main_program.global_block().append_op( type="alloc_float_status", outputs={"FloatStatus": float_status}, ) + main_program.global_block().append_op( + type="clear_float_status", + inputs={"FloatStatus": float_status}, + outputs={"FloatStatusOut": float_status}, ) d = paddle.fluid.layers.elementwise_add(a, b) out, found_inf = check_finite_and_unscale( [d], scale, float_status=float_status) -- GitLab