未验证 提交 0e4bcede 编写于 作者: L Leo Chen 提交者: GitHub

[NPU] add clear_float_status op (#34190)

* add clear_float_status op

* refine infershape

* fix typo

* refine check_finite_and_scale

* refine code
上级 7049af57
...@@ -97,10 +97,6 @@ class CheckFiniteAndUnscaleNPUKernel : public framework::OpKernel<T> { ...@@ -97,10 +97,6 @@ class CheckFiniteAndUnscaleNPUKernel : public framework::OpKernel<T> {
NpuOpRunner("Mul", {*x, *tmp_inverse_out}, {*out}, {}); NpuOpRunner("Mul", {*x, *tmp_inverse_out}, {*out}, {});
runner_mul.Run(stream); runner_mul.Run(stream);
} }
const auto& runner_clear_status =
NpuOpRunner("NPUClearFloatStatus", {*float_status}, {tmp});
runner_clear_status.Run(stream);
} }
}; };
......
/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <cstring>
#include <string>
#include <vector>
#include "paddle/fluid/framework/op_registry.h"
namespace paddle {
namespace operators {
class ClearFloatStatusOp : public framework::OperatorWithKernel {
public:
using framework::OperatorWithKernel::OperatorWithKernel;
void InferShape(framework::InferShapeContext* ctx) const override {
OP_INOUT_CHECK(ctx->HasOutput("FloatStatusOut"), "Output", "FloatStatusOut",
"clear_float_status");
ctx->SetOutputDim("FloatStatusOut", ctx->GetInputDim("FloatStatus"));
}
protected:
framework::OpKernelType GetExpectedKernelType(
const framework::ExecutionContext& ctx) const override {
return framework::OpKernelType(framework::proto::VarType::FP32,
ctx.GetPlace());
}
};
class ClearFloatStatusMaker : public framework::OpProtoAndCheckerMaker {
public:
void Make() override {
AddInput("FloatStatus",
"(Tensor) of shape {8} that holds the float status.");
AddOutput(
"FloatStatusOut",
"(Tensor) of shape {8} that holds the float status, which is cleared.");
AddComment(R"DOC(
Clear the float status
)DOC");
}
};
template <typename DeviceContext, typename T>
class ClearFloatStatusKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext& ctx) const override {
PADDLE_THROW(platform::errors::Unimplemented(
"Operator clear_float_status is not supported on CPU"));
}
};
} // namespace operators
} // namespace paddle
namespace ops = paddle::operators;
using CPU = paddle::platform::CPUDeviceContext;
REGISTER_OPERATOR(
clear_float_status, ops::ClearFloatStatusOp, ops::ClearFloatStatusMaker,
paddle::framework::EmptyGradOpMaker<paddle::framework::OpDesc>,
paddle::framework::EmptyGradOpMaker<paddle::imperative::OpBase>);
REGISTER_OP_CPU_KERNEL(clear_float_status,
ops::ClearFloatStatusKernel<CPU, float>);
/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <cmath>
#include <vector>
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/operators/npu_op_runner.h"
namespace paddle {
namespace operators {
using Tensor = framework::Tensor;
template <typename DeviceContext, typename T>
class ClearFloatStatusKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext& ctx) const override {
const auto* float_status = ctx.Input<framework::Tensor>("FloatStatus");
auto* float_status_out = ctx.Output<framework::Tensor>("FloatStatusOut");
// NOTE(zhiqiu): NPUClearFloatStatus modifies the input.
PADDLE_ENFORCE_EQ(float_status_out, float_status,
platform::errors::PreconditionNotMet(
"The input(FloatStatus) and Output(FloatStatusOut) "
"should be the same."));
Tensor tmp;
tmp.mutable_data<float>({8}, ctx.GetPlace());
const auto& runner =
NpuOpRunner("NPUClearFloatStatus", {*float_status}, {tmp});
auto stream =
ctx.template device_context<paddle::platform::NPUDeviceContext>()
.stream();
runner.Run(stream);
}
};
} // namespace operators
} // namespace paddle
namespace ops = paddle::operators;
REGISTER_OP_NPU_KERNEL(
clear_float_status,
ops::ClearFloatStatusKernel<paddle::platform::NPUDeviceContext, float>);
...@@ -173,6 +173,10 @@ class OptimizerWithMixedPrecision(object): ...@@ -173,6 +173,10 @@ class OptimizerWithMixedPrecision(object):
self._train_program.global_block().append_op( self._train_program.global_block().append_op(
type="alloc_float_status", type="alloc_float_status",
outputs={"FloatStatus": float_status}, ) outputs={"FloatStatus": float_status}, )
self._train_program.global_block().append_op(
type="clear_float_status",
inputs={"FloatStatus": float_status},
outputs={"FloatStatusOut": float_status}, )
self._float_status = float_status self._float_status = float_status
else: else:
self._float_status = None self._float_status = None
......
...@@ -40,6 +40,10 @@ class TestCheckFiniteAndUnscale(unittest.TestCase): ...@@ -40,6 +40,10 @@ class TestCheckFiniteAndUnscale(unittest.TestCase):
main_program.global_block().append_op( main_program.global_block().append_op(
type="alloc_float_status", type="alloc_float_status",
outputs={"FloatStatus": float_status}, ) outputs={"FloatStatus": float_status}, )
main_program.global_block().append_op(
type="clear_float_status",
inputs={"FloatStatus": float_status},
outputs={"FloatStatusOut": float_status}, )
c = paddle.fluid.layers.elementwise_div(a, b) c = paddle.fluid.layers.elementwise_div(a, b)
out, found_inf = check_finite_and_unscale( out, found_inf = check_finite_and_unscale(
[c], scale, float_status=float_status) [c], scale, float_status=float_status)
...@@ -106,12 +110,20 @@ class TestCheckFiniteAndUnscaleClearFloatStatus(unittest.TestCase): ...@@ -106,12 +110,20 @@ class TestCheckFiniteAndUnscaleClearFloatStatus(unittest.TestCase):
main_program.global_block().append_op( main_program.global_block().append_op(
type="alloc_float_status", type="alloc_float_status",
outputs={"FloatStatus": float_status}, ) outputs={"FloatStatus": float_status}, )
main_program.global_block().append_op(
type="clear_float_status",
inputs={"FloatStatus": float_status},
outputs={"FloatStatusOut": float_status}, )
c = paddle.fluid.layers.elementwise_div(a, b) c = paddle.fluid.layers.elementwise_div(a, b)
out, found_inf = check_finite_and_unscale( out, found_inf = check_finite_and_unscale(
[c], scale, float_status=float_status) [c], scale, float_status=float_status)
main_program.global_block().append_op( main_program.global_block().append_op(
type="alloc_float_status", type="alloc_float_status",
outputs={"FloatStatus": float_status}, ) outputs={"FloatStatus": float_status}, )
main_program.global_block().append_op(
type="clear_float_status",
inputs={"FloatStatus": float_status},
outputs={"FloatStatusOut": float_status}, )
d = paddle.fluid.layers.elementwise_add(a, b) d = paddle.fluid.layers.elementwise_add(a, b)
out, found_inf = check_finite_and_unscale( out, found_inf = check_finite_and_unscale(
[d], scale, float_status=float_status) [d], scale, float_status=float_status)
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册