From d7a1a178e89dcadd5f3ef2caf59717c0e29de2ea Mon Sep 17 00:00:00 2001 From: jjyaoao <88936287+jjyaoao@users.noreply.github.com> Date: Mon, 10 Apr 2023 17:19:36 +0800 Subject: [PATCH] delete paddle/fluid/operators/amp/*_npu.* (#52673) * delete paddle/fluid/operators/*_npu.* * try pass code-style --- .gitignore | 1 + .../amp/alloc_float_status_op_npu.cc | 46 --- .../amp/check_finite_and_unscale_op_npu.cc | 111 ------- .../check_finite_and_unscale_op_npu_test.cc | 131 -------- .../amp/clear_float_status_op_npu.cc | 53 ---- .../operators/amp/get_float_status_op_npu.cc | 53 ---- .../amp/update_loss_scaling_op_npu.cc | 293 ------------------ 7 files changed, 1 insertion(+), 687 deletions(-) delete mode 100644 paddle/fluid/operators/amp/alloc_float_status_op_npu.cc delete mode 100644 paddle/fluid/operators/amp/check_finite_and_unscale_op_npu.cc delete mode 100644 paddle/fluid/operators/amp/check_finite_and_unscale_op_npu_test.cc delete mode 100644 paddle/fluid/operators/amp/clear_float_status_op_npu.cc delete mode 100644 paddle/fluid/operators/amp/get_float_status_op_npu.cc delete mode 100644 paddle/fluid/operators/amp/update_loss_scaling_op_npu.cc diff --git a/.gitignore b/.gitignore index c0bdf7e4bf5..047d9684b4c 100644 --- a/.gitignore +++ b/.gitignore @@ -77,6 +77,7 @@ tools/nvcc_lazy paddle/fluid/pybind/eager_op_function.cc tools/nvcc_lazy + # these files (directories) are generated before build system generation paddle/fluid/operators/generated_op*.cc paddle/fluid/operators/generated_sparse_op.cc diff --git a/paddle/fluid/operators/amp/alloc_float_status_op_npu.cc b/paddle/fluid/operators/amp/alloc_float_status_op_npu.cc deleted file mode 100644 index 424c2326ab2..00000000000 --- a/paddle/fluid/operators/amp/alloc_float_status_op_npu.cc +++ /dev/null @@ -1,46 +0,0 @@ -/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include -#include - -#include "paddle/fluid/framework/op_registry.h" - -namespace paddle { -namespace operators { - -template -class AllocFloatStatusKernel : public framework::OpKernel { - public: - void Compute(const framework::ExecutionContext& ctx) const override { - auto* float_status = ctx.Output("FloatStatus"); - float_status->mutable_data(ctx.GetPlace()); - - const auto& runner = - NpuOpRunner("NPUAllocFloatStatus", {}, {*float_status}); - auto stream = - ctx.template device_context() - .stream(); - runner.Run(stream); - } -}; - -} // namespace operators -} // namespace paddle - -namespace ops = paddle::operators; - -REGISTER_OP_NPU_KERNEL( - alloc_float_status, - ops::AllocFloatStatusKernel); diff --git a/paddle/fluid/operators/amp/check_finite_and_unscale_op_npu.cc b/paddle/fluid/operators/amp/check_finite_and_unscale_op_npu.cc deleted file mode 100644 index 63e16fb3570..00000000000 --- a/paddle/fluid/operators/amp/check_finite_and_unscale_op_npu.cc +++ /dev/null @@ -1,111 +0,0 @@ -/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - -http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include -#include - -#include "paddle/fluid/framework/op_registry.h" -#include "paddle/fluid/framework/tensor_util.h" - -namespace paddle { -namespace operators { - -// NOTE(zhiqiu): The CheckFiniteAndUnscaleNPUKernel is different from CUDA. -// On NPU, we do not really check the data of input tensors, -// but use NPUGetFloatStatus to check whether the nan/inf occurs on device, -// and clear it after this op. -// Which may leads to wrong result if the input tensors is not calculated -// on NPU device, but got from other way, for example, feeding. -template -class CheckFiniteAndUnscaleNPUKernel : public framework::OpKernel { - public: - void Compute(const framework::ExecutionContext& ctx) const { - const auto xs = ctx.MultiInput("X"); - const auto* scale = ctx.Input("Scale"); - const auto* float_status = ctx.Input("FloatStatus"); - auto outs = ctx.MultiOutput("Out"); - auto* found_inf = ctx.Output("FoundInfinite"); - - found_inf->mutable_data(ctx.GetPlace()); - - auto stream = - ctx.template device_context() - .stream(); - - // step1: inverse scale - phi::DenseTensor const_tensor; - const_tensor.mutable_data({1}, ctx.GetPlace()); - FillNpuTensorWithConstant(&const_tensor, static_cast(1.0)); - - // Inverse(1.0/scale) - phi::DenseTensor* tmp_inverse_out = const_cast(scale); - phi::DenseTensor inverse_out(scale->type()); - inverse_out.Resize(scale->dims()); - inverse_out.mutable_data(ctx.GetPlace()); - const auto& runner_inverse = - NpuOpRunner("Div", {const_tensor, *scale}, {inverse_out}, {}); - runner_inverse.Run(stream); - tmp_inverse_out = &inverse_out; - - // NOTE(zhiqiu): - phi::DenseTensor tmp; - tmp.mutable_data({8}, ctx.GetPlace()); - // NOTE(zhiqiu): NPUGetFloatStatus updates data on input in-place. - // tmp is only placeholder. - const auto& runner_float_status = - NpuOpRunner("NPUGetFloatStatus", - {*float_status}, - {tmp}, - {{"message", std::string("check_nan_and_inf")}}); - runner_float_status.Run(stream); - - phi::DenseTensor sum; - sum.mutable_data({1}, ctx.GetPlace()); - const auto& runner_reduce_sum = - NpuOpRunner("ReduceSumD", - {*float_status}, - {sum}, - {{"axes", std::vector{0}}, {"keep_dims", true}}); - runner_reduce_sum.Run(stream); - - const auto& runner_greater = - NpuOpRunner("GreaterEqual", {sum, const_tensor}, {*found_inf}, {}); - runner_greater.Run(stream); - - // NOTE(zhiqiu): The normal logic is : - // out = in, if found_inf = true - // out = in/scale, if found_inf = false - // However, on NPU, in order to avoid stream sync, we do not copy the - // found_inf data to cpu to check whether to unscale or not. - // Instead, we do the Mul no matter found_inf or not. - // And, a fact is, only few steps contains nan/inf during training. - for (size_t i = 0; i < xs.size(); ++i) { - const auto* x = xs[i]; - auto* out = outs[i]; - out->mutable_data(ctx.GetPlace()); - const auto& runner_mul = - NpuOpRunner("Mul", {*x, *tmp_inverse_out}, {*out}, {}); - runner_mul.Run(stream); - } - } -}; - -} // namespace operators -} // namespace paddle - -namespace ops = paddle::operators; -namespace plat = paddle::platform; -REGISTER_OP_NPU_KERNEL(check_finite_and_unscale, - ops::CheckFiniteAndUnscaleNPUKernel, - ops::CheckFiniteAndUnscaleNPUKernel); diff --git a/paddle/fluid/operators/amp/check_finite_and_unscale_op_npu_test.cc b/paddle/fluid/operators/amp/check_finite_and_unscale_op_npu_test.cc deleted file mode 100644 index bf7272ba8b8..00000000000 --- a/paddle/fluid/operators/amp/check_finite_and_unscale_op_npu_test.cc +++ /dev/null @@ -1,131 +0,0 @@ -/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#ifndef _WIN32 -#include -#endif - -#include -#include -#include -#include - -#include "gtest/gtest.h" -#include "paddle/fluid/framework/op_registry.h" -#include "paddle/fluid/framework/operator.h" -#include "paddle/fluid/framework/program_desc.h" -#include "paddle/fluid/platform/enforce.h" -#include "paddle/phi/kernels/funcs/math_function.h" - -namespace f = paddle::framework; -namespace p = paddle::platform; - -USE_OP_ITSELF(check_finite_and_unscale); -USE_OP_DEVICE_KERNEL(check_finite_and_unscale, NPU); - -struct InputVars { - std::string name; - phi::DenseTensor *tensor; -}; - -template -void Compare(f::Scope *scope, const p::DeviceContext &ctx) { - const f::DDim dims = phi::make_ddim({2, 2}); - auto place = ctx.GetPlace(); - - // init input - std::vector input_names = { - {"x", scope->Var("x")->GetMutable()}, - {"x1", scope->Var("x1")->GetMutable()}}; - - auto *scale = scope->Var("scale")->GetMutable(); - - // init output - auto *out = scope->Var("out")->GetMutable(); - auto *out1 = scope->Var("out1")->GetMutable(); - auto *found_inf = scope->Var("found_inf")->GetMutable(); - - // Initialize input data - const int num_inputs = input_names.size(); - size_t numel = static_cast(phi::product(dims)); - - for (int i = 0; i < num_inputs; ++i) { - std::vector init_xs; - for (size_t j = 0; j < numel; ++j) { - if (j == 0) { - init_xs.push_back(static_cast(NAN)); - } else { - init_xs.push_back(static_cast(j + 1)); - } - } - f::TensorFromVector(init_xs, ctx, input_names[i].tensor); - input_names[i].tensor->Resize(dims); - } - - f::TensorFromVector(std::vector{static_cast(0.5)}, ctx, scale); - - ctx.Wait(); - - // run - f::AttributeMap attrs; - auto op = f::OpRegistry::CreateOp( - "check_finite_and_unscale", - {{"X", {"x", "x1"}}, {"Scale", {"scale"}}}, - {{"Out", {"out", "out1"}}, {"FoundInfinite", {"found_inf"}}}, - attrs); - op->Run(*scope, place); - ctx.Wait(); - - // out0 - std::vector out_vec; - f::TensorToVector(*out, ctx, &out_vec); - EXPECT_EQ(out_vec.size(), static_cast(4)); - for (size_t j = 0; j < out_vec.size(); ++j) { - VLOG(3) << "out_vec[" << j << "]:" << out_vec[j]; - } - - ctx.Wait(); - - // out0 - std::vector out1_vec; - f::TensorToVector(*out1, ctx, &out1_vec); - EXPECT_EQ(out1_vec.size(), static_cast(4)); - for (size_t j = 0; j < out1_vec.size(); ++j) { - VLOG(3) << "out1_vec[" << j << "]:" << out1_vec[j]; - } - - ctx.Wait(); - - // out found_inf - phi::DenseTensor found_inf_tensor; - found_inf_tensor.Resize({1}); - bool *found_inf_data = - found_inf_tensor.mutable_data(paddle::platform::CPUPlace()); - f::TensorCopy(*found_inf, place, &found_inf_tensor); - EXPECT_TRUE(*found_inf_data); - - ctx.Wait(); -} - -TEST(check_finite_and_unscale, NPU_fp32) { - f::Scope scope; - auto *ctx = p::DeviceContextPool::Instance().Get(p::NPUPlace(0)); - Compare(&scope, *ctx); -} - -TEST(check_finite_and_unscale, NPU_fp16) { - f::Scope scope; - auto *ctx = p::DeviceContextPool::Instance().Get(p::NPUPlace(0)); - Compare(&scope, *ctx); -} diff --git a/paddle/fluid/operators/amp/clear_float_status_op_npu.cc b/paddle/fluid/operators/amp/clear_float_status_op_npu.cc deleted file mode 100644 index 1f3e54421f0..00000000000 --- a/paddle/fluid/operators/amp/clear_float_status_op_npu.cc +++ /dev/null @@ -1,53 +0,0 @@ -/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include -#include - -#include "paddle/fluid/framework/op_registry.h" - -namespace paddle { -namespace operators { - -template -class ClearFloatStatusKernel : public framework::OpKernel { - public: - void Compute(const framework::ExecutionContext& ctx) const override { - const auto* float_status = ctx.Input("FloatStatus"); - auto* float_status_out = ctx.Output("FloatStatusOut"); - // NOTE(zhiqiu): NPUClearFloatStatus modifies the input. - PADDLE_ENFORCE_EQ(float_status_out, - float_status, - platform::errors::PreconditionNotMet( - "The input(FloatStatus) and Output(FloatStatusOut) " - "should be the same.")); - phi::DenseTensor tmp; - tmp.mutable_data({8}, ctx.GetPlace()); - const auto& runner = - NpuOpRunner("NPUClearFloatStatus", {tmp}, {*float_status_out}); - auto stream = - ctx.template device_context() - .stream(); - runner.Run(stream); - } -}; - -} // namespace operators -} // namespace paddle - -namespace ops = paddle::operators; - -REGISTER_OP_NPU_KERNEL( - clear_float_status, - ops::ClearFloatStatusKernel); diff --git a/paddle/fluid/operators/amp/get_float_status_op_npu.cc b/paddle/fluid/operators/amp/get_float_status_op_npu.cc deleted file mode 100644 index 5d8f88cc85f..00000000000 --- a/paddle/fluid/operators/amp/get_float_status_op_npu.cc +++ /dev/null @@ -1,53 +0,0 @@ -/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include -#include - -#include "paddle/fluid/framework/op_registry.h" - -namespace paddle { -namespace operators { - -template -class GetFloatStatusKernel : public framework::OpKernel { - public: - void Compute(const framework::ExecutionContext& ctx) const override { - const auto* float_status = ctx.Input("FloatStatus"); - auto* float_status_out = ctx.Output("FloatStatusOut"); - // GetClearFloatStatus modifies the input. - PADDLE_ENFORCE_EQ(float_status_out, - float_status, - platform::errors::PreconditionNotMet( - "The input(FloatStatus) and Output(FloatStatusOut) " - "should be the same.")); - phi::DenseTensor tmp; - tmp.mutable_data({8}, ctx.GetPlace()); - auto stream = - ctx.template device_context() - .stream(); - // NPUGetFloatStatus updates data on input in-place. - // tmp is only placeholder. - NpuOpRunner("NPUGetFloatStatus", {*float_status}, {tmp}).Run(stream); - } -}; - -} // namespace operators -} // namespace paddle - -namespace ops = paddle::operators; - -REGISTER_OP_NPU_KERNEL( - get_float_status, - ops::GetFloatStatusKernel); diff --git a/paddle/fluid/operators/amp/update_loss_scaling_op_npu.cc b/paddle/fluid/operators/amp/update_loss_scaling_op_npu.cc deleted file mode 100644 index d4565c17809..00000000000 --- a/paddle/fluid/operators/amp/update_loss_scaling_op_npu.cc +++ /dev/null @@ -1,293 +0,0 @@ -/* Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved. - -Licensed under the Apache License, Version 2.0 (the "License"); -you may not use this file except in compliance with the License. -You may obtain a copy of the License at - - http://www.apache.org/licenses/LICENSE-2.0 - -Unless required by applicable law or agreed to in writing, software -distributed under the License is distributed on an "AS IS" BASIS, -WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -See the License for the specific language governing permissions and -limitations under the License. */ - -#include -#include - -#include "paddle/fluid/framework/data_type.h" -#include "paddle/fluid/framework/op_registry.h" -#include "paddle/fluid/operators/amp/fp16_type_traits.h" - -DECLARE_int32(min_loss_scaling); - -namespace paddle { -namespace operators { - -template -void Update(const platform::NPUDeviceContext& ctx, - const std::vector found_inf_vec, - const phi::DenseTensor* pre_loss_scaling_tensor, - const phi::DenseTensor* good_in_tensor, - const phi::DenseTensor* bad_in_tensor, - const int incr_every_n_steps, - const int decr_every_n_nan_or_inf, - const float incr_ratio, - const float decr_ratio, - phi::DenseTensor* updated_loss_scaling_tensor, - phi::DenseTensor* good_out_tensor, - phi::DenseTensor* bad_out_tensor) { - auto place = ctx.GetPlace(); - auto stream = ctx.stream(); - if (found_inf_vec[0]) { - // good_out_data = 0 - auto g = good_out_tensor->mutable_data(place); - platform::NPUMemsetAsync(static_cast(g), - 0, - good_out_tensor->numel() * sizeof(int), - stream); - // bad_out_data = bad_in_data + 1 - phi::DenseTensor factor_tensor(bad_out_tensor->dtype()); - factor_tensor.mutable_data({1}, place); - FillNpuTensorWithConstant(&factor_tensor, static_cast(1)); - const auto& runner_p2 = NpuOpRunner( - "Add", {*bad_in_tensor, factor_tensor}, {*bad_out_tensor}, {}); - runner_p2.Run(stream); - - std::vector bad_out_data; - paddle::framework::TensorToVector(*bad_out_tensor, ctx, &bad_out_data); - if (bad_out_data[0] >= decr_every_n_nan_or_inf) { - const auto& runner_p3 = NpuOpRunner("Power", - {*pre_loss_scaling_tensor}, - {*updated_loss_scaling_tensor}, - {{"power", static_cast(1)}, - {"scale", decr_ratio}, - {"shift", static_cast(0)}}); - - runner_p3.Run(stream); - - std::vector new_loss_scaling; - paddle::framework::TensorToVector( - *updated_loss_scaling_tensor, ctx, &new_loss_scaling); - float min_value = 1.0; - if (FLAGS_min_loss_scaling > 1) { - min_value = static_cast(FLAGS_min_loss_scaling); - } - - if (new_loss_scaling[0] < min_value) { - // updated_loss_scaling_data = 1 - const auto& runner_p4 = - NpuOpRunner("Power", - {*pre_loss_scaling_tensor}, - {*updated_loss_scaling_tensor}, - {{"power", static_cast(1)}, - {"scale", static_cast(0)}, - {"shift", static_cast(min_value)}}); - - runner_p4.Run(stream); - } - - // bad_out_data = 0 - auto b = bad_out_tensor->mutable_data(place); - platform::NPUMemsetAsync(static_cast(b), - 0, - bad_out_tensor->numel() * sizeof(int), - stream); - } - } else { - // bad_out_data = 0 - auto b = bad_out_tensor->mutable_data(place); - platform::NPUMemsetAsync(static_cast(b), - 0, - bad_out_tensor->numel() * sizeof(int), - stream); - - // good_out_data = good_in_data + 1 - phi::DenseTensor factor_tensor(good_out_tensor->dtype()); - factor_tensor.mutable_data({1}, place); - FillNpuTensorWithConstant(&factor_tensor, static_cast(1)); - const auto& runner_p2 = NpuOpRunner( - "Add", {*good_in_tensor, factor_tensor}, {*good_out_tensor}, {}); - runner_p2.Run(stream); - - std::vector good_out_data; - paddle::framework::TensorToVector(*good_out_tensor, ctx, &good_out_data); - - if (good_out_data[0] >= incr_every_n_steps) { - const auto& runner_p3 = NpuOpRunner("Power", - {*pre_loss_scaling_tensor}, - {*updated_loss_scaling_tensor}, - {{"power", static_cast(1)}, - {"scale", incr_ratio}, - {"shift", static_cast(0)}}); - runner_p3.Run(stream); - - std::vector new_loss_scaling; - paddle::framework::TensorToVector( - *updated_loss_scaling_tensor, ctx, &new_loss_scaling); - if (!std::isfinite(new_loss_scaling[0])) { - // updated_loss_scaling_data = pre_loss_scaling_data - const auto& runner_p4 = NpuOpRunner("Power", - {*pre_loss_scaling_tensor}, - {*updated_loss_scaling_tensor}, - {{"power", static_cast(1)}, - {"scale", static_cast(1)}, - {"shift", static_cast(0)}}); - - runner_p4.Run(stream); - } - // good_out_data = 0 - auto g = good_out_tensor->mutable_data(place); - platform::NPUMemsetAsync(static_cast(g), - 0, - good_out_tensor->numel() * sizeof(int), - stream); - } - } -} - -template -class UpdateLossScalingFunctor { - public: - void operator()(const platform::NPUDeviceContext& dev_ctx, - const std::vector found_inf_vec, - const phi::DenseTensor* pre_loss_scaling_tensor, - const phi::DenseTensor* good_in_tensor, - const phi::DenseTensor* bad_in_tensor, - const int incr_every_n_steps, - const int decr_every_n_nan_or_inf, - const float incr_ratio, - const float decr_ratio, - phi::DenseTensor* updated_loss_scaling_tensor, - phi::DenseTensor* good_out_tensor, - phi::DenseTensor* bad_out_tensor) const { - Update(dev_ctx, - found_inf_vec, - pre_loss_scaling_tensor, - good_in_tensor, - bad_in_tensor, - incr_every_n_steps, - decr_every_n_nan_or_inf, - incr_ratio, - decr_ratio, - updated_loss_scaling_tensor, - good_out_tensor, - bad_out_tensor); - } -}; - -template -class LazyZerosNPU { - public: - void operator()(const platform::NPUDeviceContext& dev_ctx, - const std::vector found_inf_vec, - const std::vector& xs, - const std::vector& outs) const { - if (!xs.size()) { - return; - } - auto place = dev_ctx.GetPlace(); - auto stream = dev_ctx.stream(); - phi::DenseTensor* zero_tensor = nullptr; - void* zero_ptr = nullptr; - if (found_inf_vec[0]) { - int max_num = -1; - for (size_t i = 0; i < xs.size(); ++i) { - auto* out = outs[i]; - int num = out->numel(); - if (max_num < num) { - max_num = num; - zero_tensor = out; - } - } - - zero_tensor->mutable_data(place); - const auto& runner_zeros = - NpuOpRunner("ZerosLike", {*zero_tensor}, {*zero_tensor}); - runner_zeros.Run(stream); - zero_tensor->check_memory_size(); - zero_ptr = zero_tensor->data(); - } - - for (size_t i = 0; i < xs.size(); ++i) { - auto* out = outs[i]; - auto* x = xs[i]; - auto dst_ptr = out->mutable_data(place); - if (!found_inf_vec[0]) { - framework::TensorCopy(*x, place, dev_ctx, out); - } else if (zero_ptr != dst_ptr) { - auto size = out->numel() * phi::SizeOf(out->dtype()); - memory::Copy(place, dst_ptr, place, zero_ptr, size, stream); - } - } - } -}; - -template -class UpdateLossScalingNPUKernel : public framework::OpKernel { - using MPDType = typename details::MPTypeTrait::Type; - - public: - void Compute(const framework::ExecutionContext& ctx) const override { - auto& dev_ctx = ctx.template device_context(); - - const auto xs = ctx.MultiInput("X"); - auto outs = ctx.MultiOutput("Out"); - const auto* found_inf = ctx.Input("FoundInfinite"); - PADDLE_ENFORCE_EQ(found_inf->numel(), - 1, - platform::errors::InvalidArgument( - "FoundInfinite must has only one element.")); - - std::vector found_inf_vec; - paddle::framework::TensorToVector( - *found_inf, ctx.device_context(), &found_inf_vec); - - LazyZerosNPU{}(dev_ctx, found_inf_vec, xs, outs); - const bool stop_update = ctx.Attr("stop_update"); - if (stop_update) { - return; - } - - const auto* pre_loss_scaling = - ctx.Input("PrevLossScaling"); - const auto* good_in = ctx.Input("InGoodSteps"); - const auto* bad_in = ctx.Input("InBadSteps"); - auto* updated_loss_scaling = ctx.Output("LossScaling"); - auto* good_out = ctx.Output("OutGoodSteps"); - auto* bad_out = ctx.Output("OutBadSteps"); - - updated_loss_scaling->mutable_data(dev_ctx.GetPlace()); - good_out->mutable_data(dev_ctx.GetPlace()); - bad_out->mutable_data(dev_ctx.GetPlace()); - - const int incr_every_n_steps = ctx.Attr("incr_every_n_steps"); - const int decr_every_n_nan_or_inf = - ctx.Attr("decr_every_n_nan_or_inf"); - const float incr_ratio = ctx.Attr("incr_ratio"); - const float decr_ratio = ctx.Attr("decr_ratio"); - UpdateLossScalingFunctor{}(dev_ctx, - found_inf_vec, - pre_loss_scaling, - good_in, - bad_in, - incr_every_n_steps, - decr_every_n_nan_or_inf, - incr_ratio, - decr_ratio, - updated_loss_scaling, - good_out, - bad_out); - } -}; - -} // namespace operators -} // namespace paddle - -namespace ops = paddle::operators; - -REGISTER_OP_NPU_KERNEL( - update_loss_scaling, - ops::UpdateLossScalingNPUKernel, - ops::UpdateLossScalingNPUKernel); -- GitLab