Feature/add amp_checkout_finite_and_scale op (#24875)

* add amp_check_finite_and_scale op, test=develop * add cpu kernel, test=develop * use bool, test=develop * follow comments, test=develop

Feature/add amp_checkout_finite_and_scale op (#24875)
* add amp_check_finite_and_scale op, test=develop * add cpu kernel, test=develop * use bool, test=develop * follow comments, test=develop
1e818158 · Leo Chen · GitHub · 576d6808 · 1e818158 · 1e818158
8 changed file
--- a/paddle/fluid/operators/CMakeLists.txt
+++ b/paddle/fluid/operators/CMakeLists.txt
@@ -23,6 +23,8 @@ if(WITH_DISTRIBUTE)
    add_subdirectory(collective)
 endif()
+add_subdirectory(amp)
 add_subdirectory(reader)
 if (NOT WIN32)

--- a/paddle/fluid/operators/amp/CMakeLists.txt
+++ b/paddle/fluid/operators/amp/CMakeLists.txt
+include(operators)
+register_operators()
--- a/paddle/fluid/operators/amp/amp_check_finite_and_scale_op.cc
+++ b/paddle/fluid/operators/amp/amp_check_finite_and_scale_op.cc
+/* Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+http://www.apache.org/licenses/LICENSE-2.0
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+#include "paddle/fluid/operators/amp/amp_check_finite_and_scale_op.h"
+#include <string>
+#include <vector>
+namespace paddle {
+namespace operators {
+class AmpCheckFiniteAndScaleOp : public framework::OperatorWithKernel {
+ public:
+  AmpCheckFiniteAndScaleOp(const std::string &type,
+                           const framework::VariableNameMap &inputs,
+                           const framework::VariableNameMap &outputs,
+                           const framework::AttributeMap &attrs)
+      : OperatorWithKernel(type, inputs, outputs, attrs) {}
+  void InferShape(framework::InferShapeContext *ctx) const override {
+    OP_INOUT_CHECK(ctx->HasInputs("X"), "Input", "X",
+                   "amp_check_finite_and_unscale");
+    OP_INOUT_CHECK(ctx->HasOutputs("Out"), "Output", "Out",
+                   "amp_check_finite_and_unscale");
+    PADDLE_ENFORCE_EQ(
+        ctx->Inputs("X").size(), ctx->Outputs("Out").size(),
+        platform::errors::InvalidArgument(
+            "The input(X) and output(Out) should have same size in "
+            "Operator(amp_check_finite_and_unscale), size of input(X) is %d "
+            "and size of output(Out) is %d.",
+            ctx->Inputs("X").size(), ctx->Outputs("Out").size()));
+    auto x_dims = ctx->GetInputsDim("X");
+    ctx->SetOutputsDim("Out", x_dims);
+    ctx->SetOutputDim("FoundInfinite", {1});
+  }
+ protected:
+  framework::OpKernelType GetExpectedKernelType(
+      const framework::ExecutionContext &ctx) const override {
+    return framework::OpKernelType(
+        OperatorWithKernel::IndicateVarDataType(ctx, "X"), ctx.GetPlace());
+  }
+};
+class AmpCheckFiniteAndScaleOpMaker : public framework::OpProtoAndCheckerMaker {
+ public:
+  void Make() override {
+    AddInput(
+        "X",
+        "(Tensors) The input tensors of amp_check_finite_and_scale operator.")
+        .AsDuplicable();
+    AddInput("Scale",
+             "(Tensor) 1-dim tensor, the scale of amp_check_finite_and_scale "
+             "operator.");
+    AddOutput("Out",
+              "(Tensors) The scaled output tensor of "
+              "amp_check_finite_and_unscale operator.")
+        .AsDuplicable();
+    AddOutput("FoundInfinite",
+              "(Tensor) 1-dim tensor, contains a int scalar, which indicates "
+              "if there there is infinite or nan item in input X.");
+    AddComment(R"DOC(
+amp_check_finite_and_scale operator.
+Check if input X contains all finite data, if yes, scale it by input Scale.
+$$Out = X * scale$$
+If any tensor in X contains Inf or Nan, the Out will generate a indicator.
+FoundInfinite will be 1 (True), and Out will not be scaled. In this case, the data of 
+Out should not be used, and its data may not be deterministic. 
+Otherwise, FoundInfinite will be 0 (False).
+)DOC");
+  }
+};
+}  // namespace operators
+}  // namespace paddle
+namespace ops = paddle::operators;
+REGISTER_OPERATOR(
+    amp_check_finite_and_scale, ops::AmpCheckFiniteAndScaleOp,
+    ops::AmpCheckFiniteAndScaleOpMaker,
+    paddle::framework::EmptyGradOpMaker<paddle::framework::OpDesc>,
+    paddle::framework::EmptyGradOpMaker<paddle::imperative::OpBase>);
+REGISTER_OP_CPU_KERNEL(
+    amp_check_finite_and_scale,
+    ops::AmpCheckFiniteAndScaleKernel<paddle::platform::CPUDeviceContext,
+                                      float>,
+    ops::AmpCheckFiniteAndScaleKernel<paddle::platform::CPUDeviceContext,
+                                      double>);
--- a/paddle/fluid/operators/amp/amp_check_finite_and_scale_op.cu
+++ b/paddle/fluid/operators/amp/amp_check_finite_and_scale_op.cu
+/* Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+http://www.apache.org/licenses/LICENSE-2.0
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+#include <cuda.h>
+#include "paddle/fluid/operators/amp/amp_check_finite_and_scale_op.h"
+#include "paddle/fluid/platform/float16.h"
+namespace paddle {
+namespace operators {
+template <typename T>
+__global__ void AmpCheckFiniteAndScale(const T* in, const T* scale, int num,
+                                       int* found_inf, T* out) {
+  const int idx = threadIdx.x + blockIdx.x * blockDim.x;
+  if (idx < num) {
+    if (!std::isfinite(in[idx])) {
+      *found_inf = 1;
+    }
+    out[idx] = *found_inf ? in[idx] : in[idx] * scale[0];
+  }
+}
+template <typename T>
+class AmpCheckFiniteAndScaleKernel<platform::CUDADeviceContext, T>
+    : public framework::OpKernel<T> {
+ public:
+  void Compute(const framework::ExecutionContext& ctx) const {
+    auto& dev_ctx = ctx.template device_context<platform::CUDADeviceContext>();
+    const auto xs = ctx.MultiInput<framework::Tensor>("X");
+    const auto* scale = ctx.Input<framework::Tensor>("Scale");
+    auto outs = ctx.MultiOutput<framework::Tensor>("Out");
+    auto* found_inf = ctx.Output<framework::Tensor>("FoundInfinite");
+    const T* scale_data = scale->data<T>();
+    int* found_inf_data = found_inf->mutable_data<int>(dev_ctx.GetPlace());
+    cudaMemset(found_inf_data, false, found_inf->numel() * sizeof(bool));
+    for (size_t i = 0; i < xs.size(); ++i) {
+      const auto* x = xs[i];
+      auto* out = outs[i];
+      const T* x_data = x->data<T>();
+      T* out_data = out->mutable_data<T>(dev_ctx.GetPlace());
+      int num = x->numel();
+      int block = 512;
+      int grid = (num + block - 1) / block;
+      VLOG(3) << "launch kernel";
+      AmpCheckFiniteAndScale<T><<<grid, block, 0, dev_ctx.stream()>>>(
+          x_data, scale_data, num, found_inf_data, out_data);
+      VLOG(3) << "finish kernel";
+    }
+  }
+};
+}  // namespace operators
+}  // namespace paddle
+namespace ops = paddle::operators;
+REGISTER_OP_CUDA_KERNEL(
+    amp_check_finite_and_scale,
+    ops::AmpCheckFiniteAndScaleKernel<paddle::platform::CUDADeviceContext,
+                                      float>,
+    ops::AmpCheckFiniteAndScaleKernel<paddle::platform::CUDADeviceContext,
+                                      double>);
--- a/paddle/fluid/operators/amp/amp_check_finite_and_scale_op.h
+++ b/paddle/fluid/operators/amp/amp_check_finite_and_scale_op.h
+/* Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+http://www.apache.org/licenses/LICENSE-2.0
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+#pragma once
+#include <string>
+#include <vector>
+#include "paddle/fluid/framework/tensor_util.h"
+#include "paddle/fluid/operators/elementwise/elementwise_op_function.h"
+#include "paddle/fluid/operators/isfinite_op.h"
+namespace paddle {
+namespace operators {
+template <typename DeviceContext, typename T>
+class AmpCheckFiniteAndScaleKernel : public framework::OpKernel<T> {
+ public:
+  void Compute(const framework::ExecutionContext& ctx) const {
+    auto& dev_ctx = ctx.template device_context<DeviceContext>();
+    const auto xs = ctx.MultiInput<framework::Tensor>("X");
+    const auto* scale = ctx.Input<framework::Tensor>("Scale");
+    auto outs = ctx.MultiOutput<framework::Tensor>("Out");
+    auto* found_inf = ctx.Output<framework::Tensor>("FoundInfinite");
+    const T* scale_data = scale->data<T>();
+    bool* found_inf_data = found_inf->mutable_data<bool>(dev_ctx.GetPlace());
+    *found_inf_data = false;
+    framework::Tensor is_finite =
+        ctx.AllocateTmpTensor<bool, DeviceContext>({1}, dev_ctx);
+    bool* is_finite_data = is_finite.template data<bool>();
+    auto& dev = *ctx.template device_context<DeviceContext>().eigen_device();
+    for (size_t i = 0; i < xs.size(); ++i) {
+      const auto* x = xs[i];
+      auto* out = outs[i];
+      out->mutable_data<T>(dev_ctx.GetPlace());
+      if (!(*found_inf_data)) {
+        framework::TensorIsfinite(*x, &is_finite);
+        if (*is_finite_data) {
+          auto eigen_out = framework::EigenVector<T>::Flatten(*out);
+          auto eigen_in = framework::EigenVector<T>::Flatten(*x);
+          eigen_out.device(dev) = (*scale_data) * eigen_in;
+        } else {
+          *found_inf_data = true;
+          break;
+        }
+      }
+    }
+    return;
+  }
+};
+}  // namespace operators
+}  // namespace paddle
--- a/paddle/fluid/pybind/op_function_generator.cc
+++ b/paddle/fluid/pybind/op_function_generator.cc
@@ -76,6 +76,7 @@ std::map<std::string, std::set<std::string>> op_passing_outs_map = {
    {"matmul", {"Out"}},
    {"fake_quantize_dequantize_moving_average_abs_max",
     {"Out", "OutScale", "OutAccum", "OutState"}},
+    {"amp_check_finite_and_scale", {"Out", "FoundInfinite"}},
 };
 // clang-format off

--- a/python/paddle/fluid/tests/unittests/test_amp_check_finite_and_scale_op.py
+++ b/python/paddle/fluid/tests/unittests/test_amp_check_finite_and_scale_op.py
+#   Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+import unittest
+import numpy as np
+from op_test import OpTest, skip_check_grad_ci
+import paddle.fluid as fluid
+class TestAmpCheckFiniteAndScaleOp(OpTest):
+    def setUp(self):
+        self.op_type = "amp_check_finite_and_scale"
+        self.init_dtype()
+        x = np.random.random((1024, 1024)).astype(self.dtype)
+        scale = np.random.random((1)).astype(self.dtype)
+        self.inputs = {'X': [('x0', x)], 'Scale': scale}
+        self.outputs = {
+            'FoundInfinite': np.array([0]),
+            'Out': [('out0', x * scale)],
+        }
+    def init_dtype(self):
+        self.dtype = np.float32
+    def test_check_output(self):
+        self.check_output()
+class TestAmpCheckFiniteAndScaleOpWithNan(OpTest):
+    def setUp(self):
+        self.op_type = "amp_check_finite_and_scale"
+        self.init_dtype()
+        x = np.random.random((1024, 1024)).astype(self.dtype)
+        x[128][128] = np.nan
+        scale = np.random.random((1)).astype(self.dtype)
+        self.inputs = {'X': [('x0', x)], 'Scale': scale}
+        self.outputs = {
+            'FoundInfinite': np.array([1]),
+            'Out': [('out0', x)],
+        }
+    def init_dtype(self):
+        self.dtype = np.float32
+    def test_check_output(self):
+        # When input contains nan, do not check the output, 
+        # since the output may be nondeterministic and will be discarded.
+        self.check_output(no_check_set=['Out'])
+class TestAmpCheckFiniteAndScaleOpWithInf(OpTest):
+    def setUp(self):
+        self.op_type = "amp_check_finite_and_scale"
+        self.init_dtype()
+        x = np.random.random((1024, 1024)).astype(self.dtype)
+        x[128][128] = np.inf
+        scale = np.random.random((1)).astype(self.dtype)
+        self.inputs = {'X': [('x0', x)], 'Scale': scale}
+        self.outputs = {
+            'FoundInfinite': np.array([1]),
+            'Out': [('out0', x)],
+        }
+    def init_dtype(self):
+        self.dtype = np.float32
+    def test_check_output(self):
+        # When input contains inf, do not check the output, 
+        # since the output may be nondeterministic and will be discarded.
+        self.check_output(no_check_set=['Out'])
+if __name__ == '__main__':
+    unittest.main()
--- a/python/paddle/fluid/tests/unittests/white_list/no_check_set_white_list.py
+++ b/python/paddle/fluid/tests/unittests/white_list/no_check_set_white_list.py
@@ -23,4 +23,5 @@ no_check_set_white_list = [
    'unsqueeze2',
    'cross_entropy2',
    'seed',
+    'amp_check_finite_and_scale',
 ]