未验证 提交 1e818158 编写于 作者: L Leo Chen 提交者: GitHub

Feature/add amp_checkout_finite_and_scale op (#24875)

* add amp_check_finite_and_scale op, test=develop

* add cpu kernel, test=develop

* use bool, test=develop

* follow comments, test=develop
上级 576d6808
...@@ -23,6 +23,8 @@ if(WITH_DISTRIBUTE) ...@@ -23,6 +23,8 @@ if(WITH_DISTRIBUTE)
add_subdirectory(collective) add_subdirectory(collective)
endif() endif()
add_subdirectory(amp)
add_subdirectory(reader) add_subdirectory(reader)
if (NOT WIN32) if (NOT WIN32)
......
include(operators)
register_operators()
/* Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/operators/amp/amp_check_finite_and_scale_op.h"
#include <string>
#include <vector>
namespace paddle {
namespace operators {
class AmpCheckFiniteAndScaleOp : public framework::OperatorWithKernel {
public:
AmpCheckFiniteAndScaleOp(const std::string &type,
const framework::VariableNameMap &inputs,
const framework::VariableNameMap &outputs,
const framework::AttributeMap &attrs)
: OperatorWithKernel(type, inputs, outputs, attrs) {}
void InferShape(framework::InferShapeContext *ctx) const override {
OP_INOUT_CHECK(ctx->HasInputs("X"), "Input", "X",
"amp_check_finite_and_unscale");
OP_INOUT_CHECK(ctx->HasOutputs("Out"), "Output", "Out",
"amp_check_finite_and_unscale");
PADDLE_ENFORCE_EQ(
ctx->Inputs("X").size(), ctx->Outputs("Out").size(),
platform::errors::InvalidArgument(
"The input(X) and output(Out) should have same size in "
"Operator(amp_check_finite_and_unscale), size of input(X) is %d "
"and size of output(Out) is %d.",
ctx->Inputs("X").size(), ctx->Outputs("Out").size()));
auto x_dims = ctx->GetInputsDim("X");
ctx->SetOutputsDim("Out", x_dims);
ctx->SetOutputDim("FoundInfinite", {1});
}
protected:
framework::OpKernelType GetExpectedKernelType(
const framework::ExecutionContext &ctx) const override {
return framework::OpKernelType(
OperatorWithKernel::IndicateVarDataType(ctx, "X"), ctx.GetPlace());
}
};
class AmpCheckFiniteAndScaleOpMaker : public framework::OpProtoAndCheckerMaker {
public:
void Make() override {
AddInput(
"X",
"(Tensors) The input tensors of amp_check_finite_and_scale operator.")
.AsDuplicable();
AddInput("Scale",
"(Tensor) 1-dim tensor, the scale of amp_check_finite_and_scale "
"operator.");
AddOutput("Out",
"(Tensors) The scaled output tensor of "
"amp_check_finite_and_unscale operator.")
.AsDuplicable();
AddOutput("FoundInfinite",
"(Tensor) 1-dim tensor, contains a int scalar, which indicates "
"if there there is infinite or nan item in input X.");
AddComment(R"DOC(
amp_check_finite_and_scale operator.
Check if input X contains all finite data, if yes, scale it by input Scale.
$$Out = X * scale$$
If any tensor in X contains Inf or Nan, the Out will generate a indicator.
FoundInfinite will be 1 (True), and Out will not be scaled. In this case, the data of
Out should not be used, and its data may not be deterministic.
Otherwise, FoundInfinite will be 0 (False).
)DOC");
}
};
} // namespace operators
} // namespace paddle
namespace ops = paddle::operators;
REGISTER_OPERATOR(
amp_check_finite_and_scale, ops::AmpCheckFiniteAndScaleOp,
ops::AmpCheckFiniteAndScaleOpMaker,
paddle::framework::EmptyGradOpMaker<paddle::framework::OpDesc>,
paddle::framework::EmptyGradOpMaker<paddle::imperative::OpBase>);
REGISTER_OP_CPU_KERNEL(
amp_check_finite_and_scale,
ops::AmpCheckFiniteAndScaleKernel<paddle::platform::CPUDeviceContext,
float>,
ops::AmpCheckFiniteAndScaleKernel<paddle::platform::CPUDeviceContext,
double>);
/* Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <cuda.h>
#include "paddle/fluid/operators/amp/amp_check_finite_and_scale_op.h"
#include "paddle/fluid/platform/float16.h"
namespace paddle {
namespace operators {
template <typename T>
__global__ void AmpCheckFiniteAndScale(const T* in, const T* scale, int num,
int* found_inf, T* out) {
const int idx = threadIdx.x + blockIdx.x * blockDim.x;
if (idx < num) {
if (!std::isfinite(in[idx])) {
*found_inf = 1;
}
out[idx] = *found_inf ? in[idx] : in[idx] * scale[0];
}
}
template <typename T>
class AmpCheckFiniteAndScaleKernel<platform::CUDADeviceContext, T>
: public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext& ctx) const {
auto& dev_ctx = ctx.template device_context<platform::CUDADeviceContext>();
const auto xs = ctx.MultiInput<framework::Tensor>("X");
const auto* scale = ctx.Input<framework::Tensor>("Scale");
auto outs = ctx.MultiOutput<framework::Tensor>("Out");
auto* found_inf = ctx.Output<framework::Tensor>("FoundInfinite");
const T* scale_data = scale->data<T>();
int* found_inf_data = found_inf->mutable_data<int>(dev_ctx.GetPlace());
cudaMemset(found_inf_data, false, found_inf->numel() * sizeof(bool));
for (size_t i = 0; i < xs.size(); ++i) {
const auto* x = xs[i];
auto* out = outs[i];
const T* x_data = x->data<T>();
T* out_data = out->mutable_data<T>(dev_ctx.GetPlace());
int num = x->numel();
int block = 512;
int grid = (num + block - 1) / block;
VLOG(3) << "launch kernel";
AmpCheckFiniteAndScale<T><<<grid, block, 0, dev_ctx.stream()>>>(
x_data, scale_data, num, found_inf_data, out_data);
VLOG(3) << "finish kernel";
}
}
};
} // namespace operators
} // namespace paddle
namespace ops = paddle::operators;
REGISTER_OP_CUDA_KERNEL(
amp_check_finite_and_scale,
ops::AmpCheckFiniteAndScaleKernel<paddle::platform::CUDADeviceContext,
float>,
ops::AmpCheckFiniteAndScaleKernel<paddle::platform::CUDADeviceContext,
double>);
/* Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include <string>
#include <vector>
#include "paddle/fluid/framework/tensor_util.h"
#include "paddle/fluid/operators/elementwise/elementwise_op_function.h"
#include "paddle/fluid/operators/isfinite_op.h"
namespace paddle {
namespace operators {
template <typename DeviceContext, typename T>
class AmpCheckFiniteAndScaleKernel : public framework::OpKernel<T> {
public:
void Compute(const framework::ExecutionContext& ctx) const {
auto& dev_ctx = ctx.template device_context<DeviceContext>();
const auto xs = ctx.MultiInput<framework::Tensor>("X");
const auto* scale = ctx.Input<framework::Tensor>("Scale");
auto outs = ctx.MultiOutput<framework::Tensor>("Out");
auto* found_inf = ctx.Output<framework::Tensor>("FoundInfinite");
const T* scale_data = scale->data<T>();
bool* found_inf_data = found_inf->mutable_data<bool>(dev_ctx.GetPlace());
*found_inf_data = false;
framework::Tensor is_finite =
ctx.AllocateTmpTensor<bool, DeviceContext>({1}, dev_ctx);
bool* is_finite_data = is_finite.template data<bool>();
auto& dev = *ctx.template device_context<DeviceContext>().eigen_device();
for (size_t i = 0; i < xs.size(); ++i) {
const auto* x = xs[i];
auto* out = outs[i];
out->mutable_data<T>(dev_ctx.GetPlace());
if (!(*found_inf_data)) {
framework::TensorIsfinite(*x, &is_finite);
if (*is_finite_data) {
auto eigen_out = framework::EigenVector<T>::Flatten(*out);
auto eigen_in = framework::EigenVector<T>::Flatten(*x);
eigen_out.device(dev) = (*scale_data) * eigen_in;
} else {
*found_inf_data = true;
break;
}
}
}
return;
}
};
} // namespace operators
} // namespace paddle
...@@ -76,6 +76,7 @@ std::map<std::string, std::set<std::string>> op_passing_outs_map = { ...@@ -76,6 +76,7 @@ std::map<std::string, std::set<std::string>> op_passing_outs_map = {
{"matmul", {"Out"}}, {"matmul", {"Out"}},
{"fake_quantize_dequantize_moving_average_abs_max", {"fake_quantize_dequantize_moving_average_abs_max",
{"Out", "OutScale", "OutAccum", "OutState"}}, {"Out", "OutScale", "OutAccum", "OutState"}},
{"amp_check_finite_and_scale", {"Out", "FoundInfinite"}},
}; };
// clang-format off // clang-format off
......
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import unittest
import numpy as np
from op_test import OpTest, skip_check_grad_ci
import paddle.fluid as fluid
class TestAmpCheckFiniteAndScaleOp(OpTest):
def setUp(self):
self.op_type = "amp_check_finite_and_scale"
self.init_dtype()
x = np.random.random((1024, 1024)).astype(self.dtype)
scale = np.random.random((1)).astype(self.dtype)
self.inputs = {'X': [('x0', x)], 'Scale': scale}
self.outputs = {
'FoundInfinite': np.array([0]),
'Out': [('out0', x * scale)],
}
def init_dtype(self):
self.dtype = np.float32
def test_check_output(self):
self.check_output()
class TestAmpCheckFiniteAndScaleOpWithNan(OpTest):
def setUp(self):
self.op_type = "amp_check_finite_and_scale"
self.init_dtype()
x = np.random.random((1024, 1024)).astype(self.dtype)
x[128][128] = np.nan
scale = np.random.random((1)).astype(self.dtype)
self.inputs = {'X': [('x0', x)], 'Scale': scale}
self.outputs = {
'FoundInfinite': np.array([1]),
'Out': [('out0', x)],
}
def init_dtype(self):
self.dtype = np.float32
def test_check_output(self):
# When input contains nan, do not check the output,
# since the output may be nondeterministic and will be discarded.
self.check_output(no_check_set=['Out'])
class TestAmpCheckFiniteAndScaleOpWithInf(OpTest):
def setUp(self):
self.op_type = "amp_check_finite_and_scale"
self.init_dtype()
x = np.random.random((1024, 1024)).astype(self.dtype)
x[128][128] = np.inf
scale = np.random.random((1)).astype(self.dtype)
self.inputs = {'X': [('x0', x)], 'Scale': scale}
self.outputs = {
'FoundInfinite': np.array([1]),
'Out': [('out0', x)],
}
def init_dtype(self):
self.dtype = np.float32
def test_check_output(self):
# When input contains inf, do not check the output,
# since the output may be nondeterministic and will be discarded.
self.check_output(no_check_set=['Out'])
if __name__ == '__main__':
unittest.main()
...@@ -23,4 +23,5 @@ no_check_set_white_list = [ ...@@ -23,4 +23,5 @@ no_check_set_white_list = [
'unsqueeze2', 'unsqueeze2',
'cross_entropy2', 'cross_entropy2',
'seed', 'seed',
'amp_check_finite_and_scale',
] ]
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册