From e8ac7fc30a25e5d4626d8b483bf936bb9abe2e93 Mon Sep 17 00:00:00 2001 From: zhangbo9674 <82555433+zhangbo9674@users.noreply.github.com> Date: Thu, 10 Feb 2022 13:26:09 +0800 Subject: [PATCH] [bf16] add bf16 kernel: dropout & reshape & slice (#39395) * add dropout * add reshape * add slice * refien slice unittest * refine slice unittest * add cpu bf16 kernel --- paddle/fluid/operators/dropout_op.cc | 8 +++-- paddle/fluid/operators/dropout_op.cu | 3 ++ paddle/fluid/operators/reshape_op.cc | 11 ++++--- paddle/fluid/operators/slice_op.cc | 12 +++++-- paddle/pten/kernels/funcs/eigen/pad.cc | 2 ++ .../fluid/tests/unittests/test_dropout_op.py | 23 +++++++++++++- .../fluid/tests/unittests/test_reshape_op.py | 30 +++++++++++++++++- .../fluid/tests/unittests/test_slice_op.py | 31 ++++++++++++++++++- 8 files changed, 109 insertions(+), 11 deletions(-) diff --git a/paddle/fluid/operators/dropout_op.cc b/paddle/fluid/operators/dropout_op.cc index cbfb795d6a..7613b04bcc 100644 --- a/paddle/fluid/operators/dropout_op.cc +++ b/paddle/fluid/operators/dropout_op.cc @@ -179,8 +179,12 @@ REGISTER_OPERATOR(dropout, ops::DropoutOp, ops::DropoutOpMaker, REGISTER_OPERATOR(dropout_grad, ops::DropoutOpGrad); REGISTER_OP_CPU_KERNEL( dropout, ops::CPUDropoutKernel, - ops::CPUDropoutKernel); + ops::CPUDropoutKernel, + ops::CPUDropoutKernel); REGISTER_OP_CPU_KERNEL( dropout_grad, ops::DropoutGradKernel, - ops::DropoutGradKernel); + ops::DropoutGradKernel, + ops::DropoutGradKernel); diff --git a/paddle/fluid/operators/dropout_op.cu b/paddle/fluid/operators/dropout_op.cu index 0d5ee41c5c..f6ddff1d03 100644 --- a/paddle/fluid/operators/dropout_op.cu +++ b/paddle/fluid/operators/dropout_op.cu @@ -17,6 +17,7 @@ limitations under the License. */ #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/operators/dropout_impl.cu.h" #include "paddle/fluid/operators/dropout_op.h" +#include "paddle/fluid/platform/bfloat16.h" #include "paddle/fluid/platform/float16.h" namespace paddle { @@ -84,8 +85,10 @@ namespace plat = paddle::platform; REGISTER_OP_CUDA_KERNEL( dropout, ops::GPUDropoutKernel, ops::GPUDropoutKernel, + ops::GPUDropoutKernel, ops::GPUDropoutKernel); REGISTER_OP_CUDA_KERNEL( dropout_grad, ops::GPUDropoutGradKernel, ops::GPUDropoutGradKernel, + ops::GPUDropoutGradKernel, ops::GPUDropoutGradKernel); diff --git a/paddle/fluid/operators/reshape_op.cc b/paddle/fluid/operators/reshape_op.cc index 43da63aae7..1ef90ff2b7 100644 --- a/paddle/fluid/operators/reshape_op.cc +++ b/paddle/fluid/operators/reshape_op.cc @@ -698,13 +698,14 @@ REGISTER_OP_CUDA_KERNEL_FUNCTOR(reshape, float, ops::ReshapeKernel, double, ops::ReshapeKernel, int, ops::ReshapeKernel, uint8_t, ops::ReshapeKernel, int64_t, ops::ReshapeKernel, plat::float16, + ops::ReshapeKernel, plat::bfloat16, ops::ReshapeKernel); REGISTER_OP_CUDA_KERNEL_FUNCTOR(reshape_grad, float, ops::ReshapeGradKernel, double, ops::ReshapeGradKernel, int, ops::ReshapeGradKernel, int64_t, ops::ReshapeGradKernel, uint8_t, ops::ReshapeGradKernel, plat::float16, - + ops::ReshapeGradKernel, plat::bfloat16, ops::ReshapeGradKernel); REGISTER_OP_CUDA_KERNEL_FUNCTOR(reshape2, float, ops::ReshapeKernel, double, ops::ReshapeKernel, int, ops::ReshapeKernel, @@ -712,13 +713,15 @@ REGISTER_OP_CUDA_KERNEL_FUNCTOR(reshape2, float, ops::ReshapeKernel, double, ops::ReshapeKernel, plat::float16, ops::ReshapeKernel, bool, ops::ReshapeKernel, plat::complex, ops::ReshapeKernel, - plat::complex, ops::ReshapeKernel); + plat::complex, ops::ReshapeKernel, + plat::bfloat16, ops::ReshapeKernel); REGISTER_OP_CUDA_KERNEL_FUNCTOR( reshape2_grad, float, ops::ReshapeGradKernel, double, ops::ReshapeGradKernel, int, ops::ReshapeGradKernel, uint8_t, ops::ReshapeGradKernel, int64_t, ops::ReshapeGradKernel, plat::float16, ops::ReshapeGradKernel, bool, ops::ReshapeGradKernel, plat::complex, - ops::ReshapeGradKernel, plat::complex, ops::ReshapeGradKernel); + ops::ReshapeGradKernel, plat::complex, ops::ReshapeGradKernel, + plat::bfloat16, ops::ReshapeGradKernel); REGISTER_OP_CUDA_KERNEL_FUNCTOR( reshape2_grad_grad, float, ops::ReshapeDoubleGradKernel, double, @@ -727,7 +730,7 @@ REGISTER_OP_CUDA_KERNEL_FUNCTOR( plat::float16, ops::ReshapeDoubleGradKernel, bool, ops::ReshapeDoubleGradKernel, plat::complex, ops::ReshapeDoubleGradKernel, plat::complex, - ops::ReshapeDoubleGradKernel); + ops::ReshapeDoubleGradKernel, plat::bfloat16, ops::ReshapeDoubleGradKernel); #endif #ifdef PADDLE_WITH_XPU diff --git a/paddle/fluid/operators/slice_op.cc b/paddle/fluid/operators/slice_op.cc index 4965e5e156..37c79b9898 100644 --- a/paddle/fluid/operators/slice_op.cc +++ b/paddle/fluid/operators/slice_op.cc @@ -442,7 +442,9 @@ REGISTER_OP_CPU_KERNEL( ops::SliceKernel>, ops::SliceKernel>); + paddle::platform::complex>, + ops::SliceKernel); REGISTER_OP_CPU_KERNEL( slice_grad, ops::SliceGradKernel, @@ -453,7 +455,9 @@ REGISTER_OP_CPU_KERNEL( ops::SliceGradKernel>, ops::SliceGradKernel>); + paddle::platform::complex>, + ops::SliceGradKernel); REGISTER_OP_CUDA_KERNEL( slice, ops::SliceKernel, @@ -463,6 +467,8 @@ REGISTER_OP_CUDA_KERNEL( ops::SliceKernel, ops::SliceKernel, + ops::SliceKernel, ops::SliceKernel>, ops::SliceKernel, ops::SliceGradKernel, + ops::SliceGradKernel, ops::SliceGradKernel>, ops::SliceGradKernel); INSTANTIATION(EigenPad, dtype::complex); #undef INSTANTIATION diff --git a/python/paddle/fluid/tests/unittests/test_dropout_op.py b/python/paddle/fluid/tests/unittests/test_dropout_op.py index cb72248b15..f670f7c380 100644 --- a/python/paddle/fluid/tests/unittests/test_dropout_op.py +++ b/python/paddle/fluid/tests/unittests/test_dropout_op.py @@ -17,7 +17,7 @@ from __future__ import print_function import unittest import numpy as np import paddle.fluid.core as core -from op_test import OpTest, skip_check_grad_ci +from op_test import OpTest, skip_check_grad_ci, convert_float_to_uint16 import paddle import paddle.static as static import paddle.fluid as fluid @@ -233,6 +233,27 @@ class TestFP16DropoutOp2(TestFP16DropoutOp): self.fix_seed = False +class TestBF16DropoutOp(OpTest): + def setUp(self): + self.op_type = "dropout" + self.dtype = np.uint16 + + x = np.random.random((32, 64)).astype("float32") + self.inputs = {'X': convert_float_to_uint16(x)} + self.attrs = {'dropout_prob': 1.0, 'fix_seed': True, 'is_test': False} + self.outputs = { + 'Out': + convert_float_to_uint16(np.zeros((32, 64)).astype('float32')), + 'Mask': np.zeros((32, 64)).astype('uint8') + } + + def test_check_output(self): + self.check_output() + + def test_check_grad_normal(self): + self.check_grad(['X'], 'Out') + + class TestDropoutOpWithSeedOnCPUPlace(unittest.TestCase): def test_seed_cpu_place(self): paddle.enable_static() diff --git a/python/paddle/fluid/tests/unittests/test_reshape_op.py b/python/paddle/fluid/tests/unittests/test_reshape_op.py index a0063738d3..c860d6972f 100755 --- a/python/paddle/fluid/tests/unittests/test_reshape_op.py +++ b/python/paddle/fluid/tests/unittests/test_reshape_op.py @@ -17,11 +17,12 @@ from __future__ import print_function import unittest import numpy as np -from op_test import OpTest +from op_test import OpTest, convert_float_to_uint16 import paddle import paddle.fluid as fluid from paddle.fluid import compiler from paddle.static import Program, program_guard +import paddle.fluid.core as core # situation 1: have shape( list, no tensor), no actual shape(Tensor) @@ -48,6 +49,33 @@ class TestReshapeOp(OpTest): self.check_grad(["X"], "Out") +class TestReshapeBF16Op(OpTest): + def setUp(self): + self.init_data() + self.op_type = "reshape2" + self.dtype = np.uint16 + x = np.random.random(self.ori_shape).astype("float32") + out = x.reshape(self.infered_shape) + self.inputs = {"X": convert_float_to_uint16(x)} + self.attrs = {"shape": self.new_shape} + self.outputs = { + "Out": convert_float_to_uint16(out), + 'XShape': convert_float_to_uint16( + np.random.random(self.ori_shape).astype("float32")) + } + + def init_data(self): + self.ori_shape = (2, 60) + self.new_shape = (12, 10) + self.infered_shape = (12, 10) + + def test_check_output(self): + self.check_output(no_check_set=['XShape']) + + def test_check_grad(self): + self.check_grad(["X"], "Out") + + class TestReshapeOpDimInfer1(TestReshapeOp): def init_data(self): self.ori_shape = (5, 25) diff --git a/python/paddle/fluid/tests/unittests/test_slice_op.py b/python/paddle/fluid/tests/unittests/test_slice_op.py index 57d5453ec9..629d61d01b 100644 --- a/python/paddle/fluid/tests/unittests/test_slice_op.py +++ b/python/paddle/fluid/tests/unittests/test_slice_op.py @@ -17,7 +17,7 @@ from __future__ import print_function import unittest import numpy as np import paddle.fluid.core as core -from op_test import OpTest +from op_test import OpTest, convert_float_to_uint16 import paddle.fluid as fluid import paddle.fluid.layers as layers import paddle @@ -484,6 +484,35 @@ class TestFP16_2(OpTest): numeric_grad_delta=0.5) +class TestBF16(OpTest): + def setUp(self): + self.op_type = "slice" + self.config() + self.inputs = {'Input': convert_float_to_uint16(self.input)} + self.outputs = {'Out': convert_float_to_uint16(self.out)} + self.attrs = { + 'axes': self.axes, + 'starts': self.starts, + 'ends': self.ends, + 'infer_flags': self.infer_flags + } + + def config(self): + self.dtype = np.uint16 + self.input = np.random.random([3, 4, 5, 6]).astype(np.float32) + self.starts = [-3, 0, 2] + self.ends = [3, 100, -1] + self.axes = [0, 1, 3] + self.out = self.input[-3:3, 0:100, :, 2:-1] + self.infer_flags = [1, 1, 1] + + def test_check_output(self): + self.check_output() + + def test_check_grad_normal(self): + self.check_grad(['Input'], 'Out') + + # Test python API class TestSliceAPI(unittest.TestCase): def test_1(self): -- GitLab