From 59c7aea57901005464392f99ef549efd457969a5 Mon Sep 17 00:00:00 2001 From: zhangbo9674 <82555433+zhangbo9674@users.noreply.github.com> Date: Thu, 10 Feb 2022 13:26:20 +0800 Subject: [PATCH] [bf16] add bf16 kernel: squeeze & unsqueeze & stack (#39402) * add squeeze unsqueeze stack * add unittest * add cpu kernel --- paddle/fluid/operators/squeeze_op.cc | 16 +++++-- paddle/fluid/operators/squeeze_op.cu.cc | 5 ++ paddle/fluid/operators/stack_op.cc | 23 ++++++---- paddle/fluid/operators/stack_op.cu | 6 ++- paddle/fluid/operators/unsqueeze_op.cc | 16 +++++-- paddle/fluid/operators/unsqueeze_op.cu.cc | 6 +++ .../fluid/tests/unittests/test_squeeze_op.py | 29 +++++++++++- .../fluid/tests/unittests/test_stack_op.py | 46 ++++++++++++++++++- .../tests/unittests/test_unsqueeze_op.py | 29 +++++++++++- 9 files changed, 153 insertions(+), 23 deletions(-) diff --git a/paddle/fluid/operators/squeeze_op.cc b/paddle/fluid/operators/squeeze_op.cc index de30eab25f3..46306e185be 100644 --- a/paddle/fluid/operators/squeeze_op.cc +++ b/paddle/fluid/operators/squeeze_op.cc @@ -393,7 +393,9 @@ REGISTER_OP_CPU_KERNEL( ops::SqueezeKernel>, ops::SqueezeKernel>); + paddle::platform::complex>, + ops::SqueezeKernel); REGISTER_OP_CPU_KERNEL( squeeze_grad, ops::SqueezeGradKernel, @@ -406,7 +408,9 @@ REGISTER_OP_CPU_KERNEL( ops::SqueezeGradKernel>, ops::SqueezeGradKernel>); + paddle::platform::complex>, + ops::SqueezeGradKernel); REGISTER_OP_CPU_KERNEL( squeeze2, ops::Squeeze2Kernel, @@ -419,7 +423,9 @@ REGISTER_OP_CPU_KERNEL( ops::Squeeze2Kernel>, ops::Squeeze2Kernel>); + paddle::platform::complex>, + ops::Squeeze2Kernel); REGISTER_OP_CPU_KERNEL( squeeze2_grad, @@ -433,4 +439,6 @@ REGISTER_OP_CPU_KERNEL( ops::Squeeze2GradKernel>, ops::Squeeze2GradKernel>); + paddle::platform::complex>, + ops::Squeeze2GradKernel); diff --git a/paddle/fluid/operators/squeeze_op.cu.cc b/paddle/fluid/operators/squeeze_op.cu.cc index 9b4000c26ff..8d7c0e5b4ff 100644 --- a/paddle/fluid/operators/squeeze_op.cu.cc +++ b/paddle/fluid/operators/squeeze_op.cu.cc @@ -21,6 +21,7 @@ REGISTER_OP_CUDA_KERNEL( squeeze, ops::SqueezeKernel, ops::SqueezeKernel, ops::SqueezeKernel, + ops::SqueezeKernel, ops::SqueezeKernel, ops::SqueezeKernel, ops::SqueezeKernel, @@ -35,6 +36,7 @@ REGISTER_OP_CUDA_KERNEL( ops::SqueezeGradKernel, ops::SqueezeGradKernel, ops::SqueezeGradKernel, + ops::SqueezeGradKernel, ops::SqueezeGradKernel, ops::SqueezeGradKernel, ops::SqueezeGradKernel, @@ -48,6 +50,7 @@ REGISTER_OP_CUDA_KERNEL( squeeze2, ops::Squeeze2Kernel, ops::Squeeze2Kernel, ops::Squeeze2Kernel, + ops::Squeeze2Kernel, ops::Squeeze2Kernel, ops::Squeeze2Kernel, ops::Squeeze2Kernel, @@ -62,6 +65,8 @@ REGISTER_OP_CUDA_KERNEL( ops::Squeeze2GradKernel, ops::Squeeze2GradKernel, ops::Squeeze2GradKernel, + ops::Squeeze2GradKernel, ops::Squeeze2GradKernel, ops::Squeeze2GradKernel, ops::Squeeze2GradKernel, diff --git a/paddle/fluid/operators/stack_op.cc b/paddle/fluid/operators/stack_op.cc index 0a813759aa3..f1629f22222 100644 --- a/paddle/fluid/operators/stack_op.cc +++ b/paddle/fluid/operators/stack_op.cc @@ -173,13 +173,16 @@ REGISTER_OPERATOR(stack, ops::StackOp, ops::StackOpMaker, ops::StackGradOpMaker); REGISTER_OPERATOR(stack_grad, ops::StackOpGrad); -REGISTER_OP_CPU_KERNEL(stack, ops::StackKernel, - ops::StackKernel, - ops::StackKernel, - ops::StackKernel); - -REGISTER_OP_CPU_KERNEL(stack_grad, - ops::StackGradKernel, - ops::StackGradKernel, - ops::StackGradKernel, - ops::StackGradKernel); +REGISTER_OP_CPU_KERNEL( + stack, ops::StackKernel, + ops::StackKernel, + ops::StackKernel, + ops::StackKernel, + ops::StackKernel); + +REGISTER_OP_CPU_KERNEL( + stack_grad, ops::StackGradKernel, + ops::StackGradKernel, + ops::StackGradKernel, + ops::StackGradKernel, + ops::StackGradKernel); diff --git a/paddle/fluid/operators/stack_op.cu b/paddle/fluid/operators/stack_op.cu index 2cebe0e320e..a56dd6aef4f 100644 --- a/paddle/fluid/operators/stack_op.cu +++ b/paddle/fluid/operators/stack_op.cu @@ -196,10 +196,12 @@ class StackGradGPUKernel : public framework::OpKernel { REGISTER_OP_CUDA_KERNEL(stack, ops::StackGPUKernel, ops::StackGPUKernel, ops::StackGPUKernel, ops::StackGPUKernel, - ops::StackGPUKernel); + ops::StackGPUKernel, + ops::StackGPUKernel); REGISTER_OP_CUDA_KERNEL(stack_grad, ops::StackGradGPUKernel, ops::StackGradGPUKernel, ops::StackGradGPUKernel, ops::StackGradGPUKernel, - ops::StackGradGPUKernel); + ops::StackGradGPUKernel, + ops::StackGradGPUKernel); diff --git a/paddle/fluid/operators/unsqueeze_op.cc b/paddle/fluid/operators/unsqueeze_op.cc index 77b06fb2d4b..e2cbf73aa13 100644 --- a/paddle/fluid/operators/unsqueeze_op.cc +++ b/paddle/fluid/operators/unsqueeze_op.cc @@ -366,7 +366,9 @@ REGISTER_OP_CPU_KERNEL( ops::UnsqueezeKernel>, ops::UnsqueezeKernel>); + paddle::platform::complex>, + ops::UnsqueezeKernel); REGISTER_OP_CPU_KERNEL( unsqueeze_grad, ops::UnsqueezeGradKernel, @@ -379,7 +381,9 @@ REGISTER_OP_CPU_KERNEL( ops::UnsqueezeGradKernel>, ops::UnsqueezeGradKernel>); + paddle::platform::complex>, + ops::UnsqueezeGradKernel); REGISTER_OP_CPU_KERNEL( unsqueeze2, ops::UnsqueezeKernel, ops::UnsqueezeKernel, @@ -391,7 +395,9 @@ REGISTER_OP_CPU_KERNEL( ops::UnsqueezeKernel>, ops::UnsqueezeKernel>); + paddle::platform::complex>, + ops::UnsqueezeKernel); REGISTER_OP_CPU_KERNEL( unsqueeze2_grad, ops::Unsqueeze2GradKernel, @@ -404,4 +410,6 @@ REGISTER_OP_CPU_KERNEL( ops::Unsqueeze2GradKernel>, ops::Unsqueeze2GradKernel>); + paddle::platform::complex>, + ops::Unsqueeze2GradKernel); diff --git a/paddle/fluid/operators/unsqueeze_op.cu.cc b/paddle/fluid/operators/unsqueeze_op.cu.cc index d1fe251ef77..9feb66e2a5f 100644 --- a/paddle/fluid/operators/unsqueeze_op.cu.cc +++ b/paddle/fluid/operators/unsqueeze_op.cu.cc @@ -21,6 +21,7 @@ REGISTER_OP_CUDA_KERNEL( unsqueeze, ops::UnsqueezeKernel, ops::UnsqueezeKernel, ops::UnsqueezeKernel, + ops::UnsqueezeKernel, ops::UnsqueezeKernel, ops::UnsqueezeKernel, ops::UnsqueezeKernel, @@ -36,6 +37,8 @@ REGISTER_OP_CUDA_KERNEL( ops::UnsqueezeGradKernel, ops::UnsqueezeGradKernel, + ops::UnsqueezeGradKernel, ops::UnsqueezeGradKernel, ops::UnsqueezeGradKernel, ops::UnsqueezeGradKernel, @@ -50,6 +53,7 @@ REGISTER_OP_CUDA_KERNEL( ops::UnsqueezeKernel, ops::UnsqueezeKernel, ops::UnsqueezeKernel, + ops::UnsqueezeKernel, ops::UnsqueezeKernel, ops::UnsqueezeKernel, ops::UnsqueezeKernel, @@ -65,6 +69,8 @@ REGISTER_OP_CUDA_KERNEL( ops::Unsqueeze2GradKernel, ops::Unsqueeze2GradKernel, + ops::Unsqueeze2GradKernel, ops::Unsqueeze2GradKernel, ops::Unsqueeze2GradKernel, ops::Unsqueeze2GradKernel, diff --git a/python/paddle/fluid/tests/unittests/test_squeeze_op.py b/python/paddle/fluid/tests/unittests/test_squeeze_op.py index a048293c8da..e0e31894cb5 100755 --- a/python/paddle/fluid/tests/unittests/test_squeeze_op.py +++ b/python/paddle/fluid/tests/unittests/test_squeeze_op.py @@ -20,7 +20,8 @@ import numpy as np import paddle import paddle.fluid as fluid from paddle.fluid import compiler, Program, program_guard -from op_test import OpTest +from op_test import OpTest, convert_float_to_uint16 +import paddle.fluid.core as core paddle.enable_static() @@ -49,6 +50,32 @@ class TestSqueezeOp(OpTest): self.attrs = {"axes": self.axes} +class TestSqueezeBF16Op(OpTest): + def setUp(self): + self.op_type = "squeeze" + self.dtype = np.uint16 + self.init_test_case() + x = np.random.random(self.ori_shape).astype("float32") + out = x.reshape(self.new_shape) + self.inputs = {"X": convert_float_to_uint16(x)} + self.init_attrs() + self.outputs = {"Out": convert_float_to_uint16(out)} + + def test_check_output(self): + self.check_output() + + def test_check_grad(self): + self.check_grad(["X"], "Out") + + def init_test_case(self): + self.ori_shape = (1, 3, 1, 40) + self.axes = (0, 2) + self.new_shape = (3, 40) + + def init_attrs(self): + self.attrs = {"axes": self.axes} + + # Correct: There is mins axis. class TestSqueezeOp1(TestSqueezeOp): def init_test_case(self): diff --git a/python/paddle/fluid/tests/unittests/test_stack_op.py b/python/paddle/fluid/tests/unittests/test_stack_op.py index 8dd71c5a558..76f9cf1128a 100644 --- a/python/paddle/fluid/tests/unittests/test_stack_op.py +++ b/python/paddle/fluid/tests/unittests/test_stack_op.py @@ -16,7 +16,8 @@ import numpy as np import unittest import paddle import paddle.fluid as fluid -from op_test import OpTest +from op_test import OpTest, convert_float_to_uint16 +import paddle.fluid.core as core class TestStackOpBase(OpTest): @@ -90,6 +91,49 @@ class TestStackOp6(TestStackOpBase): self.axis = 3 +class TestStackBF16Op(OpTest): + def initDefaultParameters(self): + self.num_inputs = 4 + self.input_dim = (5, 6, 7) + self.axis = 0 + self.dtype = np.uint16 + + def initParameters(self): + pass + + def get_x_names(self): + x_names = [] + for i in range(self.num_inputs): + x_names.append('x{}'.format(i)) + return x_names + + def setUp(self): + self.initDefaultParameters() + self.initParameters() + self.op_type = 'stack' + self.x = [] + for i in range(self.num_inputs): + self.x.append( + np.random.random(size=self.input_dim).astype(np.float32)) + + out = np.stack(self.x, axis=self.axis) + + tmp = [] + x_names = self.get_x_names() + for i in range(self.num_inputs): + tmp.append((x_names[i], convert_float_to_uint16(self.x[i]))) + + self.inputs = {'X': tmp} + self.outputs = {'Y': convert_float_to_uint16(out)} + self.attrs = {'axis': self.axis} + + def test_check_output(self): + self.check_output() + + def test_check_grad(self): + self.check_grad(self.get_x_names(), 'Y') + + class TestStackAPIWithLoDTensorArray(unittest.TestCase): """ Test stack api when the input(x) is a LoDTensorArray. diff --git a/python/paddle/fluid/tests/unittests/test_unsqueeze_op.py b/python/paddle/fluid/tests/unittests/test_unsqueeze_op.py index 9c705837334..c1ec95fc8bf 100755 --- a/python/paddle/fluid/tests/unittests/test_unsqueeze_op.py +++ b/python/paddle/fluid/tests/unittests/test_unsqueeze_op.py @@ -19,7 +19,8 @@ import numpy as np import paddle import paddle.fluid as fluid -from op_test import OpTest +from op_test import OpTest, convert_float_to_uint16 +import paddle.fluid.core as core paddle.enable_static() @@ -48,6 +49,32 @@ class TestUnsqueezeOp(OpTest): self.attrs = {"axes": self.axes} +class TestUnsqueezeBF16Op(OpTest): + def setUp(self): + self.init_test_case() + self.op_type = "unsqueeze" + self.dtype = np.uint16 + x = np.random.random(self.ori_shape).astype("float32") + out = x.reshape(self.new_shape) + self.inputs = {"X": convert_float_to_uint16(x)} + self.init_attrs() + self.outputs = {"Out": convert_float_to_uint16(out)} + + def test_check_output(self): + self.check_output() + + def test_check_grad(self): + self.check_grad(["X"], "Out") + + def init_test_case(self): + self.ori_shape = (3, 40) + self.axes = (1, 2) + self.new_shape = (3, 1, 1, 40) + + def init_attrs(self): + self.attrs = {"axes": self.axes} + + # Correct: Single input index. class TestUnsqueezeOp1(TestUnsqueezeOp): def init_test_case(self): -- GitLab