From 8bfd978f06ca78e6040df0027343dac44c65bcc3 Mon Sep 17 00:00:00 2001
From: NetPunk <69072522+Patrick-Star125@users.noreply.github.com>
Date: Thu, 27 Apr 2023 14:22:50 +0800
Subject: [PATCH] =?UTF-8?q?=E3=80=90PaddlePaddle=20Hackathon=204=E3=80=91?=
 =?UTF-8?q?=EF=BC=9A=E4=B8=BAmaxout=E7=AE=97=E5=AD=90=E6=94=AF=E6=8C=81=20?=
 =?UTF-8?q?float16=20=E6=95=B0=E6=8D=AE=E7=B1=BB=E5=9E=8B=20(#50976)?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

* support fp16 for maxout op

* format code

* change api

* add test for static float16

* format code

* formatting code

* atol alignment

* experiment—1

* experiment-2

* experiment-3

* format code
---
 paddle/phi/kernels/funcs/maxouting.cu         |  2 ++
 paddle/phi/kernels/gpu/maxout_grad_kernel.cu  |  9 +++--
 paddle/phi/kernels/gpu/maxout_kernel.cu       |  8 ++++-
 .../fluid/tests/unittests/test_maxout_op.py   | 35 +++++++++++++++++++
 python/paddle/nn/functional/activation.py     |  6 ++--
 5 files changed, 55 insertions(+), 5 deletions(-)

diff --git a/paddle/phi/kernels/funcs/maxouting.cu b/paddle/phi/kernels/funcs/maxouting.cu
index 89450dbd5c6..146bb1aca4c 100644
--- a/paddle/phi/kernels/funcs/maxouting.cu
+++ b/paddle/phi/kernels/funcs/maxouting.cu
@@ -175,9 +175,11 @@ void MaxOutGradFunctor<DeviceContext, T>::operator()(
 }
 
 template class MaxOutGradFunctor<phi::GPUContext, float>;
+template class MaxOutGradFunctor<phi::GPUContext, phi::dtype::float16>;
 template class MaxOutGradFunctor<phi::GPUContext, double>;
 
 template class MaxOutFunctor<phi::GPUContext, float>;
+template class MaxOutFunctor<phi::GPUContext, phi::dtype::float16>;
 template class MaxOutFunctor<phi::GPUContext, double>;
 
 }  // namespace funcs
diff --git a/paddle/phi/kernels/gpu/maxout_grad_kernel.cu b/paddle/phi/kernels/gpu/maxout_grad_kernel.cu
index a405f38523a..7d59436019c 100644
--- a/paddle/phi/kernels/gpu/maxout_grad_kernel.cu
+++ b/paddle/phi/kernels/gpu/maxout_grad_kernel.cu
@@ -15,5 +15,10 @@
 #include "paddle/phi/core/kernel_registry.h"
 #include "paddle/phi/kernels/impl/maxout_grad_kernel_impl.h"
 
-PD_REGISTER_KERNEL(
-    maxout_grad, GPU, ALL_LAYOUT, phi::MaxOutGradKernel, float, double) {}
+PD_REGISTER_KERNEL(maxout_grad,
+                   GPU,
+                   ALL_LAYOUT,
+                   phi::MaxOutGradKernel,
+                   float,
+                   phi::dtype::float16,
+                   double) {}
diff --git a/paddle/phi/kernels/gpu/maxout_kernel.cu b/paddle/phi/kernels/gpu/maxout_kernel.cu
index e5407a4925c..48710464502 100644
--- a/paddle/phi/kernels/gpu/maxout_kernel.cu
+++ b/paddle/phi/kernels/gpu/maxout_kernel.cu
@@ -15,4 +15,10 @@
 #include "paddle/phi/core/kernel_registry.h"
 #include "paddle/phi/kernels/impl/maxout_kernel_impl.h"
 
-PD_REGISTER_KERNEL(maxout, GPU, ALL_LAYOUT, phi::MaxOutKernel, float, double) {}
+PD_REGISTER_KERNEL(maxout,
+                   GPU,
+                   ALL_LAYOUT,
+                   phi::MaxOutKernel,
+                   float,
+                   phi::dtype::float16,
+                   double) {}
diff --git a/python/paddle/fluid/tests/unittests/test_maxout_op.py b/python/paddle/fluid/tests/unittests/test_maxout_op.py
index 678dd55fe92..b6d339c3aab 100644
--- a/python/paddle/fluid/tests/unittests/test_maxout_op.py
+++ b/python/paddle/fluid/tests/unittests/test_maxout_op.py
@@ -136,5 +136,40 @@ class TestMaxoutAPI(unittest.TestCase):
             self.assertRaises(ValueError, F.maxout, x_float32, 2, 2)
 
 
+class TestMaxOutOpFP16(TestMaxOutOp):
+    def set_attrs(self):
+        self.dtype = 'float16'
+
+
+class TestMaxoutFP16Case1(TestMaxOutOpFP16):
+    def set_attrs(self):
+        self.axis = -1
+
+
+class TestMaxoutFP16Case2(TestMaxOutOpFP16):
+    def set_attrs(self):
+        self.axis = 3
+
+
+@unittest.skipIf(
+    not core.is_compiled_with_cuda(), "core is not compiled with CUDA"
+)
+class TestMaxoutStaticAPIFP16(unittest.TestCase):
+    def setUp(self):
+        self.x_np = np.random.uniform(-1, 1, [2, 6, 5, 4]).astype(np.float16)
+        self.groups = 2
+        self.axis = 1
+        self.place = paddle.CUDAPlace(0)
+
+    def test_static_api(self):
+        with paddle.static.program_guard(paddle.static.Program()):
+            x = paddle.static.data('X', self.x_np.shape, self.x_np.dtype)
+            out = F.maxout(x, self.groups, self.axis)
+            exe = paddle.static.Executor(self.place)
+            res = exe.run(feed={'X': self.x_np}, fetch_list=[out])
+        out_ref = maxout_forward_naive(self.x_np, self.groups, self.axis)
+        np.testing.assert_allclose(out_ref, res[0], rtol=1e-05)
+
+
 if __name__ == '__main__':
     unittest.main()
diff --git a/python/paddle/nn/functional/activation.py b/python/paddle/nn/functional/activation.py
index 04fa9ebc6dd..5bb9d2b1d03 100644
--- a/python/paddle/nn/functional/activation.py
+++ b/python/paddle/nn/functional/activation.py
@@ -787,7 +787,7 @@ def maxout(x, groups, axis=1, name=None):
 
     Parameters:
         x (Tensor): The input is 4-D Tensor with shape [N, C, H, W] or [N, H, W, C], the data type
-            of input is float32 or float64.
+            of input is float16, float32 or float64.
         groups (int): The groups number of maxout. `groups` specifies the
             index of channel dimension where maxout will be performed. This must be
             a factor of number of features.
@@ -822,7 +822,9 @@ def maxout(x, groups, axis=1, name=None):
     if in_dygraph_mode():
         return _C_ops.maxout(x, groups, axis)
     else:
-        check_variable_and_dtype(x, 'x', ['float32', 'float64'], 'maxout')
+        check_variable_and_dtype(
+            x, 'x', ['float16', 'float32', 'float64'], 'maxout'
+        )
         if axis not in [1, -1, 3]:
             raise ValueError(
                 "Attr(axis) should be 1 when data format is NCHW, -1 or 3 when data format is NHWC. Received "
-- 
GitLab