From 9aeca2f1805b48421c402c66f6087972c55cab33 Mon Sep 17 00:00:00 2001
From: Li Min <11663212+limin2021@users.noreply.github.com>
Date: Tue, 26 Oct 2021 14:01:15 +0800
Subject: [PATCH] Move fused_attention and fused_feedforward functional api
 path to incubate (#36704)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

将 #35905 和 #35843 PR中新增的的python api接口移到incubate目录下。
---
 paddle/fluid/operators/fused/CMakeLists.txt    |  2 --
 .../fluid/tests/unittests/CMakeLists.txt       |  1 -
 .../tests/unittests/test_fused_attention_op.py |  3 ++-
 .../unittests/test_fused_feedforward_op.py     | 12 ++++++------
 .../paddle/incubate/nn/functional/__init__.py  | 18 ++++++++++++++++++
 .../nn/functional/fused_transformer.py         | 10 +++++-----
 python/paddle/nn/functional/__init__.py        |  4 ----
 7 files changed, 31 insertions(+), 19 deletions(-)
 create mode 100644 python/paddle/incubate/nn/functional/__init__.py
 rename python/paddle/{ => incubate}/nn/functional/fused_transformer.py (97%)

diff --git a/paddle/fluid/operators/fused/CMakeLists.txt b/paddle/fluid/operators/fused/CMakeLists.txt
index 0e2dae75071..eec925b2c05 100644
--- a/paddle/fluid/operators/fused/CMakeLists.txt
+++ b/paddle/fluid/operators/fused/CMakeLists.txt
@@ -81,10 +81,8 @@ if (WITH_GPU OR WITH_ROCM)
         nv_test(test_fused_dropout_act_bias SRCS fused_dropout_act_bias_test.cu DEPS tensor op_registry dropout_op layer_norm_op device_context generator memory)
         nv_test(test_fused_layernorm_residual_dropout_bias SRCS fused_layernorm_residual_dropout_bias_test.cu DEPS tensor op_registry dropout_op layer_norm_op device_context generator memory)
 
-
         op_library(fused_feedforward_op)
         file(APPEND ${pybind_file} "USE_CUDA_ONLY_OP(fused_feedforward);\n")
-
         # fused_attention_op
         op_library(fused_attention_op)
         file(APPEND ${pybind_file} "USE_CUDA_ONLY_OP(fused_attention);\n")
diff --git a/python/paddle/fluid/tests/unittests/CMakeLists.txt b/python/paddle/fluid/tests/unittests/CMakeLists.txt
index 5b1c02e71ab..d8212216d3f 100644
--- a/python/paddle/fluid/tests/unittests/CMakeLists.txt
+++ b/python/paddle/fluid/tests/unittests/CMakeLists.txt
@@ -98,7 +98,6 @@ foreach(TEST_OP ${MIXED_DIST_TEST_OPS})
 endforeach()
 
 if(NOT WITH_GPU)
-
     LIST(REMOVE_ITEM TEST_OPS test_fused_feedforward_op)
     LIST(REMOVE_ITEM TEST_OPS test_fused_attention_op)
 endif()
diff --git a/python/paddle/fluid/tests/unittests/test_fused_attention_op.py b/python/paddle/fluid/tests/unittests/test_fused_attention_op.py
index a5578d71c5c..1e0d83f8ac7 100644
--- a/python/paddle/fluid/tests/unittests/test_fused_attention_op.py
+++ b/python/paddle/fluid/tests/unittests/test_fused_attention_op.py
@@ -18,6 +18,7 @@ import paddle
 import paddle.nn as nn
 import paddle.fluid.core as core
 import paddle.nn.functional as F
+import paddle.incubate.nn.functional as incubate_f
 from paddle.nn.layer.norm import LayerNorm
 from paddle.nn.layer.common import Linear, Dropout
 from paddle.nn.layer.transformer import _convert_attention_mask
@@ -190,7 +191,7 @@ class TestFusedAttentionOp(OpTest):
 
         if attn_mask is not None:
             attn_mask = _convert_attention_mask(attn_mask, x.dtype)
-        final_out = F.fused_multi_head_attention(
+        final_out = incubate_f.fused_multi_head_attention(
             x, qkv_weight_tensor, out_linear_weight, self.pre_layer_norm,
             ln1_scale, ln1_bias, ln2_scale, ln2_bias, epsilon, qkv_bias_tensor,
             out_linear_bias, attn_mask, self.dropout_prob,
diff --git a/python/paddle/fluid/tests/unittests/test_fused_feedforward_op.py b/python/paddle/fluid/tests/unittests/test_fused_feedforward_op.py
index d926512b592..5ea43d2edf0 100644
--- a/python/paddle/fluid/tests/unittests/test_fused_feedforward_op.py
+++ b/python/paddle/fluid/tests/unittests/test_fused_feedforward_op.py
@@ -18,6 +18,7 @@ import paddle.fluid as fluid
 import paddle.fluid.core as core
 from paddle.nn.layer import transformer
 import paddle.nn.functional as F
+import paddle.incubate.nn.functional as incubate_f
 from paddle.nn.layer.norm import LayerNorm
 from paddle.nn.layer.common import Linear, Dropout
 import unittest
@@ -121,7 +122,7 @@ class TestFusedFFNOp(OpTest):
         ln2_scale = paddle.to_tensor(self.norm2.weight, stop_gradient=False)
         ln2_bias = paddle.to_tensor(self.norm2.bias, stop_gradient=False)
         x = paddle.to_tensor(self.src, stop_gradient=False)
-        out = F.fused_feedforward(
+        out = incubate_f.fused_feedforward(
             x,
             linear1_weight,
             linear2_weight,
@@ -215,7 +216,7 @@ class APITestStaticFusedFFN(unittest.TestCase):
         ln2_scale = paddle.static.data(name='ln2_scale', shape=[d_model])
         ln2_bias = paddle.static.data(name='ln2_scale', shape=[d_model])
 
-        fused_out = F.fused_feedforward(
+        fused_out = incubate_f.fused_feedforward(
             x,
             linear1_weight,
             linear2_weight,
@@ -295,8 +296,7 @@ class TestFusedFFNOpError(unittest.TestCase):
                     name='linear1_weight', shape=[1, 10, 10], dtype="float32")
                 linear2_weight = paddle.static.data(
                     name='linear2_weight', shape=[1, 10, 10], dtype="float32")
-                paddle.nn.functional.fused_feedforward(x, linear1_weight,
-                                                       linear2_weight)
+                incubate_f.fused_feedforward(x, linear1_weight, linear2_weight)
 
             self.assertRaises(TypeError, test_dtype)
 
@@ -307,7 +307,7 @@ class TestFusedFFNOpError(unittest.TestCase):
                     name='linear1_weight1', shape=[10, 10], dtype="float32")
                 linear2_weight = paddle.static.data(
                     name='linear2_weight1', shape=[10, 10], dtype="float32")
-                paddle.nn.functional.fused_feedforward(
+                incubate_f.fused_feedforward(
                     x, linear1_weight, linear2_weight, dropout1_rate="a")
 
             self.assertRaises(TypeError, test_dropout_rate_type)
@@ -319,7 +319,7 @@ class TestFusedFFNOpError(unittest.TestCase):
                     name='linear1_weight2', shape=[10, 10], dtype="float32")
                 linear2_weight = paddle.static.data(
                     name='linear2_weight2', shape=[10, 10], dtype="float32")
-                paddle.nn.functional.fused_feedforward(
+                incubate_f.fused_feedforward(
                     x, linear1_weight, linear2_weight, dropout2_rate=-1)
 
             self.assertRaises(ValueError, test_dropout_rate_value)
diff --git a/python/paddle/incubate/nn/functional/__init__.py b/python/paddle/incubate/nn/functional/__init__.py
new file mode 100644
index 00000000000..4d1c3eee025
--- /dev/null
+++ b/python/paddle/incubate/nn/functional/__init__.py
@@ -0,0 +1,18 @@
+# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from .fused_transformer import fused_multi_head_attention
+from .fused_transformer import fused_feedforward
+
+__all__ = ['fused_multi_head_attention', 'fused_feedforward']
diff --git a/python/paddle/nn/functional/fused_transformer.py b/python/paddle/incubate/nn/functional/fused_transformer.py
similarity index 97%
rename from python/paddle/nn/functional/fused_transformer.py
rename to python/paddle/incubate/nn/functional/fused_transformer.py
index d0792749149..75bf9f10cef 100644
--- a/python/paddle/nn/functional/fused_transformer.py
+++ b/python/paddle/incubate/nn/functional/fused_transformer.py
@@ -12,9 +12,9 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-from ...fluid.layer_helper import LayerHelper
-from ...fluid.framework import in_dygraph_mode
-from ...fluid.data_feeder import check_variable_and_dtype, check_dtype
+from paddle.fluid.layer_helper import LayerHelper
+from paddle.fluid.framework import in_dygraph_mode
+from paddle.fluid.data_feeder import check_variable_and_dtype, check_dtype
 from paddle import _C_ops
 
 __all__ = []
@@ -90,7 +90,7 @@ def fused_feedforward(x,
             x = paddle.to_tensor(x_data)
             linear1_weight = paddle.to_tensor(linear1_weight_data)
             linear2_weight = paddle.to_tensor(linear2_weight_data)
-            out = paddle.nn.functional.fused_feedforward(x, linear1_weight, linear2_weight)
+            out = paddle.incubate.nn.functional.fused_feedforward(x, linear1_weight, linear2_weight)
             print(out.numpy().shape)
             # (1, 8, 8)
     """
@@ -244,7 +244,7 @@ def fused_multi_head_attention(x,
 
             # required: gpu
             import paddle
-            import paddle.nn.functional as F
+            import paddle.incubate.nn.functional as F
 
             # input: [batch_size, seq_len, embed_dim]
             x = paddle.rand(shape=(2, 4, 128), dtype="float32")
diff --git a/python/paddle/nn/functional/__init__.py b/python/paddle/nn/functional/__init__.py
index 2c0c4461330..1af53e0826b 100644
--- a/python/paddle/nn/functional/__init__.py
+++ b/python/paddle/nn/functional/__init__.py
@@ -61,7 +61,6 @@ from .common import class_center_sample  # noqa: F401
 from .conv import conv1d  # noqa: F401
 from .conv import conv1d_transpose  # noqa: F401
 from .common import linear  # noqa: F401
-from .fused_transformer import fused_multi_head_attention  # noqa: F401
 from .conv import conv2d  # noqa: F401
 from .conv import conv2d_transpose  # noqa: F401
 from .conv import conv3d  # noqa: F401
@@ -111,7 +110,6 @@ from .vision import grid_sample  # noqa: F401
 from .vision import pixel_shuffle  # noqa: F401
 from .input import one_hot  # noqa: F401
 from .input import embedding  # noqa: F401
-from .fused_transformer import fused_feedforward  # noqa: F401
 from ...fluid.layers import gather_tree  # noqa: F401
 from ...fluid.layers import temporal_shift  # noqa: F401
 
@@ -213,7 +211,5 @@ __all__ = [     #noqa
            'layer_norm',
            'instance_norm',
            'class_center_sample',
-            'fused_feedforward',
-           'fused_multi_head_attention',
            'sparse_attention',
 ]
-- 
GitLab