diff --git a/paddle/fluid/operators/fused/CMakeLists.txt b/paddle/fluid/operators/fused/CMakeLists.txt index 0e2dae75071e7fa1a0b1287a9c0140aa73880d35..eec925b2c057b7ffeec18a53fefde019c825e555 100644 --- a/paddle/fluid/operators/fused/CMakeLists.txt +++ b/paddle/fluid/operators/fused/CMakeLists.txt @@ -81,10 +81,8 @@ if (WITH_GPU OR WITH_ROCM) nv_test(test_fused_dropout_act_bias SRCS fused_dropout_act_bias_test.cu DEPS tensor op_registry dropout_op layer_norm_op device_context generator memory) nv_test(test_fused_layernorm_residual_dropout_bias SRCS fused_layernorm_residual_dropout_bias_test.cu DEPS tensor op_registry dropout_op layer_norm_op device_context generator memory) - op_library(fused_feedforward_op) file(APPEND ${pybind_file} "USE_CUDA_ONLY_OP(fused_feedforward);\n") - # fused_attention_op op_library(fused_attention_op) file(APPEND ${pybind_file} "USE_CUDA_ONLY_OP(fused_attention);\n") diff --git a/python/paddle/fluid/tests/unittests/CMakeLists.txt b/python/paddle/fluid/tests/unittests/CMakeLists.txt index 5b1c02e71abce1a4b14cbaa424e127a054e95a21..d8212216d3f182757768dfc2fb742f5fcdc718a6 100644 --- a/python/paddle/fluid/tests/unittests/CMakeLists.txt +++ b/python/paddle/fluid/tests/unittests/CMakeLists.txt @@ -98,7 +98,6 @@ foreach(TEST_OP ${MIXED_DIST_TEST_OPS}) endforeach() if(NOT WITH_GPU) - LIST(REMOVE_ITEM TEST_OPS test_fused_feedforward_op) LIST(REMOVE_ITEM TEST_OPS test_fused_attention_op) endif() diff --git a/python/paddle/fluid/tests/unittests/test_fused_attention_op.py b/python/paddle/fluid/tests/unittests/test_fused_attention_op.py index a5578d71c5cd06e62003aef13db76bd252e64a52..1e0d83f8ac7759407d8218d418d72b5f75cb9e9b 100644 --- a/python/paddle/fluid/tests/unittests/test_fused_attention_op.py +++ b/python/paddle/fluid/tests/unittests/test_fused_attention_op.py @@ -18,6 +18,7 @@ import paddle import paddle.nn as nn import paddle.fluid.core as core import paddle.nn.functional as F +import paddle.incubate.nn.functional as incubate_f from paddle.nn.layer.norm import LayerNorm from paddle.nn.layer.common import Linear, Dropout from paddle.nn.layer.transformer import _convert_attention_mask @@ -190,7 +191,7 @@ class TestFusedAttentionOp(OpTest): if attn_mask is not None: attn_mask = _convert_attention_mask(attn_mask, x.dtype) - final_out = F.fused_multi_head_attention( + final_out = incubate_f.fused_multi_head_attention( x, qkv_weight_tensor, out_linear_weight, self.pre_layer_norm, ln1_scale, ln1_bias, ln2_scale, ln2_bias, epsilon, qkv_bias_tensor, out_linear_bias, attn_mask, self.dropout_prob, diff --git a/python/paddle/fluid/tests/unittests/test_fused_feedforward_op.py b/python/paddle/fluid/tests/unittests/test_fused_feedforward_op.py index d926512b592d74cf25a23b0ef37f46e70998a900..5ea43d2edf0e668baf7a671ab6bb856eec2f56d6 100644 --- a/python/paddle/fluid/tests/unittests/test_fused_feedforward_op.py +++ b/python/paddle/fluid/tests/unittests/test_fused_feedforward_op.py @@ -18,6 +18,7 @@ import paddle.fluid as fluid import paddle.fluid.core as core from paddle.nn.layer import transformer import paddle.nn.functional as F +import paddle.incubate.nn.functional as incubate_f from paddle.nn.layer.norm import LayerNorm from paddle.nn.layer.common import Linear, Dropout import unittest @@ -121,7 +122,7 @@ class TestFusedFFNOp(OpTest): ln2_scale = paddle.to_tensor(self.norm2.weight, stop_gradient=False) ln2_bias = paddle.to_tensor(self.norm2.bias, stop_gradient=False) x = paddle.to_tensor(self.src, stop_gradient=False) - out = F.fused_feedforward( + out = incubate_f.fused_feedforward( x, linear1_weight, linear2_weight, @@ -215,7 +216,7 @@ class APITestStaticFusedFFN(unittest.TestCase): ln2_scale = paddle.static.data(name='ln2_scale', shape=[d_model]) ln2_bias = paddle.static.data(name='ln2_scale', shape=[d_model]) - fused_out = F.fused_feedforward( + fused_out = incubate_f.fused_feedforward( x, linear1_weight, linear2_weight, @@ -295,8 +296,7 @@ class TestFusedFFNOpError(unittest.TestCase): name='linear1_weight', shape=[1, 10, 10], dtype="float32") linear2_weight = paddle.static.data( name='linear2_weight', shape=[1, 10, 10], dtype="float32") - paddle.nn.functional.fused_feedforward(x, linear1_weight, - linear2_weight) + incubate_f.fused_feedforward(x, linear1_weight, linear2_weight) self.assertRaises(TypeError, test_dtype) @@ -307,7 +307,7 @@ class TestFusedFFNOpError(unittest.TestCase): name='linear1_weight1', shape=[10, 10], dtype="float32") linear2_weight = paddle.static.data( name='linear2_weight1', shape=[10, 10], dtype="float32") - paddle.nn.functional.fused_feedforward( + incubate_f.fused_feedforward( x, linear1_weight, linear2_weight, dropout1_rate="a") self.assertRaises(TypeError, test_dropout_rate_type) @@ -319,7 +319,7 @@ class TestFusedFFNOpError(unittest.TestCase): name='linear1_weight2', shape=[10, 10], dtype="float32") linear2_weight = paddle.static.data( name='linear2_weight2', shape=[10, 10], dtype="float32") - paddle.nn.functional.fused_feedforward( + incubate_f.fused_feedforward( x, linear1_weight, linear2_weight, dropout2_rate=-1) self.assertRaises(ValueError, test_dropout_rate_value) diff --git a/python/paddle/incubate/nn/functional/__init__.py b/python/paddle/incubate/nn/functional/__init__.py new file mode 100644 index 0000000000000000000000000000000000000000..4d1c3eee025b04a7b57b0c61a1dc83946b7a6353 --- /dev/null +++ b/python/paddle/incubate/nn/functional/__init__.py @@ -0,0 +1,18 @@ +# Copyright (c) 2021 PaddlePaddle Authors. All Rights Reserved +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from .fused_transformer import fused_multi_head_attention +from .fused_transformer import fused_feedforward + +__all__ = ['fused_multi_head_attention', 'fused_feedforward'] diff --git a/python/paddle/nn/functional/fused_transformer.py b/python/paddle/incubate/nn/functional/fused_transformer.py similarity index 97% rename from python/paddle/nn/functional/fused_transformer.py rename to python/paddle/incubate/nn/functional/fused_transformer.py index d07927491491b87c194cac946acd3ec336971e39..75bf9f10cef314f4ebe640a35ebbf3bc1a2bd2da 100644 --- a/python/paddle/nn/functional/fused_transformer.py +++ b/python/paddle/incubate/nn/functional/fused_transformer.py @@ -12,9 +12,9 @@ # See the License for the specific language governing permissions and # limitations under the License. -from ...fluid.layer_helper import LayerHelper -from ...fluid.framework import in_dygraph_mode -from ...fluid.data_feeder import check_variable_and_dtype, check_dtype +from paddle.fluid.layer_helper import LayerHelper +from paddle.fluid.framework import in_dygraph_mode +from paddle.fluid.data_feeder import check_variable_and_dtype, check_dtype from paddle import _C_ops __all__ = [] @@ -90,7 +90,7 @@ def fused_feedforward(x, x = paddle.to_tensor(x_data) linear1_weight = paddle.to_tensor(linear1_weight_data) linear2_weight = paddle.to_tensor(linear2_weight_data) - out = paddle.nn.functional.fused_feedforward(x, linear1_weight, linear2_weight) + out = paddle.incubate.nn.functional.fused_feedforward(x, linear1_weight, linear2_weight) print(out.numpy().shape) # (1, 8, 8) """ @@ -244,7 +244,7 @@ def fused_multi_head_attention(x, # required: gpu import paddle - import paddle.nn.functional as F + import paddle.incubate.nn.functional as F # input: [batch_size, seq_len, embed_dim] x = paddle.rand(shape=(2, 4, 128), dtype="float32") diff --git a/python/paddle/nn/functional/__init__.py b/python/paddle/nn/functional/__init__.py index 2c0c4461330cd2dd1af546e79881a4ebc978e963..1af53e0826be878d8734fa602160881225eff13d 100644 --- a/python/paddle/nn/functional/__init__.py +++ b/python/paddle/nn/functional/__init__.py @@ -61,7 +61,6 @@ from .common import class_center_sample # noqa: F401 from .conv import conv1d # noqa: F401 from .conv import conv1d_transpose # noqa: F401 from .common import linear # noqa: F401 -from .fused_transformer import fused_multi_head_attention # noqa: F401 from .conv import conv2d # noqa: F401 from .conv import conv2d_transpose # noqa: F401 from .conv import conv3d # noqa: F401 @@ -111,7 +110,6 @@ from .vision import grid_sample # noqa: F401 from .vision import pixel_shuffle # noqa: F401 from .input import one_hot # noqa: F401 from .input import embedding # noqa: F401 -from .fused_transformer import fused_feedforward # noqa: F401 from ...fluid.layers import gather_tree # noqa: F401 from ...fluid.layers import temporal_shift # noqa: F401 @@ -213,7 +211,5 @@ __all__ = [ #noqa 'layer_norm', 'instance_norm', 'class_center_sample', - 'fused_feedforward', - 'fused_multi_head_attention', 'sparse_attention', ]