test_multihead_attention.py 3.0 KB
Newer Older
1
#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
Y
ying 已提交
2 3 4 5 6 7 8 9 10 11 12 13 14 15
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import unittest
16 17 18

import numpy as np

19
import paddle
20 21
import paddle.fluid as fluid
import paddle.fluid.core as core
Y
ying 已提交
22 23 24 25


class TestMultiheadAttention(unittest.TestCase):
    def gen_random_input(self):
26
        """Generate random input data."""
Y
ying 已提交
27 28 29 30 31 32
        # batch_size, max_sequence_length, hidden dimension
        self.input_shape = (3, 13, 16)
        self.queries = np.random.random(size=self.input_shape).astype("float32")
        self.keys = np.random.random(size=self.input_shape).astype("float32")

    def set_program(self):
33 34 35 36 37 38 39
        """Build the test program."""
        queries = fluid.layers.data(
            name="queries",
            shape=self.input_shape,
            dtype="float32",
            append_batch_size=False,
        )
Y
ying 已提交
40
        queries.stop_gradient = False
41 42 43 44 45 46
        keys = fluid.layers.data(
            name="keys",
            shape=self.input_shape,
            dtype="float32",
            append_batch_size=False,
        )
Y
ying 已提交
47 48
        keys.stop_gradient = False

49 50 51 52 53 54 55
        contexts = fluid.nets.scaled_dot_product_attention(
            queries=queries,
            keys=keys,
            values=keys,
            num_heads=8,
            dropout_rate=0.0,
        )
56
        out = paddle.sum(contexts, axis=None)
Y
ying 已提交
57 58 59 60 61
        fluid.backward.append_backward(loss=out)

        self.fetch_list = [contexts]

    def run_program(self):
62
        """Run the test program."""
Y
ying 已提交
63
        places = [core.CPUPlace()]
X
Xi Chen 已提交
64
        if core.is_compiled_with_cuda():
Y
ying 已提交
65 66 67 68 69 70
            places.append(core.CUDAPlace(0))

        for place in places:
            self.set_inputs(place)
            exe = fluid.Executor(place)

Y
ying 已提交
71
            exe.run(fluid.default_startup_program())
72 73 74 75 76 77
            output = exe.run(
                fluid.default_main_program(),
                feed=self.inputs,
                fetch_list=self.fetch_list,
                return_numpy=True,
            )
Y
ying 已提交
78 79 80
            self.op_output = output

    def set_inputs(self, place):
81
        """Set the randomly generated data to the test program."""
Y
ying 已提交
82 83 84 85 86 87 88 89
        self.inputs = {}
        queries = fluid.Tensor()
        queries.set(self.queries, place)

        keys = fluid.Tensor()
        keys.set(self.keys, place)

        self.inputs["keys"] = keys
Y
ying 已提交
90
        self.inputs["queries"] = queries
Y
ying 已提交
91 92 93 94 95 96 97

    def test_multihead_attention(self):
        self.gen_random_input()

        self.set_program()
        self.run_program()

98
        # fixme(caoying) add more meaningfull unittest.
Y
ying 已提交
99

Y
ying 已提交
100 101 102

if __name__ == '__main__':
    unittest.main()