test_weight_normalization.py 4.4 KB
Newer Older
1
#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
G
guosheng 已提交
2 3 4 5 6 7 8 9 10 11 12 13
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
14

15
import collections
G
guosheng 已提交
16
import unittest
17

18
import numpy as np
19

20
import paddle
21 22 23 24
import paddle.fluid as fluid
import paddle.fluid.core as core
from paddle.fluid.initializer import ConstantInitializer
from paddle.fluid.param_attr import WeightNormParamAttr
G
guosheng 已提交
25 26 27 28 29


class TestWeightNormalization(unittest.TestCase):
    batch_size = 3
    hidden_size = 5
30
    data_desc = (['x', [10], 0],)
G
guosheng 已提交
31 32 33 34 35 36 37

    @classmethod
    def setUpClass(cls):
        cls.set_program()

    @classmethod
    def set_program(cls):
38 39 40
        data = fluid.layers.data(
            name=cls.data_desc[0][0], shape=cls.data_desc[0][1]
        )
C
Charles-hit 已提交
41 42
        out = paddle.static.nn.fc(
            x=data,
43
            size=cls.hidden_size,
C
Charles-hit 已提交
44
            weight_attr=WeightNormParamAttr(
45 46 47 48 49
                dim=None,
                name='weight_norm_param',
                initializer=ConstantInitializer(1.0),
            ),
            bias_attr=False,
C
Charles-hit 已提交
50
            activation=None,
51
        )
52
        loss = paddle.sum(out)
G
guosheng 已提交
53 54
        fluid.backward.append_backward(loss=loss)
        cls.fetch_list = [
55 56 57
            'weight_norm_param_g',
            'weight_norm_param_v',
            'weight_norm_param_g@GRAD',
G
guosheng 已提交
58 59 60 61 62
        ]

    def run_program(self):
        outputs = []
        places = [core.CPUPlace()]
63
        if core.is_compiled_with_cuda():
G
guosheng 已提交
64 65 66 67 68
            places.append(core.CUDAPlace(0))
        for place in places:
            self.set_inputs(place)
            exe = fluid.Executor(place)
            exe.run(fluid.default_startup_program())
69 70 71 72 73 74
            output = exe.run(
                fluid.default_main_program(),
                feed=self.inputs,
                fetch_list=self.fetch_list,
                return_numpy=False,
            )
G
guosheng 已提交
75 76 77 78 79 80 81 82 83 84 85
            outputs.append(output)
        self.actual_outputs = outputs

    def set_data(self):
        self.data = collections.OrderedDict()
        for desc in self.data_desc:
            data_name = desc[0]
            data_shape = desc[1]
            data_lod_level = desc[2]
            data_lod = []
            for i in range(data_lod_level):
86 87 88
                lod_level_i = np.random.randint(
                    low=1,
                    high=5,
89 90 91
                    size=self.batch_size
                    if i == 0
                    else sum(lod_level_i),  # noqa: F821
92
                ).tolist()
G
guosheng 已提交
93
                data_lod.append(lod_level_i)
94
            data_value = np.random.random(
95 96 97
                size=[sum(data_lod[-1]) if data_lod else self.batch_size]
                + data_shape
            ).astype('float32')
G
guosheng 已提交
98 99 100 101 102 103 104 105
            self.data[data_name] = (data_value, data_lod)

    def set_inputs(self, place):
        self.inputs = {}
        for desc in self.data_desc:
            tensor = fluid.Tensor()
            tensor.set(self.data[desc[0]][0], place)
            if self.data[desc[0]][1]:
106
                tensor.set_recursive_sequence_lengths(self.data[desc[0]][1])
G
guosheng 已提交
107 108 109
            self.inputs[desc[0]] = tensor

    def weight_normalize(self):
110
        v = np.ones(
111 112
            (self.data[self.data_desc[0][0]][0].shape[-1], self.hidden_size)
        )
113 114
        g = np.linalg.norm(v, axis=None, keepdims=True)
        w = g * v / np.linalg.norm(v, axis=None, keepdims=True)
G
guosheng 已提交
115
        x = self.data[self.data_desc[0][0]][0]
116
        out = np.dot(x, w)
117 118 119 120
        g_grad = (
            np.dot(x.T, np.ones_like(out))
            * (v / np.linalg.norm(v, axis=None, keepdims=True))
        ).sum(axis=None, keepdims=True)
G
guosheng 已提交
121 122 123 124 125 126 127 128
        return g, v, g_grad

    def test_weight_normalization(self):
        self.set_data()
        self.run_program()
        expect_output = self.weight_normalize()
        for actual_output in self.actual_outputs:
            [
129 130 131
                np.testing.assert_allclose(
                    np.array(actual), expect, rtol=1e-05, atol=0.001
                )
132
                for expect, actual in zip(expect_output, actual_output)
G
guosheng 已提交
133 134 135 136 137
            ]


if __name__ == '__main__':
    unittest.main()