test_weight_normalization.py 4.4 KB
Newer Older
1
#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
G
guosheng 已提交
2 3 4 5 6 7 8 9 10 11 12 13
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
14

G
guosheng 已提交
15
import unittest
16
import numpy as np
G
guosheng 已提交
17
import collections
18 19 20 21
import paddle.fluid as fluid
import paddle.fluid.core as core
from paddle.fluid.initializer import ConstantInitializer
from paddle.fluid.param_attr import WeightNormParamAttr
G
guosheng 已提交
22 23 24 25 26


class TestWeightNormalization(unittest.TestCase):
    batch_size = 3
    hidden_size = 5
27
    data_desc = (['x', [10], 0],)
G
guosheng 已提交
28 29 30 31 32 33 34

    @classmethod
    def setUpClass(cls):
        cls.set_program()

    @classmethod
    def set_program(cls):
35 36 37 38 39 40 41 42 43 44 45 46 47 48
        data = fluid.layers.data(
            name=cls.data_desc[0][0], shape=cls.data_desc[0][1]
        )
        out = fluid.layers.fc(
            input=data,
            size=cls.hidden_size,
            param_attr=WeightNormParamAttr(
                dim=None,
                name='weight_norm_param',
                initializer=ConstantInitializer(1.0),
            ),
            bias_attr=False,
            act=None,
        )
G
guosheng 已提交
49 50 51
        loss = fluid.layers.reduce_sum(out)
        fluid.backward.append_backward(loss=loss)
        cls.fetch_list = [
52 53 54
            'weight_norm_param_g',
            'weight_norm_param_v',
            'weight_norm_param_g@GRAD',
G
guosheng 已提交
55 56 57 58 59
        ]

    def run_program(self):
        outputs = []
        places = [core.CPUPlace()]
60
        if core.is_compiled_with_cuda():
G
guosheng 已提交
61 62 63 64 65
            places.append(core.CUDAPlace(0))
        for place in places:
            self.set_inputs(place)
            exe = fluid.Executor(place)
            exe.run(fluid.default_startup_program())
66 67 68 69 70 71
            output = exe.run(
                fluid.default_main_program(),
                feed=self.inputs,
                fetch_list=self.fetch_list,
                return_numpy=False,
            )
G
guosheng 已提交
72 73 74 75 76 77 78 79 80 81 82
            outputs.append(output)
        self.actual_outputs = outputs

    def set_data(self):
        self.data = collections.OrderedDict()
        for desc in self.data_desc:
            data_name = desc[0]
            data_shape = desc[1]
            data_lod_level = desc[2]
            data_lod = []
            for i in range(data_lod_level):
83 84 85 86 87
                lod_level_i = np.random.randint(
                    low=1,
                    high=5,
                    size=self.batch_size if i == 0 else sum(lod_level_i),
                ).tolist()
G
guosheng 已提交
88
                data_lod.append(lod_level_i)
89
            data_value = np.random.random(
90 91 92
                size=[sum(data_lod[-1]) if data_lod else self.batch_size]
                + data_shape
            ).astype('float32')
G
guosheng 已提交
93 94 95 96 97 98 99 100
            self.data[data_name] = (data_value, data_lod)

    def set_inputs(self, place):
        self.inputs = {}
        for desc in self.data_desc:
            tensor = fluid.Tensor()
            tensor.set(self.data[desc[0]][0], place)
            if self.data[desc[0]][1]:
101
                tensor.set_recursive_sequence_lengths(self.data[desc[0]][1])
G
guosheng 已提交
102 103 104
            self.inputs[desc[0]] = tensor

    def weight_normalize(self):
105
        v = np.ones(
106 107
            (self.data[self.data_desc[0][0]][0].shape[-1], self.hidden_size)
        )
108 109
        g = np.linalg.norm(v, axis=None, keepdims=True)
        w = g * v / np.linalg.norm(v, axis=None, keepdims=True)
G
guosheng 已提交
110
        x = self.data[self.data_desc[0][0]][0]
111
        out = np.dot(x, w)
112 113 114 115
        g_grad = (
            np.dot(x.T, np.ones_like(out))
            * (v / np.linalg.norm(v, axis=None, keepdims=True))
        ).sum(axis=None, keepdims=True)
G
guosheng 已提交
116 117 118 119 120 121 122 123
        return g, v, g_grad

    def test_weight_normalization(self):
        self.set_data()
        self.run_program()
        expect_output = self.weight_normalize()
        for actual_output in self.actual_outputs:
            [
124 125 126
                np.testing.assert_allclose(
                    np.array(actual), expect, rtol=1e-05, atol=0.001
                )
127
                for expect, actual in zip(expect_output, actual_output)
G
guosheng 已提交
128 129 130 131 132
            ]


if __name__ == '__main__':
    unittest.main()