test_regularizer_api.py 8.0 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14
#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

15 16
import contextlib
import random
17 18
import unittest
from functools import partial
19

20
import numpy as np
21

22 23
import paddle
import paddle.fluid as fluid
24
import paddle.fluid.core as core
25 26


27 28 29 30 31 32 33 34 35 36
def bow_net(
    data,
    label,
    dict_dim,
    is_sparse=False,
    emb_dim=8,
    hid_dim=8,
    hid_dim2=6,
    class_dim=2,
):
37 38 39 40 41
    """
    BOW net
    This model is from https://github.com/PaddlePaddle/models:
    fluid/PaddleNLP/text_classification/nets.py
    """
42 43 44
    emb = fluid.layers.embedding(
        input=data, is_sparse=is_sparse, size=[dict_dim, emb_dim]
    )
45
    bow = fluid.layers.sequence_pool(input=emb, pool_type='sum')
46
    bow_tanh = paddle.tanh(bow)
C
Charles-hit 已提交
47 48 49 50 51
    fc_1 = paddle.static.nn.fc(x=bow_tanh, size=hid_dim, activation="tanh")
    fc_2 = paddle.static.nn.fc(x=fc_1, size=hid_dim2, activation="tanh")
    prediction = paddle.static.nn.fc(
        x=[fc_2], size=class_dim, activation="softmax"
    )
52 53 54
    cost = paddle.nn.functional.cross_entropy(
        input=prediction, label=label, reduction='none', use_softmax=False
    )
55
    avg_cost = paddle.mean(x=cost)
56 57 58 59 60 61

    return avg_cost


class TestRegularizer(unittest.TestCase):
    def setUp(self):
L
littletomatodonkey 已提交
62
        self.word_len = 1500
63 64 65
        self.train_data = [
            [(random.sample(range(1000), 10), [0])] for _ in range(2)
        ]
66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90

    def get_places(self):
        places = [core.CPUPlace()]
        if core.is_compiled_with_cuda():
            places.append(core.CUDAPlace(0))
        return places

    @contextlib.contextmanager
    def scope_prog_guard(self, main_prog, startup_prog):
        scope = fluid.core.Scope()
        with fluid.unique_name.guard():
            with fluid.scope_guard(scope):
                with fluid.program_guard(main_prog, startup_prog):
                    yield

    def run_program(self, place, feed_list):
        exe = fluid.Executor(place)
        feeder = fluid.DataFeeder(feed_list=feed_list, place=place)
        exe.run(fluid.default_startup_program())

        main_prog = fluid.default_main_program()
        param_list = [var.name for var in main_prog.block(0).all_parameters()]

        param_sum = []
        for data in self.train_data:
91 92 93
            out = exe.run(
                main_prog, feed=feeder.feed(data), fetch_list=param_list
            )
94 95 96 97 98 99 100
            p_sum = 0
            for v in out:
                p_sum += np.sum(np.abs(v))
            param_sum.append(p_sum)
        return param_sum

    def check_l2decay_regularizer(self, place, model):
C
cnn 已提交
101
        paddle.seed(1)
102 103 104
        paddle.framework.random._manual_program_seed(1)
        main_prog = fluid.framework.Program()
        startup_prog = fluid.framework.Program()
105 106 107 108 109 110
        with self.scope_prog_guard(
            main_prog=main_prog, startup_prog=startup_prog
        ):
            data = fluid.layers.data(
                name="words", shape=[1], dtype="int64", lod_level=1
            )
111 112
            label = fluid.layers.data(name="label", shape=[1], dtype="int64")

L
littletomatodonkey 已提交
113
            avg_cost = model(data, label, self.word_len)
114 115 116

            optimizer = fluid.optimizer.Adagrad(
                learning_rate=0.1,
117 118
                regularization=paddle.regularizer.L2Decay(1.0),
            )
119 120 121 122 123
            optimizer.minimize(avg_cost)
            param_sum = self.run_program(place, [data, label])
        return param_sum

    def check_l2decay(self, place, model):
C
cnn 已提交
124
        paddle.seed(1)
125 126 127 128
        paddle.framework.random._manual_program_seed(1)
        main_prog = fluid.framework.Program()
        startup_prog = fluid.framework.Program()

129 130 131 132 133 134
        with self.scope_prog_guard(
            main_prog=main_prog, startup_prog=startup_prog
        ):
            data = fluid.layers.data(
                name="words", shape=[1], dtype="int64", lod_level=1
            )
135 136
            label = fluid.layers.data(name="label", shape=[1], dtype="int64")

L
littletomatodonkey 已提交
137
            avg_cost_l2 = model(data, label, self.word_len)
138 139 140 141

            param_list = fluid.default_main_program().block(0).all_parameters()
            para_sum = []
            for para in param_list:
142
                para_mul = paddle.square(x=para)
143
                para_sum.append(paddle.sum(para_mul))
144
            avg_cost_l2 += fluid.layers.sums(para_sum) * 0.5
145 146 147 148 149 150 151

            optimizer = fluid.optimizer.Adagrad(learning_rate=0.1)
            optimizer.minimize(avg_cost_l2)
            param_sum = self.run_program(place, [data, label])
        return param_sum

    def test_l2(self):
L
littletomatodonkey 已提交
152
        paddle.enable_static()
153 154 155 156 157 158 159 160 161 162 163 164 165
        for place in self.get_places():
            dense_sparse_p_sum = []
            for sparse in [True, False]:
                model = partial(bow_net, is_sparse=sparse)
                framework_l2 = self.check_l2decay_regularizer(place, model)
                l2 = self.check_l2decay(place, model)
                assert len(l2) == len(framework_l2)
                for i in range(len(l2)):
                    assert np.isclose(a=framework_l2[i], b=l2[i], rtol=5e-5)
                dense_sparse_p_sum.append(framework_l2)

            assert len(dense_sparse_p_sum[0]) == len(dense_sparse_p_sum[1])
            for i in range(len(dense_sparse_p_sum[0])):
166 167 168 169 170
                assert np.isclose(
                    a=dense_sparse_p_sum[0][i],
                    b=dense_sparse_p_sum[1][i],
                    rtol=5e-5,
                )
171 172

    def test_repeated_regularization(self):
L
littletomatodonkey 已提交
173
        paddle.enable_static()
174 175
        l1 = paddle.regularizer.L1Decay(0.1)
        l2 = paddle.regularizer.L2Decay(0.01)
176 177 178
        fc_param_attr = paddle.ParamAttr(
            regularizer=paddle.regularizer.L1Decay()
        )
179
        with fluid.program_guard(fluid.Program(), fluid.Program()):
180
            x = paddle.uniform([2, 2, 3])
C
Charles-hit 已提交
181
            out = paddle.static.nn.fc(x, 5, weight_attr=fc_param_attr)
182
            loss = paddle.sum(out)
183 184 185 186
            sgd = fluid.optimizer.SGD(learning_rate=0.1, regularization=l2)
            sgd.minimize(loss)
        with fluid.dygraph.guard():
            input = fluid.dygraph.to_variable(
187 188
                np.random.randn(3, 2).astype('float32')
            )
C
cnn 已提交
189
            paddle.seed(1)
190 191
            paddle.framework.random._manual_program_seed(1)

192 193
            linear1 = paddle.nn.Linear(
                2, 2, weight_attr=fc_param_attr, bias_attr=fc_param_attr
194
            )
195 196
            linear2 = paddle.nn.Linear(
                2, 2, weight_attr=fc_param_attr, bias_attr=fc_param_attr
197
            )
198 199 200 201 202

            loss1 = linear1(input)
            loss1.backward()
            # set l2 regularizer in optimizer, but l1 in fluid.ParamAttr

203 204 205 206 207
            fluid.optimizer.SGD(
                parameter_list=linear1.parameters(),
                learning_rate=1e-2,
                regularization=l2,
            ).minimize(loss1)
208 209 210
            # only set l1 in fluid.ParamAttr
            loss2 = linear2(input)
            loss2.backward()
211 212 213
            fluid.optimizer.SGD(
                parameter_list=linear2.parameters(), learning_rate=1e-2
            ).minimize(loss2)
214
            # they should both be applied by l1, and keep the same
215 216 217 218
            np.testing.assert_allclose(
                linear1.weight.numpy(),
                linear2.weight.numpy(),
                rtol=1e-05,
219 220
                err_msg='weight should use the regularization in fluid.ParamAttr!',
            )
221 222 223 224
            np.testing.assert_allclose(
                linear1.bias.numpy(),
                linear2.bias.numpy(),
                rtol=1e-05,
225
                err_msg='bias should use the regularization in fluid.ParamAttr!',
226
            )
227 228 229 230


if __name__ == '__main__':
    unittest.main()