test_regularizer_api.py 8.0 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14
#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

15 16
import contextlib
import random
17 18
import unittest
from functools import partial
19

20
import numpy as np
21

22 23
import paddle
import paddle.fluid as fluid
24
import paddle.fluid.core as core
25 26


27 28 29 30 31 32 33 34 35 36
def bow_net(
    data,
    label,
    dict_dim,
    is_sparse=False,
    emb_dim=8,
    hid_dim=8,
    hid_dim2=6,
    class_dim=2,
):
37 38 39 40 41
    """
    BOW net
    This model is from https://github.com/PaddlePaddle/models:
    fluid/PaddleNLP/text_classification/nets.py
    """
42 43 44
    emb = fluid.layers.embedding(
        input=data, is_sparse=is_sparse, size=[dict_dim, emb_dim]
    )
45
    bow = fluid.layers.sequence_pool(input=emb, pool_type='sum')
46
    bow_tanh = paddle.tanh(bow)
C
Charles-hit 已提交
47 48 49 50 51
    fc_1 = paddle.static.nn.fc(x=bow_tanh, size=hid_dim, activation="tanh")
    fc_2 = paddle.static.nn.fc(x=fc_1, size=hid_dim2, activation="tanh")
    prediction = paddle.static.nn.fc(
        x=[fc_2], size=class_dim, activation="softmax"
    )
52 53 54
    cost = paddle.nn.functional.cross_entropy(
        input=prediction, label=label, reduction='none', use_softmax=False
    )
55
    avg_cost = paddle.mean(x=cost)
56 57 58 59 60 61

    return avg_cost


class TestRegularizer(unittest.TestCase):
    def setUp(self):
L
littletomatodonkey 已提交
62
        self.word_len = 1500
63 64 65
        self.train_data = [
            [(random.sample(range(1000), 10), [0])] for _ in range(2)
        ]
66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90

    def get_places(self):
        places = [core.CPUPlace()]
        if core.is_compiled_with_cuda():
            places.append(core.CUDAPlace(0))
        return places

    @contextlib.contextmanager
    def scope_prog_guard(self, main_prog, startup_prog):
        scope = fluid.core.Scope()
        with fluid.unique_name.guard():
            with fluid.scope_guard(scope):
                with fluid.program_guard(main_prog, startup_prog):
                    yield

    def run_program(self, place, feed_list):
        exe = fluid.Executor(place)
        feeder = fluid.DataFeeder(feed_list=feed_list, place=place)
        exe.run(fluid.default_startup_program())

        main_prog = fluid.default_main_program()
        param_list = [var.name for var in main_prog.block(0).all_parameters()]

        param_sum = []
        for data in self.train_data:
91 92 93
            out = exe.run(
                main_prog, feed=feeder.feed(data), fetch_list=param_list
            )
94 95 96 97 98 99 100
            p_sum = 0
            for v in out:
                p_sum += np.sum(np.abs(v))
            param_sum.append(p_sum)
        return param_sum

    def check_l2decay_regularizer(self, place, model):
C
cnn 已提交
101
        paddle.seed(1)
102 103 104
        paddle.framework.random._manual_program_seed(1)
        main_prog = fluid.framework.Program()
        startup_prog = fluid.framework.Program()
105 106 107
        with self.scope_prog_guard(
            main_prog=main_prog, startup_prog=startup_prog
        ):
G
GGBond8488 已提交
108 109 110 111 112
            data = paddle.static.data(
                name="words", shape=[-1, 1], dtype="int64", lod_level=1
            )
            label = paddle.static.data(
                name="label", shape=[-1, 1], dtype="int64"
113
            )
114

L
littletomatodonkey 已提交
115
            avg_cost = model(data, label, self.word_len)
116 117 118

            optimizer = fluid.optimizer.Adagrad(
                learning_rate=0.1,
119 120
                regularization=paddle.regularizer.L2Decay(1.0),
            )
121 122 123 124 125
            optimizer.minimize(avg_cost)
            param_sum = self.run_program(place, [data, label])
        return param_sum

    def check_l2decay(self, place, model):
C
cnn 已提交
126
        paddle.seed(1)
127 128 129 130
        paddle.framework.random._manual_program_seed(1)
        main_prog = fluid.framework.Program()
        startup_prog = fluid.framework.Program()

131 132 133
        with self.scope_prog_guard(
            main_prog=main_prog, startup_prog=startup_prog
        ):
G
GGBond8488 已提交
134 135 136 137 138
            data = paddle.static.data(
                name="words", shape=[-1, 1], dtype="int64", lod_level=1
            )
            label = paddle.static.data(
                name="label", shape=[-1, 1], dtype="int64"
139
            )
140

L
littletomatodonkey 已提交
141
            avg_cost_l2 = model(data, label, self.word_len)
142 143 144 145

            param_list = fluid.default_main_program().block(0).all_parameters()
            para_sum = []
            for para in param_list:
146
                para_mul = paddle.square(x=para)
147
                para_sum.append(paddle.sum(para_mul))
148
            avg_cost_l2 += fluid.layers.sums(para_sum) * 0.5
149 150 151 152 153 154 155

            optimizer = fluid.optimizer.Adagrad(learning_rate=0.1)
            optimizer.minimize(avg_cost_l2)
            param_sum = self.run_program(place, [data, label])
        return param_sum

    def test_l2(self):
L
littletomatodonkey 已提交
156
        paddle.enable_static()
157 158 159 160 161 162 163 164 165 166 167 168 169
        for place in self.get_places():
            dense_sparse_p_sum = []
            for sparse in [True, False]:
                model = partial(bow_net, is_sparse=sparse)
                framework_l2 = self.check_l2decay_regularizer(place, model)
                l2 = self.check_l2decay(place, model)
                assert len(l2) == len(framework_l2)
                for i in range(len(l2)):
                    assert np.isclose(a=framework_l2[i], b=l2[i], rtol=5e-5)
                dense_sparse_p_sum.append(framework_l2)

            assert len(dense_sparse_p_sum[0]) == len(dense_sparse_p_sum[1])
            for i in range(len(dense_sparse_p_sum[0])):
170 171 172 173 174
                assert np.isclose(
                    a=dense_sparse_p_sum[0][i],
                    b=dense_sparse_p_sum[1][i],
                    rtol=5e-5,
                )
175 176

    def test_repeated_regularization(self):
L
littletomatodonkey 已提交
177
        paddle.enable_static()
178 179
        l1 = paddle.regularizer.L1Decay(0.1)
        l2 = paddle.regularizer.L2Decay(0.01)
180 181 182
        fc_param_attr = paddle.ParamAttr(
            regularizer=paddle.regularizer.L1Decay()
        )
183
        with fluid.program_guard(fluid.Program(), fluid.Program()):
184
            x = paddle.uniform([2, 2, 3])
C
Charles-hit 已提交
185
            out = paddle.static.nn.fc(x, 5, weight_attr=fc_param_attr)
186
            loss = paddle.sum(out)
187 188 189 190
            sgd = fluid.optimizer.SGD(learning_rate=0.1, regularization=l2)
            sgd.minimize(loss)
        with fluid.dygraph.guard():
            input = fluid.dygraph.to_variable(
191 192
                np.random.randn(3, 2).astype('float32')
            )
C
cnn 已提交
193
            paddle.seed(1)
194 195
            paddle.framework.random._manual_program_seed(1)

196 197
            linear1 = paddle.nn.Linear(
                2, 2, weight_attr=fc_param_attr, bias_attr=fc_param_attr
198
            )
199 200
            linear2 = paddle.nn.Linear(
                2, 2, weight_attr=fc_param_attr, bias_attr=fc_param_attr
201
            )
202 203 204 205 206

            loss1 = linear1(input)
            loss1.backward()
            # set l2 regularizer in optimizer, but l1 in fluid.ParamAttr

207 208 209 210 211
            fluid.optimizer.SGD(
                parameter_list=linear1.parameters(),
                learning_rate=1e-2,
                regularization=l2,
            ).minimize(loss1)
212 213 214
            # only set l1 in fluid.ParamAttr
            loss2 = linear2(input)
            loss2.backward()
215 216 217
            fluid.optimizer.SGD(
                parameter_list=linear2.parameters(), learning_rate=1e-2
            ).minimize(loss2)
218
            # they should both be applied by l1, and keep the same
219 220 221 222
            np.testing.assert_allclose(
                linear1.weight.numpy(),
                linear2.weight.numpy(),
                rtol=1e-05,
223 224
                err_msg='weight should use the regularization in fluid.ParamAttr!',
            )
225 226 227 228
            np.testing.assert_allclose(
                linear1.bias.numpy(),
                linear2.bias.numpy(),
                rtol=1e-05,
229
                err_msg='bias should use the regularization in fluid.ParamAttr!',
230
            )
231 232 233 234


if __name__ == '__main__':
    unittest.main()