test_weight_decay.py 6.4 KB
Newer Older
C
chengduo 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17
#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import contextlib
import unittest
from functools import partial
18

C
chengduo 已提交
19 20
import numpy as np

21
import paddle
C
chengduo 已提交
22
import paddle.fluid as fluid
23
import paddle.fluid.core as core
24
from paddle.fluid import compiler
C
chengduo 已提交
25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42


def get_places():
    places = []
    if core.is_compiled_with_cuda():
        places.append(core.CUDAPlace(0))
    return places


@contextlib.contextmanager
def prog_scope_guard(main_prog, startup_prog):
    scope = fluid.core.Scope()
    with fluid.unique_name.guard():
        with fluid.scope_guard(scope):
            with fluid.program_guard(main_prog, startup_prog):
                yield


43 44 45 46 47 48 49 50 51 52
def bow_net(
    data,
    label,
    dict_dim,
    is_sparse=False,
    emb_dim=128,
    hid_dim=128,
    hid_dim2=96,
    class_dim=2,
):
C
chengduo 已提交
53 54 55 56 57
    """
    BOW net
    This model is from https://github.com/PaddlePaddle/models:
    fluid/PaddleNLP/text_classification/nets.py
    """
58 59 60
    emb = fluid.layers.embedding(
        input=data, is_sparse=is_sparse, size=[dict_dim, emb_dim]
    )
C
chengduo 已提交
61
    bow = fluid.layers.sequence_pool(input=emb, pool_type='sum')
62
    bow_tanh = paddle.tanh(bow)
C
Charles-hit 已提交
63 64 65 66 67
    fc_1 = paddle.static.nn.fc(x=bow_tanh, size=hid_dim, activation="tanh")
    fc_2 = paddle.static.nn.fc(x=fc_1, size=hid_dim2, activation="tanh")
    prediction = paddle.static.nn.fc(
        x=[fc_2], size=class_dim, activation="softmax"
    )
68 69 70
    cost = paddle.nn.functional.cross_entropy(
        input=prediction, label=label, reduction='none', use_softmax=False
    )
71
    avg_cost = paddle.mean(x=cost)
C
chengduo 已提交
72 73 74 75 76 77 78

    return avg_cost


class TestWeightDecay(unittest.TestCase):
    def setUp(self):
        self.word_dict = paddle.dataset.imdb.word_dict()
79 80 81
        reader = paddle.batch(
            paddle.dataset.imdb.train(self.word_dict), batch_size=4
        )()
C
chengduo 已提交
82
        self.train_data = [next(reader) for _ in range(5)]
83
        self.learning_rate = 0.5
C
chengduo 已提交
84 85 86 87 88 89 90 91

    def run_executor(self, place, feed_list, loss):
        exe = fluid.Executor(place)
        feeder = fluid.DataFeeder(feed_list=feed_list, place=place)
        exe.run(fluid.default_startup_program())
        main_prog = fluid.default_main_program()
        loss_set = []
        for data in self.train_data:
92 93 94
            out = exe.run(
                main_prog, feed=feeder.feed(data), fetch_list=[loss.name]
            )
C
chengduo 已提交
95 96 97 98 99 100

            print("loss              %s" % (np.average(out)))
            loss_set.append(np.average(out))

        return loss_set

101 102 103 104 105 106 107 108 109
    def run_parallel_exe(
        self,
        place,
        feed_list,
        loss,
        use_reduce=False,
        use_fast_executor=False,
        use_ir_memory_optimize=False,
    ):
C
chengduo 已提交
110 111 112 113 114 115 116 117 118
        exe = fluid.Executor(place)
        feeder = fluid.DataFeeder(feed_list=feed_list, place=place)
        exe.run(fluid.default_startup_program())

        exec_strategy = fluid.ExecutionStrategy()
        if use_fast_executor:
            exec_strategy.use_experimental_executor = True

        build_strategy = fluid.BuildStrategy()
119 120 121 122 123
        build_strategy.reduce_strategy = (
            fluid.BuildStrategy.ReduceStrategy.Reduce
            if use_reduce
            else fluid.BuildStrategy.ReduceStrategy.AllReduce
        )
C
chengduo 已提交
124 125
        build_strategy.memory_optimize = use_ir_memory_optimize

126
        train_cp = compiler.CompiledProgram(
127 128 129 130 131 132
            fluid.default_main_program()
        ).with_data_parallel(
            loss_name=loss.name,
            exec_strategy=exec_strategy,
            build_strategy=build_strategy,
        )
C
chengduo 已提交
133 134 135

        loss_set = []
        for data in self.train_data:
136 137 138
            out = exe.run(
                train_cp, feed=feeder.feed(data), fetch_list=[loss.name]
            )
C
chengduo 已提交
139 140 141 142
            loss_set.append(np.average(out))

        return loss_set

143 144 145
    def check_weight_decay(
        self, place, model, use_parallel_exe=False, use_reduce=False
    ):
C
chengduo 已提交
146 147 148 149
        main_prog = fluid.framework.Program()
        startup_prog = fluid.framework.Program()
        startup_prog.random_seed = 1
        with prog_scope_guard(main_prog=main_prog, startup_prog=startup_prog):
G
GGBond8488 已提交
150 151 152 153 154
            data = paddle.static.data(
                name="words", shape=[-1, 1], dtype="int64", lod_level=1
            )
            label = paddle.static.data(
                name="label", shape=[-1, 1], dtype="int64"
155
            )
C
chengduo 已提交
156 157
            avg_cost = model(data, label, len(self.word_dict))

158 159 160 161
            param_list = [
                (var, var * self.learning_rate)
                for var in main_prog.block(0).all_parameters()
            ]
C
chengduo 已提交
162 163

            optimizer = fluid.optimizer.Adagrad(
164 165
                learning_rate=self.learning_rate
            )
C
chengduo 已提交
166 167 168
            optimizer.minimize(avg_cost)

            for params in param_list:
169
                updated_p = paddle.subtract(x=params[0], y=params[1])
C
chengduo 已提交
170 171 172
                fluid.layers.assign(input=updated_p, output=params[0])

            if use_parallel_exe:
173 174 175
                loss = self.run_parallel_exe(
                    place, [data, label], loss=avg_cost, use_reduce=use_reduce
                )
C
chengduo 已提交
176 177 178 179 180 181 182 183 184 185
            else:
                loss = self.run_executor(place, [data, label], loss=avg_cost)

        return loss

    def test_weight_decay(self):
        model = partial(bow_net, is_sparse=False)
        for place in get_places():
            loss = self.check_weight_decay(place, model, use_parallel_exe=False)

C
chengduo 已提交
186
            # TODO(zcd): should test use_reduce=True
187 188 189
            loss2 = self.check_weight_decay(
                place, model, use_parallel_exe=True, use_reduce=False
            )
C
chengduo 已提交
190 191

            for i in range(len(loss)):
C
chengduo 已提交
192
                self.assertTrue(
193
                    np.isclose(a=loss[i], b=loss2[i], rtol=5e-5),
194 195 196 197 198 199 200 201
                    "Expect "
                    + str(loss[i])
                    + "\n"
                    + "But Got"
                    + str(loss2[i])
                    + " in class "
                    + self.__class__.__name__,
                )
C
chengduo 已提交
202 203 204 205


if __name__ == '__main__':
    unittest.main()