test_program_prune_backward.py 12.1 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14
#   Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

15
import contextlib
16 17 18 19
import unittest

import numpy as np
import seresnext_net
20 21
from fake_reader import fake_imdb_reader
from simple_nets import fc_with_batchnorm, init_data, simple_fc_net
22 23
from test_parallel_executor_transformer import (
    DeviceType,
24 25
    get_feed_data_reader,
    transformer,
26
)
27

H
hong 已提交
28
import paddle
29 30
import paddle.fluid as fluid
import paddle.fluid.core as core
31 32 33 34 35 36 37 38 39


def lstm_net(use_feed):
    dict_dim = 5147
    emb_dim = 128
    hid_dim = 128
    hid_dim2 = 96
    class_dim = 2
    emb_lr = 30.0
40 41 42
    data = fluid.layers.data(
        name="words", shape=[1], dtype="int64", lod_level=1
    )
43 44 45 46
    label = fluid.layers.data(name="label", shape=[1], dtype="int64")
    emb = fluid.layers.embedding(
        input=data,
        size=[dict_dim, emb_dim],
47 48
        param_attr=fluid.ParamAttr(learning_rate=emb_lr),
    )
49
    fc0 = fluid.layers.fc(input=emb, size=hid_dim * 4)
50 51 52
    lstm_h, c = fluid.layers.dynamic_lstm(
        input=fc0, size=hid_dim * 4, is_reverse=False
    )
53
    lstm_max = fluid.layers.sequence_pool(input=lstm_h, pool_type='max')
54
    lstm_max_tanh = paddle.tanh(lstm_max)
55 56 57
    fc1 = fluid.layers.fc(input=lstm_max_tanh, size=hid_dim2, act='tanh')
    prediction = fluid.layers.fc(input=fc1, size=class_dim, act='softmax')
    cost = fluid.layers.cross_entropy(input=prediction, label=label)
58
    avg_cost = paddle.mean(x=cost)
59 60 61
    return avg_cost


62 63 64 65 66 67 68 69 70 71
def simple_fc_net_with_accuracy(use_feed):
    img = fluid.layers.data(name='image', shape=[784], dtype='float32')
    label = fluid.layers.data(name='label', shape=[1], dtype='int64')

    hidden = img
    for _ in range(4):
        hidden = fluid.layers.fc(
            hidden,
            size=200,
            act='relu',
72 73 74 75
            bias_attr=fluid.ParamAttr(
                initializer=fluid.initializer.Constant(value=1.0)
            ),
        )
76 77
    prediction = fluid.layers.fc(hidden, size=10, act='softmax')
    loss = fluid.layers.cross_entropy(input=prediction, label=label)
78
    loss = paddle.mean(loss)
79
    accuracy_out = paddle.static.accuracy(input=prediction, label=label, k=5)
80 81 82
    return loss


83 84 85 86 87 88 89 90
def cond_net(use_feed=None):
    x = fluid.layers.data(name="x", shape=[4], dtype='float32')
    label = fluid.layers.data('label', shape=[1], dtype='int64')
    prediction = fluid.layers.fc(input=x, size=1, act=None)

    def loss1(pred, label):
        x = fluid.layers.data(name="x", shape=[4], dtype='float32')
        loss = fluid.layers.cross_entropy(input=pred, label=label)
91
        avg_loss = paddle.mean(loss, name='mean_cross_entropy_loss')
92 93 94
        return avg_loss

    def loss2(pred, label):
95 96 97
        loss = paddle.nn.functional.softmax_with_cross_entropy(
            logits=pred, label=label
        )
98
        avg_loss = paddle.mean(loss, name='mean_softmax_loss')
99 100 101
        return avg_loss

    two = fluid.layers.fill_constant([1], 'int32', 2)
102 103 104 105 106
    pred = two == 0
    avg_loss = fluid.layers.case(
        [(pred, lambda: loss1(prediction, label))],
        lambda: loss2(prediction, label),
    )
107 108 109 110 111 112 113 114 115 116 117
    return avg_loss


def optimization_in_cond_net(with_optimize=False):
    x = fluid.layers.data(name="x", shape=[4], dtype='float32')
    label = fluid.layers.data('label', shape=[1], dtype='int64')
    prediction = fluid.layers.fc(input=x, size=1, act=None)

    def loss1(opt, pred, label, with_optimize):
        x = fluid.layers.data(name="x", shape=[4], dtype='float32')
        loss = fluid.layers.cross_entropy(input=pred, label=label)
118
        avg_loss = paddle.mean(loss, name='mean_cross_entropy_loss')
119 120 121 122 123
        if with_optimize:
            opt.minimize(avg_loss)
        return avg_loss

    def loss2(opt, pred, label, with_optimize):
124 125 126
        loss = paddle.nn.functional.softmax_with_cross_entropy(
            logits=pred, label=label
        )
127
        avg_loss = paddle.mean(loss, name='mean_softmax_loss')
128 129 130 131 132 133
        if with_optimize:
            opt.minimize(avg_loss)
        return avg_loss

    sgd = fluid.optimizer.SGD(learning_rate=0.1)
    two = fluid.layers.fill_constant([1], 'int32', 2)
134
    pred = two == 0
135 136
    avg_loss = fluid.layers.case(
        [(pred, lambda: loss1(sgd, prediction, label, with_optimize))],
137 138
        lambda: loss2(sgd, prediction, label, with_optimize),
    )
139 140 141
    return avg_loss


142 143 144
class TestProgramPruneBackward(unittest.TestCase):
    def program_compare(self, program_a, program_b):
        assert isinstance(
145 146
            program_a, fluid.framework.Program
        ), "The first argument should be fluid.framework.Program."
147
        assert isinstance(
148 149
            program_b, fluid.framework.Program
        ), "The second argument should be fluid.framework Program."
150 151 152 153 154 155 156 157

        self.assertEqual(len(program_a.blocks), len(program_b.blocks))
        for idx in range(len(program_a.blocks)):
            block_a = program_a.blocks[idx]
            block_b = program_b.blocks[idx]
            self.assertEqual(len(block_a.ops), len(block_b.ops))
            self.assertEqual(len(block_a.vars), len(block_b.vars))
            for op_idx in range(len(block_a.ops)):
158 159 160
                self.assertEqual(
                    block_a.ops[op_idx].type, block_b.ops[op_idx].type
                )
161 162 163 164 165 166 167 168 169 170
            for var_key in list(block_a.vars.keys()):
                self.assertTrue(block_b.has_var(var_key))

    def check_prune_correctness(self, method, feed_dict, optimizer):
        loss = method(use_feed=False)

        main_program = fluid.default_main_program()
        test_prog_orig = main_program.clone(for_test=True)
        optimizer().minimize(loss)
        test_prog_prune = main_program.clone(for_test=True)
171

172 173
        self.program_compare(test_prog_orig, test_prog_prune)

174 175 176
        places = [core.CPUPlace()]
        if core.is_compiled_with_cuda():
            places.append(core.CUDAPlace(0))
177

178 179 180 181
        for place in places:
            exe = fluid.Executor(place)
            exe.run(fluid.default_startup_program())

182 183 184 185 186 187
            (loss_data_prune,) = exe.run(
                test_prog_prune, feed=feed_dict, fetch_list=[loss.name]
            )
            (loss_data_orig,) = exe.run(
                test_prog_orig, feed=feed_dict, fetch_list=[loss.name]
            )
188
            self.assertEqual(loss_data_orig, loss_data_prune)
189 190 191 192 193

    def test_simple_fc_net(self):
        def optimizer():
            optimizer = fluid.optimizer.SGD(
                learning_rate=0.001,
194 195
                regularization=fluid.regularizer.L2Decay(1e-4),
            )
196 197 198 199
            return optimizer

        with self.program_scope_guard():
            img, label = init_data()
200 201 202 203 204
            self.check_prune_correctness(
                method=simple_fc_net,
                feed_dict={"image": img, "label": label},
                optimizer=optimizer,
            )
205

206 207 208 209
    def test_simple_fc_net_with_accuracy(self):
        def optimizer():
            optimizer = fluid.optimizer.SGD(
                learning_rate=0.001,
210 211
                regularization=fluid.regularizer.L2Decay(1e-4),
            )
212 213 214 215
            return optimizer

        with self.program_scope_guard():
            img, label = init_data()
216 217 218 219 220
            self.check_prune_correctness(
                method=simple_fc_net_with_accuracy,
                feed_dict={"image": img, "label": label},
                optimizer=optimizer,
            )
221

222 223 224 225
    def test_batchnorm_fc(self):
        def optimizer():
            optimizer = fluid.optimizer.SGD(
                learning_rate=0.001,
226 227
                regularization=fluid.regularizer.L2Decay(1e-4),
            )
228 229 230 231
            return optimizer

        with self.program_scope_guard():
            img, label = init_data()
232 233 234 235 236
            self.check_prune_correctness(
                method=fc_with_batchnorm,
                feed_dict={"image": img, "label": label},
                optimizer=optimizer,
            )
237 238 239 240 241

    def test_seresnet(self):
        with self.program_scope_guard():
            self.check_prune_correctness(
                method=seresnext_net.model,
242
                feed_dict=seresnext_net.feed_dict(use_device=DeviceType.CPU),
243 244
                optimizer=seresnext_net.optimizer,
            )
245 246 247 248 249

    def test_transformer(self):
        def optimizer():
            optimizer = fluid.optimizer.Adam(
                learning_rate=0.001,
250 251
                regularization=fluid.regularizer.L2Decay(1e-4),
            )
252 253 254 255 256
            return optimizer

        with self.program_scope_guard():
            # the program argument is used to distinguish Program and CompiledProgram
            feed_dict = get_feed_data_reader().get_next(
257 258 259 260 261
                fluid.Executor(core.CPUPlace()), fluid.default_main_program()
            )
            self.check_prune_correctness(
                method=transformer, feed_dict=feed_dict, optimizer=optimizer
            )
262 263 264 265 266

    def test_lstm(self):
        def optimizer():
            optimizer = fluid.optimizer.Adagrad(
                learning_rate=0.001,
267 268
                regularization=fluid.regularizer.L2Decay(1e-4),
            )
269 270 271 272 273
            return optimizer

        with self.program_scope_guard():
            word_dict_size = 5147
            reader = fake_imdb_reader(word_dict_size, 1)
274 275 276
            data = fluid.layers.data(
                name="words", shape=[1], dtype="int64", lod_level=1
            )
277
            label = fluid.layers.data(name="label", shape=[1], dtype="int64")
278 279 280
            feeder = fluid.DataFeeder(
                feed_list=[data, label], place=core.CPUPlace()
            )
281
            feed_data = feeder.feed(reader())
282 283 284
            self.check_prune_correctness(
                method=lstm_net, feed_dict=feed_data, optimizer=optimizer
            )
285

286 287 288 289 290 291 292 293 294
    def test_cond(self):
        def optimizer():
            optimizer = fluid.optimizer.SGD(learning_rate=0.01)
            return optimizer

        with self.program_scope_guard():
            x_in = np.random.random(size=(10, 4)).astype('float32')
            label_in = np.random.randint(1, size=(10, 1)).astype('int64')
            feed_dict = {'x': x_in, 'label': label_in}
295 296 297
            self.check_prune_correctness(
                method=cond_net, feed_dict=feed_dict, optimizer=optimizer
            )
298 299 300 301 302 303 304 305 306 307 308 309

    def test_optimization_in_cond(self):
        x_in = np.random.random(size=(10, 4)).astype('float32')
        label_in = np.random.randint(1, size=(10, 1)).astype('int64')
        feed_dict = {'x': x_in, 'label': label_in}
        with self.program_scope_guard():
            loss = optimization_in_cond_net(False)
            main_program = fluid.default_main_program()
            test_prog_orig = main_program.clone(for_test=True)
            place = core.CPUPlace()
            exe = fluid.Executor(place)
            exe.run(fluid.default_startup_program())
310 311 312
            (loss_data_orig,) = exe.run(
                test_prog_orig, feed=feed_dict, fetch_list=[loss.name]
            )
313 314 315 316 317 318 319 320 321

        with self.program_scope_guard():
            loss = optimization_in_cond_net(True)
            main_program = fluid.default_main_program()
            test_prog_prune = main_program.clone(for_test=True)

            place = core.CPUPlace()
            exe = fluid.Executor(place)
            exe.run(fluid.default_startup_program())
322 323 324
            (loss_data_prune,) = exe.run(
                test_prog_prune, feed=feed_dict, fetch_list=[loss.name]
            )
325 326 327 328

        self.program_compare(test_prog_orig, test_prog_prune)
        self.assertEqual(loss_data_orig, loss_data_prune)

329 330 331 332 333 334 335
    @contextlib.contextmanager
    def program_scope_guard(self):
        prog = fluid.Program()
        startup_prog = fluid.Program()
        scope = fluid.core.Scope()
        with fluid.scope_guard(scope):
            with fluid.program_guard(prog, startup_prog):
336 337
                with fluid.unique_name.guard():
                    yield
338 339 340


if __name__ == '__main__':
H
hong 已提交
341
    paddle.enable_static()
342
    unittest.main()