test_program_prune_backward.py 10.5 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14
#   Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

15
import contextlib
16 17 18 19
import unittest

import numpy as np
import seresnext_net
20
from simple_nets import fc_with_batchnorm, init_data, simple_fc_net
21 22
from test_parallel_executor_transformer import (
    DeviceType,
23 24
    get_feed_data_reader,
    transformer,
25
)
26

H
hong 已提交
27
import paddle
28 29
import paddle.fluid as fluid
import paddle.fluid.core as core
30 31


32
def simple_fc_net_with_accuracy(use_feed):
G
GGBond8488 已提交
33 34
    img = paddle.static.data(name='image', shape=[-1, 784], dtype='float32')
    label = paddle.static.data(name='label', shape=[-1, 1], dtype='int64')
35 36 37

    hidden = img
    for _ in range(4):
C
Charles-hit 已提交
38
        hidden = paddle.static.nn.fc(
39 40
            hidden,
            size=200,
C
Charles-hit 已提交
41
            activation='relu',
42 43 44 45
            bias_attr=fluid.ParamAttr(
                initializer=fluid.initializer.Constant(value=1.0)
            ),
        )
C
Charles-hit 已提交
46
    prediction = paddle.static.nn.fc(hidden, size=10, activation='softmax')
47 48 49
    loss = paddle.nn.functional.cross_entropy(
        input=prediction, label=label, reduction='none', use_softmax=False
    )
50
    loss = paddle.mean(loss)
51
    accuracy_out = paddle.static.accuracy(input=prediction, label=label, k=5)
52 53 54
    return loss


55
def cond_net(use_feed=None):
G
GGBond8488 已提交
56 57
    x = paddle.static.data(name="x", shape=[-1, 4], dtype='float32')
    label = paddle.static.data('label', shape=[-1, 1], dtype='int64')
C
Charles-hit 已提交
58
    prediction = paddle.static.nn.fc(x, size=1, activation=None)
59 60

    def loss1(pred, label):
G
GGBond8488 已提交
61
        x = paddle.static.data(name="x", shape=[-1, 4], dtype='float32')
62 63 64
        loss = paddle.nn.functional.cross_entropy(
            input=pred, label=label, reduction='none', use_softmax=False
        )
65
        avg_loss = paddle.mean(loss, name='mean_cross_entropy_loss')
66 67 68
        return avg_loss

    def loss2(pred, label):
69 70 71
        loss = paddle.nn.functional.softmax_with_cross_entropy(
            logits=pred, label=label
        )
72
        avg_loss = paddle.mean(loss, name='mean_softmax_loss')
73 74 75
        return avg_loss

    two = fluid.layers.fill_constant([1], 'int32', 2)
76
    pred = two == 0
77
    avg_loss = paddle.static.nn.case(
78 79 80
        [(pred, lambda: loss1(prediction, label))],
        lambda: loss2(prediction, label),
    )
81 82 83 84
    return avg_loss


def optimization_in_cond_net(with_optimize=False):
G
GGBond8488 已提交
85 86
    x = paddle.static.data(name="x", shape=[-1, 4], dtype='float32')
    label = paddle.static.data('label', shape=[-1, 1], dtype='int64')
C
Charles-hit 已提交
87
    prediction = paddle.static.nn.fc(x, size=1, activation=None)
88 89

    def loss1(opt, pred, label, with_optimize):
G
GGBond8488 已提交
90
        x = paddle.static.data(name="x", shape=[-1, 4], dtype='float32')
91 92 93
        loss = paddle.nn.functional.cross_entropy(
            input=pred, label=label, reduction='none', use_softmax=False
        )
94
        avg_loss = paddle.mean(loss, name='mean_cross_entropy_loss')
95 96 97 98 99
        if with_optimize:
            opt.minimize(avg_loss)
        return avg_loss

    def loss2(opt, pred, label, with_optimize):
100 101 102
        loss = paddle.nn.functional.softmax_with_cross_entropy(
            logits=pred, label=label
        )
103
        avg_loss = paddle.mean(loss, name='mean_softmax_loss')
104 105 106 107 108 109
        if with_optimize:
            opt.minimize(avg_loss)
        return avg_loss

    sgd = fluid.optimizer.SGD(learning_rate=0.1)
    two = fluid.layers.fill_constant([1], 'int32', 2)
110
    pred = two == 0
111
    avg_loss = paddle.static.nn.case(
112
        [(pred, lambda: loss1(sgd, prediction, label, with_optimize))],
113 114
        lambda: loss2(sgd, prediction, label, with_optimize),
    )
115 116 117
    return avg_loss


118 119 120
class TestProgramPruneBackward(unittest.TestCase):
    def program_compare(self, program_a, program_b):
        assert isinstance(
121 122
            program_a, fluid.framework.Program
        ), "The first argument should be fluid.framework.Program."
123
        assert isinstance(
124 125
            program_b, fluid.framework.Program
        ), "The second argument should be fluid.framework Program."
126 127 128 129 130 131 132 133

        self.assertEqual(len(program_a.blocks), len(program_b.blocks))
        for idx in range(len(program_a.blocks)):
            block_a = program_a.blocks[idx]
            block_b = program_b.blocks[idx]
            self.assertEqual(len(block_a.ops), len(block_b.ops))
            self.assertEqual(len(block_a.vars), len(block_b.vars))
            for op_idx in range(len(block_a.ops)):
134 135 136
                self.assertEqual(
                    block_a.ops[op_idx].type, block_b.ops[op_idx].type
                )
137 138 139 140 141 142 143 144 145 146
            for var_key in list(block_a.vars.keys()):
                self.assertTrue(block_b.has_var(var_key))

    def check_prune_correctness(self, method, feed_dict, optimizer):
        loss = method(use_feed=False)

        main_program = fluid.default_main_program()
        test_prog_orig = main_program.clone(for_test=True)
        optimizer().minimize(loss)
        test_prog_prune = main_program.clone(for_test=True)
147

148 149
        self.program_compare(test_prog_orig, test_prog_prune)

150 151 152
        places = [core.CPUPlace()]
        if core.is_compiled_with_cuda():
            places.append(core.CUDAPlace(0))
153

154 155 156 157
        for place in places:
            exe = fluid.Executor(place)
            exe.run(fluid.default_startup_program())

158 159 160 161 162 163
            (loss_data_prune,) = exe.run(
                test_prog_prune, feed=feed_dict, fetch_list=[loss.name]
            )
            (loss_data_orig,) = exe.run(
                test_prog_orig, feed=feed_dict, fetch_list=[loss.name]
            )
164
            self.assertEqual(loss_data_orig, loss_data_prune)
165 166 167 168 169

    def test_simple_fc_net(self):
        def optimizer():
            optimizer = fluid.optimizer.SGD(
                learning_rate=0.001,
170 171
                regularization=fluid.regularizer.L2Decay(1e-4),
            )
172 173 174 175
            return optimizer

        with self.program_scope_guard():
            img, label = init_data()
176 177 178 179 180
            self.check_prune_correctness(
                method=simple_fc_net,
                feed_dict={"image": img, "label": label},
                optimizer=optimizer,
            )
181

182 183 184 185
    def test_simple_fc_net_with_accuracy(self):
        def optimizer():
            optimizer = fluid.optimizer.SGD(
                learning_rate=0.001,
186 187
                regularization=fluid.regularizer.L2Decay(1e-4),
            )
188 189 190 191
            return optimizer

        with self.program_scope_guard():
            img, label = init_data()
192 193 194 195 196
            self.check_prune_correctness(
                method=simple_fc_net_with_accuracy,
                feed_dict={"image": img, "label": label},
                optimizer=optimizer,
            )
197

198 199 200 201
    def test_batchnorm_fc(self):
        def optimizer():
            optimizer = fluid.optimizer.SGD(
                learning_rate=0.001,
202 203
                regularization=fluid.regularizer.L2Decay(1e-4),
            )
204 205 206 207
            return optimizer

        with self.program_scope_guard():
            img, label = init_data()
208 209 210 211 212
            self.check_prune_correctness(
                method=fc_with_batchnorm,
                feed_dict={"image": img, "label": label},
                optimizer=optimizer,
            )
213 214 215 216 217

    def test_seresnet(self):
        with self.program_scope_guard():
            self.check_prune_correctness(
                method=seresnext_net.model,
218
                feed_dict=seresnext_net.feed_dict(use_device=DeviceType.CPU),
219 220
                optimizer=seresnext_net.optimizer,
            )
221 222 223 224 225

    def test_transformer(self):
        def optimizer():
            optimizer = fluid.optimizer.Adam(
                learning_rate=0.001,
226 227
                regularization=fluid.regularizer.L2Decay(1e-4),
            )
228 229 230 231 232
            return optimizer

        with self.program_scope_guard():
            # the program argument is used to distinguish Program and CompiledProgram
            feed_dict = get_feed_data_reader().get_next(
233 234 235 236 237
                fluid.Executor(core.CPUPlace()), fluid.default_main_program()
            )
            self.check_prune_correctness(
                method=transformer, feed_dict=feed_dict, optimizer=optimizer
            )
238

239 240 241 242 243 244 245 246 247
    def test_cond(self):
        def optimizer():
            optimizer = fluid.optimizer.SGD(learning_rate=0.01)
            return optimizer

        with self.program_scope_guard():
            x_in = np.random.random(size=(10, 4)).astype('float32')
            label_in = np.random.randint(1, size=(10, 1)).astype('int64')
            feed_dict = {'x': x_in, 'label': label_in}
248 249 250
            self.check_prune_correctness(
                method=cond_net, feed_dict=feed_dict, optimizer=optimizer
            )
251 252 253 254 255 256 257 258 259 260 261 262

    def test_optimization_in_cond(self):
        x_in = np.random.random(size=(10, 4)).astype('float32')
        label_in = np.random.randint(1, size=(10, 1)).astype('int64')
        feed_dict = {'x': x_in, 'label': label_in}
        with self.program_scope_guard():
            loss = optimization_in_cond_net(False)
            main_program = fluid.default_main_program()
            test_prog_orig = main_program.clone(for_test=True)
            place = core.CPUPlace()
            exe = fluid.Executor(place)
            exe.run(fluid.default_startup_program())
263 264 265
            (loss_data_orig,) = exe.run(
                test_prog_orig, feed=feed_dict, fetch_list=[loss.name]
            )
266 267 268 269 270 271 272 273 274

        with self.program_scope_guard():
            loss = optimization_in_cond_net(True)
            main_program = fluid.default_main_program()
            test_prog_prune = main_program.clone(for_test=True)

            place = core.CPUPlace()
            exe = fluid.Executor(place)
            exe.run(fluid.default_startup_program())
275 276 277
            (loss_data_prune,) = exe.run(
                test_prog_prune, feed=feed_dict, fetch_list=[loss.name]
            )
278 279 280 281

        self.program_compare(test_prog_orig, test_prog_prune)
        self.assertEqual(loss_data_orig, loss_data_prune)

282 283 284 285 286 287 288
    @contextlib.contextmanager
    def program_scope_guard(self):
        prog = fluid.Program()
        startup_prog = fluid.Program()
        scope = fluid.core.Scope()
        with fluid.scope_guard(scope):
            with fluid.program_guard(prog, startup_prog):
289 290
                with fluid.unique_name.guard():
                    yield
291 292 293


if __name__ == '__main__':
H
hong 已提交
294
    paddle.enable_static()
295
    unittest.main()