test_to_static.py 6.7 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import unittest

import numpy as np

import paddle
import paddle.nn.functional as F
21
from paddle import LazyGuard, nn
22 23 24 25
from paddle.distributed.auto_parallel.static.helper import (
    ProgramHelper,
    ProxyLayer,
)
26
from paddle.distributed.fleet import auto
27
from paddle.framework import in_dynamic_mode
28
from paddle.io import Dataset
29
from paddle.jit.dy2static.utils import is_paddle_func
30
from paddle.nn import Sequential
31 32 33 34 35 36 37 38 39 40
from paddle.static import InputSpec

batch_size = 4
batch_num = 30
hidden_size = 1024
class_num = 10


class MyDataset(Dataset):
    def __init__(self, num_samples):
41
        super().__init__()
42 43 44 45 46 47 48 49 50 51 52 53
        self.num_samples = num_samples

    def __getitem__(self, index):
        input = np.random.uniform(size=hidden_size).astype("float32")
        label = np.random.randint(0, class_num - 1, dtype="int64")
        return input, label

    def __len__(self):
        return self.num_samples


class MLPLayer(nn.Layer):
54 55 56 57 58 59 60
    def __init__(
        self,
        hidden_size=1024,
        intermediate_size=4 * 1024,
        dropout_ratio=0.1,
        initializer_range=0.02,
    ):
61
        super().__init__()
62 63 64
        d_model = hidden_size
        dim_feedforward = intermediate_size
        weight_attr = paddle.ParamAttr(
65 66 67 68 69 70 71 72 73
            initializer=nn.initializer.Normal(mean=0.0, std=initializer_range)
        )

        self.linear0 = nn.Linear(
            d_model, dim_feedforward, weight_attr, bias_attr=None
        )
        self.linear1 = nn.Linear(
            dim_feedforward, d_model, weight_attr, bias_attr=None
        )
74 75 76 77 78 79 80 81 82 83 84 85 86 87 88
        self.linear2 = nn.Linear(d_model, 1, weight_attr, bias_attr=None)
        self.norm = nn.LayerNorm(d_model, epsilon=1e-5)
        self.dropout = nn.Dropout(dropout_ratio, mode="upscale_in_train")

    def forward(self, input):
        out = self.norm(input)
        out = self.linear0(out)
        out = F.gelu(out, approximate=True)
        out = self.linear1(out)
        out = self.dropout(out)
        out = self.linear2(out)

        return out


89 90 91
class TestWholeProgram(unittest.TestCase):
    def test_apply_optimzier(self):
        paddle.disable_static()
92 93 94 95 96 97
        mlp = MLPLayer(
            hidden_size=hidden_size,
            intermediate_size=4 * hidden_size,
            dropout_ratio=0.1,
            initializer_range=0.02,
        )
98 99
        metrics = paddle.metric.Accuracy()
        loss = paddle.nn.CrossEntropyLoss()
100 101 102
        optimizer = paddle.optimizer.SGD(
            learning_rate=0.00001, parameters=mlp.parameters()
        )
103 104 105 106 107 108 109 110 111 112 113 114
        inputs = InputSpec([batch_size, hidden_size], 'float32', 'x')
        labels = InputSpec([batch_size], 'int64', 'label')

        program_helper = ProgramHelper(mlp, loss, [metrics], [inputs], [labels])
        paddle.enable_static()
        # step 1: build program
        program_helper.build_program(mode='train')
        program_helper.build_program(mode='eval')
        # support easily to switch mode
        program_helper.to('train')

        forward_ops = program_helper.main_program.block(0).ops
115
        self.assertEqual(len(forward_ops), 17)
116 117 118 119 120

        # step 2: apply optimzer to generate whole program
        optimize_ops, _ = program_helper.apply_optimizer(optimizer)
        all_ops = program_helper.main_program.block(0).ops
        sgd_ops = [
121 122
            op
            for op in program_helper.main_program.block(0).ops
123 124
            if op.type == 'sgd'
        ]
125
        self.assertEqual(len(all_ops), 37)
126 127 128 129 130
        self.assertEqual(len(optimize_ops), len(sgd_ops))

        program_helper.reset()


131 132 133
class TestToStatic(unittest.TestCase):
    def test_to_static(self):

134 135 136 137 138 139
        mlp = MLPLayer(
            hidden_size=hidden_size,
            intermediate_size=4 * hidden_size,
            dropout_ratio=0.1,
            initializer_range=0.02,
        )
140
        loss = paddle.nn.CrossEntropyLoss()
141 142 143
        optimizer = paddle.optimizer.SGD(
            learning_rate=0.00001, parameters=mlp.parameters()
        )
144 145 146

        dataset = MyDataset(batch_num * batch_size)

147 148
        # inputs = InputSpec([batch_size, hidden_size], 'float32', 'x')
        # labels = InputSpec([batch_size], 'int64', 'label')
149

150
        assert in_dynamic_mode()
151 152 153 154 155 156 157
        engine = auto.Engine(
            model=mlp,
            loss=loss,
            optimizer=optimizer,
            metrics=paddle.metric.Accuracy(),
            strategy=None,
        )
158 159 160
        engine.fit(dataset, batch_size=batch_size)
        engine.evaluate(dataset, batch_size=batch_size)
        engine.predict(dataset, batch_size=batch_size)
161
        assert not in_dynamic_mode()
162 163


164 165 166 167
class TestLazyInit(unittest.TestCase):
    def test_lazy_init(self):

        with LazyGuard():
168 169 170 171 172 173
            mlp = MLPLayer(
                hidden_size=hidden_size,
                intermediate_size=4 * hidden_size,
                dropout_ratio=0.1,
                initializer_range=0.02,
            )
174 175 176 177 178 179 180 181 182 183 184 185 186 187 188
            loss = paddle.nn.CrossEntropyLoss()

        metrics = paddle.metric.Accuracy()
        loss = paddle.nn.CrossEntropyLoss()
        inputs = InputSpec([batch_size, hidden_size], 'float32', 'x')
        labels = InputSpec([batch_size], 'int64', 'label')

        program_helper = ProgramHelper(mlp, loss, [metrics], [inputs], [labels])
        program_helper.build_program(mode='train')
        ops = program_helper.startup_program.block(0).ops
        vars = program_helper.startup_program.block(0).vars
        assert len(vars.keys()) == len(ops)
        program_helper.reset()


189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204
class TestIgnoreProxyLayer(unittest.TestCase):
    def test_is_paddle_func(self):
        mlp = MLPLayer(
            hidden_size=hidden_size,
            intermediate_size=4 * hidden_size,
            dropout_ratio=0.1,
            initializer_range=0.02,
        )
        loss = paddle.nn.CrossEntropyLoss()
        metrics = paddle.metric.Accuracy()

        proxy_layer = ProxyLayer(mlp, loss, metrics)

        self.assertFalse(is_paddle_func(proxy_layer._train))
        self.assertFalse(is_paddle_func(proxy_layer._eval))
        self.assertFalse(is_paddle_func(proxy_layer._predict))
205 206 207
        # test for nn.Sequential
        net = Sequential(('mlp', mlp))
        self.assertFalse(is_paddle_func(net))
208 209


210 211
if __name__ == "__main__":
    unittest.main()