test_to_static.py 6.7 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import unittest

import numpy as np

import paddle
import paddle.nn.functional as F
21
from paddle import LazyGuard, nn
22
from paddle.distributed.auto_parallel.helper import ProgramHelper, ProxyLayer
23 24
from paddle.distributed.fleet import auto
from paddle.fluid.framework import _non_static_mode
25
from paddle.io import Dataset
26
from paddle.jit.dy2static.utils import is_paddle_func
27
from paddle.nn import Sequential
28 29 30 31 32 33 34 35 36 37
from paddle.static import InputSpec

batch_size = 4
batch_num = 30
hidden_size = 1024
class_num = 10


class MyDataset(Dataset):
    def __init__(self, num_samples):
38
        super().__init__()
39 40 41 42 43 44 45 46 47 48 49 50
        self.num_samples = num_samples

    def __getitem__(self, index):
        input = np.random.uniform(size=hidden_size).astype("float32")
        label = np.random.randint(0, class_num - 1, dtype="int64")
        return input, label

    def __len__(self):
        return self.num_samples


class MLPLayer(nn.Layer):
51 52 53 54 55 56 57
    def __init__(
        self,
        hidden_size=1024,
        intermediate_size=4 * 1024,
        dropout_ratio=0.1,
        initializer_range=0.02,
    ):
58
        super().__init__()
59 60 61
        d_model = hidden_size
        dim_feedforward = intermediate_size
        weight_attr = paddle.ParamAttr(
62 63 64 65 66 67 68 69 70
            initializer=nn.initializer.Normal(mean=0.0, std=initializer_range)
        )

        self.linear0 = nn.Linear(
            d_model, dim_feedforward, weight_attr, bias_attr=None
        )
        self.linear1 = nn.Linear(
            dim_feedforward, d_model, weight_attr, bias_attr=None
        )
71 72 73 74 75 76 77 78 79 80 81 82 83 84 85
        self.linear2 = nn.Linear(d_model, 1, weight_attr, bias_attr=None)
        self.norm = nn.LayerNorm(d_model, epsilon=1e-5)
        self.dropout = nn.Dropout(dropout_ratio, mode="upscale_in_train")

    def forward(self, input):
        out = self.norm(input)
        out = self.linear0(out)
        out = F.gelu(out, approximate=True)
        out = self.linear1(out)
        out = self.dropout(out)
        out = self.linear2(out)

        return out


86 87 88
class TestWholeProgram(unittest.TestCase):
    def test_apply_optimzier(self):
        paddle.disable_static()
89 90 91 92 93 94
        mlp = MLPLayer(
            hidden_size=hidden_size,
            intermediate_size=4 * hidden_size,
            dropout_ratio=0.1,
            initializer_range=0.02,
        )
95 96
        metrics = paddle.metric.Accuracy()
        loss = paddle.nn.CrossEntropyLoss()
97 98 99
        optimizer = paddle.optimizer.SGD(
            learning_rate=0.00001, parameters=mlp.parameters()
        )
100 101 102 103 104 105 106 107 108 109 110 111
        inputs = InputSpec([batch_size, hidden_size], 'float32', 'x')
        labels = InputSpec([batch_size], 'int64', 'label')

        program_helper = ProgramHelper(mlp, loss, [metrics], [inputs], [labels])
        paddle.enable_static()
        # step 1: build program
        program_helper.build_program(mode='train')
        program_helper.build_program(mode='eval')
        # support easily to switch mode
        program_helper.to('train')

        forward_ops = program_helper.main_program.block(0).ops
112
        self.assertEqual(len(forward_ops), 17)
113 114 115 116 117

        # step 2: apply optimzer to generate whole program
        optimize_ops, _ = program_helper.apply_optimizer(optimizer)
        all_ops = program_helper.main_program.block(0).ops
        sgd_ops = [
118 119
            op
            for op in program_helper.main_program.block(0).ops
120 121
            if op.type == 'sgd'
        ]
122
        self.assertEqual(len(all_ops), 37)
123 124 125 126 127
        self.assertEqual(len(optimize_ops), len(sgd_ops))

        program_helper.reset()


128 129 130
class TestToStatic(unittest.TestCase):
    def test_to_static(self):

131 132 133 134 135 136
        mlp = MLPLayer(
            hidden_size=hidden_size,
            intermediate_size=4 * hidden_size,
            dropout_ratio=0.1,
            initializer_range=0.02,
        )
137
        loss = paddle.nn.CrossEntropyLoss()
138 139 140
        optimizer = paddle.optimizer.SGD(
            learning_rate=0.00001, parameters=mlp.parameters()
        )
141 142 143

        dataset = MyDataset(batch_num * batch_size)

144 145
        # inputs = InputSpec([batch_size, hidden_size], 'float32', 'x')
        # labels = InputSpec([batch_size], 'int64', 'label')
146

147
        assert _non_static_mode()
148 149 150 151 152 153 154
        engine = auto.Engine(
            model=mlp,
            loss=loss,
            optimizer=optimizer,
            metrics=paddle.metric.Accuracy(),
            strategy=None,
        )
155 156 157
        engine.fit(dataset, batch_size=batch_size)
        engine.evaluate(dataset, batch_size=batch_size)
        engine.predict(dataset, batch_size=batch_size)
158
        assert not _non_static_mode()
159 160


161 162 163 164
class TestLazyInit(unittest.TestCase):
    def test_lazy_init(self):

        with LazyGuard():
165 166 167 168 169 170
            mlp = MLPLayer(
                hidden_size=hidden_size,
                intermediate_size=4 * hidden_size,
                dropout_ratio=0.1,
                initializer_range=0.02,
            )
171 172 173 174 175 176 177 178 179 180 181 182 183 184 185
            loss = paddle.nn.CrossEntropyLoss()

        metrics = paddle.metric.Accuracy()
        loss = paddle.nn.CrossEntropyLoss()
        inputs = InputSpec([batch_size, hidden_size], 'float32', 'x')
        labels = InputSpec([batch_size], 'int64', 'label')

        program_helper = ProgramHelper(mlp, loss, [metrics], [inputs], [labels])
        program_helper.build_program(mode='train')
        ops = program_helper.startup_program.block(0).ops
        vars = program_helper.startup_program.block(0).vars
        assert len(vars.keys()) == len(ops)
        program_helper.reset()


186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201
class TestIgnoreProxyLayer(unittest.TestCase):
    def test_is_paddle_func(self):
        mlp = MLPLayer(
            hidden_size=hidden_size,
            intermediate_size=4 * hidden_size,
            dropout_ratio=0.1,
            initializer_range=0.02,
        )
        loss = paddle.nn.CrossEntropyLoss()
        metrics = paddle.metric.Accuracy()

        proxy_layer = ProxyLayer(mlp, loss, metrics)

        self.assertFalse(is_paddle_func(proxy_layer._train))
        self.assertFalse(is_paddle_func(proxy_layer._eval))
        self.assertFalse(is_paddle_func(proxy_layer._predict))
202 203 204
        # test for nn.Sequential
        net = Sequential(('mlp', mlp))
        self.assertFalse(is_paddle_func(net))
205 206


207 208
if __name__ == "__main__":
    unittest.main()