test_lr_grad_clip.py 3.4 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import unittest

17 18
from test_to_static import MLPLayer, MyDataset

19
import paddle
20
from paddle.distributed.fleet import auto
21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36

paddle.enable_static()


class TestEngineBase(unittest.TestCase):
    def setUp(self):
        self.batch_size = 4
        self.batch_num = 5
        self.hidden_size = 1024

        self.init_model()
        self.init_optimizer()
        self.init_dataset()
        self.init_engine()

    def init_model(self):
37 38 39 40 41 42
        self.mlp = MLPLayer(
            hidden_size=self.hidden_size,
            intermediate_size=4 * self.hidden_size,
            dropout_ratio=0.1,
            initializer_range=0.02,
        )
43 44 45
        self.loss = paddle.nn.CrossEntropyLoss()

    def init_optimizer(self):
46 47 48
        self.optimizer = paddle.optimizer.SGD(
            learning_rate=0.00001, parameters=self.mlp.parameters()
        )
49 50 51 52 53

    def init_dataset(self):
        self.dataset = MyDataset(self.batch_num * self.batch_size)

    def init_engine(self):
54 55
        # inputs = InputSpec([self.batch_size, self.hidden_size], 'float32', 'x')
        # labels = InputSpec([self.batch_size], 'int64', 'label')
56

57 58 59 60 61 62
        self.engine = auto.Engine(
            model=self.mlp,
            loss=self.loss,
            optimizer=self.optimizer,
            metrics=paddle.metric.Accuracy(),
        )
63 64 65 66 67


class TestLRScheduler(TestEngineBase):
    def init_optimizer(self):
        scheduler = paddle.optimizer.lr.CosineAnnealingDecay(
68 69
            learning_rate=0.00001, T_max=10
        )
70 71 72 73 74
        self.optimizer = paddle.optimizer.SGD(learning_rate=scheduler)

    def test_lr_scheduler(self):
        self.init_engine()
        self.engine.fit(self.dataset, batch_size=self.batch_size)
Z
zhaoyingli 已提交
75
        lr = self.engine._optimizer._learning_rate
76
        assert isinstance(lr, paddle.optimizer.lr.LRScheduler)
77 78


79
class TestGradClipByGlobalNorm(TestEngineBase):
80 81
    def init_optimizer(self):
        clip = paddle.nn.ClipGradByGlobalNorm(clip_norm=1.0)
82 83 84
        self.optimizer = paddle.optimizer.SGD(
            learning_rate=0.00001, grad_clip=clip
        )
85 86 87 88 89 90 91 92 93 94 95

    def test_grad_clip(self):

        self.engine.fit(self.dataset, batch_size=self.batch_size)
        self.check_program()

    def check_program(self):

        ops = self.engine.main_program.global_block().ops
        has_grad_clip = False
        for op in ops:
96 97 98
            if op.desc.has_attr("op_namescope") and op.desc.attr(
                "op_namescope"
            ).startswith("/gradient_clip"):
99 100 101 102 103
                has_grad_clip = True
                break
        assert has_grad_clip is True


104 105 106
class TestGradClipByNorm(TestGradClipByGlobalNorm):
    def init_optimizer(self):
        clip = paddle.nn.ClipGradByNorm(clip_norm=1.0)
107 108 109
        self.optimizer = paddle.optimizer.SGD(
            learning_rate=0.00001, grad_clip=clip
        )
110 111


112 113
if __name__ == "__main__":
    unittest.main()