test_complex_grad_accumulated.py 4.3 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20
#   Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import unittest
import numpy as np

import paddle

import paddle.fluid.core as core
21
from paddle.fluid.framework import _test_eager_guard
22 23 24


class Optimization_ex1(paddle.nn.Layer):
25 26 27 28 29 30
    def __init__(
        self,
        shape,
        dtype,
        param_attr=paddle.nn.initializer.Uniform(low=-5.0, high=5.0),
    ):
31 32
        super(Optimization_ex1, self).__init__()

33 34 35 36 37 38
        self.theta0 = self.create_parameter(
            shape=shape, attr=param_attr, dtype=dtype, is_bias=False
        )
        self.theta1 = self.create_parameter(
            shape=shape, attr=param_attr, dtype=dtype, is_bias=False
        )
39
        self.A = paddle.to_tensor(
40 41 42 43 44 45 46 47
            np.random.random((4, 4)).astype(dtype)
            + np.random.random((4, 4)).astype(dtype) * 1j
        )
        self.B = paddle.to_tensor(
            np.random.random((4, 4)).astype(dtype)
            + np.random.random((4, 4)).astype(dtype) * 1j,
            stop_gradient=False,
        )
48 49 50 51 52 53

    def forward(self, mode=1):
        jj = paddle.to_tensor(np.array([1j]).astype(np.complex64))
        if mode == 1:
            # run all calc in one step
            loss = paddle.sum(self.A + (self.theta0 + self.theta1 * jj)) * (
54 55
                paddle.sum(self.A + (self.theta0 + self.theta1 * jj)).conj()
            )
56 57 58 59 60
            return loss.real()
        elif mode == 2:
            # run in two step
            self.theta = self.theta0 + self.theta1 * jj
            loss = paddle.sum(self.A + self.theta) * (
61 62
                paddle.sum(self.A + self.theta).conj()
            )
63 64 65
            return loss.real()
        elif mode == 3:
            # run without param
66 67 68
            loss = paddle.sum(self.A + self.B) * (
                paddle.sum(self.A + self.B).conj()
            )
69 70 71 72 73 74 75 76 77 78
            return loss.real()
        else:
            raise NotImplementedError


class TestComplexGradAccumulated(unittest.TestCase):
    def setUp(self):
        self.devices = ['cpu']
        if core.is_compiled_with_cuda():
            self.devices.append('gpu')
79 80
        self.iter = 3
        self.learning_rate = 0.5
81 82 83
        self.dtypes = ['float32', 'float64']
        self.theta_size = [4, 4]

84
    def train(self, device, dtype, mode):
85 86 87
        paddle.set_device(device)

        myLayer = Optimization_ex1(self.theta_size, dtype)
88 89 90
        optimizer = paddle.optimizer.SGD(
            learning_rate=self.learning_rate, parameters=myLayer.parameters()
        )
91

92 93 94 95 96 97 98 99 100 101 102
        for iter in range(self.iter):
            loss = myLayer(mode)
            loss.backward()

            optimizer.step()
            optimizer.clear_grad()

    def train_no_clear_grad(self, device, dtype, mode):
        paddle.set_device(device)

        myLayer = Optimization_ex1(self.theta_size, dtype)
103 104 105
        optimizer = paddle.optimizer.SGD(
            learning_rate=self.learning_rate, parameters=myLayer.parameters()
        )
106 107 108 109 110 111

        for iter in range(self.iter):
            loss = myLayer(mode)
            loss.backward()

            optimizer.step()
112 113 114 115

    def test_case_one_step(self):
        for dev in self.devices:
            for dtype in self.dtypes:
116 117
                self.train(dev, dtype, 1)
                self.train_no_clear_grad(dev, dtype, 1)
118 119 120 121

    def test_case_two_step(self):
        for dev in self.devices:
            for dtype in self.dtypes:
122 123
                self.train(dev, dtype, 2)
                self.train_no_clear_grad(dev, dtype, 2)
124 125 126 127

    def test_case_non_param(self):
        for dev in self.devices:
            for dtype in self.dtypes:
128 129
                self.train(dev, dtype, 3)
                self.train_no_clear_grad(dev, dtype, 3)
130

131 132 133 134 135 136
    def test_eager(self):
        with _test_eager_guard():
            self.test_case_one_step()
            self.test_case_two_step()
            self.test_case_non_param()

137 138 139

if __name__ == '__main__':
    unittest.main()