test_imperative_selected_rows.py 4.8 KB
Newer Older
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
#   Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

import unittest
16

17
import numpy as np
18

19
import paddle
20 21 22
import paddle.fluid as fluid
import paddle.fluid.core as core
from paddle.fluid.dygraph.base import to_variable
23
from paddle.fluid.framework import _test_eager_guard
24
from paddle.fluid.optimizer import SGDOptimizer
25 26


27
class SimpleNet(paddle.nn.Layer):
Y
Youwei Song 已提交
28
    def __init__(self, vocab_size, hidden_size, dtype):
29
        super().__init__()
30 31 32 33 34
        self.emb = paddle.nn.Embedding(
            vocab_size,
            hidden_size,
            weight_attr='emb.w',
            sparse=True,
35
        )
36 37 38 39 40 41 42

    def forward(self, input):
        input_emb = self.emb(input)
        return input_emb, self.emb


class TestSimpleNet(unittest.TestCase):
43
    def func_selectedrows_gradient1(self):
44 45 46 47 48 49 50
        places = [fluid.CPUPlace()]
        if core.is_compiled_with_cuda():
            places.append(fluid.CUDAPlace(0))

        for place in places:
            for dtype in ["float32", "float64"]:
                for sort_sum_gradient in [True, False]:
51
                    paddle.disable_static(place)
52
                    fluid.set_flags(
53 54
                        {'FLAGS_sort_sum_gradient': sort_sum_gradient}
                    )
55
                    # grad_clip = fluid.clip.GradientClipByGlobalNorm(5.0)
56

57
                    input_word = np.array([[1, 2], [2, 1]]).astype('int64')
Z
Zhou Wei 已提交
58
                    input = paddle.to_tensor(input_word)
59

60
                    simplenet = SimpleNet(20, 32, dtype)
61 62 63 64
                    adam = SGDOptimizer(
                        learning_rate=0.001,
                        parameter_list=simplenet.parameters(),
                    )  # grad_clip=grad_clip
65
                    input_emb, emb = simplenet(input)
66

67 68
                    self.assertIsNone(emb.weight.gradient())
                    self.assertIsNone(input_emb.gradient())
69

70
                    input_emb.backward()
71
                    adam.minimize(input_emb)
72
                    self.assertIsNotNone(emb.weight.gradient())
73

74
                    emb.clear_gradients()
75
                    self.assertIsNone(emb.weight.gradient())
76

77
                    input_emb.clear_gradient()
78
                    self.assertIsNotNone(input_emb.gradient())
79
                    paddle.enable_static()
80

81
    def test_selectedrows_gradient1(self):
82
        fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True})
83 84 85
        with _test_eager_guard():
            self.func_selectedrows_gradient1()
        self.func_selectedrows_gradient1()
86
        fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": False})
87 88

    def func_selectedrows_gradient2(self):
89 90 91 92 93 94 95
        places = [fluid.CPUPlace()]
        if core.is_compiled_with_cuda():
            places.append(fluid.CUDAPlace(0))

        for place in places:
            for sort_sum_gradient in [True, False]:
                with fluid.dygraph.guard(place):
96
                    fluid.set_flags(
97 98
                        {'FLAGS_sort_sum_gradient': sort_sum_gradient}
                    )
99
                    grad_clip = fluid.clip.GradientClipByGlobalNorm(5.0)
100

Y
Youwei Song 已提交
101
                    input_word = np.array([[1, 2], [2, 1]]).astype('int64')
102 103
                    input = to_variable(input_word)

Y
Youwei Song 已提交
104
                    simplenet = SimpleNet(20, 32, "float32")
105 106 107 108 109
                    adam = SGDOptimizer(
                        learning_rate=0.001,
                        parameter_list=simplenet.parameters(),
                        grad_clip=grad_clip,
                    )
110 111
                    input_emb, emb = simplenet(input)

112 113
                    self.assertIsNone(emb.weight.gradient())
                    self.assertIsNone(input_emb.gradient())
114

115
                    input_emb.backward()
116
                    adam.minimize(input_emb)
117
                    self.assertIsNotNone(emb.weight.gradient())
118 119

                    emb.clear_gradients()
120
                    self.assertIsNone(emb.weight.gradient())
121 122

                    input_emb.clear_gradient()
123
                    self.assertIsNotNone(input_emb.gradient())
124

125
    def test_selectedrows_gradient2(self):
126
        fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": True})
127 128 129
        with _test_eager_guard():
            self.func_selectedrows_gradient2()
        self.func_selectedrows_gradient2()
130
        fluid.set_flags({"FLAGS_retain_grad_for_all_tensor": False})
131

132 133 134

if __name__ == '__main__':
    unittest.main()