test_rmsprop_op.py 11.9 KB
Newer Older
1
#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
D
dzhwinter 已提交
2
#
D
dzhwinter 已提交
3 4 5
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
D
dzhwinter 已提交
6
#
D
dzhwinter 已提交
7
#     http://www.apache.org/licenses/LICENSE-2.0
D
dzhwinter 已提交
8
#
D
dzhwinter 已提交
9 10 11 12 13 14
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

15 16
from __future__ import print_function

17
import unittest
18

19
import numpy as np
20 21
import paddle.fluid.core as core
from paddle.fluid.op import Operator
S
sneaxiy 已提交
22
import paddle.fluid as fluid
M
MRXLT 已提交
23
import paddle
S
sneaxiy 已提交
24 25 26 27 28 29 30


def create_selected_rows_and_tensor(scope, place, height, row_num,
                                    embedding_size):
    sr = scope.var("@selected_rows@").get_selected_rows()
    tensor = scope.var("grad").get_tensor()

31 32 33
    rows = np.random.random_integers(low=0, high=height - 1, size=[
        row_num,
    ]).astype('int64')
S
sneaxiy 已提交
34 35 36 37 38 39 40 41 42 43 44 45 46
    sr_val = np.random.random(size=[row_num, embedding_size]).astype('float32')

    sr.set_height(height)
    sr.set_rows(rows)
    sr.get_tensor().set(sr_val, place)

    tensor_val = np.zeros(shape=[height, embedding_size], dtype='float32')
    for i in range(row_num):
        row = rows[i]
        tensor_val[row, :] = tensor_val[row, :] + sr_val[i, :]

    tensor.set(tensor_val, place)
    return tensor_val, sr_val
47 48 49


class TestBase(unittest.TestCase):
50

S
sneaxiy 已提交
51 52 53 54 55 56 57
    def setup(self,
              place,
              is_sparse,
              centered,
              size,
              row_num=None,
              epsilon=1e-6):
58 59
        np.random.seed(5)  # fix seed

S
sneaxiy 已提交
60 61 62
        self.scope = fluid.global_scope()
        self.place = place

63
        self.param_name = "param"
S
sneaxiy 已提交
64
        self.param = np.random.random(size).astype("float32")
65 66

        self.mean_square_name = "mean_square"
67 68
        self.mean_square = np.random.uniform(low=1, high=2,
                                             size=size).astype("float32")
69 70

        self.mean_grad_name = "mean_grad"
S
sneaxiy 已提交
71
        self.mean_grad = np.random.random(size).astype("float32")
72 73 74 75 76

        self.lr_name = "lr"
        self.learning_rate = np.array([0.01]).astype("float32")

        self.grad_name = "grad"
S
sneaxiy 已提交
77 78 79 80 81 82 83 84 85 86

        self.is_sparse = is_sparse
        if self.is_sparse:
            self.grad_sr_name = "@selected_rows@"
            self.grad, self.grad_sr = create_selected_rows_and_tensor(
                self.scope, place, size[0], row_num, size[1])
        else:
            self.grad = np.random.random(size).astype("float32")
            grad_tensor = self.scope.var(self.grad_name).get_tensor()
            grad_tensor.set(self.grad, place)
87 88

        self.moment_name = "moment"
89 90
        self.moment = np.random.uniform(low=0, high=1,
                                        size=size).astype("float32")
91 92 93

        self.epsilon = epsilon
        self.decay = 0.9
S
sneaxiy 已提交
94
        self.momentum = 0.1
95 96
        self.centered = centered

97 98
        self.ms_out = self.decay * self.mean_square + (
            1 - self.decay) * self.grad * self.grad
99
        if centered:
100 101
            self.mg_out = self.decay * self.mean_grad + (1 -
                                                         self.decay) * self.grad
102 103 104 105 106 107 108 109 110
            self.moment_out = self.momentum * self.moment + \
                              self.learning_rate * self.grad / np.sqrt(self.ms_out - np.square(self.mg_out) + self.epsilon)
        else:
            self.moment_out = self.momentum * self.moment + \
                              self.learning_rate * self.grad / np.sqrt(self.ms_out + self.epsilon)

        self.param_out = self.param - self.moment_out

        # create and initialize Param Variable
S
sneaxiy 已提交
111 112
        self.param_tensor = self.scope.var(self.param_name).get_tensor()
        self.param_tensor.set(self.param, place)
113

S
sneaxiy 已提交
114 115 116
        self.mean_square_tensor = self.scope.var(
            self.mean_square_name).get_tensor()
        self.mean_square_tensor.set(self.mean_square, place)
117

S
sneaxiy 已提交
118
        lr = self.scope.var(self.lr_name).get_tensor()
119 120
        lr.set(self.learning_rate, place)

S
sneaxiy 已提交
121 122
        self.moment_tensor = self.scope.var(self.moment_name).get_tensor()
        self.moment_tensor.set(self.moment, place)
123

S
sneaxiy 已提交
124 125 126 127
        if self.centered:
            self.mean_grad_tensor = self.scope.var(
                self.mean_grad_name).get_tensor()
            self.mean_grad_tensor.set(self.mean_grad, place)
128

S
sneaxiy 已提交
129 130
    def check(self, actual_t, expect_t, place, out_name, atol=1e-5):
        self.assertTrue(
131 132 133
            np.allclose(actual_t, expect_t, atol=atol),
            "Output (" + out_name + ") has diff at " + str(place) +
            "\nExpect " + str(expect_t) + "\n" + "But Got" + str(actual_t))
134

S
sneaxiy 已提交
135 136

class TestRmspropOp(TestBase):
137

S
sneaxiy 已提交
138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164
    def check_with_place(self,
                         place,
                         is_sparse,
                         centered,
                         size,
                         row_num=None,
                         epsilon=1e-6):
        self.setup(place, is_sparse, centered, size, row_num, epsilon)
        self.run_and_check()

    def run_and_check(self):
        grad_name = self.grad_sr_name if self.is_sparse else self.grad_name

        kwargs = {
            'Param': self.param_name,
            'Grad': grad_name,
            'MeanSquare': self.mean_square_name,
            'Moment': self.moment_name,
            'LearningRate': self.lr_name,
            'ParamOut': self.param_name,
            'MeanSquareOut': self.mean_square_name,
            'MomentOut': self.moment_name,
            'epsilon': self.epsilon,
            'decay': self.decay,
            'momentum': self.momentum,
            'centered': self.centered
        }
165 166

        if self.centered:
S
sneaxiy 已提交
167 168 169 170 171 172 173
            kwargs['MeanGrad'] = self.mean_grad_name
            kwargs['MeanGradOut'] = self.mean_grad_name

        rmsprop_op = Operator('rmsprop', **kwargs)
        atol = 1e-6

        rmsprop_op.run(self.scope, self.place)
174

175 176 177 178 179 180 181 182 183 184 185 186 187 188 189
        self.check(np.array(self.mean_square_tensor),
                   self.ms_out,
                   self.place,
                   self.mean_square_name,
                   atol=atol)
        self.check(np.array(self.moment_tensor),
                   self.moment_out,
                   self.place,
                   self.moment_name,
                   atol=atol)
        self.check(np.array(self.param_tensor),
                   self.param_out,
                   self.place,
                   self.param_name,
                   atol=atol)
190 191

        if self.centered:
192 193
            self.check(np.array(self.mean_grad_tensor), self.mg_out, self.place,
                       self.mean_grad_name)
194 195 196 197 198

    def test_rmsprop(self):
        places = [core.CPUPlace()]
        if core.is_compiled_with_cuda():
            places.append(core.CUDAPlace(0))
S
sneaxiy 已提交
199 200

        size = (128, 320)
201
        for place in places:
S
sneaxiy 已提交
202 203
            for centered in [False, True]:
                with fluid.scope_guard(core.Scope()):
204 205 206 207
                    self.check_with_place(place,
                                          is_sparse=False,
                                          centered=centered,
                                          size=size)
S
sneaxiy 已提交
208 209

                with fluid.scope_guard(core.Scope()):
210 211 212 213 214
                    self.check_with_place(place,
                                          is_sparse=True,
                                          centered=centered,
                                          row_num=512,
                                          size=size)
S
sneaxiy 已提交
215 216

                with fluid.scope_guard(core.Scope()):
217 218 219 220 221
                    self.check_with_place(place,
                                          is_sparse=True,
                                          centered=centered,
                                          row_num=60,
                                          size=size)
222 223


M
MRXLT 已提交
224
class TestRMSPropV2(unittest.TestCase):
225

M
MRXLT 已提交
226 227 228 229
    def test_rmsprop_dygraph(self):
        paddle.disable_static()
        value = np.arange(26).reshape(2, 13).astype("float32")
        a = paddle.to_tensor(value)
230
        linear = paddle.nn.Linear(13, 5)
M
MRXLT 已提交
231
        # This can be any optimizer supported by dygraph.
232 233 234
        adam = paddle.optimizer.RMSProp(learning_rate=0.01,
                                        parameters=linear.parameters(),
                                        weight_decay=0.01)
M
MRXLT 已提交
235 236 237 238 239 240
        out = linear(a)
        out.backward()
        adam.step()
        adam.clear_gradients()

    def test_rmsprop(self):
241
        paddle.enable_static()
M
MRXLT 已提交
242 243 244 245 246 247 248 249 250 251 252 253 254
        place = fluid.CPUPlace()
        main = fluid.Program()
        with fluid.program_guard(main):
            x = fluid.layers.data(name='x', shape=[13], dtype='float32')
            y = fluid.layers.data(name='y', shape=[1], dtype='float32')
            y_predict = fluid.layers.fc(input=x, size=1, act=None)
            cost = fluid.layers.square_error_cost(input=y_predict, label=y)
            avg_cost = fluid.layers.mean(cost)

            rms_optimizer = paddle.optimizer.RMSProp(learning_rate=0.1)
            rms_optimizer.minimize(avg_cost)

            fetch_list = [avg_cost]
255 256
            train_reader = paddle.batch(paddle.dataset.uci_housing.train(),
                                        batch_size=1)
M
MRXLT 已提交
257 258 259 260 261 262 263 264
            feeder = fluid.DataFeeder(place=place, feed_list=[x, y])
            exe = fluid.Executor(place)
            exe.run(fluid.default_startup_program())
            for data in train_reader():
                exe.run(main, feed=feeder.feed(data), fetch_list=fetch_list)

    def test_raise_error(self):
        self.assertRaises(ValueError, paddle.optimizer.RMSProp, None)
265 266 267 268 269 270 271 272 273 274 275 276
        self.assertRaises(ValueError,
                          paddle.optimizer.RMSProp,
                          learning_rate=0.1,
                          rho=None)
        self.assertRaises(ValueError,
                          paddle.optimizer.RMSProp,
                          learning_rate=0.1,
                          epsilon=None)
        self.assertRaises(ValueError,
                          paddle.optimizer.RMSProp,
                          learning_rate=0.1,
                          momentum=None)
M
MRXLT 已提交
277

M
MRXLT 已提交
278 279 280 281
    def test_rmsprop_op_invalid_input(self):
        paddle.disable_static()
        linear = paddle.nn.Linear(10, 10)
        with self.assertRaises(ValueError):
282 283 284
            adam = paddle.optimizer.RMSProp(0.1,
                                            epsilon=-1,
                                            parameters=linear.parameters())
M
MRXLT 已提交
285
        with self.assertRaises(ValueError):
286 287 288
            adam = paddle.optimizer.RMSProp(0.1,
                                            momentum=-1,
                                            parameters=linear.parameters())
M
MRXLT 已提交
289
        with self.assertRaises(ValueError):
290 291 292
            adam = paddle.optimizer.RMSProp(0.1,
                                            rho=-1,
                                            parameters=linear.parameters())
M
MRXLT 已提交
293

M
MRXLT 已提交
294

295
class TestRMSPropV2Group(TestRMSPropV2):
296

297 298 299 300 301 302 303
    def test_rmsprop_dygraph(self):
        paddle.disable_static()
        value = np.arange(26).reshape(2, 13).astype("float32")
        a = paddle.to_tensor(value)
        linear_1 = paddle.nn.Linear(13, 5)
        linear_2 = paddle.nn.Linear(5, 3)
        # This can be any optimizer supported by dygraph.
304 305 306 307 308 309 310 311 312 313 314
        adam = paddle.optimizer.RMSProp(learning_rate=0.01,
                                        parameters=[{
                                            'params':
                                            linear_1.parameters()
                                        }, {
                                            'params':
                                            linear_2.parameters(),
                                            'weight_decay':
                                            0.001
                                        }],
                                        weight_decay=0.01)
315 316 317 318 319 320 321
        out = linear_1(a)
        out = linear_2(out)
        out.backward()
        adam.step()
        adam.clear_gradients()


322
if __name__ == "__main__":
H
hong 已提交
323
    paddle.enable_static()
324
    unittest.main()