test_dynrnn_gradient_check.py 12.5 KB
Newer Older
1
#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
D
dzhwinter 已提交
2
#
D
dzhwinter 已提交
3 4 5
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
D
dzhwinter 已提交
6
#
D
dzhwinter 已提交
7
#     http://www.apache.org/licenses/LICENSE-2.0
D
dzhwinter 已提交
8
#
D
dzhwinter 已提交
9 10 11 12 13 14
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

15 16
from __future__ import print_function

Y
Yang Yu 已提交
17 18 19
import numpy
import random
import collections
20
import paddle.fluid as fluid
Y
Yang Yu 已提交
21
import unittest
22
from decorator_helper import *
Y
Yang Yu 已提交
23 24 25


class Memory(object):
26

Y
Yang Yu 已提交
27 28 29 30 31 32 33 34 35
    def __init__(self, shape, dtype='float32'):
        self.ex = numpy.zeros(shape=shape, dtype=dtype)
        self.cur = None

    def update(self, val):
        assert val.shape == self.ex.shape
        assert val.dtype == self.ex.dtype
        self.cur = val

36
    def next(self):
Y
Yang Yu 已提交
37 38 39 40
        self.ex = self.cur
        self.cur = None

    def __next__(self):
41
        self.next()
Y
Yang Yu 已提交
42 43 44 45 46 47 48

    def reset(self):
        self.ex = numpy.zeros(shape=self.ex.shape, dtype=self.ex.dtype)
        self.cur = None


class Output(object):
49

Y
Yang Yu 已提交
50 51 52 53 54 55 56 57 58 59 60 61 62 63
    def __init__(self):
        self.outs = []

    def next_sequence(self):
        self.outs.append([])

    def out(self, val):
        self.outs[-1].append(val)

    def last(self):
        return self.outs[-1][-1]


class BaseRNN(object):
64

Y
Yang Yu 已提交
65 66 67 68
    def __init__(self, ins, mems, params, outs, num_seq=5, max_seq_len=15):
        self.num_seq = num_seq
        self.inputs = collections.defaultdict(list)

69
        for _ in range(num_seq):
Y
Yang Yu 已提交
70 71 72 73 74
            seq_len = random.randint(1, max_seq_len - 1)
            for iname in ins:
                ishape = ins[iname].get('shape', None)
                idtype = ins[iname].get('dtype', 'float32')
                lst = []
75
                for _ in range(seq_len):
Y
Yang Yu 已提交
76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96
                    lst.append(numpy.random.random(size=ishape).astype(idtype))
                self.inputs[iname].append(lst)

        self.mems = dict()
        for mname in mems:
            mshape = mems[mname].get('shape', None)
            mdtype = mems[mname].get('dtype', 'float32')
            self.mems[mname] = Memory(shape=mshape, dtype=mdtype)

        self.params = dict()
        for pname in params:
            pshape = params[pname].get('shape', None)
            pdtype = params[pname].get('dtype', 'float32')
            self.params[pname] = numpy.random.random(size=pshape).astype(pdtype)

        self.outputs = dict()

        for oname in outs:
            self.outputs[oname] = Output()

    def step(self, **kwargs):
Y
Yang Yu 已提交
97
        raise NotImplementedError()
Y
Yang Yu 已提交
98 99 100 101 102 103

    def exe(self):
        retv = dict()
        for out in self.outputs:
            retv[out] = []

104
        for seq_id in range(self.num_seq):
Y
Yang Yu 已提交
105 106 107 108 109
            for mname in self.mems:
                self.mems[mname].reset()
            for out in self.outputs:
                self.outputs[out].next_sequence()

110
            iname0 = list(self.inputs.keys())[0]
Y
Yang Yu 已提交
111 112
            seq_len = len(self.inputs[iname0][seq_id])

113
            for step_id in range(seq_len):
Y
Yang Yu 已提交
114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143
                xargs = dict()

                for iname in self.inputs:
                    xargs[iname] = self.inputs[iname][seq_id][step_id]

                for mname in self.mems:
                    xargs[mname] = self.mems[mname]

                for pname in self.params:
                    xargs[pname] = self.params[pname]

                for out in self.outputs:
                    xargs[out] = self.outputs[out]

                self.step(**xargs)

                for mname in self.mems:
                    next(self.mems[mname])

            for out in self.outputs:
                retv[out].append(self.outputs[out].last())

        for out in retv:
            retv[out] = numpy.array(retv[out])
        return retv

    def to_feed(self, place):
        feed_dict = dict()

        for iname in self.inputs:
144
            lod = []
Y
Yang Yu 已提交
145
            np_flatten = []
146
            for seq_id in range(len(self.inputs[iname])):
Y
Yang Yu 已提交
147
                seq_len = len(self.inputs[iname][seq_id])
148
                lod.append(seq_len)
Y
Yang Yu 已提交
149 150 151 152
                np_flatten.extend(self.inputs[iname][seq_id])

            t = fluid.Tensor()
            t.set(numpy.array(np_flatten), place)
153
            t.set_recursive_sequence_lengths([lod])
Y
Yang Yu 已提交
154 155 156 157 158 159
            feed_dict[iname] = t

        for pname in self.params:
            feed_dict[pname] = self.params[pname]
        return feed_dict

Y
Yang Yu 已提交
160
    def get_numeric_gradient_of_param(self, param_name, delta=0.001):
Y
Yang Yu 已提交
161
        p = self.params[param_name]
Y
Yang Yu 已提交
162 163 164
        if len(p.shape) != 2:
            raise ValueError("Not support get numeric gradient of an parameter,"
                             " which is not matrix")
Y
Yang Yu 已提交
165 166
        g = numpy.zeros(shape=p.shape, dtype=p.dtype)

167 168
        for i in range(p.shape[0]):
            for j in range(p.shape[1]):
Y
Yang Yu 已提交
169 170 171 172 173 174 175
                o = p[i][j]
                p[i][j] += delta
                pos = self._exe_mean_out_()
                p[i][j] -= 2 * delta
                neg = self._exe_mean_out_()
                p[i][j] = o
                g[i][j] = (pos - neg) / (delta * 2)
Y
Yang Yu 已提交
176 177
        return g

Y
Stash  
Yang Yu 已提交
178 179 180 181 182 183 184 185 186 187 188 189 190 191
    def get_numeric_gradient_of_input(self,
                                      input_name,
                                      delta=0.001,
                                      return_one_tensor=True):
        ipt = self.inputs[input_name]
        grad = []

        for seq in ipt:
            seq_grad = []
            for item in seq:
                item_grad = numpy.zeros(shape=item.shape, dtype=item.dtype)
                if len(item.shape) != 1:
                    raise ValueError("Not support")

192
                for i in range(len(item)):
Y
Stash  
Yang Yu 已提交
193 194 195 196 197 198 199 200 201 202 203 204 205
                    o = item[i]
                    item[i] += delta
                    pos = self._exe_mean_out_()
                    item[i] -= 2 * delta
                    neg = self._exe_mean_out_()
                    item[i] = o
                    item_grad[i] = (pos - neg) / (delta * 2)
                seq_grad.append(item_grad)
            grad.append(seq_grad)

        if not return_one_tensor:
            return grad

206
        for i in range(len(grad)):
Y
Stash  
Yang Yu 已提交
207 208 209 210
            grad[i] = numpy.concatenate(grad[i])
        grad = numpy.concatenate(grad)
        return grad

Y
Yang Yu 已提交
211 212
    def _exe_mean_out_(self):
        outs = self.exe()
213
        return numpy.array([o.mean() for o in outs.values()]).mean()
Y
Yang Yu 已提交
214 215


216
class SeedFixedTestCase(unittest.TestCase):
217

218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234
    @classmethod
    def setUpClass(cls):
        """Fix random seeds to remove randomness from tests"""
        cls._np_rand_state = numpy.random.get_state()
        cls._py_rand_state = random.getstate()

        numpy.random.seed(123)
        random.seed(124)

    @classmethod
    def tearDownClass(cls):
        """Restore random seeds"""
        numpy.random.set_state(cls._np_rand_state)
        random.setstate(cls._py_rand_state)


class TestSimpleMul(SeedFixedTestCase):
Y
Yang Yu 已提交
235 236 237 238 239 240 241
    DATA_NAME = 'X'
    DATA_WIDTH = 32
    PARAM_NAME = 'W'
    HIDDEN_WIDTH = 10
    OUT_NAME = 'Out'

    class SimpleMul(BaseRNN):
242

Y
Yang Yu 已提交
243 244
        def __init__(self):
            base = TestSimpleMul
245 246 247 248 249 250 251 252
            super(base.SimpleMul,
                  self).__init__({base.DATA_NAME: {
                      'shape': [base.DATA_WIDTH]
                  }}, {}, {
                      base.PARAM_NAME: {
                          'shape': [base.DATA_WIDTH, base.HIDDEN_WIDTH]
                      }
                  }, [base.OUT_NAME])
Y
Yang Yu 已提交
253 254 255 256

        def step(self, X, W, Out):
            Out.out(numpy.matmul(X, W))

Y
Yang Yu 已提交
257 258 259 260
    # Test many times in local to ensure the random seed cannot breaks CI
    # @many_times(10)
    @prog_scope()
    def test_forward_backward(self):
Y
Stash  
Yang Yu 已提交
261
        py_rnn = TestSimpleMul.SimpleMul()
262 263 264
        dat = fluid.layers.data(name=self.DATA_NAME,
                                shape=[self.DATA_WIDTH],
                                lod_level=1)
Y
Stash  
Yang Yu 已提交
265
        dat.stop_gradient = False
Y
Yang Yu 已提交
266 267 268 269 270

        rnn = fluid.layers.DynamicRNN()
        with rnn.block():
            d = rnn.step_input(dat)
            o = fluid.layers.fc(input=d,
Y
Yang Yu 已提交
271
                                param_attr=self.PARAM_NAME,
Y
Yang Yu 已提交
272
                                bias_attr=False,
Y
Yang Yu 已提交
273
                                size=self.HIDDEN_WIDTH,
Y
Yang Yu 已提交
274 275 276 277 278
                                act=None)
            rnn.output(o)

        out = rnn()
        out = fluid.layers.sequence_pool(out, pool_type='last')
Y
Yu Yang 已提交
279
        loss = fluid.layers.mean(out)
Y
Update  
Yang Yu 已提交
280
        fluid.backward.append_backward(loss)
Y
Yang Yu 已提交
281 282 283

        cpu = fluid.CPUPlace()
        exe = fluid.Executor(cpu)
284
        out, w_g, i_g = list(
285 286
            map(
                numpy.array,
287 288
                exe.run(feed=py_rnn.to_feed(cpu),
                        fetch_list=[
289 290
                            out, self.PARAM_NAME + "@GRAD",
                            self.DATA_NAME + "@GRAD"
291 292
                        ],
                        return_numpy=False)))
Y
Stash  
Yang Yu 已提交
293
        out_by_python = py_rnn.exe()[self.OUT_NAME]
Y
Yang Yu 已提交
294
        self.assertTrue(numpy.allclose(out, out_by_python))
Y
Stash  
Yang Yu 已提交
295
        w_g_num = py_rnn.get_numeric_gradient_of_param(self.PARAM_NAME)
Y
Yang Yu 已提交
296
        self.assertTrue(numpy.allclose(w_g_num, w_g, rtol=0.05))
Y
Stash  
Yang Yu 已提交
297 298 299 300
        i_g_num = py_rnn.get_numeric_gradient_of_input(
            input_name=self.DATA_NAME)
        i_g_num = i_g_num.reshape(i_g.shape)
        self.assertTrue(numpy.allclose(i_g_num, i_g, rtol=0.05))
Y
Yang Yu 已提交
301 302


303
class TestSimpleMulWithMemory(SeedFixedTestCase):
Y
Yang Yu 已提交
304
    DATA_WIDTH = 32
Y
Stash  
Yang Yu 已提交
305
    HIDDEN_WIDTH = 20
Y
Yang Yu 已提交
306 307 308 309
    DATA_NAME = 'X'
    PARAM_NAME = 'W'

    class SimpleMulWithMemory(BaseRNN):
310

Y
Yang Yu 已提交
311
        def __init__(self):
312 313 314 315 316 317 318 319 320 321 322 323 324 325 326
            super(TestSimpleMulWithMemory.SimpleMulWithMemory, self).__init__(
                {
                    TestSimpleMulWithMemory.DATA_NAME: {
                        'shape': [TestSimpleMulWithMemory.DATA_WIDTH]
                    }
                }, {'Mem': {
                    'shape': [TestSimpleMulWithMemory.HIDDEN_WIDTH]
                }}, {
                    TestSimpleMulWithMemory.PARAM_NAME: {
                        'shape': [
                            TestSimpleMulWithMemory.DATA_WIDTH,
                            TestSimpleMulWithMemory.HIDDEN_WIDTH
                        ]
                    }
                }, ['Out'])
Y
Yang Yu 已提交
327 328 329 330 331 332 333 334 335

        def step(self, X, Mem, W, Out):
            o = numpy.matmul(X, W)
            assert isinstance(Mem, Memory)
            o += Mem.ex
            Mem.update(o)
            assert isinstance(Out, Output)
            Out.out(o)

Y
Yang Yu 已提交
336 337
    # many_times used locally for debug. Make sure the calculation is stable.
    # @many_times(10)
Y
Yang Yu 已提交
338 339 340
    @prog_scope()
    def test_forward_backward(self):
        py_rnn = TestSimpleMulWithMemory.SimpleMulWithMemory()
341 342 343
        data = fluid.layers.data(name=self.DATA_NAME,
                                 shape=[self.DATA_WIDTH],
                                 lod_level=1)
Y
Stash  
Yang Yu 已提交
344
        data.stop_gradient = False
Y
Yang Yu 已提交
345 346 347 348 349 350 351 352 353 354 355 356 357 358 359
        rnn = fluid.layers.DynamicRNN()
        with rnn.block():
            d = rnn.step_input(data)
            mem = rnn.memory(value=0.0, shape=[self.HIDDEN_WIDTH])
            hidden = fluid.layers.fc(input=d,
                                     size=self.HIDDEN_WIDTH,
                                     param_attr=self.PARAM_NAME,
                                     bias_attr=False,
                                     act=None)
            o = fluid.layers.elementwise_add(x=hidden, y=mem)
            rnn.update_memory(mem, o)
            rnn.output(o)

        out = rnn()
        last = fluid.layers.sequence_pool(input=out, pool_type='last')
Y
Yu Yang 已提交
360
        loss = fluid.layers.mean(last)
Y
Update  
Yang Yu 已提交
361
        fluid.backward.append_backward(loss)
Y
Yang Yu 已提交
362 363 364

        cpu = fluid.CPUPlace()
        exe = fluid.Executor(cpu)
Y
Stash  
Yang Yu 已提交
365
        feed = py_rnn.to_feed(cpu)
366
        last_np, w_g, i_g = list(
367 368
            map(
                numpy.array,
369 370
                exe.run(feed=feed,
                        fetch_list=[
371 372
                            last, self.PARAM_NAME + "@GRAD",
                            self.DATA_NAME + "@GRAD"
373 374 375
                        ],
                        return_numpy=False)))
        last_by_py, = list(py_rnn.exe().values())
Y
Stash  
Yang Yu 已提交
376
        w_g_num = py_rnn.get_numeric_gradient_of_param(self.PARAM_NAME)
Y
Yang Yu 已提交
377
        self.assertTrue(numpy.allclose(last_np, last_by_py))
Y
Yang Yu 已提交
378

Y
Stash  
Yang Yu 已提交
379 380 381 382 383 384 385
        self.assertTrue(numpy.allclose(w_g_num, w_g, rtol=0.1))
        i_g_num = py_rnn.get_numeric_gradient_of_input(self.DATA_NAME)
        i_g_num = i_g_num.reshape(i_g.shape)

        # Since this RNN has many float add. The number could be not stable.
        # rtol = 0.1
        self.assertTrue(numpy.allclose(i_g_num, i_g, rtol=0.1))
Y
Yang Yu 已提交
386 387


Y
Yang Yu 已提交
388 389
if __name__ == '__main__':
    unittest.main()