test_lstmp_op.py 11.2 KB
Newer Older
1
#  Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
2 3 4 5 6 7 8 9 10 11 12 13
#
#Licensed under the Apache License, Version 2.0 (the "License");
#you may not use this file except in compliance with the License.
#You may obtain a copy of the License at
#
#    http://www.apache.org/licenses/LICENSE-2.0
#
#Unless required by applicable law or agreed to in writing, software
#distributed under the License is distributed on an "AS IS" BASIS,
#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
#See the License for the specific language governing permissions and
#limitations under the License.
14 15

from __future__ import print_function
16 17
import unittest
import numpy as np
18
import test_lstm_op as LstmTest
19

20
ACTIVATION = {
21 22 23 24
    'identity': LstmTest.identity,
    'sigmoid': LstmTest.sigmoid,
    'tanh': LstmTest.tanh,
    'relu': LstmTest.relu
25 26 27 28 29 30 31 32 33
}


# LSTM with recurrent projection Layer
def lstmp(
        input,  # T x 4D
        lod,  # 1 x N
        h0=None,  # N x D
        c0=None,  # N x D
34
        w_r=None,  # P x 4D
35 36 37 38
        w_rh=None,  # D x P
        w_b=None,  # 1 x 4D
        w_c=None,  # 1 x 3D
        is_reverse=False,
39 40
        proj_clip=0.0,
        cell_clip=0.0,
41 42
        act_gate=None,
        act_cell=None,
43
        act_cand=None,
44
        act_proj=None):
45 46
    def _step(x, w_r, w_rh, w_c, r_pre, c_pre, proj_clip, cell_clip, act_gate,
              act_cell, act_cand, act_proj):
47 48 49 50 51 52 53 54 55 56 57 58 59
        g = np.dot(r_pre, w_r)  # 1 x 4D
        g = g + x
        g = np.reshape(g, (1, g.size))
        c, g_i, g_f, g_o = np.split(g, 4, axis=1)
        if w_c is None:
            g_i = act_gate(g_i)  # 1 x D
            g_f = act_gate(g_f)  # 1 x D
        else:
            w_ic, w_fc, _ = np.split(w_c, 3, axis=1)
            g_i = act_gate(g_i + w_ic * c_pre)  # 1 x D
            g_f = act_gate(g_f + w_fc * c_pre)  # 1 x D
        c = g_f * c_pre + g_i * act_cand(c)  # 1 x D

60 61 62 63 64 65 66 67 68 69 70
        def array_clip(a, clip):
            size = np.prod(a.shape)
            new_a = np.reshape(a, (size))
            for i in range(size):
                new_a[i] = max(new_a[i], -1.0 * clip)
                new_a[i] = min(new_a[i], clip)
            new_a = np.reshape(new_a, a.shape)
            return new_a

        if cell_clip > 0.0:
            c = array_clip(c, cell_clip)
71 72 73 74 75 76 77 78
        if w_c is None:
            g_o = act_gate(g_o)  # 1 x D
        else:
            _, _, w_oc = np.split(w_c, 3, axis=1)
            g_o = act_gate(g_o + w_oc * c)  # 1 x D
        h = g_o * act_cell(c)
        # projection
        r = np.dot(h, w_rh)
79
        r = act_proj(r)
80 81
        if proj_clip > 0.0:
            r = array_clip(r, proj_clip)
82 83
        return r, c

84
    def _reverse(x, offset):
85
        y = np.zeros_like(x)
86 87
        for i in range(len(offset) - 1):
            b, e = offset[i], offset[i + 1]
88 89 90
            y[b:e, :] = np.flip(x[b:e, :], 0)
        return y

91 92 93 94
    offset = [0]
    for l in lod[0]:
        offset.append(offset[-1] + l)
    batch_size = len(lod[0])
95 96 97 98 99 100 101 102
    # recurrent projection state
    projection = []
    cell = []
    input = _reverse(input, offset) if is_reverse else input
    if w_b is not None:
        input = input + np.tile(w_b, (offset[-1], 1))
    for i in range(batch_size):
        # compute one sequence
103
        seq_len = lod[0][i]
104
        x = input[offset[i]:offset[i + 1], :]
105
        r_pre = h0[i]
106 107 108
        c_pre = c0[i]  # 1 x D
        for j in range(seq_len):
            # compute one step
109 110 111
            r_pre, c_pre = _step(x[j], w_r, w_rh, w_c, r_pre, c_pre, proj_clip,
                                 cell_clip, act_gate, act_cell, act_cand,
                                 act_proj)
112 113 114 115 116 117 118 119 120 121 122 123 124 125
            projection.append(r_pre.flatten())
            cell.append(c_pre.flatten())

    projection = np.array(projection).astype('float64')
    cell = np.array(cell).astype('float64')

    projection = _reverse(projection, offset) if is_reverse else projection
    cell = _reverse(cell, offset) if is_reverse else cell

    assert projection.shape == (input.shape[0], w_r.shape[0])  # T x P
    assert cell.shape == (input.shape[0], input.shape[1] / 4)  # T x D
    return projection, cell


126
class TestLstmpOp(LstmTest.TestLstmOp):
Y
Yibing Liu 已提交
127 128 129 130
    def reset_argument(self):
        pass

    def setUp(self):
131
        self.set_argument()
132 133
        # projection size
        self.P = 10
134
        self.act_proj = self.act_cell
135

Y
Yibing Liu 已提交
136
        self.reset_argument()
137 138
        self.op_type = 'lstmp'

139 140
        T = sum(self.lod[0])
        N = len(self.lod[0])
141 142
        x = np.random.normal(size=(T, 4 * self.D)).astype('float64')
        if self.has_initial_state:
143
            h0 = np.random.normal(size=(N, self.P)).astype('float64')
144 145
            c0 = np.random.normal(size=(N, self.D)).astype('float64')
        else:
146
            h0 = np.zeros((N, self.P)).astype('float64')
147 148 149 150 151 152 153 154 155 156
            c0 = np.zeros((N, self.D)).astype('float64')
        w = np.random.normal(size=(self.P, 4 * self.D)).astype('float64')
        if self.use_peepholes:
            b = np.random.normal(size=(1, 7 * self.D)).astype('float64')
        else:
            b = np.random.normal(size=(1, 4 * self.D)).astype('float64')

        w_b = b[:, 0:4 * self.D]
        w_c = b[:, 4 * self.D:] if self.use_peepholes else None
        w_rh = np.random.normal(size=(self.D, self.P)).astype('float64')
157 158
        proj_clip = 0.1
        cell_clip = 0.1
159
        r, c = lstmp(x, self.lod, h0, c0, w, w_rh, w_b, w_c, self.is_reverse,
160 161 162
                     proj_clip, cell_clip, ACTIVATION[self.act_gate],
                     ACTIVATION[self.act_cell], ACTIVATION[self.act_cand],
                     ACTIVATION[self.act_proj])
163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178

        self.inputs = {'Input': (x, self.lod), 'Weight': w, 'ProjWeight': w_rh}

        self.inputs['Bias'] = b

        if self.has_initial_state:
            self.inputs['H0'] = h0
            self.inputs['C0'] = c0

        self.outputs = {
            'Projection': (r, self.lod),
            'Cell': (c, self.lod),
        }
        self.attrs = {
            'use_peepholes': self.use_peepholes,
            'is_reverse': self.is_reverse,
179 180
            'proj_clip': proj_clip,
            'cell_clip': cell_clip,
181 182
            'gate_activation': self.act_gate,
            'cell_activation': self.act_cell,
Y
Yibing Liu 已提交
183
            'candidate_activation': self.act_cand,
184
            'proj_activation': self.act_proj
185 186 187 188 189 190 191
        }

    def test_check_output(self):
        self.check_output(atol=1e-8)

    def test_check_grad(self):
        # TODO(qingqing) remove folowing lines after the check_grad is refined.
192
        N = len(self.lod[0])
193
        self.outputs['BatchGate'] = np.zeros((N, 4 * self.D)).astype('float64')
194
        self.outputs['BatchHidden'] = np.zeros((N, self.D)).astype('float64')
195 196 197
        self.outputs['BatchCellPreAct'] = np.zeros(
            (N, self.D)).astype('float64')
        self.check_grad(
198
            ['Input', 'Weight', 'ProjWeight', 'Bias'], ['Projection'],
199 200
            max_relative_error=1e-2,
            numeric_grad_delta=0.0000005)
201 202


203
class TestLstmpOpHasInitial(TestLstmpOp):
Y
Yibing Liu 已提交
204
    def reset_argument(self):
205 206 207 208
        self.has_initial_state = True

    def test_check_grad(self):
        # TODO(qingqing) remove folowing lines after the check_grad is refined.
209
        N = len(self.lod[0])
210
        self.outputs['BatchGate'] = np.zeros((N, 4 * self.D)).astype('float64')
211
        self.outputs['BatchHidden'] = np.zeros((N, self.D)).astype('float64')
212 213 214
        self.outputs['BatchCellPreAct'] = np.zeros(
            (N, self.D)).astype('float64')
        self.check_grad(
215 216
            ['Input', 'Weight', 'ProjWeight', 'Bias', 'H0', 'C0'],
            ['Projection'],
217
            numeric_grad_delta=0.0000005,
218
            max_relative_error=1e-2)
219 220

    def test_check_grad_ingore_bias(self):
221
        N = len(self.lod[0])
222
        self.outputs['BatchGate'] = np.zeros((N, 4 * self.D)).astype('float64')
223
        self.outputs['BatchHidden'] = np.zeros((N, self.D)).astype('float64')
224 225 226
        self.outputs['BatchCellPreAct'] = np.zeros(
            (N, self.D)).astype('float64')
        self.check_grad(
227 228
            ['Input', 'ProjWeight', 'Weight'], ['Projection'],
            max_relative_error=1e-2,
229
            numeric_grad_delta=0.0000005,
230 231 232
            no_grad_set=set('Bias'))

    def test_check_grad_ingore_weight(self):
233
        N = len(self.lod[0])
234
        self.outputs['BatchGate'] = np.zeros((N, 4 * self.D)).astype('float64')
235
        self.outputs['BatchHidden'] = np.zeros((N, self.D)).astype('float64')
236 237 238
        self.outputs['BatchCellPreAct'] = np.zeros(
            (N, self.D)).astype('float64')
        self.check_grad(
239 240
            ['Input', 'ProjWeight', 'Bias'], ['Projection'],
            max_relative_error=1e-2,
241
            numeric_grad_delta=0.0000005,
242 243
            no_grad_set=set('Weight'))

244
    def test_check_grad_ingore_proj_weight(self):
245
        N = len(self.lod[0])
246 247 248 249 250 251 252
        self.outputs['BatchGate'] = np.zeros((N, 4 * self.D)).astype('float64')
        self.outputs['BatchHidden'] = np.zeros((N, self.D)).astype('float64')
        self.outputs['BatchCellPreAct'] = np.zeros(
            (N, self.D)).astype('float64')
        self.check_grad(
            ['Input', 'Weight', 'Bias'], ['Projection'],
            max_relative_error=1e-2,
253
            numeric_grad_delta=0.0000005,
254 255
            no_grad_set=set('ProjWeight'))

256
    def test_check_grad_ingore_input(self):
257
        N = len(self.lod[0])
258
        self.outputs['BatchGate'] = np.zeros((N, 4 * self.D)).astype('float64')
259
        self.outputs['BatchHidden'] = np.zeros((N, self.D)).astype('float64')
260 261 262
        self.outputs['BatchCellPreAct'] = np.zeros(
            (N, self.D)).astype('float64')
        self.check_grad(
263 264
            ['Weight', 'ProjWeight', 'Bias'], ['Projection'],
            max_relative_error=1e-2,
265
            numeric_grad_delta=0.0000005,
266 267 268
            no_grad_set=set('Input'))

    def test_check_grad_ingore_h0(self):
269
        N = len(self.lod[0])
270
        self.outputs['BatchGate'] = np.zeros((N, 4 * self.D)).astype('float64')
271
        self.outputs['BatchHidden'] = np.zeros((N, self.D)).astype('float64')
272 273 274
        self.outputs['BatchCellPreAct'] = np.zeros(
            (N, self.D)).astype('float64')
        self.check_grad(
275 276
            ['Input', 'Weight', 'ProjWeight', 'Bias', 'C0'], ['Projection'],
            max_relative_error=1e-2,
277
            numeric_grad_delta=0.0000005,
278 279 280
            no_grad_set=set('H0'))

    def test_check_grad_ingore_c0(self):
281
        N = len(self.lod[0])
282
        self.outputs['BatchGate'] = np.zeros((N, 4 * self.D)).astype('float64')
283
        self.outputs['BatchHidden'] = np.zeros((N, self.D)).astype('float64')
284 285 286
        self.outputs['BatchCellPreAct'] = np.zeros(
            (N, self.D)).astype('float64')
        self.check_grad(
287 288
            ['Input', 'Weight', 'ProjWeight', 'Bias', 'H0'], ['Projection'],
            max_relative_error=1e-2,
289
            numeric_grad_delta=0.0000005,
290 291 292
            no_grad_set=set('C0'))


293
class TestLstmpOpRerverse(TestLstmpOp):
Y
Yibing Liu 已提交
294
    def reset_argument(self):
295 296 297
        self.is_reverse = True


298
class TestLstmpOpNotUsePeepholes(TestLstmpOp):
Y
Yibing Liu 已提交
299
    def reset_argument(self):
300 301 302
        self.use_peepholes = False


303
class TestLstmpOpLinearProjection(TestLstmpOp):
Y
Yibing Liu 已提交
304
    def reset_argument(self):
305
        self.act_proj = 'identity'
Y
Yibing Liu 已提交
306 307


308 309 310 311 312 313 314 315 316 317
class TestLstmpOpLen0Case1(TestLstmpOp):
    def reset_argument(self):
        self.lod = [[0, 4, 0]]


class TestLstmpOpLen0Case2(TestLstmpOp):
    def reset_argument(self):
        self.lod = [[2, 0, 3]]


318 319
if __name__ == '__main__':
    unittest.main()