test_lstmp_op.py 13.2 KB
Newer Older
1
#  Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
2 3 4 5 6 7 8 9 10 11 12 13
#
#Licensed under the Apache License, Version 2.0 (the "License");
#you may not use this file except in compliance with the License.
#You may obtain a copy of the License at
#
#    http://www.apache.org/licenses/LICENSE-2.0
#
#Unless required by applicable law or agreed to in writing, software
#distributed under the License is distributed on an "AS IS" BASIS,
#WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
#See the License for the specific language governing permissions and
#limitations under the License.
14 15

from __future__ import print_function
16 17
import unittest
import numpy as np
18
import test_lstm_op as LstmTest
19 20
from paddle import fluid
from paddle.fluid import Program, program_guard
21

22
ACTIVATION = {
23 24 25 26
    'identity': LstmTest.identity,
    'sigmoid': LstmTest.sigmoid,
    'tanh': LstmTest.tanh,
    'relu': LstmTest.relu
27 28 29 30 31 32 33 34 35
}


# LSTM with recurrent projection Layer
def lstmp(
        input,  # T x 4D
        lod,  # 1 x N
        h0=None,  # N x D
        c0=None,  # N x D
36
        w_r=None,  # P x 4D
37 38 39 40
        w_rh=None,  # D x P
        w_b=None,  # 1 x 4D
        w_c=None,  # 1 x 3D
        is_reverse=False,
41 42
        proj_clip=0.0,
        cell_clip=0.0,
43 44
        act_gate=None,
        act_cell=None,
45
        act_cand=None,
46
        act_proj=None):
47 48
    def _step(x, w_r, w_rh, w_c, r_pre, c_pre, proj_clip, cell_clip, act_gate,
              act_cell, act_cand, act_proj):
49 50 51 52 53 54 55 56 57 58 59 60 61
        g = np.dot(r_pre, w_r)  # 1 x 4D
        g = g + x
        g = np.reshape(g, (1, g.size))
        c, g_i, g_f, g_o = np.split(g, 4, axis=1)
        if w_c is None:
            g_i = act_gate(g_i)  # 1 x D
            g_f = act_gate(g_f)  # 1 x D
        else:
            w_ic, w_fc, _ = np.split(w_c, 3, axis=1)
            g_i = act_gate(g_i + w_ic * c_pre)  # 1 x D
            g_f = act_gate(g_f + w_fc * c_pre)  # 1 x D
        c = g_f * c_pre + g_i * act_cand(c)  # 1 x D

62 63 64 65 66 67 68 69 70 71 72
        def array_clip(a, clip):
            size = np.prod(a.shape)
            new_a = np.reshape(a, (size))
            for i in range(size):
                new_a[i] = max(new_a[i], -1.0 * clip)
                new_a[i] = min(new_a[i], clip)
            new_a = np.reshape(new_a, a.shape)
            return new_a

        if cell_clip > 0.0:
            c = array_clip(c, cell_clip)
73 74 75 76 77 78 79 80
        if w_c is None:
            g_o = act_gate(g_o)  # 1 x D
        else:
            _, _, w_oc = np.split(w_c, 3, axis=1)
            g_o = act_gate(g_o + w_oc * c)  # 1 x D
        h = g_o * act_cell(c)
        # projection
        r = np.dot(h, w_rh)
81
        r = act_proj(r)
82 83
        if proj_clip > 0.0:
            r = array_clip(r, proj_clip)
84 85
        return r, c

86
    def _reverse(x, offset):
87
        y = np.zeros_like(x)
88 89
        for i in range(len(offset) - 1):
            b, e = offset[i], offset[i + 1]
90 91 92
            y[b:e, :] = np.flip(x[b:e, :], 0)
        return y

93 94 95 96
    offset = [0]
    for l in lod[0]:
        offset.append(offset[-1] + l)
    batch_size = len(lod[0])
97 98 99 100 101 102 103 104
    # recurrent projection state
    projection = []
    cell = []
    input = _reverse(input, offset) if is_reverse else input
    if w_b is not None:
        input = input + np.tile(w_b, (offset[-1], 1))
    for i in range(batch_size):
        # compute one sequence
105
        seq_len = lod[0][i]
106
        x = input[offset[i]:offset[i + 1], :]
107
        r_pre = h0[i]
108 109 110
        c_pre = c0[i]  # 1 x D
        for j in range(seq_len):
            # compute one step
111 112 113
            r_pre, c_pre = _step(x[j], w_r, w_rh, w_c, r_pre, c_pre, proj_clip,
                                 cell_clip, act_gate, act_cell, act_cand,
                                 act_proj)
114 115 116 117 118 119 120 121 122 123 124 125 126 127
            projection.append(r_pre.flatten())
            cell.append(c_pre.flatten())

    projection = np.array(projection).astype('float64')
    cell = np.array(cell).astype('float64')

    projection = _reverse(projection, offset) if is_reverse else projection
    cell = _reverse(cell, offset) if is_reverse else cell

    assert projection.shape == (input.shape[0], w_r.shape[0])  # T x P
    assert cell.shape == (input.shape[0], input.shape[1] / 4)  # T x D
    return projection, cell


128
class TestLstmpOp(LstmTest.TestLstmOp):
Y
Yibing Liu 已提交
129 130 131 132
    def reset_argument(self):
        pass

    def setUp(self):
133
        self.set_argument()
134 135
        # projection size
        self.P = 10
136
        self.act_proj = self.act_cell
137

Y
Yibing Liu 已提交
138
        self.reset_argument()
139 140
        self.op_type = 'lstmp'

141 142
        T = sum(self.lod[0])
        N = len(self.lod[0])
143 144
        x = np.random.normal(size=(T, 4 * self.D)).astype('float64')
        if self.has_initial_state:
145
            h0 = np.random.normal(size=(N, self.P)).astype('float64')
146 147
            c0 = np.random.normal(size=(N, self.D)).astype('float64')
        else:
148
            h0 = np.zeros((N, self.P)).astype('float64')
149 150 151 152 153 154 155 156 157 158
            c0 = np.zeros((N, self.D)).astype('float64')
        w = np.random.normal(size=(self.P, 4 * self.D)).astype('float64')
        if self.use_peepholes:
            b = np.random.normal(size=(1, 7 * self.D)).astype('float64')
        else:
            b = np.random.normal(size=(1, 4 * self.D)).astype('float64')

        w_b = b[:, 0:4 * self.D]
        w_c = b[:, 4 * self.D:] if self.use_peepholes else None
        w_rh = np.random.normal(size=(self.D, self.P)).astype('float64')
159 160
        proj_clip = 0.1
        cell_clip = 0.1
161
        r, c = lstmp(x, self.lod, h0, c0, w, w_rh, w_b, w_c, self.is_reverse,
162 163 164
                     proj_clip, cell_clip, ACTIVATION[self.act_gate],
                     ACTIVATION[self.act_cell], ACTIVATION[self.act_cand],
                     ACTIVATION[self.act_proj])
165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180

        self.inputs = {'Input': (x, self.lod), 'Weight': w, 'ProjWeight': w_rh}

        self.inputs['Bias'] = b

        if self.has_initial_state:
            self.inputs['H0'] = h0
            self.inputs['C0'] = c0

        self.outputs = {
            'Projection': (r, self.lod),
            'Cell': (c, self.lod),
        }
        self.attrs = {
            'use_peepholes': self.use_peepholes,
            'is_reverse': self.is_reverse,
181 182
            'proj_clip': proj_clip,
            'cell_clip': cell_clip,
183 184
            'gate_activation': self.act_gate,
            'cell_activation': self.act_cell,
Y
Yibing Liu 已提交
185
            'candidate_activation': self.act_cand,
186
            'proj_activation': self.act_proj
187 188 189
        }

    def test_check_output(self):
H
hong 已提交
190
        self.check_output(atol=1e-8, check_dygraph=False)
191 192 193

    def test_check_grad(self):
        # TODO(qingqing) remove folowing lines after the check_grad is refined.
194
        N = len(self.lod[0])
195
        self.outputs['BatchGate'] = np.zeros((N, 4 * self.D)).astype('float64')
196
        self.outputs['BatchHidden'] = np.zeros((N, self.D)).astype('float64')
197 198 199
        self.outputs['BatchCellPreAct'] = np.zeros(
            (N, self.D)).astype('float64')
        self.check_grad(
200
            ['Input', 'Weight', 'ProjWeight', 'Bias'], ['Projection'],
H
hong 已提交
201 202
            numeric_grad_delta=0.0000005,
            check_dygraph=False)
203 204


205
class TestLstmpOpHasInitial(TestLstmpOp):
Y
Yibing Liu 已提交
206
    def reset_argument(self):
207 208 209 210
        self.has_initial_state = True

    def test_check_grad(self):
        # TODO(qingqing) remove folowing lines after the check_grad is refined.
211
        N = len(self.lod[0])
212
        self.outputs['BatchGate'] = np.zeros((N, 4 * self.D)).astype('float64')
213
        self.outputs['BatchHidden'] = np.zeros((N, self.D)).astype('float64')
214 215 216
        self.outputs['BatchCellPreAct'] = np.zeros(
            (N, self.D)).astype('float64')
        self.check_grad(
217 218
            ['Input', 'Weight', 'ProjWeight', 'Bias', 'H0', 'C0'],
            ['Projection'],
219
            numeric_grad_delta=0.0000005,
H
hong 已提交
220
            check_dygraph=False)
221 222

    def test_check_grad_ingore_bias(self):
223
        N = len(self.lod[0])
224
        self.outputs['BatchGate'] = np.zeros((N, 4 * self.D)).astype('float64')
225
        self.outputs['BatchHidden'] = np.zeros((N, self.D)).astype('float64')
226 227 228
        self.outputs['BatchCellPreAct'] = np.zeros(
            (N, self.D)).astype('float64')
        self.check_grad(
229
            ['Input', 'ProjWeight', 'Weight'], ['Projection'],
230
            numeric_grad_delta=0.0000005,
H
hong 已提交
231 232
            no_grad_set=set('Bias'),
            check_dygraph=False)
233 234

    def test_check_grad_ingore_weight(self):
235
        N = len(self.lod[0])
236
        self.outputs['BatchGate'] = np.zeros((N, 4 * self.D)).astype('float64')
237
        self.outputs['BatchHidden'] = np.zeros((N, self.D)).astype('float64')
238 239 240
        self.outputs['BatchCellPreAct'] = np.zeros(
            (N, self.D)).astype('float64')
        self.check_grad(
241
            ['Input', 'ProjWeight', 'Bias'], ['Projection'],
242
            numeric_grad_delta=0.0000005,
H
hong 已提交
243 244
            no_grad_set=set('Weight'),
            check_dygraph=False)
245

246
    def test_check_grad_ingore_proj_weight(self):
247
        N = len(self.lod[0])
248 249 250 251 252 253
        self.outputs['BatchGate'] = np.zeros((N, 4 * self.D)).astype('float64')
        self.outputs['BatchHidden'] = np.zeros((N, self.D)).astype('float64')
        self.outputs['BatchCellPreAct'] = np.zeros(
            (N, self.D)).astype('float64')
        self.check_grad(
            ['Input', 'Weight', 'Bias'], ['Projection'],
254
            numeric_grad_delta=0.0000005,
H
hong 已提交
255 256
            no_grad_set=set('ProjWeight'),
            check_dygraph=False)
257

258
    def test_check_grad_ingore_input(self):
259
        N = len(self.lod[0])
260
        self.outputs['BatchGate'] = np.zeros((N, 4 * self.D)).astype('float64')
261
        self.outputs['BatchHidden'] = np.zeros((N, self.D)).astype('float64')
262 263 264
        self.outputs['BatchCellPreAct'] = np.zeros(
            (N, self.D)).astype('float64')
        self.check_grad(
265
            ['Weight', 'ProjWeight', 'Bias'], ['Projection'],
266
            numeric_grad_delta=0.0000005,
H
hong 已提交
267 268
            no_grad_set=set('Input'),
            check_dygraph=False)
269 270

    def test_check_grad_ingore_h0(self):
271
        N = len(self.lod[0])
272
        self.outputs['BatchGate'] = np.zeros((N, 4 * self.D)).astype('float64')
273
        self.outputs['BatchHidden'] = np.zeros((N, self.D)).astype('float64')
274 275 276
        self.outputs['BatchCellPreAct'] = np.zeros(
            (N, self.D)).astype('float64')
        self.check_grad(
277
            ['Input', 'Weight', 'ProjWeight', 'Bias', 'C0'], ['Projection'],
278
            numeric_grad_delta=0.0000005,
H
hong 已提交
279 280
            no_grad_set=set('H0'),
            check_dygraph=False)
281 282

    def test_check_grad_ingore_c0(self):
283
        N = len(self.lod[0])
284
        self.outputs['BatchGate'] = np.zeros((N, 4 * self.D)).astype('float64')
285
        self.outputs['BatchHidden'] = np.zeros((N, self.D)).astype('float64')
286 287 288
        self.outputs['BatchCellPreAct'] = np.zeros(
            (N, self.D)).astype('float64')
        self.check_grad(
289
            ['Input', 'Weight', 'ProjWeight', 'Bias', 'H0'], ['Projection'],
290
            numeric_grad_delta=0.0000005,
H
hong 已提交
291 292
            no_grad_set=set('C0'),
            check_dygraph=False)
293 294


295
class TestLstmpOpRerverse(TestLstmpOp):
Y
Yibing Liu 已提交
296
    def reset_argument(self):
297 298 299
        self.is_reverse = True


300
class TestLstmpOpNotUsePeepholes(TestLstmpOp):
Y
Yibing Liu 已提交
301
    def reset_argument(self):
302 303 304
        self.use_peepholes = False


305
class TestLstmpOpLinearProjection(TestLstmpOp):
Y
Yibing Liu 已提交
306
    def reset_argument(self):
307
        self.act_proj = 'identity'
Y
Yibing Liu 已提交
308 309


310 311 312 313 314 315 316 317 318 319
class TestLstmpOpLen0Case1(TestLstmpOp):
    def reset_argument(self):
        self.lod = [[0, 4, 0]]


class TestLstmpOpLen0Case2(TestLstmpOp):
    def reset_argument(self):
        self.lod = [[2, 0, 3]]


320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336 337 338 339 340 341 342 343 344 345 346 347 348 349 350 351 352 353 354 355 356 357 358 359 360 361 362 363 364 365 366 367 368 369 370 371 372 373
class TestLstmpOpError(unittest.TestCase):
    def test_errors(self):
        with program_guard(Program(), Program()):

            def test_Variable():
                input_data = np.random.random((1, 2048)).astype("float32")
                fluid.layers.dynamic_lstmp(
                    input=input_data,
                    size=2048,
                    proj_size=256,
                    use_peepholes=False,
                    is_reverse=True,
                    cell_activation="tanh",
                    proj_activation="tanh")

            self.assertRaises(TypeError, test_Variable)

            def test_h_0():
                in_data = fluid.data(
                    name="input", shape=[None, 2048], dtype="float32")
                h = fluid.data(name="h", shape=[None, 512], dtype="int32")
                c = fluid.data(name="c", shape=[None, 512], dtype="float32")
                fluid.layers.dynamic_lstmp(
                    input=in_data,
                    size=2048,
                    proj_size=256,
                    use_peepholes=False,
                    is_reverse=True,
                    cell_activation="tanh",
                    proj_activation="tanh",
                    h_0=h,
                    c_0=c)

            self.assertRaises(TypeError, test_h_0)

            def test_c_0():
                in_data_ = fluid.data(
                    name="input_", shape=[None, 2048], dtype="float32")
                h_ = fluid.data(name="h_", shape=[None, 512], dtype="float32")
                c_ = fluid.data(name="c_", shape=[None, 512], dtype="int32")
                fluid.layers.dynamic_lstmp(
                    input=in_data_,
                    size=2048,
                    proj_size=256,
                    use_peepholes=False,
                    is_reverse=True,
                    cell_activation="tanh",
                    proj_activation="tanh",
                    h_0=h_,
                    c_0=c_)

            self.assertRaises(TypeError, test_c_0)


374 375
if __name__ == '__main__':
    unittest.main()