test_gru_op.py 8.2 KB
Newer Older
1
#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
D
dzhwinter 已提交
2
#
D
dzhwinter 已提交
3 4 5
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
D
dzhwinter 已提交
6
#
D
dzhwinter 已提交
7
#     http://www.apache.org/licenses/LICENSE-2.0
D
dzhwinter 已提交
8
#
D
dzhwinter 已提交
9 10 11 12 13 14
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

15 16
from __future__ import print_function

G
guosheng 已提交
17 18 19
import unittest
import numpy as np
import math
M
minqiyang 已提交
20
import functools
21
from op_test import OpTest, skip_check_grad_ci
22
from paddle.fluid.tests.unittests.test_lstm_op import ACTIVATION
23 24
from paddle import fluid
from paddle.fluid import Program, program_guard
T
tensor-tang 已提交
25 26 27 28 29 30 31 32 33 34


def gru(
        input,  # T x 3D
        lod,  # 1 x N
        h0,  # N x D
        weight,  # D x 3D
        bias,  # 1 x 3D
        is_reverse,
        act_state,
35
        act_gate,
Q
Qiao Longfei 已提交
36 37
        dtype='float32',
        origin_mode=False):
T
tensor-tang 已提交
38
    def _seq_to_batch(lod, is_reverse):
G
guosheng 已提交
39
        idx_in_seq_list = []
40 41 42 43
        seq_lens = lod[0]
        seq_starts = [0]
        for i in range(len(seq_lens)):
            seq_starts.append(seq_starts[-1] + seq_lens[i])
G
guosheng 已提交
44
        sorted_seqs = sorted(
M
minqiyang 已提交
45 46
            list(range(len(seq_lens))),
            key=functools.cmp_to_key(lambda x, y: seq_lens[y] - seq_lens[x]))
G
guosheng 已提交
47 48 49 50 51 52 53 54 55 56 57
        num_batch = seq_lens[sorted_seqs[0]]
        for batch_idx in range(num_batch):
            idx_in_seq = []
            for i in range(len(seq_lens)):
                if seq_lens[sorted_seqs[i]] <= batch_idx:
                    break
                idx = (seq_starts[sorted_seqs[i] + 1] - 1 - batch_idx
                       ) if is_reverse else (
                           seq_starts[sorted_seqs[i]] + batch_idx)
                idx_in_seq.append(idx)
            idx_in_seq_list.append(idx_in_seq)
G
guosheng 已提交
58
        return idx_in_seq_list, sorted_seqs
G
guosheng 已提交
59

T
tensor-tang 已提交
60 61 62 63 64 65 66 67
    def _step(x, h_p, w, b, act_state, act_gate):
        T = x.shape[0]
        D = w.shape[0]
        g = x + np.tile(b, (T, 1))
        w_u_r = w.flatten()[:D * D * 2].reshape((D, D * 2))
        u_r = act_gate(np.dot(h_p, w_u_r) + g[:, :D * 2])
        u = u_r[:, :D]
        r = u_r[:, D:D * 2]
G
guosheng 已提交
68
        r_h_p = r * h_p
T
tensor-tang 已提交
69 70
        w_c = w.flatten()[D * D * 2:].reshape((D, D))
        c = act_state(np.dot(r_h_p, w_c) + g[:, D * 2:])
G
guosheng 已提交
71
        g = np.hstack((u_r, c))
Q
Qiao Longfei 已提交
72 73 74 75
        if origin_mode:
            h = (1 - u) * c + u * h_p
        else:
            h = u * c + (1 - u) * h_p
G
guosheng 已提交
76 77
        return g, r_h_p, h

T
tensor-tang 已提交
78 79 80
    T = sum(lod[0])
    N = len(lod[0])
    D = weight.shape[0]
81 82 83 84
    batch_gate = np.zeros((T, 3 * D), dtype=dtype)
    batch_reset_hidden_prev = np.zeros((T, D), dtype=dtype)
    batch_hidden = np.zeros((T, D), dtype=dtype)
    hidden = np.zeros((T, D), dtype=dtype)
T
tensor-tang 已提交
85 86

    idx_in_seq_list, sorted_seqs = _seq_to_batch(lod, is_reverse)
87 88
    h_p = h0[[seq for seq in sorted_seqs if lod[0][seq] > 0]]

T
tensor-tang 已提交
89 90 91 92 93 94 95 96 97 98 99 100 101 102
    max_seq_len = len(idx_in_seq_list)
    end_idx = 0
    for batch_idx in range(max_seq_len):
        x = input[idx_in_seq_list[batch_idx]]
        g, r_h_p, h = _step(x, h_p, weight, bias, act_state, act_gate)
        if batch_idx < (max_seq_len - 1):
            h_p = h[:len(idx_in_seq_list[batch_idx + 1])]
        start_idx = end_idx
        end_idx = start_idx + len(idx_in_seq_list[batch_idx])
        batch_gate[start_idx:end_idx] = g
        batch_reset_hidden_prev[start_idx:end_idx] = r_h_p
        batch_hidden[start_idx:end_idx] = h
        hidden[idx_in_seq_list[batch_idx]] = h
    return batch_gate, batch_reset_hidden_prev, batch_hidden, hidden
G
guosheng 已提交
103 104


T
tensor-tang 已提交
105
class TestGRUOp(OpTest):
G
guosheng 已提交
106
    def set_confs(self):
T
tensor-tang 已提交
107
        pass
G
guosheng 已提交
108

109 110 111
    def set_is_test(self):
        self.is_test = False

G
guosheng 已提交
112 113
    def setUp(self):
        self.op_type = "gru"
T
tensor-tang 已提交
114
        self.lod = [[2, 4, 3]]
Z
zhupengyang 已提交
115
        self.D = 40
T
tensor-tang 已提交
116 117 118 119 120
        self.is_reverse = False
        self.with_h0 = True
        self.with_bias = True
        self.act_state = 'tanh'
        self.act_gate = 'sigmoid'
121
        self.dtype = 'float64'
Q
Qiao Longfei 已提交
122
        self.origin_mode = False
G
guosheng 已提交
123
        self.set_confs()
124
        self.set_is_test()
T
tensor-tang 已提交
125 126 127

        T = sum(self.lod[0])
        N = len(self.lod[0])
128 129
        input = np.random.rand(T, 3 * self.D).astype(self.dtype)
        weight = np.random.rand(self.D, 3 * self.D).astype(self.dtype)
T
tensor-tang 已提交
130
        bias = np.random.rand(
131 132
            1, 3 * self.D).astype(self.dtype) if self.with_bias else np.zeros(
                (1, 3 * self.D), dtype=self.dtype)
T
tensor-tang 已提交
133
        h0 = np.random.rand(
134 135
            N, self.D).astype(self.dtype) if self.with_h0 else np.zeros(
                (N, self.D), dtype=self.dtype)
T
tensor-tang 已提交
136 137 138

        batch_gate, batch_reset_hidden_prev, batch_hidden, hidden = gru(
            input, self.lod, h0, weight, bias, self.is_reverse,
Q
Qiao Longfei 已提交
139 140
            ACTIVATION[self.act_state], ACTIVATION[self.act_gate], self.dtype,
            self.origin_mode)
T
tensor-tang 已提交
141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158
        self.inputs = {'Input': (input, self.lod), 'Weight': weight}

        if self.with_bias:
            self.inputs['Bias'] = bias

        if self.with_h0:
            self.inputs['H0'] = h0

        self.outputs = {
            'Hidden': (hidden, self.lod),
            'BatchGate': batch_gate,
            'BatchResetHiddenPrev': batch_reset_hidden_prev,
            'BatchHidden': batch_hidden,
        }

        self.attrs = {
            'activation': self.act_state,
            'gate_activation': self.act_gate,
Q
Qiao Longfei 已提交
159
            'is_reverse': self.is_reverse,
160 161
            'origin_mode': self.origin_mode,
            'is_test': self.is_test
T
tensor-tang 已提交
162
        }
G
guosheng 已提交
163 164

    def test_check_output(self):
H
hong 已提交
165
        self.check_output(atol=1e-8, check_dygraph=False)
G
guosheng 已提交
166 167

    def test_check_grad(self):
H
hong 已提交
168 169
        self.check_grad(
            ['Input', 'H0', 'Weight', 'Bias'], ['Hidden'], check_dygraph=False)
G
guosheng 已提交
170 171


Q
Qiao Longfei 已提交
172 173 174 175 176
class TestGRUOriginMode(TestGRUOp):
    def set_confs(self):
        self.origin_mode = True


177 178
class TestGRUOp2(TestGRUOp):
    def set_confs(self):
179
        self.dtype = 'float64'
180 181


182 183 184
class TestGRUOp2Len0(TestGRUOp):
    def set_confs(self):
        self.lod = [[2, 0, 4]]
185
        self.dtype = 'float64'
186 187


Q
Qiao Longfei 已提交
188 189
class TestGRUOp2OriginMode(TestGRUOp):
    def set_confs(self):
190
        self.dtype = 'float64'
Q
Qiao Longfei 已提交
191 192 193
        self.origin_mode = True


194 195 196
class TestGRUOp2OriginModeLen0(TestGRUOp):
    def set_confs(self):
        self.lod = [[0, 3, 4]]
197
        self.dtype = 'float64'
198 199 200 201 202 203
        self.origin_mode = True


class TestGRUOp2OriginModeLastLen0(TestGRUOp):
    def set_confs(self):
        self.lod = [[0, 3, 0]]
204
        self.dtype = 'float64'
205 206 207
        self.origin_mode = True


G
guosheng 已提交
208
class TestGRUOpNoInitial(TestGRUOp):
T
tensor-tang 已提交
209 210
    def set_confs(self):
        self.with_h0 = False
G
guosheng 已提交
211 212

    def test_check_grad(self):
H
hong 已提交
213 214
        self.check_grad(
            ['Input', 'Weight', 'Bias'], ['Hidden'], check_dygraph=False)
G
guosheng 已提交
215 216


T
tensor-tang 已提交
217 218 219 220 221
class TestGRUOpNoBias(TestGRUOp):
    def set_confs(self):
        self.with_bias = False

    def test_check_grad(self):
H
hong 已提交
222 223
        self.check_grad(
            ['Input', 'H0', 'Weight'], ['Hidden'], check_dygraph=False)
T
tensor-tang 已提交
224 225


G
guosheng 已提交
226 227 228 229 230
class TestGRUOpReverse(TestGRUOp):
    def set_confs(self):
        self.is_reverse = True


Q
Qiao Longfei 已提交
231 232 233 234 235 236
class TestGRUOpReverseOriginMode(TestGRUOp):
    def set_confs(self):
        self.is_reverse = True
        self.origin_mode = True


237 238 239 240 241 242 243 244 245 246 247 248 249 250 251
class TestGRUOpInference(TestGRUOp):
    def set_is_test(self):
        self.is_test = True

    def test_check_output(self):
        new_outputs = {}
        new_outputs['Hidden'] = self.outputs['Hidden']
        self.outputs = new_outputs
        super(TestGRUOpInference, self).test_check_output()

    # avoid checking gradient
    def test_check_grad(self):
        pass


252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270
class TestGruOpError(unittest.TestCase):
    def test_errors(self):
        with program_guard(Program(), Program()):

            def test_Variable():
                input_data = np.random.random((1, 1536)).astype("float32")
                fluid.layers.dynamic_gru(input=input_data, size=512)

            self.assertRaises(TypeError, test_Variable)

            def test_h_0():
                in_data = fluid.data(
                    name="input", shape=[None, 1536], dtype="float32")
                h = fluid.data(name="h", shape=[None, 512], dtype="int32")
                fluid.layers.dynamic_gru(input=in_data, size=512, h_0=h)

            self.assertRaises(TypeError, test_h_0)


G
guosheng 已提交
271 272
if __name__ == "__main__":
    unittest.main()