test_dynrnn_static_input.py 8.3 KB
Newer Older
1
#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
D
dzhwinter 已提交
2
#
D
dzhwinter 已提交
3 4 5
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
D
dzhwinter 已提交
6
#
D
dzhwinter 已提交
7
#     http://www.apache.org/licenses/LICENSE-2.0
D
dzhwinter 已提交
8
#
D
dzhwinter 已提交
9 10 11 12 13 14
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

15 16
from __future__ import print_function

17
import unittest
18
import paddle
19 20 21 22 23
import paddle.fluid.core as core
import paddle.fluid as fluid
from paddle.fluid.backward import append_backward
import paddle.fluid.framework as framework
from paddle.fluid.framework import Program, switch_main_program
24 25 26
import bisect
import numpy as np

Y
yangyaming 已提交
27
fluid.default_startup_program().random_seed = 1
28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43


class TestDyRnnStaticInput(unittest.TestCase):
    def setUp(self):
        self._delta = 0.005
        self._max_sequence_len = 3
        self._program = Program()
        switch_main_program(self._program)
        self.output_dim = 10
        self.place = core.CPUPlace()
        self.prepare_x_tensor()
        self.prepare_static_input_tensor()
        self.exe = fluid.Executor(self.place)

    def prepare_x_tensor(self):
        self.x_tensor_dim = 10
44 45
        lod = [[2, 1, 3]]
        shape = [sum(lod[0]), self.x_tensor_dim]
46 47
        self.x_tensor_data = np.random.random(shape).astype('float32')
        self.x_tensor = core.LoDTensor()
48
        self.x_tensor.set_recursive_sequence_lengths(lod)
49 50 51 52
        self.x_tensor.set(self.x_tensor_data, self.place)

    def prepare_static_input_tensor(self):
        self.static_input_tensor_dim = 4
53 54
        lod = [[1, 2, 3]]
        shape = [sum(lod[0]), self.static_input_tensor_dim]
55 56
        self.static_input_data = np.random.random(shape).astype('float32')
        self.static_input_tensor = core.LoDTensor()
57
        self.static_input_tensor.set_recursive_sequence_lengths(lod)
58 59 60 61 62 63 64 65 66 67 68 69
        self.static_input_tensor.set(self.static_input_data, self.place)

    def fetch_value(self, var):
        fetch_outs = self.exe.run(feed={
            'x_tensor': self.x_tensor,
            'static_input_tensor': self.static_input_tensor
        },
                                  fetch_list=[var],
                                  return_numpy=False)
        return self._lodtensor_to_ndarray(fetch_outs[0])

    def _lodtensor_to_ndarray(self, lod_tensor):
Y
yuyang18 已提交
70
        dims = lod_tensor.shape()
71
        ndarray = np.zeros(shape=dims).astype('float32')
72
        for i in range(np.product(dims)):
Y
yuyang18 已提交
73
            ndarray.ravel()[i] = lod_tensor._get_float_element(i)
74
        return ndarray, lod_tensor.recursive_sequence_lengths()
75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118

    def build_graph(self, only_forward=False):
        x_tensor = fluid.layers.data(
            name='x_tensor',
            shape=[self.x_tensor_dim],
            dtype='float32',
            lod_level=1)
        x_tensor.stop_gradient = False

        static_input_tensor = fluid.layers.data(
            name='static_input_tensor',
            shape=[self.static_input_tensor_dim],
            dtype='float32',
            lod_level=1)
        static_input_tensor.stop_gradient = False

        if only_forward:
            static_input_out_array = self._program.global_block().create_var(
                name='static_input_out_array',
                type=core.VarDesc.VarType.LOD_TENSOR_ARRAY,
                dtype='float32')
            static_input_out_array.stop_gradient = True

        rnn = fluid.layers.DynamicRNN()
        with rnn.block():
            step_x = rnn.step_input(x_tensor)
            step_static_input = rnn.static_input(static_input_tensor)
            if only_forward:
                fluid.layers.array_write(
                    x=step_static_input,
                    i=rnn.step_idx,
                    array=static_input_out_array)
            last = fluid.layers.sequence_pool(
                input=step_static_input, pool_type='last')
            projected = fluid.layers.fc(input=[step_x, last],
                                        size=self.output_dim)
            rnn.output(projected)

        if only_forward:
            static_input_step_outs = []
            step_idx = fluid.layers.fill_constant(
                shape=[1], dtype='int64', value=0)
            step_idx.stop_gradient = True

119
            for i in range(self._max_sequence_len):
120 121 122 123 124 125 126 127 128 129
                step_out = fluid.layers.array_read(static_input_out_array,
                                                   step_idx)
                step_out.stop_gradient = True
                static_input_step_outs.append(step_out)
                fluid.layers.increment(x=step_idx, value=1.0, in_place=True)

        if only_forward:
            return static_input_step_outs

        last = fluid.layers.sequence_pool(input=rnn(), pool_type='last')
Y
Yu Yang 已提交
130
        loss = fluid.layers.mean(last)
131 132 133 134 135 136
        append_backward(loss)
        static_input_grad = self._program.global_block().var(
            framework.grad_var_name('static_input_tensor'))
        return static_input_grad, loss

    def get_expected_static_step_outs(self):
137 138
        x_lod = self.x_tensor.recursive_sequence_lengths()
        x_seq_len = x_lod[0]
139 140 141
        x_seq_len_sorted = sorted(x_seq_len)
        x_sorted_indices = np.argsort(x_seq_len)[::-1]

142 143 144
        static_lod = self.static_input_tensor.recursive_sequence_lengths()
        static_sliced = []
        cur_offset = 0
145
        for i in range(len(static_lod[0])):
146 147 148 149
            static_sliced.append(self.static_input_data[cur_offset:(
                cur_offset + static_lod[0][i])])
            cur_offset += static_lod[0][i]
        static_seq_len = static_lod[0]
150
        static_reordered = []
151
        for i in range(len(x_sorted_indices)):
152 153 154
            static_reordered.extend(static_sliced[x_sorted_indices[i]].tolist())
        static_seq_len_reordered = [
            static_seq_len[x_sorted_indices[i]]
155
            for i in range(len(x_sorted_indices))
156 157 158
        ]

        static_step_outs = []
Y
yangyaming 已提交
159
        static_step_lods = []
160

161
        for i in range(self._max_sequence_len):
162
            end = len(x_seq_len) - bisect.bisect_left(x_seq_len_sorted, i + 1)
163 164
            lod = []
            total_len = 0
165
            for i in range(end):
166 167
                lod.append(static_seq_len_reordered[i])
                total_len += lod[-1]
Y
yangyaming 已提交
168
            static_step_lods.append([lod])
169
            end = total_len
170 171 172
            static_step_outs.append(
                np.array(static_reordered[:end]).astype('float32'))

Y
yangyaming 已提交
173
        return static_step_outs, static_step_lods
174 175 176 177

    def test_step_out(self):
        static_step_outs = self.build_graph(only_forward=True)
        self.exe.run(framework.default_startup_program())
Y
yangyaming 已提交
178
        expected_outs, expected_lods = self.get_expected_static_step_outs()
179
        for i in range(self._max_sequence_len):
Y
yangyaming 已提交
180 181 182
            step_out, lod = self.fetch_value(static_step_outs[i])
            self.assertTrue(np.allclose(step_out, expected_outs[i]))
            self.assertTrue(np.allclose(lod, expected_lods[i]))
183 184 185 186 187

    def test_network_gradient(self):
        static_input_grad, loss = self.build_graph()
        self.exe.run(framework.default_startup_program())

Y
yangyaming 已提交
188
        actual_gradients, actual_lod = self.fetch_value(static_input_grad)
189

Y
yuyang18 已提交
190
        static_input_shape = self.static_input_tensor.shape()
191 192 193
        numeric_gradients = np.zeros(shape=static_input_shape).astype('float32')
        # calculate numeric gradients
        tensor_size = np.product(static_input_shape)
194
        for i in range(tensor_size):
Y
yuyang18 已提交
195
            origin = self.static_input_tensor._get_float_element(i)
196
            x_pos = origin + self._delta
Y
yuyang18 已提交
197
            self.static_input_tensor._set_float_element(i, x_pos)
Y
yangyaming 已提交
198
            y_pos = self.fetch_value(loss)[0][0]
199
            x_neg = origin - self._delta
Y
yuyang18 已提交
200
            self.static_input_tensor._set_float_element(i, x_neg)
Y
yangyaming 已提交
201
            y_neg = self.fetch_value(loss)[0][0]
Y
yuyang18 已提交
202
            self.static_input_tensor._set_float_element(i, origin)
203
            numeric_gradients.ravel()[i] = (y_pos - y_neg) / self._delta / 2
Y
yangyaming 已提交
204
        self.assertTrue(np.allclose(actual_gradients, numeric_gradients, 0.001))
205 206 207
        self.assertTrue(
            np.allclose(actual_lod,
                        self.static_input_tensor.recursive_sequence_lengths()))
208 209 210 211


if __name__ == '__main__':
    unittest.main()