test_warpctc_op.py 8.4 KB
Newer Older
1
#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
D
dzhwinter 已提交
2
#
D
dzhwinter 已提交
3 4 5
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
D
dzhwinter 已提交
6
#
D
dzhwinter 已提交
7
#     http://www.apache.org/licenses/LICENSE-2.0
D
dzhwinter 已提交
8
#
D
dzhwinter 已提交
9 10 11 12 13 14
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

Y
Yiqun Liu 已提交
15 16 17
import sys
import unittest
import numpy as np
18 19
from op_test import OpTest
from test_softmax_op import stable_softmax
Y
Yiqun Liu 已提交
20

21 22
CUDA_BLOCK_SIZE = 512

Y
Yiqun Liu 已提交
23 24 25 26 27 28 29 30 31 32 33 34 35 36

class CTCForward(object):
    def __init__(self, softmax, softmax_lod, labels, labels_lod, blank,
                 norm_by_times):
        self.softmax = softmax
        self.softmax_lod = softmax_lod
        assert labels.shape[1] == 1
        self.labels = labels
        self.labels_lod = labels_lod
        self.blank = blank
        self.norm_by_times = norm_by_times

        self.level = 0
        self.num_classes = softmax.shape[1]
37 38
        self.batch_size = len(softmax_lod[self.level])
        assert self.batch_size == len(labels_lod[self.level])
Y
Yiqun Liu 已提交
39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158

        self.loss = np.zeros([self.batch_size, 1], dtype="float32")
        self.gradient = np.zeros(self.softmax.shape, dtype="float32")

        # float64
        self.EXP_MAX = sys.float_info.max
        self.EXP_MIN = sys.float_info.min
        self.LOG_ZERO = np.log(self.EXP_MIN)
        self.LOG_INFINITY = np.log(self.EXP_MAX)

    def safe_exp(self, x):
        if x <= self.LOG_ZERO:
            return 0.0
        if x >= self.LOG_INFINITY:
            return self.EXP_MAX
        return np.exp(x)

    def safe_log(self, x):
        if x <= self.EXP_MIN:
            return self.LOG_ZERO
        return np.log(x)

    # x = lna and y = lnb are in log scale, ln(a / b) = lna - lnb
    def log_div(self, x, y):
        res = x - y
        if res <= self.LOG_ZERO:
            return self.LOG_ZERO
        if res >= self.LOG_INFINITY:
            return self.LOG_INFINITY
        return res

    # x = lna and y = lnb are in log scale, ln(a * b) = lna + lnb
    def log_mul(self, x, y):
        res = x + y
        if res <= self.LOG_ZERO:
            return self.LOG_ZERO
        if res >= self.LOG_INFINITY:
            return self.LOG_INFINITY
        return res

    # x = lna and y = lnb are in log scale,
    # ln(a + b) = lna + ln(1 + exp(lnb - lna)), where b > a
    def log_add(self, x, y):
        if x < y:
            t = y
            y = x
            x = t
        return x + self.safe_log(1 + self.safe_exp(y - x))

    def segment_range(self, time, total_times, total_segments):
        start = max(0, total_segments - (2 * (total_times - time)))
        end = min(total_segments, 2 * (time + 1))
        return start, end

    def forward_a_sequence(self, softmax_a_sequence, labels_a_sequence):
        total_times = softmax_a_sequence.shape[0]
        total_segments = labels_a_sequence.shape[0] * 2 + 1

        required_times = labels_a_sequence.shape[0]
        old_label = -1
        for i in range(labels_a_sequence.shape[0]):
            # two contingous labels with the same value
            if labels_a_sequence[i, 0] == old_label:
                required_times = required_times + 1
            old_label = labels_a_sequence[i, 0]

        if total_times < required_times:
            return 0

        # calculate the forward and backward variables,
        # reference Chapter 7.3 of "Alex Grave, Supervised Sequence
        # Labelling with Recurrent Neural Networks"
        log_acts = np.zeros([total_times, self.num_classes], dtype="float32")
        for i in range(total_times):
            for j in range(self.num_classes):
                log_acts[i, j] = self.safe_log(softmax_a_sequence[i, j])

        # calculate the forward variables
        forward_vars = np.zeros([total_times, total_segments], dtype="float32")
        for i in range(total_times):
            for j in range(total_segments):
                forward_vars[i, j] = self.LOG_ZERO

        for i in range(total_times):
            # dp initialization at t0
            if i == 0:
                forward_vars[i, 0] = log_acts[0, self.blank]
                if total_segments > 1:
                    forward_vars[i, 1] = log_acts[0, labels_a_sequence[i, 0]]
                continue

            # dp from t1
            start, end = self.segment_range(i, total_times, total_segments)
            for k in range(end - start):
                j = k + start
                if j & 1 == 1:
                    label_idx = j / 2
                    label_val = labels_a_sequence[label_idx, 0]
                    fv = self.log_add(forward_vars[i - 1, j],
                                      forward_vars[i - 1, j - 1])
                    if j > 1 and label_val != labels_a_sequence[label_idx - 1,
                                                                0]:
                        fv = self.log_add(fv, forward_vars[i - 1, j - 2])
                    fv = self.log_mul(fv, log_acts[i, label_val])
                else:
                    fv = forward_vars[i - 1, j]
                    if j > 0:
                        fv = self.log_add(fv, forward_vars[i - 1, j - 1])
                    fv = self.log_mul(fv, log_acts[i, self.blank])
                forward_vars[i, j] = fv

        # sum the last two value as log_prob
        log_prob = forward_vars[total_times - 1, total_segments - 1]
        if total_segments > 1:
            log_prob = self.log_add(
                log_prob, forward_vars[total_times - 1, total_segments - 2])

        return -log_prob

    def forward(self):
159 160
        softmax_offset = 0
        labels_offset = 0
Y
Yiqun Liu 已提交
161
        for i in range(self.batch_size):
162 163 164 165
            softmax_start_i = softmax_offset
            softmax_end_i = softmax_offset + self.softmax_lod[self.level][i]
            labels_start_i = labels_offset
            labels_end_i = labels_offset + self.labels_lod[self.level][i]
Y
Yiqun Liu 已提交
166 167 168 169 170

            softmax_a_sequence = self.softmax[softmax_start_i:softmax_end_i, :]
            labels_a_sequence = self.labels[labels_start_i:labels_end_i, :]
            self.loss[i] = self.forward_a_sequence(softmax_a_sequence,
                                                   labels_a_sequence)
171 172
            softmax_offset += self.softmax_lod[self.level][i]
            labels_offset += self.labels_lod[self.level][i]
Y
Yiqun Liu 已提交
173 174 175 176
        return self.loss


class TestWarpCTCOp(OpTest):
177 178 179
    def config(self):
        self.batch_size = 4
        self.num_classes = 8
180 181
        self.logits_lod = [[4, 1, 3, 3]]
        self.labels_lod = [[3, 1, 4, 4]]
182 183 184
        self.blank = self.num_classes - 1
        self.norm_by_times = False

Y
Yiqun Liu 已提交
185 186
    def setUp(self):
        self.op_type = "warpctc"
187
        self.config()
Y
Yiqun Liu 已提交
188

189 190
        logits = np.random.uniform(
            0.1, 1.0,
191
            [sum(self.logits_lod[0]), self.num_classes]).astype("float32")
Y
Yiqun Liu 已提交
192 193
        softmax = np.apply_along_axis(stable_softmax, 1, logits)
        # labels should not be blank
194
        labels = np.random.randint(
195 196 197
            0,
            self.num_classes - 1, [sum(self.labels_lod[0]), 1],
            dtype="int32")
Y
Yiqun Liu 已提交
198

199 200
        ctc = CTCForward(softmax, self.logits_lod, labels, self.labels_lod,
                         self.blank, self.norm_by_times)
Y
Yiqun Liu 已提交
201 202 203
        loss = ctc.forward()

        max_sequence_length = 0
204
        for i in range(self.batch_size):
205 206
            max_sequence_length = max(max_sequence_length,
                                      self.logits_lod[0][i])
207
        self.gradient = np.zeros(
208 209
            [max_sequence_length, self.batch_size, self.num_classes],
            dtype="float32")
Y
Yiqun Liu 已提交
210 211

        self.inputs = {
212 213
            "Logits": (logits, self.logits_lod),
            "Label": (labels, self.labels_lod)
Y
Yiqun Liu 已提交
214 215
        }
        self.outputs = {"Loss": loss}
216
        self.attrs = {"blank": self.blank, "norm_by_times": self.norm_by_times}
Y
Yiqun Liu 已提交
217 218 219 220

    def test_check_output(self):
        self.check_output()

W
wanghaoshuang 已提交
221
    def test_check_grad(self):
222
        self.outputs['WarpCTCGrad'] = self.gradient
223
        self.check_grad(["Logits"], "Loss", max_relative_error=0.007)
Y
Yiqun Liu 已提交
224

225

226 227 228 229
class TestWarpCTCOpCase1(TestWarpCTCOp):
    def config(self):
        self.batch_size = 4
        self.num_classes = CUDA_BLOCK_SIZE + 2
230 231
        self.logits_lod = [[4, 1, 3, 3]]
        self.labels_lod = [[3, 1, 4, 4]]
232 233
        self.blank = 0
        self.norm_by_times = False
Y
Yiqun Liu 已提交
234 235 236 237


if __name__ == "__main__":
    unittest.main()