test_batch_norm_op.py 12.6 KB
Newer Older
Q
Qiao Longfei 已提交
1 2
import unittest
import numpy as np
Y
Yu Yang 已提交
3
from op_test import OpTest
Q
Qiao Longfei 已提交
4 5 6 7
import paddle.v2.framework.core as core
from paddle.v2.framework.op import Operator


Y
Yu Yang 已提交
8 9 10 11 12 13 14 15 16 17 18 19 20 21 22
def grad_var_name(var_name):
    return var_name + "@GRAD"


def get_backward_op(scope, op, no_grad_set):
    backward_op = core.Operator.backward(op, no_grad_set)
    for input in backward_op.input_vars():
        var = scope.var(input)
        var.get_tensor()
    for output in backward_op.output_vars():
        var = scope.var(output)
        var.get_tensor()
    return backward_op


Q
Qiao Longfei 已提交
23
def _reference_training(x, scale, offset, epsilon, data_format):
Z
zchen0211 已提交
24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53
    if data_format == "NCHW":
        n, c, h, w = x.shape
        x_square = x * x
        x_square_sum = np.sum(x_square, (0, 2, 3))
        x_sum = np.sum(x, axis=(0, 2, 3))
        element_count = np.size(x) / int(np.shape(x)[1])
        mean = x_sum / element_count
        var = x_square_sum / element_count - mean * mean
        mean_tile = np.reshape(mean, (1, c, 1, 1))
        mean_tile = np.tile(mean_tile, (n, 1, h, w))
        var_tile = np.reshape(var, (1, c, 1, 1))
        var_tile = np.tile(var_tile, (n, 1, h, w))
        normalized = (x - mean_tile) / np.sqrt(var_tile + epsilon)
        scale_tile = np.reshape(scale, (1, c, 1, 1))
        scale_tile = np.tile(scale_tile, (n, 1, h, w))
        offset_tile = np.reshape(offset, (1, c, 1, 1))
        offset_tile = np.reshape(offset_tile, (1, c, 1, 1))
        y = normalized * scale_tile + offset_tile
        return y, mean, var
    elif data_format == "NHWC":
        x_square = x * x
        x_square_sum = np.sum(x_square, (0, 1, 2))
        x_sum = np.sum(x, axis=(0, 1, 2))
        element_count = np.size(x) / int(np.shape(x)[-1])
        mean = x_sum / element_count
        var = x_square_sum / element_count - mean * mean
        normalized = (x - mean) / np.sqrt(var + epsilon)
        return (normalized * scale + offset), mean, var
    else:
        raise ValueError("Unknown data order.")
Q
Qiao Longfei 已提交
54 55 56 57 58 59 60 61 62 63 64 65


def _reference_grad(x, grad_y, scale, mean, var, epsilon, data_format):
    # Use the following formulas to calculate gradients:
    # grad_scale =
    #   sum(grad_y * (x - mean)) * rsqrt(var + epsilon)
    #
    # grad_offset = sum(output_y)
    #
    # grad_x =
    #   1/N * scale * rsqrt(var + epsilon) * (N * grad_y - sum(grad_y) -
    #   (x - mean) * sum(grad_y * (x - mean)) / (var + epsilon))
Z
zchen0211 已提交
66 67 68 69 70 71 72

    # transfer from (N, C, H, W) to (N, H, W, C) to simplify computation
    if data_format == "NCHW":
        x = np.transpose(x, (0, 2, 3, 1))
        grad_y = np.transpose(grad_y, (0, 2, 3, 1))

        # raise ValueError("data_format must be NHWC, got %s." % data_format)
Q
Qiao Longfei 已提交
73 74 75 76 77 78 79
    grad_x = scale * (grad_y - np.mean(
        grad_y, axis=(0, 1, 2)) - (x - mean) * np.mean(
            grad_y * (x - mean), axis=(0, 1, 2)) /
                      (var + epsilon)) / np.sqrt(var + epsilon)
    grad_scale = np.sum(grad_y * (x - mean) / np.sqrt(var + epsilon),
                        axis=(0, 1, 2))
    grad_offset = np.sum(grad_y, axis=(0, 1, 2))
Z
zchen0211 已提交
80 81 82 83 84 85

    # transfer back to N, C, H, W
    if data_format == "NCHW":
        grad_x = np.transpose(grad_x, (0, 3, 1, 2))
        x = np.transpose(x, (0, 3, 1, 2))
        grad_y = np.transpose(grad_y, (0, 3, 1, 2))
Q
Qiao Longfei 已提交
86 87 88 89 90 91 92 93 94 95 96 97 98
    return grad_x, grad_scale, grad_offset


def create_or_get_tensor(scope, var_name, var, place):
    tensor = scope.var(var_name).get_tensor()
    if var is not None:
        assert isinstance(var, np.ndarray)
        tensor.set_lod([[]])
        tensor.set_dims(var.shape)
        tensor.set(var, place)
    return tensor


Z
zchen0211 已提交
99 100
def set_output_grad(scope, outputs, place, feed_dict=None):
    def __set_tensor__(name, data=None):
Q
Qiao Longfei 已提交
101 102 103
        out_tensor = scope.find_var(name).get_tensor()
        grad_tensor = scope.var(grad_var_name(name)).get_tensor()
        out_dtype = out_tensor.dtype()
Z
zchen0211 已提交
104 105 106 107 108 109 110
        if data is None:
            if out_dtype == core.DataType.FP64:
                data = np.ones(out_tensor.shape(), dtype=np.float64)
            elif out_dtype == core.DataType.FP32:
                data = np.ones(out_tensor.shape(), dtype=np.float32)
            else:
                raise ValueError("Not supported data type " + str(out_dtype))
Q
Qiao Longfei 已提交
111 112 113
        grad_tensor.set(data, place)

    for output in outputs:
Z
zchen0211 已提交
114 115 116 117
        data = None
        if output in feed_dict:
            data = feed_dict[output]
        __set_tensor__(output, data)
Q
Qiao Longfei 已提交
118 119 120 121 122 123


class TestBatchNormOp(OpTest):
    def __assert_close(self, tensor, np_array, msg, atol=1e-4):
        self.assertTrue(np.allclose(np.array(tensor), np_array, atol=atol), msg)

Z
zchen0211 已提交
124
    def test_python(self):
Z
zchen0211 已提交
125
        data_format = "NHWC"
Q
Qiao Longfei 已提交
126 127 128
        epsilon = 0.00001
        momentum = 0.9

Z
zchen0211 已提交
129
        # N, H, W, C: 2, 3, 4, 2
Z
zchen0211 已提交
130 131 132
        n, h, w, c = 2, 3, 4, 2
        x_shape = [n, h, w, c]
        scale_shape = [c]
Q
Qiao Longfei 已提交
133 134 135 136 137 138

        x_val = np.random.random_sample(x_shape).astype(np.float32)
        scale_val = np.random.random_sample(scale_shape).astype(np.float32)
        bias_val = np.random.random_sample(scale_shape).astype(np.float32)

        mean = np.zeros(scale_shape).astype(np.float32)
Z
zchen0211 已提交
139 140 141 142 143 144 145 146 147 148 149 150 151
        variance = np.ones(scale_shape).astype(np.float32)

        # run forward
        y_out, saved_mean, var_ref = _reference_training(
            x_val, scale_val, bias_val, epsilon, "NHWC")

        #
        mean_out = saved_mean * (1. - momentum) + momentum * mean
        variance_out = var_ref * (1. - momentum) + momentum * variance
        saved_variance = 1. / np.sqrt(var_ref + epsilon)

        # running N, C, H, W case
        # should produce the same results
Z
zchen0211 已提交
152
        x_shape2 = [n, c, h, w]
Z
zchen0211 已提交
153 154 155 156 157 158 159 160 161 162 163 164 165 166
        x_val2 = np.transpose(x_val, (0, 3, 1, 2))
        y_out2, saved_mean2, var_ref2 = _reference_training(
            x_val2, scale_val, bias_val, epsilon, "NCHW")

        self.__assert_close(saved_mean, saved_mean2, "batch mean")
        self.__assert_close(var_ref, var_ref2, "batch variance")

        # transfer (N, C, H, W) back to (N, H, W, C)
        y_out2_trans = np.transpose(y_out2, (0, 2, 3, 1))
        self.__assert_close(y_out, y_out2_trans, "batch variance")
        print 'python: NHWC, NCHW, forward checking passed'

        # test backward now
        # NHWC
Z
zchen0211 已提交
167 168 169
        self.y_grad = np.random.random_sample(x_shape).astype(np.float32)
        y_grad = self.y_grad
        # y_grad = np.ones(x_shape).astype(np.float32)
Z
zchen0211 已提交
170 171 172 173
        x_grad_ref, scale_grad_ref, bias_grad_ref = _reference_grad(
            x_val, y_grad, scale_val, saved_mean, var_ref, epsilon, "NHWC")

        # NCHW
Z
zchen0211 已提交
174 175
        y_grad2 = np.transpose(y_grad, (0, 3, 1, 2))
        # y_grad2 = np.ones(x_shape2).astype(np.float32)
Z
zchen0211 已提交
176 177 178 179 180 181 182 183 184 185 186
        x_grad_ref2, scale_grad_ref2, bias_grad_ref2 = _reference_grad(
            x_val2, y_grad2, scale_val, saved_mean2, var_ref2, epsilon, "NCHW")

        self.__assert_close(scale_grad_ref, scale_grad_ref2, "scale gradient")
        self.__assert_close(bias_grad_ref, bias_grad_ref2, "bias gradient")

        x_grad_transpose = np.transpose(x_grad_ref2, (0, 2, 3, 1))
        self.__assert_close(x_grad_ref, x_grad_transpose, "x gradient")
        print 'python: NHWC, NCHW, backward checking passed'

    def test_forward_backward(self):
187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226
        def test_with_place(place, tensor_format):
            # attr
            epsilon = 0.00001
            momentum = 0.9

            # N, H, W, C: 12, 3, 4, 2
            n, h, w, c = 2, 3, 4, 2

            if data_format == "NHWC":
                x_shape = [n, h, w, c]
            elif data_format == "NCHW":
                x_shape = [n, c, h, w]
            else:
                raise ValueError("Unknown data type.")
            scale_shape = [c]

            x_val = np.random.random_sample(x_shape).astype(np.float32)
            scale_val = np.random.random_sample(scale_shape).astype(np.float32)
            bias_val = np.random.random_sample(scale_shape).astype(np.float32)

            mean = np.zeros(scale_shape).astype(np.float32)
            variance = np.ones(scale_shape).astype(np.float32)

            # run forward
            y_out, saved_mean, var_ref = _reference_training(
                x_val, scale_val, bias_val, epsilon, data_format)

            # update moving mean and variance
            mean_out = saved_mean * (1. - momentum) + momentum * mean
            variance_out = var_ref * (1. - momentum) + momentum * variance
            saved_variance = 1. / np.sqrt(var_ref + epsilon)

            #  for gradient test
            # y_grad = np.ones(x_shape).astype(np.float32)
            y_grad = np.zeros(x_shape).astype(np.float32)
            y_grad[0, 0, 0, 0] = 1.
            # y_grad = np.random.random_sample(x_shape).astype(np.float32)
            x_grad_ref, scale_grad_ref, bias_grad_ref = _reference_grad(
                x_val, y_grad, scale_val, saved_mean, var_ref, epsilon,
                data_format)
Q
Qiao Longfei 已提交
227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264

            scope = core.Scope()

            # create input
            x_tensor = create_or_get_tensor(scope, "x_val", x_val, place)
            scale_tensor = create_or_get_tensor(scope, "scale_val", scale_val,
                                                place)
            bias_tensor = create_or_get_tensor(scope, "bias_val", bias_val,
                                               place)
            mean_tensor = create_or_get_tensor(scope, "mean", mean, place)
            variance_tensor = create_or_get_tensor(scope, "variance", variance,
                                                   place)

            # create output
            y_tensor = create_or_get_tensor(scope, "y_out", None, place)
            saved_mean_tensor = create_or_get_tensor(scope, "saved_mean", None,
                                                     place)
            saved_variance_tensor = create_or_get_tensor(
                scope, "saved_variance", None, place)
            mean_out_tensor = mean_tensor
            variance_out_tensor = variance_tensor

            batch_norm_op = Operator(
                "batch_norm",
                # inputs
                X="x_val",
                Scale="scale_val",
                Bias="bias_val",
                Mean="mean",
                Variance="variance",
                # outputs
                Y="y_out",
                MeanOut="mean",
                VarianceOut="variance",
                SavedMean="saved_mean",
                SavedVariance="saved_variance",
                # attrs
                is_test=False,
Z
zchen0211 已提交
265
                tensor_format=tensor_format,
Q
Qiao Longfei 已提交
266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283
                momentum=momentum,
                epsilon=epsilon)

            ctx = core.DeviceContext.create(place)
            batch_norm_op.run(scope, ctx)

            # check forward result
            self.__assert_close(y_tensor, y_out, "y_out")
            self.__assert_close(saved_mean_tensor, saved_mean, "saved_mean")
            self.__assert_close(saved_variance_tensor, saved_variance,
                                "saved_variance")
            self.__assert_close(mean_out_tensor, mean_out, "mean_out")
            if isinstance(place, core.GPUPlace):
                atol = 5e-2
            else:
                atol = 1e-4
            self.__assert_close(variance_out_tensor, variance_out,
                                "variance_out", atol)
284
            print "op test forward passed: ", str(place), tensor_format
Q
Qiao Longfei 已提交
285 286 287 288 289 290

            # run backward
            batch_norm_op_grad = get_backward_op(scope, batch_norm_op, set())
            set_output_grad(
                scope,
                ["y_out", "mean", "variance", "saved_mean", "saved_variance"],
Z
zchen0211 已提交
291 292
                place,
                feed_dict={"y_out": y_grad})
Q
Qiao Longfei 已提交
293 294 295 296 297 298 299 300 301 302 303 304 305 306 307 308
            batch_norm_op_grad.run(scope, ctx)

            x_grad_tensor = create_or_get_tensor(scope,
                                                 grad_var_name("x_val"), None,
                                                 place)
            scale_grad_tensor = create_or_get_tensor(scope,
                                                     grad_var_name("scale_val"),
                                                     None, place)
            bias_grad_tensor = create_or_get_tensor(scope,
                                                    grad_var_name("bias_val"),
                                                    None, place)

            # check gradient output
            self.__assert_close(x_grad_tensor, x_grad_ref, "x_grad")
            self.__assert_close(scale_grad_tensor, scale_grad_ref, "scale_grad")
            self.__assert_close(bias_grad_tensor, bias_grad_ref, "bias_grad")
309
            print "op test backward passed: ", str(place), tensor_format
Q
Qiao Longfei 已提交
310 311 312 313 314

        places = [core.CPUPlace()]
        if core.is_compile_gpu() and core.op_support_gpu("batch_norm"):
            places.append(core.GPUPlace(0))
        for place in places:
315 316
            for data_format in ["NCHW", "NHWC"]:
                test_with_place(place, data_format)
Q
Qiao Longfei 已提交
317 318 319 320


if __name__ == '__main__':
    unittest.main()