test_fake_quant.py 7.7 KB
Newer Older
1 2 3
# -*- coding: utf-8 -*-
# MegEngine is Licensed under the Apache License, Version 2.0 (the "License")
#
4
# Copyright (c) 2014-2021 Megvii Inc. All rights reserved.
5 6 7 8 9 10 11 12
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT ARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
import numpy as np
import pytest

import megengine as mge
M
Megvii Engine Team 已提交
13
import megengine.functional as F
14
from megengine import tensor
M
Megvii Engine Team 已提交
15
from megengine.core.autodiff.grad import Function, Grad
16
from megengine.core.tensor.dtype import QuantDtypeMeta
17
from megengine.core.tensor.utils import make_shape_tuple
18
from megengine.quantization.internal_fake_quant import *
19 20 21 22
from megengine.quantization.utils import (
    QuantMode,
    create_qparams,
    fake_quant_tensor,
M
Megvii Engine Team 已提交
23
    lsq_forward,
24 25
    tqt_forward,
)
26 27


M
Megvii Engine Team 已提交
28
class TQT_numpy:
29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66
    def __init__(self, lowerbound, upperbound):
        super().__init__()
        self.lowerbound = lowerbound
        self.upperbound = upperbound

    def forward(self, inp, scale):
        t = 2 ** scale
        # t = F.maximum(t, 1e-4)
        inp_scaled = inp / t
        inp_clipped = np.maximum(
            np.minimum(inp_scaled, self.upperbound), self.lowerbound
        )
        inp_rounded = np.round(inp_clipped)
        inp_flq = inp_rounded * t
        self.saved_tensors = (inp_scaled, inp_rounded, t)
        return inp_flq

    def backward(self, grad_inp_flq):
        (inp_scaled, inp_rounded, t) = self.saved_tensors
        mask_clip = (inp_scaled < -0.5 + self.lowerbound) + (
            inp_scaled > self.upperbound + 0.5
        )  # mask for accumulating the gradients of |data_scaled|>L
        mask_quant = np.abs(
            mask_clip - 1
        )  # mask for accumulating the gradients with |data_scaled|<=L
        grad_quant = (
            grad_inp_flq * mask_quant * (inp_rounded - inp_scaled)
        )  # gradient within |data_scaled|<=L
        grad_clip = (
            grad_inp_flq * mask_clip * inp_rounded
        )  # gradient with   | data_scaled|>L
        grad_s = grad_clip.sum() + grad_quant.sum()
        # dL/ds = dL/dt * t * ln(2)
        grad_s = grad_s * t * np.log(2)
        grad_inp = grad_inp_flq * mask_quant
        return grad_inp, grad_s


M
Megvii Engine Team 已提交
67
def test_tqt():
68

M
Megvii Engine Team 已提交
69 70 71 72 73
    g = []

    def cb(grad):
        g.append(grad)

74 75
    x = np.random.randint(-128, 128, size=(1, 2, 3, 4)).astype("float32")
    s = np.random.rand(1) - 1
M
Megvii Engine Team 已提交
76 77 78 79 80 81 82 83 84 85 86 87 88 89
    g_y = np.ones(shape=(1, 2, 3, 4), dtype="float32")

    n = TQT_numpy(-127, 127)
    y_np = n.forward(x, s)
    g_x_np, g_s_np = n.backward(g_y)

    x = mge.tensor(x, dtype="float32")
    s = mge.tensor(s, dtype="float32")
    g_y = mge.tensor(g_y, dtype="float32")
    grad = Grad().wrt(x, s, callback=cb)
    y = tqt_forward(-127, 127, x, s)
    grad(y, g_y)
    g_x, g_s = g

90 91 92
    np.testing.assert_allclose(y.numpy(), y_np, rtol=1e-5, atol=1e-5)
    np.testing.assert_allclose(g_x.numpy(), g_x_np, rtol=1e-5, atol=1e-5)
    np.testing.assert_allclose(g_s.numpy(), g_s_np, rtol=5e-5, atol=5e-5)
93 94


95
def _save_to(self, name="grad"):
96
    def callback(grad):
97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119
        setattr(self, name, grad)

    return callback


class Round(Function):
    def forward(self, x):
        return F.round(x)

    def backward(self, output_grads):
        return output_grads


def fake_quant_tensor_gt(inp, scale, zero_point, qmin, qmax):
    oup = Round()(inp / scale) + zero_point
    oup = F.minimum(F.maximum(oup, qmin), qmax)
    oup = (oup - zero_point) * scale
    return oup


def test_fakequant():
    qmin = -126
    qmax = 129
120
    test_dtype = QuantDtypeMeta("test_qint8", None, "int8", qmin, qmax)
121 122

    def run(zero_point, scale):
123
        qparams = create_qparams(QuantMode.ASYMMERTIC, test_dtype, scale, zero_point)
124 125 126
        inp_data = np.random.uniform(low=-512.0, high=512.0, size=(1, 32, 32, 32))
        inp = tensor(inp_data, dtype=np.float32)
        # test forward
127
        oup = fake_quant_tensor(inp, qparams).numpy()
128 129 130 131 132 133 134
        oup_gt = fake_quant_tensor_gt(inp, scale, zero_point, qmin, qmax).numpy()
        assert np.allclose(oup, oup_gt)
        assert oup.shape == oup_gt.shape

        # test backward
        x = tensor(inp_data, dtype=np.float32)
        grad = Grad().wrt(x, callback=_save_to(x))
135
        y = fake_quant_tensor(x, qparams)
136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152
        grad(y, tensor(F.ones_like(x)))

        x1 = tensor(inp_data, dtype=np.float32)
        grad = Grad().wrt(x1, callback=_save_to(x1))
        y1 = fake_quant_tensor_gt(x1, scale, zero_point, qmin, qmax)
        grad(y1, tensor(F.ones_like(x1)))

        assert np.allclose(x.grad.numpy(), x1.grad.numpy())
        assert make_shape_tuple(x.grad.shape) == make_shape_tuple(x1.grad.shape)

    zero_point = tensor([1.0], dtype=np.float32)
    scale = tensor([4.0], dtype=np.float32)
    run(zero_point, scale)

    zero_point = tensor(1.0 * np.ones((1, 32, 1, 1)), dtype=np.float32)
    scale = tensor(4.0 * np.ones((1, 32, 1, 1)), dtype=np.float32)
    run(zero_point, scale)
M
Megvii Engine Team 已提交
153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196


class LSQ_numpy:
    def __init__(self, lowerbound, upperbound):
        super().__init__()
        self.lowerbound = lowerbound
        self.upperbound = upperbound

    def forward(self, inp, scale, zero_point, grad_scale):
        inp_scaled = inp / scale + zero_point
        inp_clipped = np.maximum(
            np.minimum(inp_scaled, self.upperbound), self.lowerbound
        )
        inp_rounded = np.floor(inp_clipped + 0.5)
        inp_flq = (inp_rounded - zero_point) * scale
        self.saved_tensors = (inp_scaled, inp_rounded, scale, grad_scale)
        return inp_flq

    def backward(self, grad_inp_flq):
        (inp_scaled, inp_rounded, scale, grad_scale) = self.saved_tensors

        ind_small = inp_scaled < self.lowerbound
        ind_big = inp_scaled > self.upperbound
        ind_middle = np.logical_xor(ind_small, ind_big)
        ind_middle = np.abs(ind_middle - 1)

        grad_s = (
            ind_small * self.lowerbound
            + ind_big * self.upperbound
            + ind_middle * (-inp_scaled + inp_rounded)
        )
        grad_s = grad_s * grad_scale * grad_inp_flq
        grad_s = grad_s.sum()
        grad_inp = grad_inp_flq * ind_middle

        return grad_inp, grad_s


def test_lsq():
    g = []

    def cb(grad):
        g.append(grad)

197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217
    # FIXME: use random number when LSQ is fixed
    # x = np.random.randint(-128, 128, size=(1, 2, 3, 4)).astype("float32")
    # s = np.random.rand(1)
    x = np.array(
        [
            [
                [
                    [4.0, 38.0, -121.0, 38.0],
                    [15.0, -115.0, -112.0, 24.0],
                    [23.0, -65.0, 109.0, -115.0],
                ],
                [
                    [-66.0, -90.0, -45.0, -101.0],
                    [68.0, -98.0, 108.0, -79.0],
                    [54.0, 63.0, -10.0, -50.0],
                ],
            ]
        ],
        dtype="float32",
    )
    s = np.array([0.02918224], dtype="float32")
M
Megvii Engine Team 已提交
218
    eps = np.array([1e-5], dtype="float32")
219
    s = np.abs(s) if np.abs(s) > eps else eps
M
Megvii Engine Team 已提交
220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242
    zero_point = np.array([1.0], dtype="float32")
    grad_s = np.array([2.0], dtype="float32")

    g_y = np.ones(shape=(1, 2, 3, 4), dtype="float32")

    n = LSQ_numpy(-127, 127)
    y_np = n.forward(x, s, zero_point, grad_s)
    g_x_np, g_s_np = n.backward(g_y)

    x = mge.tensor(x, dtype="float32")
    s = mge.tensor(s, dtype="float32")
    zero_point = mge.tensor(zero_point, dtype="float32")
    grad_s = mge.tensor(grad_s, dtype="float32")

    g_y = mge.tensor(g_y, dtype="float32")
    grad = Grad().wrt(x, s, callback=cb)
    y = lsq_forward(-127, 127, x, s, zero_point, grad_s)
    grad(y, g_y)
    g_x, g_s = g

    np.testing.assert_allclose(y.numpy(), y_np, rtol=1e-7, atol=1e-7)
    np.testing.assert_allclose(g_x.numpy(), g_x_np, rtol=1e-7, atol=1e-7)
    np.testing.assert_allclose(g_s.numpy(), g_s_np, rtol=5e-7, atol=5e-7)