提交 d4b86b84 编写于 作者: M Megvii Engine Team 提交者: Xinran Xu

feat(mge/dtype): add int2 lowbit support and example

GitOrigin-RevId: 67c14ac959a9f2725360f79cd3838000aa5e35ea
上级 3931099e
......@@ -25,6 +25,9 @@ _metadata_dict = {
"qint32": _QuantDtypeMetadata(
"QuantizedS32", "int32", False, -(2 ** 31), 2 ** 31 - 1,
),
# NOTE: int2 is not supported for model dump yet
"quint2": _QuantDtypeMetadata(None, "uint8", True, 0, 3),
"qint2": _QuantDtypeMetadata(None, "int8", False, -2, 1),
}
......
......@@ -13,6 +13,7 @@ from .qconfig import (
QConfig,
calibration_qconfig,
ema_fakequant_qconfig,
ema_lowbit_fakequant_qconfig,
min_max_fakequant_qconfig,
tqt_quant_qconfig,
)
......
......@@ -92,6 +92,15 @@ ema_fakequant_qconfig = QConfig(
act_fake_quant=partial(FakeQuantize, dtype="qint8", narrow_range=False),
)
ema_lowbit_fakequant_qconfig = QConfig(
weight_observer=partial(MinMaxObserver, dtype="qint4", narrow_range=False),
act_observer=partial(
ExponentialMovingAverageObserver, dtype="qint4", narrow_range=False
),
weight_fake_quant=partial(FakeQuantize, dtype="qint4", narrow_range=False),
act_fake_quant=partial(FakeQuantize, dtype="qint4", narrow_range=False),
)
calibration_qconfig = QConfig(
weight_observer=partial(MinMaxObserver, dtype="qint8", narrow_range=True),
act_observer=partial(HistogramObserver, dtype="qint8", narrow_range=False),
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册