Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
MegEngine 天元
MegEngine
提交
33e8879a
MegEngine
项目概览
MegEngine 天元
/
MegEngine
大约 1 年 前同步成功
通知
399
Star
4705
Fork
582
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
DevOps
流水线
流水线任务
计划
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
MegEngine
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
DevOps
DevOps
流水线
流水线任务
计划
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
流水线任务
提交
Issue看板
提交
33e8879a
编写于
11月 16, 2020
作者:
M
Megvii Engine Team
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
feat(mge/quantization): support distributed qat
GitOrigin-RevId: c915c843b865d2c462fbdeb09ec525cd2a73ad90
上级
8e11204a
变更
5
隐藏空白更改
内联
并排
Showing
5 changed file
with
103 addition
and
4 deletion
+103
-4
imperative/python/megengine/quantization/__init__.py
imperative/python/megengine/quantization/__init__.py
+1
-0
imperative/python/megengine/quantization/observer.py
imperative/python/megengine/quantization/observer.py
+39
-0
imperative/python/megengine/quantization/qconfig.py
imperative/python/megengine/quantization/qconfig.py
+11
-0
imperative/python/test/unit/module/test_batchnorm.py
imperative/python/test/unit/module/test_batchnorm.py
+0
-4
imperative/python/test/unit/quantization/test_observer.py
imperative/python/test/unit/quantization/test_observer.py
+52
-0
未找到文件。
imperative/python/megengine/quantization/__init__.py
浏览文件 @
33e8879a
...
...
@@ -15,6 +15,7 @@ from .qconfig import (
ema_fakequant_qconfig
,
ema_lowbit_fakequant_qconfig
,
min_max_fakequant_qconfig
,
sync_ema_fakequant_qconfig
,
tqt_quant_qconfig
,
)
from
.utils
import
QuantMode
imperative/python/megengine/quantization/observer.py
浏览文件 @
33e8879a
...
...
@@ -12,6 +12,8 @@ import numpy as np
from
..
import
functional
as
F
from
..core.tensor.dtype
import
_metadata_dict
,
get_quantized_dtype
from
..distributed
import
WORLD
,
get_rank
,
is_distributed
from
..functional.distributed
import
all_reduce_max
,
all_reduce_min
from
..module
import
Module
from
..tensor
import
Tensor
from
.utils
import
QuantMode
,
Round
,
get_qparam_dict
...
...
@@ -123,6 +125,21 @@ class MinMaxObserver(Observer):
return
x_orig
class
SyncMinMaxObserver
(
MinMaxObserver
):
def
forward
(
self
,
x_orig
):
if
self
.
enable
:
x
=
x_orig
.
detach
()
if
is_distributed
():
min_x
=
all_reduce_min
(
x
.
min
(),
WORLD
)
max_x
=
all_reduce_max
(
x
.
max
(),
WORLD
)
else
:
min_x
=
x
.
min
()
max_x
=
x
.
max
()
self
.
min_val
.
_reset
(
F
.
minimum
(
self
.
min_val
,
min_x
))
self
.
max_val
.
_reset
(
F
.
maximum
(
self
.
max_val
,
max_x
))
return
x_orig
class
ExponentialMovingAverageObserver
(
MinMaxObserver
):
def
__init__
(
self
,
...
...
@@ -157,6 +174,28 @@ class ExponentialMovingAverageObserver(MinMaxObserver):
return
x_orig
class
SyncExponentialMovingAverageObserver
(
ExponentialMovingAverageObserver
):
def
forward
(
self
,
x_orig
):
if
self
.
enabled
:
x
=
x_orig
.
detach
()
if
is_distributed
:
min_x
=
all_reduce_min
(
x
.
min
(),
WORLD
)
max_x
=
all_reduce_max
(
x
.
max
(),
WORLD
)
else
:
min_x
=
x
.
min
()
max_x
=
x
.
max
()
self
.
min_val
.
_reset
(
self
.
min_val
*
self
.
runtime_momentum
+
(
1
-
self
.
runtime_momentum
)
*
min_x
)
self
.
max_val
.
_reset
(
self
.
max_val
*
self
.
runtime_momentum
+
(
1
-
self
.
runtime_momentum
)
*
max_x
)
self
.
runtime_momentum
=
self
.
momentum
return
x_orig
class
HistogramObserver
(
MinMaxObserver
):
def
__init__
(
self
,
...
...
imperative/python/megengine/quantization/qconfig.py
浏览文件 @
33e8879a
...
...
@@ -13,6 +13,8 @@ from .observer import (
ExponentialMovingAverageObserver
,
HistogramObserver
,
MinMaxObserver
,
SyncExponentialMovingAverageObserver
,
SyncMinMaxObserver
,
)
...
...
@@ -92,6 +94,15 @@ ema_fakequant_qconfig = QConfig(
act_fake_quant
=
partial
(
FakeQuantize
,
dtype
=
"qint8"
,
narrow_range
=
False
),
)
sync_ema_fakequant_qconfig
=
QConfig
(
weight_observer
=
partial
(
SyncMinMaxObserver
,
dtype
=
"qint8"
,
narrow_range
=
True
),
act_observer
=
partial
(
SyncExponentialMovingAverageObserver
,
dtype
=
"qint8"
,
narrow_range
=
False
),
weight_fake_quant
=
partial
(
FakeQuantize
,
dtype
=
"qint8"
,
narrow_range
=
True
),
act_fake_quant
=
partial
(
FakeQuantize
,
dtype
=
"qint8"
,
narrow_range
=
False
),
)
ema_lowbit_fakequant_qconfig
=
QConfig
(
weight_observer
=
partial
(
MinMaxObserver
,
dtype
=
"qint4"
,
narrow_range
=
False
),
act_observer
=
partial
(
...
...
imperative/python/test/unit/module/test_batchnorm.py
浏览文件 @
33e8879a
...
...
@@ -143,7 +143,6 @@ def test_batchnorm():
@
pytest
.
mark
.
skipif
(
platform
.
system
()
==
"Darwin"
,
reason
=
"do not imp GPU mode at macos now"
)
@
pytest
.
mark
.
isolated_distributed
def
test_syncbn1d
():
nr_chan
=
8
data_shape
=
(
3
,
nr_chan
,
4
)
...
...
@@ -234,7 +233,6 @@ def test_batchnorm2d():
@
pytest
.
mark
.
skipif
(
platform
.
system
()
==
"Darwin"
,
reason
=
"do not imp GPU mode at macos now"
)
@
pytest
.
mark
.
isolated_distributed
def
test_syncbn2d
():
nr_chan
=
8
data_shape
=
(
3
,
nr_chan
,
16
,
16
)
...
...
@@ -305,7 +303,6 @@ def test_batchnorm_no_stats():
@
pytest
.
mark
.
skipif
(
platform
.
system
()
==
"Darwin"
,
reason
=
"do not imp GPU mode at macos now"
)
@
pytest
.
mark
.
isolated_distributed
def
test_syncbn_no_stats
():
nr_chan
=
8
data_shape
=
(
3
,
nr_chan
,
4
)
...
...
@@ -354,7 +351,6 @@ def test_batchnorm2d_no_stats():
@
pytest
.
mark
.
skipif
(
platform
.
system
()
==
"Darwin"
,
reason
=
"do not imp GPU mode at macos now"
)
@
pytest
.
mark
.
isolated_distributed
def
test_syncbn2d_no_stats
():
nr_chan
=
8
data_shape
=
(
3
,
nr_chan
,
16
,
16
)
...
...
imperative/python/test/unit/quantization/test_observer.py
0 → 100644
浏览文件 @
33e8879a
import
multiprocessing
as
mp
import
platform
import
numpy
as
np
import
pytest
import
megengine
as
mge
import
megengine.distributed
as
dist
import
megengine.quantization.observer
as
ob
from
megengine.distributed.helper
import
get_device_count_by_fork
def
test_min_max_observer
():
x
=
np
.
random
.
rand
(
3
,
3
,
3
,
3
).
astype
(
"float32"
)
np_min
,
np_max
=
x
.
min
(),
x
.
max
()
x
=
mge
.
tensor
(
x
)
m
=
ob
.
MinMaxObserver
()
m
(
x
)
assert
m
.
min_val
==
np_min
and
m
.
max_val
==
np_max
@
pytest
.
mark
.
skipif
(
platform
.
system
()
==
"Darwin"
,
reason
=
"do not imp GPU mode at macos now"
)
@
pytest
.
mark
.
skipif
(
platform
.
system
()
==
"Windows"
,
reason
=
"windows disable MGB_ENABLE_OPR_MM"
)
@
pytest
.
mark
.
skipif
(
get_device_count_by_fork
(
"gpu"
)
<
2
,
reason
=
"need more gpu device"
)
@
pytest
.
mark
.
isolated_distributed
def
test_sync_min_max_observer
():
x
=
np
.
random
.
rand
(
6
,
3
,
3
,
3
).
astype
(
"float32"
)
np_min
,
np_max
=
x
.
min
(),
x
.
max
()
world_size
=
2
port
=
dist
.
get_free_ports
(
1
)[
0
]
server
=
dist
.
Server
(
port
)
def
worker
(
rank
,
slc
):
dist
.
init_process_group
(
"localhost"
,
port
,
world_size
,
rank
,
rank
)
m
=
ob
.
SyncMinMaxObserver
()
y
=
mge
.
tensor
(
x
[
slc
])
m
(
y
)
assert
m
.
min_val
==
np_min
and
m
.
max_val
==
np_max
procs
=
[]
for
rank
in
range
(
world_size
):
slc
=
slice
(
rank
*
3
,
(
rank
+
1
)
*
3
)
p
=
mp
.
Process
(
target
=
worker
,
args
=
(
rank
,
slc
,),
daemon
=
True
)
p
.
start
()
procs
.
append
(
p
)
for
p
in
procs
:
p
.
join
(
20
)
assert
p
.
exitcode
==
0
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录