Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
MegEngine 天元
MegEngine
提交
8be78b11
MegEngine
项目概览
MegEngine 天元
/
MegEngine
1 年多 前同步成功
通知
404
Star
4705
Fork
582
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
DevOps
流水线
流水线任务
计划
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
MegEngine
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
DevOps
DevOps
流水线
流水线任务
计划
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
流水线任务
提交
Issue看板
提交
8be78b11
编写于
5月 18, 2020
作者:
M
Megvii Engine Team
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
test(mge/optimizer): refactor the unittest of optimizer
GitOrigin-RevId: 4754285713d6a8697a31056331b443ad6b1302af
上级
01ac8bbd
变更
1
隐藏空白更改
内联
并排
Showing
1 changed file
with
125 addition
and
196 deletion
+125
-196
python_module/test/unit/optimizer/test_optimizer.py
python_module/test/unit/optimizer/test_optimizer.py
+125
-196
未找到文件。
python_module/test/unit/optimizer/test_optimizer.py
浏览文件 @
8be78b11
...
...
@@ -12,249 +12,178 @@ import numpy as np
from
helpers
import
MLP
,
graph_mode
import
megengine.functional
as
F
from
megengine
import
load
,
save
from
megengine
import
load
,
optimizer
,
save
from
megengine.core
import
TensorDict
,
tensor
from
megengine.jit
import
trace
from
megengine.optimizer
import
SGD
,
Adam
from
megengine.test
import
assertTensorClose
def
get_input
():
batch_size
=
2
input_dim
=
28
data_shape
=
(
batch_size
,
input_dim
)
label_shape
=
(
batch_size
,)
data
=
tensor
()
label
=
tensor
(
dtype
=
np
.
int32
)
batch_size
,
input_dim
=
2
,
28
data_shape
,
label_shape
=
(
batch_size
,
input_dim
),
(
batch_size
,)
data
,
label
=
tensor
(
dtype
=
np
.
float32
),
tensor
(
dtype
=
np
.
int32
)
data
.
set_value
(
np
.
random
.
random
(
data_shape
).
astype
(
np
.
float32
))
label
.
set_value
(
np
.
random
.
randint
(
0
,
10
,
label_shape
))
return
data
,
data_shape
,
label
,
label_shape
def
test_sgd_simple
():
data
,
data_shape
,
label
,
label_shape
=
get_input
()
mlp
=
MLP
()
opt
=
SGD
(
mlp
.
parameters
(),
lr
=
0.01
,
weight_decay
=
0.1
)
for
idx
in
range
(
3
):
data
.
set_value
(
np
.
random
.
random
(
data_shape
).
astype
(
np
.
float32
))
label
.
set_value
(
np
.
random
.
randint
(
0
,
10
,
label_shape
))
pred
=
mlp
(
data
)
loss
=
F
.
square_loss
(
pred
,
label
.
reshape
(
-
1
,
1
))
if
idx
%
2
:
opt
.
zero_grad
()
else
:
mlp
.
zero_grad
()
opt
.
backward
(
loss
)
grads
=
TensorDict
()
orig_params
=
TensorDict
()
for
param
in
mlp
.
parameters
():
grad
=
F
.
grad
(
loss
,
param
,
use_virtual_grad
=
False
)
assertTensorClose
(
grad
.
numpy
(),
param
.
grad
.
numpy
())
grads
[
param
]
=
np
.
copy
(
grad
.
numpy
())
orig_params
[
param
]
=
np
.
copy
(
param
.
numpy
())
opt
.
step
()
for
param
in
mlp
.
parameters
():
assertTensorClose
(
param
.
numpy
(),
orig_params
[
param
]
*
0.999
-
grads
[
param
]
*
0.01
)
def
test_sgd_momentum
():
@
graph_mode
(
"eager"
,
"static"
)
def
test_optimizer_serialization
():
data
,
data_shape
,
label
,
label_shape
=
get_input
()
mlp
=
MLP
()
opt
=
SGD
(
mlp
.
parameters
(),
lr
=
0.01
,
momentum
=
0.9
)
slots
=
TensorDict
()
for
param
in
mlp
.
parameters
():
slots
[
param
]
=
np
.
zeros
(
param
.
shape
).
astype
(
np
.
float32
)
for
_
in
range
(
3
):
data
.
set_value
(
np
.
random
.
random
(
data_shape
).
astype
(
np
.
float32
))
label
.
set_value
(
np
.
random
.
randint
(
0
,
10
,
label_shape
))
pred
=
mlp
(
data
)
loss
=
F
.
square_loss
(
pred
,
label
.
reshape
(
-
1
,
1
))
opt
.
zero_grad
()
opt
.
backward
(
loss
)
orig_params
=
TensorDict
()
grads
=
TensorDict
()
for
param
in
mlp
.
parameters
():
orig_params
[
param
]
=
np
.
copy
(
param
.
numpy
())
grads
[
param
]
=
np
.
copy
(
param
.
grad
.
numpy
())
opt
.
step
()
for
param
in
mlp
.
parameters
():
slot
=
slots
[
param
]
orig_param
=
orig_params
[
param
]
slot
*=
0.9
slot
-=
param
.
grad
.
numpy
()
*
0.01
assertTensorClose
(
param
.
numpy
(),
orig_param
+
slot
)
# TODO: put opt.step() inside trace
def
test_sgd_momentum_static
():
_
,
data_shape
,
_
,
label_shape
=
get_input
()
mlp
=
MLP
()
opt
=
SGD
(
mlp
.
parameters
(),
lr
=
0.01
,
momentum
=
0.9
)
@
trace
def
f
(
data
,
label
):
pred
=
mlp
(
data
)
loss
=
F
.
square_loss
(
pred
,
label
.
reshape
(
-
1
,
1
))
opt
.
zero_grad
()
opt
.
backward
(
loss
)
opt
=
optimizer
.
SGD
(
mlp
.
parameters
(),
lr
=
0.01
,
momentum
=
0.9
)
slots
=
TensorDict
()
for
param
in
mlp
.
parameters
():
slots
[
param
]
=
np
.
zeros
(
param
.
shape
).
astype
(
np
.
float32
)
for
_
in
range
(
3
):
f
(
np
.
random
.
random
(
data_shape
).
astype
(
np
.
float32
),
np
.
random
.
randint
(
0
,
10
,
label_shape
).
astype
(
np
.
int32
),
)
orig_params
=
TensorDict
()
grads
=
TensorDict
()
for
param
in
mlp
.
parameters
():
orig_params
[
param
]
=
np
.
copy
(
param
.
numpy
())
grads
[
param
]
=
np
.
copy
(
param
.
grad
.
numpy
())
opt
.
step
()
for
param
in
mlp
.
parameters
():
slot
=
slots
[
param
]
orig_param
=
orig_params
[
param
]
slot
*=
0.9
slot
-=
param
.
grad
.
numpy
()
*
0.01
assertTensorClose
(
param
.
numpy
(),
orig_param
+
slot
)
def
test_update_lr
():
data
,
data_shape
,
label
,
label_shape
=
get_input
()
mlp
=
MLP
()
opt
=
SGD
(
mlp
.
parameters
(),
lr
=
0.01
)
pred
=
mlp
(
data
)
loss
=
F
.
square_loss
(
pred
,
label
.
reshape
(
-
1
,
1
))
opt
.
zero_grad
()
opt
.
backward
(
loss
)
opt
.
step
()
for
group
in
opt
.
param_groups
:
group
[
"lr"
]
+=
0.02
for
_
in
range
(
3
):
for
param
in
mlp
.
parameters
():
slots
[
param
]
=
slots
[
param
]
*
0.9
+
param
.
grad
.
numpy
()
with
BytesIO
()
as
fout
:
save
(
opt
.
state_dict
(),
fout
)
fout
.
seek
(
0
)
state_dict
=
load
(
fout
)
opt1
=
optimizer
.
SGD
(
mlp
.
parameters
(),
lr
=
0.02
,
momentum
=
0.8
)
opt1
.
load_state_dict
(
state_dict
)
data
.
set_value
(
np
.
random
.
random
(
data_shape
).
astype
(
np
.
float32
))
label
.
set_value
(
np
.
random
.
randint
(
0
,
10
,
label_shape
))
pred
=
mlp
(
data
)
loss
=
F
.
square_loss
(
pred
,
label
.
reshape
(
-
1
,
1
))
opt
.
zero_grad
()
opt
.
backward
(
loss
)
opt1
.
zero_grad
()
opt1
.
backward
(
loss
)
orig_params
=
TensorDict
()
for
param
in
mlp
.
parameters
():
grad
=
F
.
grad
(
loss
,
param
,
use_virtual_grad
=
False
)
assertTensorClose
(
grad
.
numpy
(),
param
.
grad
.
numpy
())
orig_params
=
[]
orig_params
[
param
]
=
np
.
copy
(
param
.
numpy
())
opt1
.
step
()
for
param
in
mlp
.
parameters
():
orig_params
.
append
(
np
.
copy
(
param
.
numpy
()))
opt
.
step
()
for
param
,
orig_param
in
zip
(
mlp
.
parameters
(),
orig_params
):
assertTensorClose
(
param
.
numpy
(),
orig_param
-
param
.
grad
.
numpy
()
*
0.03
)
orig_param
=
orig_params
[
param
]
slots
[
param
]
=
slots
[
param
]
*
0.9
+
param
.
grad
.
numpy
()
assertTensorClose
(
param
.
numpy
(),
orig_param
-
0.01
*
slots
[
param
])
def
test_adam
():
def
_test_optimizer
(
opt_str
,
test_case
,
check_class
,
update_lr
=
False
):
iter_num
=
3
data
,
data_shape
,
label
,
label_shape
=
get_input
()
mlp
=
MLP
()
beta0
=
0.8
beta1
=
0.9
eps
=
1e-4
opt
=
Adam
(
mlp
.
parameters
(),
lr
=
0.01
,
betas
=
(
beta0
,
beta1
),
eps
=
eps
)
m_slots
=
TensorDict
()
v_slots
=
TensorDict
()
for
param
in
mlp
.
parameters
():
m_slots
[
param
]
=
np
.
zeros
(
param
.
shape
).
astype
(
np
.
float32
)
v_slots
[
param
]
=
np
.
zeros
(
param
.
shape
).
astype
(
np
.
float32
)
step_size
=
0
def
check_value
():
for
param
in
mlp
.
parameters
():
grad
=
param
.
grad
.
numpy
()
orig_param
=
orig_params
[
param
]
m
=
m_slots
[
param
]
v
=
v_slots
[
param
]
m
*=
beta0
m
+=
(
1
-
beta0
)
*
grad
v
*=
beta1
v
+=
(
1
-
beta1
)
*
grad
*
grad
update
=
(
m
/
(
1
-
beta0
**
step_size
))
/
(
np
.
sqrt
(
v
/
(
1
-
beta1
**
step_size
))
+
eps
)
assertTensorClose
(
param
.
numpy
(),
orig_param
-
0.01
*
update
)
net
=
MLP
()
opt
=
getattr
(
optimizer
,
opt_str
)(
net
.
parameters
(),
**
test_case
)
check_func
=
check_class
(
net
,
**
test_case
)
# eager
for
_
in
range
(
3
):
step
=
0
# eager graph
for
i
in
range
(
iter_num
):
if
update_lr
and
i
==
1
:
# change learning rate
for
group
in
opt
.
param_groups
:
group
[
"lr"
]
+=
0.01
check_func
.
lr
+=
0.01
data
.
set_value
(
np
.
random
.
random
(
data_shape
).
astype
(
np
.
float32
))
label
.
set_value
(
np
.
random
.
randint
(
0
,
10
,
label_shape
))
pred
=
mlp
(
data
)
pred
=
net
(
data
)
loss
=
F
.
square_loss
(
pred
,
label
.
reshape
(
-
1
,
1
))
opt
.
zero_grad
()
grads
=
opt
.
backward
(
loss
)
ori
g
_params
=
TensorDict
()
for
param
in
mlp
.
parameters
():
ori
g
_params
[
param
]
=
np
.
copy
(
param
.
numpy
())
opt
.
backward
(
loss
)
ori_params
=
TensorDict
()
for
param
in
net
.
parameters
():
ori_params
[
param
]
=
np
.
copy
(
param
.
numpy
())
opt
.
step
()
step
_size
+=
1
check_
value
(
)
step
+=
1
check_
func
(
ori_params
,
net
.
parameters
(),
step
)
# static
# static
graph
@
trace
def
f
(
data
,
label
):
pred
=
mlp
(
data
)
def
train_func
(
data
,
label
):
pred
=
net
(
data
)
loss
=
F
.
square_loss
(
pred
,
label
.
reshape
(
-
1
,
1
))
opt
.
backward
(
loss
)
for
_
in
range
(
3
):
for
i
in
range
(
iter_num
):
if
update_lr
and
i
==
1
:
# change learning rate
for
group
in
opt
.
param_groups
:
group
[
"lr"
]
+=
0.01
check_func
.
lr
+=
0.01
opt
.
zero_grad
()
ori
g
_params
=
TensorDict
()
for
param
in
mlp
.
parameters
():
ori
g
_params
[
param
]
=
np
.
copy
(
param
.
numpy
())
f
(
ori_params
=
TensorDict
()
for
param
in
net
.
parameters
():
ori_params
[
param
]
=
np
.
copy
(
param
.
numpy
())
train_func
(
np
.
random
.
random
(
data_shape
).
astype
(
np
.
float32
),
np
.
random
.
randint
(
0
,
10
,
label_shape
).
astype
(
np
.
int32
),
)
opt
.
step
()
step_size
+=
1
check_value
()
step
+=
1
check_func
(
ori_params
,
net
.
parameters
(),
step
)
def
test_sgd
():
class
CheckValue
:
def
__init__
(
self
,
net
,
**
kwarg
):
self
.
slots
=
TensorDict
()
for
param
in
net
.
parameters
():
self
.
slots
[
param
]
=
np
.
zeros
(
param
.
shape
).
astype
(
np
.
float32
)
for
k
,
v
in
kwarg
.
items
():
setattr
(
self
,
k
,
v
)
def
__call__
(
self
,
ori_params
,
new_params
,
step
):
for
param
in
new_params
:
grad
=
param
.
grad
.
numpy
()
if
hasattr
(
self
,
"momentum"
):
self
.
slots
[
param
]
=
grad
+
self
.
slots
[
param
]
*
self
.
momentum
delta
=
-
self
.
lr
*
self
.
slots
[
param
]
else
:
delta
=
-
self
.
lr
*
grad
assertTensorClose
(
param
.
numpy
(),
ori_params
[
param
]
+
delta
)
cases
=
[
{
"momentum"
:
0.9
,
"lr"
:
0.01
},
# SGD with momentum
{
"lr"
:
0.01
},
# simple SGD
{
"weight_decay"
:
0.1
,
"lr"
:
0.01
},
# with weight_decay
]
for
case
in
cases
:
_test_optimizer
(
"SGD"
,
case
,
CheckValue
)
_test_optimizer
(
"SGD"
,
case
,
CheckValue
,
update_lr
=
True
)
@
graph_mode
(
"eager"
,
"static"
)
def
test_optimizer_serialization
():
data
,
data_shape
,
label
,
label_shape
=
get_input
()
mlp
=
MLP
()
opt
=
SGD
(
mlp
.
parameters
(),
lr
=
0.01
,
momentum
=
0.9
)
slots
=
TensorDict
()
for
param
in
mlp
.
parameters
():
slots
[
param
]
=
np
.
zeros
(
param
.
shape
).
astype
(
np
.
float32
)
pred
=
mlp
(
data
)
loss
=
F
.
square_loss
(
pred
,
label
.
reshape
(
-
1
,
1
))
opt
.
zero_grad
()
opt
.
backward
(
loss
)
opt
.
step
()
for
param
in
mlp
.
parameters
():
slot
=
slots
[
param
]
slot
*=
0.9
slot
-=
param
.
grad
.
numpy
()
*
0.01
with
BytesIO
()
as
fout
:
save
(
opt
.
state_dict
(),
fout
)
fout
.
seek
(
0
)
state_dict
=
load
(
fout
)
opt1
=
SGD
(
mlp
.
parameters
(),
lr
=
0.02
,
momentum
=
0.8
)
opt1
.
load_state_dict
(
state_dict
)
data
.
set_value
(
np
.
random
.
random
(
data_shape
).
astype
(
np
.
float32
))
label
.
set_value
(
np
.
random
.
randint
(
0
,
10
,
label_shape
))
pred
=
mlp
(
data
)
loss
=
F
.
square_loss
(
pred
,
label
.
reshape
(
-
1
,
1
))
opt1
.
zero_grad
()
opt1
.
backward
(
loss
)
orig_params
=
TensorDict
()
for
param
in
mlp
.
parameters
():
orig_params
[
param
]
=
np
.
copy
(
param
.
numpy
())
opt1
.
step
()
for
param
in
mlp
.
parameters
():
orig_param
=
orig_params
[
param
]
slot
=
slots
[
param
]
slot
*=
0.9
slot
-=
param
.
grad
.
numpy
()
*
0.01
assertTensorClose
(
param
.
numpy
(),
orig_param
+
slot
)
def
test_adam
():
class
CheckValue
:
def
__init__
(
self
,
net
,
**
kwarg
):
self
.
m_slots
=
TensorDict
()
self
.
v_slots
=
TensorDict
()
for
param
in
net
.
parameters
():
self
.
m_slots
[
param
]
=
np
.
zeros
(
param
.
shape
).
astype
(
np
.
float32
)
self
.
v_slots
[
param
]
=
np
.
zeros
(
param
.
shape
).
astype
(
np
.
float32
)
for
k
,
v
in
kwarg
.
items
():
setattr
(
self
,
k
,
v
)
def
__call__
(
self
,
ori_params
,
new_params
,
step
):
for
param
in
new_params
:
grad
=
param
.
grad
.
numpy
()
m
=
self
.
m_slots
[
param
]
v
=
self
.
v_slots
[
param
]
m
*=
self
.
betas
[
0
]
m
+=
(
1
-
self
.
betas
[
0
])
*
grad
v
*=
self
.
betas
[
1
]
v
+=
(
1
-
self
.
betas
[
1
])
*
grad
*
grad
delta
=
(
m
/
(
1
-
self
.
betas
[
0
]
**
step
))
/
(
np
.
sqrt
(
v
/
(
1
-
self
.
betas
[
1
]
**
step
))
+
self
.
eps
)
assertTensorClose
(
param
.
numpy
(),
ori_params
[
param
]
-
self
.
lr
*
delta
)
cases
=
[
{
"betas"
:
(
0.8
,
0.9
),
"eps"
:
1e-04
,
"lr"
:
0.01
},
{
"betas"
:
(
0.8
,
0.9
),
"eps"
:
1e-04
,
"lr"
:
0.01
,
"weight_decay"
:
0.1
,
},
# with weight_decay
]
for
case
in
cases
:
_test_optimizer
(
"Adam"
,
case
,
CheckValue
)
_test_optimizer
(
"Adam"
,
case
,
CheckValue
,
update_lr
=
True
)
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录