Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
MegEngine 天元
MegEngine
提交
30d6b4f6
MegEngine
项目概览
MegEngine 天元
/
MegEngine
1 年多 前同步成功
通知
403
Star
4705
Fork
582
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
DevOps
流水线
流水线任务
计划
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
MegEngine
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
DevOps
DevOps
流水线
流水线任务
计划
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
流水线任务
提交
Issue看板
提交
30d6b4f6
编写于
2月 02, 2021
作者:
M
Megvii Engine Team
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
fix(mge): fix scalar parameter change to 1-dim parameter after backward and optimize
GitOrigin-RevId: 1794369a71251475cbe8f839cbf35f91a3adee99
上级
cf27dd64
变更
7
隐藏空白更改
内联
并排
Showing
7 changed file
with
46 addition
and
27 deletion
+46
-27
imperative/python/megengine/autodiff/grad_manager.py
imperative/python/megengine/autodiff/grad_manager.py
+2
-0
imperative/python/megengine/functional/inplace.py
imperative/python/megengine/functional/inplace.py
+5
-1
imperative/python/megengine/optimizer/adadelta.py
imperative/python/megengine/optimizer/adadelta.py
+11
-8
imperative/python/megengine/optimizer/adagrad.py
imperative/python/megengine/optimizer/adagrad.py
+10
-8
imperative/python/megengine/optimizer/adam.py
imperative/python/megengine/optimizer/adam.py
+1
-1
imperative/python/megengine/optimizer/sgd.py
imperative/python/megengine/optimizer/sgd.py
+4
-4
imperative/python/test/integration/test_optimizer.py
imperative/python/test/integration/test_optimizer.py
+13
-5
未找到文件。
imperative/python/megengine/autodiff/grad_manager.py
浏览文件 @
30d6b4f6
...
...
@@ -279,6 +279,8 @@ class GradManager:
tensor
.
grad
=
grad
else
:
tensor
.
grad
+=
grad
if
tensor
.
isscalar
()
and
tensor
.
grad
is
not
None
:
tensor
.
grad
.
setscalar
()
finally
:
self
.
release
()
backwarding_grad_manager
=
cache
...
...
imperative/python/megengine/functional/inplace.py
浏览文件 @
30d6b4f6
...
...
@@ -12,4 +12,8 @@ from ..core.ops.builtin import InplaceAdd
def
_inplace_add_
(
dest
,
delta
,
alpha
,
beta
):
return
dest
.
_reset
(
apply
(
InplaceAdd
(),
dest
,
delta
,
alpha
,
beta
)[
0
])
isscalar
=
dest
.
isscalar
()
dest
.
_reset
(
apply
(
InplaceAdd
(),
dest
,
delta
,
alpha
,
beta
)[
0
])
if
isscalar
:
dest
.
setscalar
()
return
dest
imperative/python/megengine/optimizer/adadelta.py
浏览文件 @
30d6b4f6
...
...
@@ -61,16 +61,19 @@ class Adadelta(Optimizer):
rho
=
param_group
[
"rho"
]
eps
=
param_group
[
"eps"
]
def
make_scalar
(
val
):
return
tensor
(
val
)
# since `conver_inputs` is disabled for param updates,
# scalar should be explicitly tansforred to tensor
_lr
=
tensor
([
lr
])
_
weight_decay
=
tensor
([
weight_decay
]
)
_
rho
=
tensor
([
rho
]
)
_
eps
=
tensor
([
eps
]
)
c05
=
tensor
([
0.5
])
c1
=
tensor
([
1.0
]
)
c2
=
tensor
([
2.0
])
_
lr
=
make_scalar
(
lr
)
_
weight_decay
=
make_scalar
(
weight_decay
)
_
rho
=
make_scalar
(
rho
)
_eps
=
make_scalar
(
eps
)
c1
,
c2
,
c05
=
map
(
make_scalar
,
(
1.0
,
2.0
,
0.5
)
)
for
param
in
param_group
[
"params"
]:
if
param
.
grad
is
None
:
...
...
imperative/python/megengine/optimizer/adagrad.py
浏览文件 @
30d6b4f6
...
...
@@ -60,16 +60,18 @@ class Adagrad(Optimizer):
weight_decay
=
param_group
[
"weight_decay"
]
eps
=
param_group
[
"eps"
]
def
make_scalar
(
val
):
return
tensor
(
val
)
# since `conver_inputs` is disabled for param updates,
# scalar should be explicitly tansforred to tensor
_lr
=
tensor
([
lr
])
_lr_decay
=
tensor
([
lr_decay
])
_weight_decay
=
tensor
([
weight_decay
])
_eps
=
tensor
([
eps
])
c05
=
tensor
([
0.5
])
c1
=
tensor
([
1.0
])
c2
=
tensor
([
2.0
])
_lr
,
_lr_decay
=
map
(
make_scalar
,
(
lr
,
lr_decay
))
_weight_decay
=
make_scalar
(
weight_decay
)
_eps
=
make_scalar
(
eps
)
c1
,
c2
,
c05
=
map
(
make_scalar
,
(
1.0
,
2.0
,
0.5
))
for
param
in
param_group
[
"params"
]:
if
param
.
grad
is
None
:
...
...
imperative/python/megengine/optimizer/adam.py
浏览文件 @
30d6b4f6
...
...
@@ -61,7 +61,7 @@ class Adam(Optimizer):
beta0
,
beta1
=
param_group
[
"betas"
]
def
make_scalar
(
val
):
return
tensor
(
[
val
]
)
return
tensor
(
val
)
# since `conver_inputs` is disabled for param updates,
# scalar should be explicitly tansforred to tensor
...
...
imperative/python/megengine/optimizer/sgd.py
浏览文件 @
30d6b4f6
...
...
@@ -57,13 +57,13 @@ class SGD(Optimizer):
# since `conver_inputs` is disabled for param updates,
# scalar should be explicitly tansforred to tensor
_lr
=
tensor
(
[
lr
]
)
_weight_decay
=
tensor
(
[
weight_decay
]
)
_momentum
=
tensor
(
[
momentum
]
)
_lr
=
tensor
(
lr
)
_weight_decay
=
tensor
(
weight_decay
)
_momentum
=
tensor
(
momentum
)
inplace_mode
=
int
(
os
.
getenv
(
"MEGENGINE_INPLACE_UPDATE"
,
"0"
))
if
inplace_mode
:
_neg_lr
=
tensor
(
[
-
lr
]
)
_neg_lr
=
tensor
(
-
lr
)
c1
=
tensor
([
1.0
])
for
param
in
param_group
[
"params"
]:
...
...
imperative/python/test/integration/test_optimizer.py
浏览文件 @
30d6b4f6
...
...
@@ -32,7 +32,7 @@ class MLP(Module):
class
Simple
(
Module
):
def
__init__
(
self
):
super
().
__init__
()
self
.
a
=
Parameter
(
[
1.23
]
,
dtype
=
np
.
float32
)
self
.
a
=
Parameter
(
1.23
,
dtype
=
np
.
float32
)
def
forward
(
self
,
x
):
x
=
x
*
self
.
a
...
...
@@ -64,6 +64,7 @@ def _test_optimizer(opt_str, test_case, check_class, update_lr=False):
ori_params
=
{}
for
param
in
net
.
parameters
():
assert
param
.
_tuple_shape
is
()
ori_params
[
param
]
=
np
.
copy
(
param
.
numpy
())
opt
.
step
()
step
+=
1
...
...
@@ -95,6 +96,7 @@ def _test_optimizer(opt_str, test_case, check_class, update_lr=False):
ori_params
=
{}
for
param
in
net
.
parameters
():
assert
param
.
_tuple_shape
is
()
ori_params
[
param
]
=
np
.
copy
(
param
.
numpy
())
train_func
(
...
...
@@ -121,7 +123,9 @@ def test_sgd():
delta
=
-
self
.
lr
*
self
.
slots
[
param
]
else
:
delta
=
-
self
.
lr
*
grad
np
.
testing
.
assert_almost_equal
(
param
.
numpy
(),
ori_params
[
param
]
+
delta
)
np
.
testing
.
assert_almost_equal
(
param
.
numpy
(),
ori_params
[
param
]
+
delta
,
decimal
=
6
)
cases
=
[
{
"momentum"
:
0.9
,
"lr"
:
0.01
},
# SGD with momentum
...
...
@@ -157,7 +161,7 @@ def test_adam():
np
.
sqrt
(
v
/
(
1
-
self
.
betas
[
1
]
**
step
))
+
self
.
eps
)
np
.
testing
.
assert_almost_equal
(
param
.
numpy
(),
ori_params
[
param
]
-
self
.
lr
*
delta
param
.
numpy
(),
ori_params
[
param
]
-
self
.
lr
*
delta
,
decimal
=
6
)
cases
=
[
...
...
@@ -189,7 +193,9 @@ def test_adagrad():
self
.
s_slots
[
param
]
+=
grad
**
2
delta
=
grad
/
(
self
.
s_slots
[
param
]
+
self
.
eps
)
**
0.5
delta
*=
-
(
self
.
lr
/
(
1
+
(
step
-
1
)
*
self
.
lr_decay
))
np
.
testing
.
assert_almost_equal
(
param
.
numpy
(),
ori_params
[
param
]
+
delta
)
np
.
testing
.
assert_almost_equal
(
param
.
numpy
(),
ori_params
[
param
]
+
delta
,
decimal
=
6
)
cases
=
[
{
"lr"
:
0.01
,
"eps"
:
1e-06
,
"lr_decay"
:
0.01
},
...
...
@@ -232,7 +238,9 @@ def test_adadelta():
1
-
self
.
rho
)
delta
*=
-
self
.
lr
np
.
testing
.
assert_almost_equal
(
param
.
numpy
(),
ori_params
[
param
]
+
delta
)
np
.
testing
.
assert_almost_equal
(
param
.
numpy
(),
ori_params
[
param
]
+
delta
,
decimal
=
6
)
cases
=
[
{
"lr"
:
1.0
,
"eps"
:
1e-06
,
"rho"
:
0.9
},
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录