Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
正统之独孤求败
mindspore
提交
3c205729
M
mindspore
项目概览
正统之独孤求败
/
mindspore
与 Fork 源项目一致
Fork自
MindSpore / mindspore
通知
1
Star
0
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
M
mindspore
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
体验新版 GitCode,发现更多精彩内容 >>
提交
3c205729
编写于
6月 19, 2020
作者:
P
panyifeng
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
support multi param for tuple grad
上级
bc4b1c24
变更
13
隐藏空白更改
内联
并排
Showing
13 changed file
with
65 addition
and
62 deletion
+65
-62
mindspore/ccsrc/operator/prim_others.cc
mindspore/ccsrc/operator/prim_others.cc
+32
-11
mindspore/ccsrc/pipeline/action.cc
mindspore/ccsrc/pipeline/action.cc
+2
-1
mindspore/ccsrc/pipeline/static_analysis/abstract_value.h
mindspore/ccsrc/pipeline/static_analysis/abstract_value.h
+4
-4
mindspore/common/parameter.py
mindspore/common/parameter.py
+5
-5
mindspore/nn/optim/adam.py
mindspore/nn/optim/adam.py
+1
-1
mindspore/nn/optim/ftrl.py
mindspore/nn/optim/ftrl.py
+1
-1
mindspore/nn/optim/lazyadam.py
mindspore/nn/optim/lazyadam.py
+3
-2
mindspore/ops/composite/base.py
mindspore/ops/composite/base.py
+7
-31
tests/ut/python/nn/optim/test_adam.py
tests/ut/python/nn/optim/test_adam.py
+2
-1
tests/ut/python/nn/optim/test_adam_with_tuple_grad.py
tests/ut/python/nn/optim/test_adam_with_tuple_grad.py
+2
-2
tests/ut/python/nn/optim/test_ftrl.py
tests/ut/python/nn/optim/test_ftrl.py
+2
-1
tests/ut/python/nn/optim/test_lazyadam.py
tests/ut/python/nn/optim/test_lazyadam.py
+2
-1
tests/ut/python/nn/optim/test_proximal_ada_grad.py
tests/ut/python/nn/optim/test_proximal_ada_grad.py
+2
-1
未找到文件。
mindspore/ccsrc/operator/prim_others.cc
浏览文件 @
3c205729
...
...
@@ -59,7 +59,8 @@ class UndeterminedShapeType {
public:
explicit
UndeterminedShapeType
(
const
std
::
string
&
env_str
)
{
// param_name indices_shape indices_type values_shape values_type dense_shape
// export UNDETERMINED_SPARSE_SHAPE_TYPES="w1:2:Int32:2 1 2:Float32:3 1 2"
// export UNDETERMINED_SPARSE_SHAPE_TYPES="sparse_key_w1:2:Int32:2 1 2:Float32:3 1 2;sparse_key_w2:2:Int32:2 1
// 2:Float32:3 1 2"
std
::
vector
<
string
>
fields
;
string
tmp
;
std
::
stringstream
input
(
env_str
);
...
...
@@ -115,6 +116,20 @@ std::vector<int> UndeterminedShapeType::GetShape(const std::string &shape_str) {
}
const
size_t
UndeterminedShapeType
::
fields_num
=
6
;
std
::
unordered_map
<
std
::
string
,
UndeterminedShapeType
>
g_undetermined_configs
;
void
InitUndeterminedFromEnv
(
const
std
::
string
&
sparse_shape_types
)
{
if
(
!
g_undetermined_configs
.
empty
())
{
return
;
}
std
::
string
tmp
;
std
::
stringstream
input
(
sparse_shape_types
);
while
(
std
::
getline
(
input
,
tmp
,
';'
))
{
auto
config
=
UndeterminedShapeType
(
tmp
);
g_undetermined_configs
.
insert
(
std
::
make_pair
(
config
.
param_name
(),
config
));
MS_LOG
(
DEBUG
)
<<
"Undetermined config from env: "
<<
tmp
;
}
}
AbstractBasePtr
InferImplEnvGetItem
(
const
AnalysisEnginePtr
&
,
const
PrimitivePtr
&
primitive
,
const
AbstractBasePtrList
&
args_spec_list
)
{
MS_EXCEPTION_IF_NULL
(
primitive
);
...
...
@@ -128,27 +143,33 @@ AbstractBasePtr InferImplEnvGetItem(const AnalysisEnginePtr &, const PrimitivePt
MS_LOG
(
EXCEPTION
)
<<
"EnvGetItem evaluator args[1] should be a SymbolicKeyInstance but: "
<<
key
->
ToString
();
}
if
(
key
->
sparse_grad
())
{
if
(
!
key
->
sparse_grad
().
empty
())
{
// Will be fixed once undetermined type ready
auto
sparse_shape_types
=
common
::
GetEnv
(
"UNDETERMINED_SPARSE_SHAPE_TYPES"
);
if
(
sparse_shape_types
.
empty
())
{
sparse_shape_types
=
"
w1
:2:Int32:2 1 2:Float32:3 1 2"
;
sparse_shape_types
=
"
sparse_key_w1:2:Int32:2 1 2:Float32:3 1 2;sparse_key_w2
:2:Int32:2 1 2:Float32:3 1 2"
;
}
MS_LOG
(
DEBUG
)
<<
"EnvGetItem is sparse_grad "
<<
key
->
ToString
()
<<
", Undetermined shape is "
<<
sparse_shape_types
;
InitUndeterminedFromEnv
(
sparse_shape_types
);
auto
shape_types
=
UndeterminedShapeType
(
sparse_shape_types
);
auto
shape_types
=
g_undetermined_configs
.
find
(
key
->
sparse_grad
());
if
(
shape_types
==
g_undetermined_configs
.
end
())
{
MS_LOG
(
EXCEPTION
)
<<
"Param "
<<
key
->
ToString
()
<<
" has sparse_grad, but shape/type is not configured in env UNDETERMINED_SPARSE_SHAPE_TYPES: "
<<
sparse_shape_types
;
}
MS_LOG
(
DEBUG
)
<<
"EnvGetItem is sparse_grad "
<<
key
->
ToString
();
AbstractBasePtrList
sparse_list
;
// indices
auto
indices_ele
=
std
::
make_shared
<
AbstractScalar
>
(
kAnyValue
,
shape_types
.
indices_type
());
auto
indices
=
std
::
make_shared
<
AbstractTensor
>
(
indices_ele
,
std
::
make_shared
<
Shape
>
(
shape_types
.
indices_shape
()));
auto
indices_ele
=
std
::
make_shared
<
AbstractScalar
>
(
kAnyValue
,
shape_types
->
second
.
indices_type
());
auto
indices
=
std
::
make_shared
<
AbstractTensor
>
(
indices_ele
,
std
::
make_shared
<
Shape
>
(
shape_types
->
second
.
indices_shape
()));
sparse_list
.
emplace_back
(
indices
);
// values
auto
dout_ele
=
std
::
make_shared
<
AbstractScalar
>
(
kAnyValue
,
shape_types
.
values_type
());
auto
dout
=
std
::
make_shared
<
AbstractTensor
>
(
dout_ele
,
std
::
make_shared
<
Shape
>
(
shape_types
.
values_shape
()));
auto
dout_ele
=
std
::
make_shared
<
AbstractScalar
>
(
kAnyValue
,
shape_types
->
second
.
values_type
());
auto
dout
=
std
::
make_shared
<
AbstractTensor
>
(
dout_ele
,
std
::
make_shared
<
Shape
>
(
shape_types
->
second
.
values_shape
()));
sparse_list
.
emplace_back
(
dout
);
// dense_shape
sparse_list
.
emplace_back
(
std
::
make_shared
<
AbstractTuple
>
(
shape_types
.
dense_shape
()));
sparse_list
.
emplace_back
(
std
::
make_shared
<
AbstractTuple
>
(
shape_types
->
second
.
dense_shape
()));
return
std
::
make_shared
<
AbstractTuple
>
(
sparse_list
);
}
...
...
mindspore/ccsrc/pipeline/action.cc
浏览文件 @
3c205729
...
...
@@ -229,7 +229,8 @@ bool AbstractSpecializeAction(const ResourcePtr &res) {
if
(
param_node
->
has_default
())
{
auto
param_value
=
std
::
dynamic_pointer_cast
<
ParamValuePy
>
(
param_node
->
default_param
());
AbstractBasePtr
ptr
=
abstract
::
FromValue
(
parse
::
data_converter
::
PyDataToValue
(
param_value
->
value
()),
true
);
auto
sparse_grad
=
py
::
cast
<
bool
>
(
parse
::
python_adapter
::
GetPyObjAttr
(
param_value
->
value
(),
"sparse_grad"
));
auto
sparse_grad
=
py
::
cast
<
std
::
string
>
(
parse
::
python_adapter
::
GetPyObjAttr
(
param_value
->
value
(),
"sparse_grad"
));
ptr
->
set_sparse_grad
(
sparse_grad
);
parallel
::
ParallelParameterContextRestoreInNoTraining
(
func_graph
,
param_node
,
ptr
);
...
...
mindspore/ccsrc/pipeline/static_analysis/abstract_value.h
浏览文件 @
3c205729
...
...
@@ -44,7 +44,7 @@ class AbstractBase : public Base {
public:
explicit
AbstractBase
(
const
ValuePtr
&
value
=
nullptr
,
const
TypePtr
&
type
=
kAnyType
,
const
BaseShapePtr
&
shape
=
kNoShape
)
:
value_
(
value
),
type_
(
type
),
shape_
(
shape
),
sparse_grad_
(
false
)
{}
:
value_
(
value
),
type_
(
type
),
shape_
(
shape
),
sparse_grad_
(
""
)
{}
~
AbstractBase
()
override
=
default
;
MS_DECLARE_PARENT
(
AbstractBase
,
Base
)
...
...
@@ -53,13 +53,13 @@ class AbstractBase : public Base {
virtual
bool
operator
==
(
const
AbstractBase
&
other
)
const
;
void
set_value
(
const
ValuePtr
&
value
)
{
value_
=
value
;
}
void
set_sparse_grad
(
const
bool
&
sparse_grad
)
{
sparse_grad_
=
sparse_grad
;
}
void
set_sparse_grad
(
const
std
::
string
&
sparse_grad
)
{
sparse_grad_
=
sparse_grad
;
}
void
set_type
(
const
TypePtr
&
type
)
{
type_
=
type
;
}
void
set_shape
(
const
BaseShapePtr
&
shape
)
{
shape_
=
shape
;
}
void
set_value_desc
(
const
std
::
string
&
desc
)
{
value_desc_
=
desc
;
}
const
std
::
string
&
value_desc
()
const
{
return
value_desc_
;
}
ValuePtr
GetValueTrack
()
const
{
return
value_
;
}
bool
sparse_grad
()
const
{
return
sparse_grad_
;
}
const
std
::
string
&
sparse_grad
()
const
{
return
sparse_grad_
;
}
TypePtr
GetTypeTrack
()
const
{
return
type_
;
}
BaseShapePtr
GetShapeTrack
()
const
{
return
shape_
;
}
...
...
@@ -87,7 +87,7 @@ class AbstractBase : public Base {
TypePtr
type_
;
BaseShapePtr
shape_
;
std
::
string
value_desc_
;
// store initial value description for error report
bool
sparse_grad_
;
std
::
string
sparse_grad_
;
};
class
AbstractScalar
:
public
AbstractBase
{
...
...
mindspore/common/parameter.py
浏览文件 @
3c205729
...
...
@@ -51,9 +51,9 @@ class Parameter:
requires_grad (bool): True if the parameter requires gradient. Default: True.
layerwise_parallel (bool): A kind of model parallel mode. When layerwise_parallel is true in paralle mode,
broadcast and gradients communication would not be applied on parameters. Default: False.
sparse_grad (
bool): True if the parameter's gradient is sparse. Default: False
.
sparse_grad (
str): Set if the parameter's gradient is sparse. Default: empty
.
"""
def
__init__
(
self
,
default_input
,
name
,
requires_grad
=
True
,
layerwise_parallel
=
False
,
sparse_grad
=
False
):
def
__init__
(
self
,
default_input
,
name
,
requires_grad
=
True
,
layerwise_parallel
=
False
,
sparse_grad
=
""
):
self
.
set_parameter_data
(
default_input
)
self
.
name
=
name
self
.
requires_grad
=
requires_grad
...
...
@@ -181,9 +181,9 @@ class Parameter:
return
self
.
_sparse_grad
@
sparse_grad
.
setter
def
sparse_grad
(
self
,
value
=
True
):
if
not
isinstance
(
value
,
bool
):
raise
TypeError
(
"`sparse_grad` parameter must be
bool
type"
)
def
sparse_grad
(
self
,
value
=
""
):
if
not
isinstance
(
value
,
str
):
raise
TypeError
(
"`sparse_grad` parameter must be
str
type"
)
self
.
_sparse_grad
=
value
@
property
...
...
mindspore/nn/optim/adam.py
浏览文件 @
3c205729
...
...
@@ -156,7 +156,7 @@ class Adam(Optimizer):
To improve parameter groups performance, the customized order of parameters can be supported.
The sparse strategy is applied while the SparseGatherV2 operator being used for forward network and the
`sparse_grad` of `Parameter` being set
as True
. The sparse feature is under continuous development. The sparse
`sparse_grad` of `Parameter` being set. The sparse feature is under continuous development. The sparse
behavior is currently performed on the CPU, weight decay is not supported.
Args:
...
...
mindspore/nn/optim/ftrl.py
浏览文件 @
3c205729
...
...
@@ -72,7 +72,7 @@ class FTRL(Optimizer):
Note:
The sparse strategy is applied while the SparseGatherV2 operator being used for forward network and the
`sparse_grad` of `Parameter` being set
as True
. The sparse feature is under continuous development. The sparse
`sparse_grad` of `Parameter` being set. The sparse feature is under continuous development. The sparse
behavior is currently performed on the CPU, weight decay is not supported.
Args:
...
...
mindspore/nn/optim/lazyadam.py
浏览文件 @
3c205729
...
...
@@ -92,9 +92,10 @@ class LazyAdam(Optimizer):
applied on the parameters if `weight_decay` > 0 and the 'beta' and 'gamma' are not in the name of parameters.
The sparse strategy is applied while the SparseGatherV2 operator being used for forward network and the
`sparse_grad` of `Parameter` being set
as True
. The sparse behavior, to be notice, is not equivalent to the
`sparse_grad` of `Parameter` being set. The sparse behavior, to be notice, is not equivalent to the
original Adam algorithm, as only the current indices parames will be updated. The sparse feature is under
continuous development. The sparse behavior is currently performed on the CPU, weight decay is not supported.
continuous development. The sparse behavior is currently performed on the CPU, weight decay is
not supported.
Args:
params (Union[list[Parameter], list[dict]]): When the `params` is a list of `Parameter` which will be updated,
...
...
mindspore/ops/composite/base.py
浏览文件 @
3c205729
...
...
@@ -241,6 +241,7 @@ class HyperMap(HyperMap_):
return
func
(
*
args_list
)
return
tuple
(
map
(
hypermap
,
*
args_list
))
class
Map
(
Map_
):
"""
Map will apply the set operation on input sequences.
...
...
@@ -271,37 +272,12 @@ class Map(Map_):
Map_
.
__init__
(
self
)
def
__call__
(
self
,
*
args
):
func
=
args
[
0
]
count
=
0
count_max
=
1
args_list
=
args
[
1
:]
if
self
.
ops
is
not
None
:
func
=
self
.
ops
args_list
=
args
for
item
in
args_list
:
if
isinstance
(
item
,
(
tuple
,
list
)):
count_max
=
len
(
item
)
break
def
get_item
(
x
):
nonlocal
count
if
isinstance
(
x
,
(
tuple
,
list
)):
return
x
[
count
]
return
x
for
i
in
range
(
count_max
):
true_args
=
tuple
(
map
(
get_item
,
args_list
))
func
(
*
true_args
)
count
=
i
+
1
return
True
def
register
(
self
,
*
type_names
):
"""Register a function for the given type string."""
def
deco
(
fn
):
self
.
register_fn
(
type_names
,
fn
)
return
fn
return
deco
func
=
self
.
ops
args_list
=
args
if
self
.
ops
is
None
:
func
=
args
[
0
]
args_list
=
args
[
1
:]
return
tuple
(
map
(
func
,
*
args_list
))
class
_ListAppend
(
ListAppend_
):
...
...
tests/ut/python/nn/optim/test_adam.py
浏览文件 @
3c205729
...
...
@@ -53,7 +53,8 @@ class NetWithSparseGatherV2(nn.Cell):
""" NetWithSparseGatherV2 definition """
def
__init__
(
self
):
super
(
NetWithSparseGatherV2
,
self
).
__init__
()
self
.
weight1
=
Parameter
(
Tensor
(
np
.
ones
([
3
,
1
,
2
]).
astype
(
np
.
float32
)),
name
=
"weight1"
,
sparse_grad
=
True
)
self
.
weight1
=
Parameter
(
Tensor
(
np
.
ones
([
3
,
1
,
2
]).
astype
(
np
.
float32
)),
name
=
"weight1"
,
sparse_grad
=
"sparse_key_w1"
)
self
.
weight2
=
Parameter
(
Tensor
(
np
.
ones
([
2
,
1
,
2
]).
astype
((
np
.
float32
))),
name
=
"weight2"
)
self
.
axis
=
0
self
.
gather
=
P
.
SparseGatherV2
()
...
...
tests/ut/python/nn/optim/test_adam_with_tuple_grad.py
浏览文件 @
3c205729
...
...
@@ -154,8 +154,8 @@ def test_AdamWeightDecaySparse():
class
NetWithSparseGatherV2
(
nn
.
Cell
):
def
__init__
(
self
):
super
(
NetWithSparseGatherV2
,
self
).
__init__
()
self
.
w1
=
Parameter
(
Tensor
(
np
.
ones
([
3
,
1
,
2
]).
astype
(
np
.
float32
)),
name
=
"w1"
,
sparse_grad
=
True
)
self
.
w2
=
Parameter
(
Tensor
(
np
.
ones
([
2
,
1
,
2
]).
astype
(
np
.
float32
)),
name
=
"w2"
)
self
.
w1
=
Parameter
(
Tensor
(
np
.
ones
([
3
,
1
,
2
]).
astype
(
np
.
float32
)),
name
=
"w1"
,
sparse_grad
=
"sparse_key_w1"
)
self
.
w2
=
Parameter
(
Tensor
(
np
.
ones
([
2
,
1
,
2
]).
astype
(
np
.
float32
)),
name
=
"w2"
,
sparse_grad
=
"sparse_key_w2"
)
self
.
gatherv2
=
P
.
SparseGatherV2
()
self
.
axis
=
0
def
construct
(
self
,
indices
):
...
...
tests/ut/python/nn/optim/test_ftrl.py
浏览文件 @
3c205729
...
...
@@ -41,7 +41,8 @@ class NetWithSparseGatherV2(nn.Cell):
""" NetWithSparseGatherV2 definition """
def
__init__
(
self
):
super
(
NetWithSparseGatherV2
,
self
).
__init__
()
self
.
weight1
=
Parameter
(
Tensor
(
np
.
ones
([
3
,
1
,
2
]).
astype
(
np
.
float32
)),
name
=
"weight1"
,
sparse_grad
=
True
)
self
.
weight1
=
Parameter
(
Tensor
(
np
.
ones
([
3
,
1
,
2
]).
astype
(
np
.
float32
)),
name
=
"weight1"
,
sparse_grad
=
"sparse_key_w1"
)
self
.
weight2
=
Parameter
(
Tensor
(
np
.
ones
([
2
,
1
,
2
]).
astype
((
np
.
float32
))),
name
=
"weight2"
)
self
.
axis
=
0
self
.
gather
=
P
.
SparseGatherV2
()
...
...
tests/ut/python/nn/optim/test_lazyadam.py
浏览文件 @
3c205729
...
...
@@ -43,7 +43,8 @@ class NetWithSparseGatherV2(nn.Cell):
""" NetWithSparseGatherV2 definition """
def
__init__
(
self
):
super
(
NetWithSparseGatherV2
,
self
).
__init__
()
self
.
weight1
=
Parameter
(
Tensor
(
np
.
ones
([
3
,
1
,
2
]).
astype
(
np
.
float32
)),
name
=
"weight1"
,
sparse_grad
=
True
)
self
.
weight1
=
Parameter
(
Tensor
(
np
.
ones
([
3
,
1
,
2
]).
astype
(
np
.
float32
)),
name
=
"weight1"
,
sparse_grad
=
"sparse_key_w1"
)
self
.
weight2
=
Parameter
(
Tensor
(
np
.
ones
([
2
,
1
,
2
]).
astype
((
np
.
float32
))),
name
=
"weight2"
)
self
.
axis
=
0
self
.
gather
=
P
.
SparseGatherV2
()
...
...
tests/ut/python/nn/optim/test_proximal_ada_grad.py
浏览文件 @
3c205729
...
...
@@ -40,7 +40,8 @@ class NetWithSparseGatherV2(nn.Cell):
""" NetWithSparseGatherV2 definition """
def
__init__
(
self
):
super
(
NetWithSparseGatherV2
,
self
).
__init__
()
self
.
weight1
=
Parameter
(
Tensor
(
np
.
ones
([
3
,
1
,
2
]).
astype
(
np
.
float32
)),
name
=
"weight1"
,
sparse_grad
=
True
)
self
.
weight1
=
Parameter
(
Tensor
(
np
.
ones
([
3
,
1
,
2
]).
astype
(
np
.
float32
)),
name
=
"weight1"
,
sparse_grad
=
"sparse_key_w1"
)
self
.
weight2
=
Parameter
(
Tensor
(
np
.
ones
([
2
,
1
,
2
]).
astype
(
np
.
float32
)),
name
=
"weight2"
)
self
.
axis
=
0
self
.
gather
=
P
.
SparseGatherV2
()
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录