Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
magicwindyyd
mindspore
提交
6b54a641
M
mindspore
项目概览
magicwindyyd
/
mindspore
与 Fork 源项目一致
Fork自
MindSpore / mindspore
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
M
mindspore
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
6b54a641
编写于
5月 15, 2020
作者:
Y
yangzhenzhang
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
ckpt and restore parameter shape
上级
311b7e71
变更
6
隐藏空白更改
内联
并排
Showing
6 changed file
with
141 addition
and
2 deletion
+141
-2
mindspore/ccsrc/parallel/context.cc
mindspore/ccsrc/parallel/context.cc
+54
-0
mindspore/ccsrc/parallel/context.h
mindspore/ccsrc/parallel/context.h
+11
-0
mindspore/ccsrc/pipeline/action.cc
mindspore/ccsrc/pipeline/action.cc
+6
-0
mindspore/common/api.py
mindspore/common/api.py
+1
-1
tests/ut/python/parallel/test_get_parameter_layout.py
tests/ut/python/parallel/test_get_parameter_layout.py
+1
-1
tests/ut/python/parallel/test_train_and_eval.py
tests/ut/python/parallel/test_train_and_eval.py
+68
-0
未找到文件。
mindspore/ccsrc/parallel/context.cc
浏览文件 @
6b54a641
...
@@ -22,12 +22,15 @@
...
@@ -22,12 +22,15 @@
#include <memory>
#include <memory>
#include <numeric>
#include <numeric>
#include <utility>
#include <utility>
#include <map>
#include "common/utils.h"
#include "common/utils.h"
#include "parallel/device_manager.h"
#include "parallel/device_manager.h"
namespace
mindspore
{
namespace
mindspore
{
namespace
parallel
{
namespace
parallel
{
static
std
::
map
<
std
::
string
,
std
::
vector
<
int
>>
param_shapes
;
std
::
vector
<
std
::
string
>
PARALLEL_MODE_LIST
=
{
STAND_ALONE
,
DATA_PARALLEL
,
HYBRID_PARALLEL
,
SEMI_AUTO_PARALLEL
,
std
::
vector
<
std
::
string
>
PARALLEL_MODE_LIST
=
{
STAND_ALONE
,
DATA_PARALLEL
,
HYBRID_PARALLEL
,
SEMI_AUTO_PARALLEL
,
AUTO_PARALLEL
};
AUTO_PARALLEL
};
std
::
vector
<
std
::
string
>
STRATEGY_SEARCH_MODE_LIST
=
{
DYNAMIC_PROGRAMMING
,
RECURSIVE_PROGRAMMING
};
std
::
vector
<
std
::
string
>
STRATEGY_SEARCH_MODE_LIST
=
{
DYNAMIC_PROGRAMMING
,
RECURSIVE_PROGRAMMING
};
...
@@ -136,5 +139,56 @@ const std::vector<uint32_t> ParallelContext::GetAllReduceFusionSplitSizes(const
...
@@ -136,5 +139,56 @@ const std::vector<uint32_t> ParallelContext::GetAllReduceFusionSplitSizes(const
}
}
return
{};
return
{};
}
}
// Clear param_shapes before training in auto-parallel or semi-auto-parallel mode
void
ParallelParameterContextInit
(
const
FuncGraphPtr
&
func_graph
)
{
MS_EXCEPTION_IF_NULL
(
func_graph
);
if
(
!
func_graph
->
has_flag
(
AUTO_PARALLEL
)
||
!
func_graph
->
has_flag
(
TRAINING
))
{
return
;
}
param_shapes
.
clear
();
}
// Restore the parameters' shape for evaluation/prediction in auto-parallel or semi-auto-parallel mode
void
ParallelParameterContextRestoreInNoTraining
(
const
FuncGraphPtr
&
func_graph
,
const
ParameterPtr
&
param_node
,
AbstractBasePtr
ptr
)
{
MS_EXCEPTION_IF_NULL
(
func_graph
);
MS_EXCEPTION_IF_NULL
(
param_node
);
MS_EXCEPTION_IF_NULL
(
ptr
);
if
(
!
func_graph
->
has_flag
(
AUTO_PARALLEL
)
||
(
func_graph
->
flags
().
count
(
TRAINING
)
==
0
)
||
func_graph
->
flags
()[
TRAINING
])
{
return
;
}
auto
iter
=
param_shapes
.
find
(
param_node
->
name
());
if
(
iter
==
param_shapes
.
end
())
{
MS_LOG
(
WARNING
)
<<
"Can not found the shape for parameter "
<<
param_node
->
name
();
return
;
}
std
::
vector
<
int
>
shape
=
iter
->
second
;
std
::
shared_ptr
<
abstract
::
BaseShape
>
base_shape
=
std
::
make_shared
<
abstract
::
Shape
>
(
shape
);
ptr
->
set_shape
(
base_shape
);
MS_LOG
(
DEBUG
)
<<
"The parameter name is "
<<
param_node
->
name
()
<<
", the shape is "
<<
shape
;
}
// Checkpoint the parameters' shape for training in auto-parallel or semi-auto-parallel mode
void
ParallelParameterContextCkptInTraining
(
const
FuncGraphPtr
&
func_graph
,
const
ParameterPtr
&
param_node
,
const
AbstractBasePtr
&
ptr
)
{
MS_EXCEPTION_IF_NULL
(
func_graph
);
MS_EXCEPTION_IF_NULL
(
param_node
);
MS_EXCEPTION_IF_NULL
(
ptr
);
if
(
!
func_graph
->
has_flag
(
AUTO_PARALLEL
)
||
!
func_graph
->
has_flag
(
TRAINING
))
{
return
;
}
std
::
vector
<
int
>
shape
=
dyn_cast
<
abstract
::
Shape
>
(
ptr
->
GetShapeTrack
())
->
shape
();
auto
ret
=
param_shapes
.
try_emplace
(
param_node
->
name
(),
shape
);
if
(
!
ret
.
second
)
{
MS_LOG
(
EXCEPTION
)
<<
"The shape for parameter name "
<<
param_node
->
name
()
<<
" is existed"
;
return
;
}
MS_LOG
(
DEBUG
)
<<
"The parameter name is "
<<
param_node
->
name
()
<<
", the shape is "
<<
shape
;
}
}
// namespace parallel
}
// namespace parallel
}
// namespace mindspore
}
// namespace mindspore
mindspore/ccsrc/parallel/context.h
浏览文件 @
6b54a641
...
@@ -26,6 +26,9 @@
...
@@ -26,6 +26,9 @@
#include "parallel/ops_info/ops_utils.h"
#include "parallel/ops_info/ops_utils.h"
#include "parallel/status.h"
#include "parallel/status.h"
#include "utils/convert_utils.h"
#include "utils/convert_utils.h"
#include "ir/anf.h"
#include "ir/func_graph.h"
#include "debug/info.h"
namespace
mindspore
{
namespace
mindspore
{
namespace
parallel
{
namespace
parallel
{
...
@@ -38,6 +41,8 @@ constexpr char SEMI_AUTO_PARALLEL[] = "semi_auto_parallel";
...
@@ -38,6 +41,8 @@ constexpr char SEMI_AUTO_PARALLEL[] = "semi_auto_parallel";
constexpr
char
DYNAMIC_PROGRAMMING
[]
=
"dynamic_programming"
;
constexpr
char
DYNAMIC_PROGRAMMING
[]
=
"dynamic_programming"
;
constexpr
char
RECURSIVE_PROGRAMMING
[]
=
"recursive_programming"
;
constexpr
char
RECURSIVE_PROGRAMMING
[]
=
"recursive_programming"
;
constexpr
char
TRAINING
[]
=
"training"
;
class
ParallelContext
{
class
ParallelContext
{
public:
public:
~
ParallelContext
()
=
default
;
~
ParallelContext
()
=
default
;
...
@@ -114,6 +119,12 @@ class ParallelContext {
...
@@ -114,6 +119,12 @@ class ParallelContext {
std
::
string
strategy_ckpt_load_file_
;
std
::
string
strategy_ckpt_load_file_
;
std
::
string
strategy_ckpt_save_file_
;
std
::
string
strategy_ckpt_save_file_
;
};
};
void
ParallelParameterContextInit
(
const
FuncGraphPtr
&
func_graph
);
void
ParallelParameterContextRestoreInNoTraining
(
const
FuncGraphPtr
&
func_graph
,
const
ParameterPtr
&
param_node
,
AbstractBasePtr
ptr
);
void
ParallelParameterContextCkptInTraining
(
const
FuncGraphPtr
&
func_graph
,
const
ParameterPtr
&
param_node
,
const
AbstractBasePtr
&
ptr
);
}
// namespace parallel
}
// namespace parallel
}
// namespace mindspore
}
// namespace mindspore
...
...
mindspore/ccsrc/pipeline/action.cc
浏览文件 @
6b54a641
...
@@ -25,6 +25,7 @@
...
@@ -25,6 +25,7 @@
#include "ir/func_graph_cloner.h"
#include "ir/func_graph_cloner.h"
#include "parallel/costmodel_context.h"
#include "parallel/costmodel_context.h"
#include "parallel/context.h"
#include "pipeline/pass.h"
#include "pipeline/pass.h"
#include "pipeline/parse/parse_base.h"
#include "pipeline/parse/parse_base.h"
#include "pipeline/parse/data_converter.h"
#include "pipeline/parse/data_converter.h"
...
@@ -217,6 +218,8 @@ bool AbstractSpecializeAction(const ResourcePtr &res) {
...
@@ -217,6 +218,8 @@ bool AbstractSpecializeAction(const ResourcePtr &res) {
FuncGraphPtr
func_graph
=
res
->
func_graph
();
FuncGraphPtr
func_graph
=
res
->
func_graph
();
abstract
::
AbstractBasePtrList
args_spec
=
res
->
args_spec
();
abstract
::
AbstractBasePtrList
args_spec
=
res
->
args_spec
();
parallel
::
ParallelParameterContextInit
(
func_graph
);
// suppose that there is not KeywordArgument for the top graph
// suppose that there is not KeywordArgument for the top graph
// get the hyper parameter
// get the hyper parameter
for
(
const
auto
&
param
:
func_graph
->
parameters
())
{
for
(
const
auto
&
param
:
func_graph
->
parameters
())
{
...
@@ -224,7 +227,10 @@ bool AbstractSpecializeAction(const ResourcePtr &res) {
...
@@ -224,7 +227,10 @@ bool AbstractSpecializeAction(const ResourcePtr &res) {
if
(
param_node
->
has_default
())
{
if
(
param_node
->
has_default
())
{
AbstractBasePtr
ptr
=
AbstractBasePtr
ptr
=
abstract
::
FromValue
(
parse
::
data_converter
::
PyDataToValue
(
param_node
->
default_param
()),
true
);
abstract
::
FromValue
(
parse
::
data_converter
::
PyDataToValue
(
param_node
->
default_param
()),
true
);
parallel
::
ParallelParameterContextRestoreInNoTraining
(
func_graph
,
param_node
,
ptr
);
args_spec
.
push_back
(
ptr
);
args_spec
.
push_back
(
ptr
);
parallel
::
ParallelParameterContextCkptInTraining
(
func_graph
,
param_node
,
ptr
);
}
}
}
}
// Analyze
// Analyze
...
...
mindspore/common/api.py
浏览文件 @
6b54a641
...
@@ -379,7 +379,7 @@ class _Executor:
...
@@ -379,7 +379,7 @@ class _Executor:
self
.
_params_init_data
(
obj
,
params
)
self
.
_params_init_data
(
obj
,
params
)
if
not
enable_debug_runtime
or
enable_ge
:
if
not
enable_debug_runtime
or
enable_ge
:
if
auto_parallel_mode
:
if
auto_parallel_mode
and
"train"
in
phase
:
obj
.
parameter_layout_dict
=
self
.
_executor
.
get_parameter_layout
(
phase
)
obj
.
parameter_layout_dict
=
self
.
_executor
.
get_parameter_layout
(
phase
)
obj
.
load_parameter_slice
(
params
)
obj
.
load_parameter_slice
(
params
)
...
...
tests/ut/python/parallel/test_get_parameter_layout.py
浏览文件 @
6b54a641
...
@@ -47,7 +47,7 @@ def test_get_parameter_layout():
...
@@ -47,7 +47,7 @@ def test_get_parameter_layout():
net
=
Net
(
strategy1
,
strategy2
,
weight
)
net
=
Net
(
strategy1
,
strategy2
,
weight
)
net
.
set_auto_parallel
()
net
.
set_auto_parallel
()
exe
=
me
.
_executor
exe
=
me
.
_executor
exe
.
compile
(
net
,
x
,
auto_parallel_mode
=
True
)
exe
.
compile
(
net
,
x
,
phase
=
'train'
,
auto_parallel_mode
=
True
)
x_layout
=
[[
2
,
4
],
[
1
,
-
1
],
[
16
,
32
]]
# device_arrangement = [2, 4], tensor_map = [1, -1]
x_layout
=
[[
2
,
4
],
[
1
,
-
1
],
[
16
,
32
]]
# device_arrangement = [2, 4], tensor_map = [1, -1]
weight_layout
=
[[
2
,
4
],
[
0
,
-
1
],
[
16
,
32
]]
# device_arrangement = [2, 4], tensor_map = [0, -1]
weight_layout
=
[[
2
,
4
],
[
0
,
-
1
],
[
16
,
32
]]
# device_arrangement = [2, 4], tensor_map = [0, -1]
expect_dict
=
{
'x'
:
x_layout
,
'w1'
:
weight_layout
}
expect_dict
=
{
'x'
:
x_layout
,
'w1'
:
weight_layout
}
...
...
tests/ut/python/parallel/test_train_and_eval.py
0 → 100644
浏览文件 @
6b54a641
# Copyright 2020 Huawei Technologies Co., Ltd
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
# ============================================================================
import
numpy
as
np
import
mindspore
as
ms
from
mindspore
import
context
,
Tensor
,
Parameter
from
mindspore.nn
import
Cell
,
TrainOneStepCell
,
Momentum
from
mindspore.ops
import
operations
as
P
from
mindspore.common.api
import
_executor
class
Net
(
Cell
):
def
__init__
(
self
,
mul_weight
,
strategy1
=
None
,
strategy2
=
None
):
super
().
__init__
()
self
.
mul
=
P
.
Mul
().
set_strategy
(
strategy1
)
self
.
neg
=
P
.
Neg
().
set_strategy
(
strategy2
)
self
.
mul_weight
=
Parameter
(
mul_weight
,
"w1"
)
def
construct
(
self
,
x
,
b
):
out
=
self
.
mul
(
x
,
self
.
mul_weight
)
out
=
self
.
neg
(
out
)
return
out
class
EvalNet
(
Cell
):
def
__init__
(
self
,
network
,
strategy2
=
None
):
super
().
__init__
()
self
.
network
=
network
self
.
relu
=
P
.
ReLU
().
set_strategy
(
strategy2
)
def
construct
(
self
,
x
,
b
):
out
=
self
.
network
(
x
,
b
)
out
=
self
.
relu
(
out
)
return
out
_x
=
Tensor
(
np
.
ones
([
8
,
8
]),
dtype
=
ms
.
float32
)
_w1
=
Tensor
(
np
.
ones
([
8
,
8
]),
dtype
=
ms
.
float32
)
_b
=
Tensor
(
np
.
ones
([
8
,
8
]),
dtype
=
ms
.
float32
)
def
test_train_and_eval
():
context
.
set_context
(
save_graphs
=
True
,
mode
=
0
)
context
.
set_auto_parallel_context
(
parallel_mode
=
"semi_auto_parallel"
,
device_num
=
16
)
strategy1
=
((
4
,
4
),
(
4
,
4
))
strategy2
=
((
4
,
4
),
)
net
=
Net
(
_w1
,
strategy1
,
strategy2
)
eval_net
=
EvalNet
(
net
,
strategy2
=
strategy2
)
net
.
set_train
()
net
.
set_auto_parallel
()
_executor
.
compile
(
net
,
_x
,
_b
,
phase
=
'train'
,
auto_parallel_mode
=
True
)
eval_net
.
set_train
(
mode
=
False
)
eval_net
.
set_auto_parallel
()
_executor
.
compile
(
eval_net
,
_x
,
_b
,
phase
=
'eval'
,
auto_parallel_mode
=
True
)
context
.
reset_auto_parallel_context
()
\ No newline at end of file
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录