Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
magicwindyyd
mindspore
提交
0cdb1171
M
mindspore
项目概览
magicwindyyd
/
mindspore
与 Fork 源项目一致
Fork自
MindSpore / mindspore
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
M
mindspore
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
0cdb1171
编写于
4月 20, 2020
作者:
M
mindspore-ci-bot
提交者:
Gitee
4月 20, 2020
浏览文件
操作
浏览文件
下载
差异文件
!87 Take AllToAll as a virtual operator in cost model
Merge pull request !87 from gziyan/dev_alltoall
上级
c3ec9712
0d208e00
变更
7
隐藏空白更改
内联
并排
Showing
7 changed file
with
55 addition
and
30 deletion
+55
-30
mindspore/ccsrc/parallel/auto_parallel/graph_costmodel.h
mindspore/ccsrc/parallel/auto_parallel/graph_costmodel.h
+1
-1
mindspore/ccsrc/parallel/tensor_layout/redistribution_operator_infer.cc
...c/parallel/tensor_layout/redistribution_operator_infer.cc
+23
-10
mindspore/ccsrc/parallel/tensor_layout/redistribution_operator_infer.h
...rc/parallel/tensor_layout/redistribution_operator_infer.h
+3
-1
mindspore/ccsrc/parallel/tensor_layout/tensor_redistribution.cc
...ore/ccsrc/parallel/tensor_layout/tensor_redistribution.cc
+22
-14
mindspore/ccsrc/parallel/tensor_layout/tensor_redistribution.h
...pore/ccsrc/parallel/tensor_layout/tensor_redistribution.h
+3
-1
tests/ut/python/parallel/test_auto_parallel_resnet.py
tests/ut/python/parallel/test_auto_parallel_resnet.py
+2
-2
tests/ut/python/parallel/test_auto_parallel_two_matmul.py
tests/ut/python/parallel/test_auto_parallel_two_matmul.py
+1
-1
未找到文件。
mindspore/ccsrc/parallel/auto_parallel/graph_costmodel.h
浏览文件 @
0cdb1171
...
...
@@ -34,7 +34,7 @@ namespace parallel {
#define OPERATOR_TO_OPERATOR_CONNECTOR "-"
#define DEFAULT_DEVICE_MEMORY_CAPACITY (1024.0 * 1024.0 * 1024.0 * 16.0)
#define DEFAULT_COST_MODEL_ALPHA 1.0
#define DEFAULT_COST_MODEL_BETA
26
0.0
#define DEFAULT_COST_MODEL_BETA
40
0.0
#define DEFAULT_COST_MODEL_GAMMA 0.001
#define DEFAULT_COST_MODEL_SIMPLIFY_CALCULATION true
#define DEFAULT_COST_MODEL_COMMUNI_THRESHOLD 2048.0
...
...
mindspore/ccsrc/parallel/tensor_layout/redistribution_operator_infer.cc
浏览文件 @
0cdb1171
...
...
@@ -23,7 +23,7 @@
namespace
mindspore
{
namespace
parallel
{
Status
RedistributionOperatorInfer
::
Init
(
const
TensorLayout
&
tensor_layout
,
const
Map
&
out_tensor_map
,
RankList
dev_list
)
{
RankList
dev_list
,
bool
is_cost_model
)
{
in_tensor_map_
=
tensor_layout
.
tensor_map
();
dev_mat_
=
tensor_layout
.
device_arrangement
();
...
...
@@ -51,6 +51,8 @@ Status RedistributionOperatorInfer::Init(const TensorLayout& tensor_layout, cons
for
(
int32_t
item
:
map
)
{
map_
[
key
++
]
=
item
;
}
is_cost_model_
=
is_cost_model
;
return
Status
::
SUCCESS
;
}
...
...
@@ -130,15 +132,26 @@ Status RedistributionOperatorInfer::InferPermuteByAxis() {
std
::
any_of
(
map_
.
begin
(),
map_
.
end
(),
[
out_dim
](
const
RedistributionOperatorMap
::
value_type
&
a
)
{
return
a
.
second
==
out_dim
;
}))
{
int32_t
cat_dim
=
in_tensor_map_
.
GetIndexByValue
(
out_dim
);
Args
args_allconcat
=
{
cat_dim
,
out_dim
,
dev_mat_
.
GetDimByReverseIdx
(
IntToUint
(
out_dim
))};
Args
args_allsplit
=
{
dev_mat_
.
GetDimByReverseIdx
(
IntToUint
(
out_dim
)),
UintToInt
(
index
),
out_dim
};
if
(
InsertOperator
(
CONCAT_BY_AXIS
,
args_allconcat
)
==
Status
::
FAILED
)
{
MS_LOG
(
ERROR
)
<<
"Insert ConcatByAxis Error!"
;
return
Status
::
FAILED
;
}
if
(
InsertOperator
(
SPLIT_BY_AXIS
,
args_allsplit
)
==
Status
::
FAILED
)
{
MS_LOG
(
ERROR
)
<<
"Insert SplitByAxis Error!"
;
return
Status
::
FAILED
;
int32_t
dev_num
=
dev_mat_
.
GetDimByReverseIdx
(
IntToUint
(
out_dim
));
if
(
is_cost_model_
)
{
int32_t
dev_dim
=
in_tensor_map_
.
GetDimByIdx
(
IntToUint
(
cat_dim
));
Args
args_alltoall
=
{
dev_mat_
.
GetDimByReverseIdx
(
IntToUint
(
dev_dim
)),
UintToInt
(
index
),
cat_dim
,
dev_dim
,
dev_num
};
if
(
InsertOperator
(
PERMUTE_BY_AXIS
,
args_alltoall
)
==
Status
::
FAILED
)
{
MS_LOG
(
ERROR
)
<<
"Insert PermuteByAxis Error!"
;
return
Status
::
FAILED
;
}
}
else
{
Args
args_allconcat
=
{
cat_dim
,
out_dim
,
dev_num
};
Args
args_allsplit
=
{
dev_num
,
UintToInt
(
index
),
out_dim
};
if
(
InsertOperator
(
CONCAT_BY_AXIS
,
args_allconcat
)
==
Status
::
FAILED
)
{
MS_LOG
(
ERROR
)
<<
"Insert ConcatByAxis Error!"
;
return
Status
::
FAILED
;
}
if
(
InsertOperator
(
SPLIT_BY_AXIS
,
args_allsplit
)
==
Status
::
FAILED
)
{
MS_LOG
(
ERROR
)
<<
"Insert SplitByAxis Error!"
;
return
Status
::
FAILED
;
}
}
(
void
)
map_
.
erase
(
iter
++
);
map_
[
IntToSize
(
cat_dim
)]
=
NONE
;
...
...
mindspore/ccsrc/parallel/tensor_layout/redistribution_operator_infer.h
浏览文件 @
0cdb1171
...
...
@@ -40,7 +40,8 @@ class RedistributionOperatorInfer {
public:
const
int
NONE
=
-
1
;
explicit
RedistributionOperatorInfer
(
bool
construct_op_flag
=
true
)
:
construct_op_flag_
(
construct_op_flag
)
{}
Status
Init
(
const
TensorLayout
&
tensor_layout
,
const
Map
&
out_tensor_map
,
RankList
dev_list
);
Status
Init
(
const
TensorLayout
&
tensor_layout
,
const
Map
&
out_tensor_map
,
RankList
dev_list
,
bool
is_cost_model
=
false
);
~
RedistributionOperatorInfer
()
=
default
;
OperatorList
operator_list
()
const
{
return
operator_list_
;
}
OperatorVector
operator_vector
()
const
{
return
operator_vector_
;
}
...
...
@@ -67,6 +68,7 @@ class RedistributionOperatorInfer {
ConstructOperator
constructor_
;
RankList
dev_list_
;
bool
construct_op_flag_
;
bool
is_cost_model_
;
};
}
// namespace parallel
}
// namespace mindspore
...
...
mindspore/ccsrc/parallel/tensor_layout/tensor_redistribution.cc
浏览文件 @
0cdb1171
...
...
@@ -40,7 +40,7 @@ Status TensorRedistribution::Init(const TensorLayout& from, const TensorLayout&
return
Status
::
SUCCESS
;
}
RedistributionOpListPtr
TensorRedistribution
::
InferTensorRedistributionOperatorList
()
{
RedistributionOpListPtr
TensorRedistribution
::
InferTensorRedistributionOperatorList
(
bool
is_cost_model
)
{
// Step 1: Match device arrangement between from_ and to_
RedistributionLayoutTransfer
layout_transfer
;
Status
status
=
layout_transfer
.
Init
(
from_
,
to_
);
...
...
@@ -62,7 +62,7 @@ RedistributionOpListPtr TensorRedistribution::InferTensorRedistributionOperatorL
MS_LOG
(
DEBUG
)
<<
"reshape to_ "
<<
to_
.
ToString
();
// Step 2: Infer redistribution and insert operators
RedistributionOperatorInfer
operator_infer
(
construct_op_flag_
);
if
(
operator_infer
.
Init
(
from_layout
,
to_layout
.
tensor_map
(),
dev_list_
)
==
Status
::
FAILED
)
{
if
(
operator_infer
.
Init
(
from_layout
,
to_layout
.
tensor_map
(),
dev_list_
,
is_cost_model
)
==
Status
::
FAILED
)
{
MS_LOG
(
ERROR
)
<<
"Init operatorInfer failed!"
;
return
nullptr
;
}
...
...
@@ -138,7 +138,7 @@ Status TensorRedistribution::InferReshape(const TensorLayout& from_layout, const
}
Status
TensorRedistribution
::
ComputeCost
()
{
RedistributionOpListPtr
redistribution_oplist_ptr
=
InferTensorRedistributionOperatorList
();
RedistributionOpListPtr
redistribution_oplist_ptr
=
InferTensorRedistributionOperatorList
(
true
);
if
(
redistribution_oplist_ptr
==
nullptr
)
{
MS_LOG
(
ERROR
)
<<
"Failure: InferTensorRedistribution failed"
;
return
Status
::
FAILED
;
...
...
@@ -151,14 +151,22 @@ Status TensorRedistribution::ComputeCost() {
std
::
accumulate
(
slice_shape
.
begin
(),
slice_shape
.
end
(),
static_cast
<
double
>
(
1.0
),
std
::
multiplies
<
double
>
());
std
::
string
str
=
op
.
first
;
if
(
str
==
PERMUTE_BY_AXIS
)
{
// The shape does not change after PermuteByAxis operation.
// communication cost = all_to_all + all_to_all = 2 * slice_shape
// computation cost = slice_shape
forward_comm_cost_
+=
prod
;
backward_comm_cost_
+=
prod
;
comm_cost_
+=
2.0
*
prod
;
computation_cost_
+=
prod
;
memory_cost_
+=
prod
;
// Since AlltoAll is a virtual operator, the expanded operators are used here to compute cost.
// communication cost = all_gather + reduce_scatter = before_slice_shape + after_slice_shape
forward_comm_cost_
+=
prod
*
ALLTOALL_SCALE_FACTOR
;
backward_comm_cost_
+=
prod
*
ALLTOALL_SCALE_FACTOR
;
comm_cost_
+=
2.0
*
prod
*
ALLTOALL_SCALE_FACTOR
;
int32_t
concat_dim
=
op
.
second
[
2
];
if
(
concat_dim
==
0
)
{
// memory cost = all_gather
computation_cost_
+=
prod
;
memory_cost_
+=
prod
;
}
else
{
// memory cost = all_gather + split + concat
int32_t
dev_num
=
op
.
second
[
4
];
computation_cost_
+=
(
prod
+
prod
*
dev_num
+
prod
*
dev_num
);
memory_cost_
+=
(
prod
*
dev_num
+
prod
*
dev_num
+
prod
);
}
}
else
if
(
str
==
CONCAT_BY_AXIS
)
{
// communication cost = all_gather + reduce_scatter = before_slice_shape + after_slice_shape
// computation cost = before_slice_shape
...
...
@@ -168,9 +176,9 @@ Status TensorRedistribution::ComputeCost() {
}
double
dev_num
=
op
.
second
[
2
];
// here, communication cost = all_gather + reduce_scatter
forward_comm_cost_
+=
prod
*
dev_num
;
backward_comm_cost_
+=
prod
;
comm_cost_
+=
prod
*
(
dev_num
+
1.0
);
forward_comm_cost_
+=
prod
*
dev_num
*
ALLGATHER_REDUCESCATTER_SCALE_FACTOR
;
backward_comm_cost_
+=
prod
*
ALLGATHER_REDUCESCATTER_SCALE_FACTOR
;
comm_cost_
+=
prod
*
(
dev_num
+
1.0
)
*
ALLGATHER_REDUCESCATTER_SCALE_FACTOR
;
int32_t
concat_dim
=
op
.
second
[
0
];
if
(
concat_dim
==
0
)
{
// computation cost = all_gather
...
...
mindspore/ccsrc/parallel/tensor_layout/tensor_redistribution.h
浏览文件 @
0cdb1171
...
...
@@ -33,6 +33,8 @@
namespace
mindspore
{
namespace
parallel
{
constexpr
double
ALLTOALL_SCALE_FACTOR
=
2.0
;
constexpr
double
ALLGATHER_REDUCESCATTER_SCALE_FACTOR
=
0.5
;
class
TensorRedistribution
{
public:
explicit
TensorRedistribution
(
bool
construct_op_flag
=
true
,
bool
keep_reshape
=
false
)
...
...
@@ -46,7 +48,7 @@ class TensorRedistribution {
keep_reshape_
(
keep_reshape
)
{}
Status
Init
(
const
TensorLayout
&
from
,
const
TensorLayout
&
to
,
const
RankList
&
dev_list
);
~
TensorRedistribution
()
=
default
;
RedistributionOpListPtr
InferTensorRedistributionOperatorList
();
RedistributionOpListPtr
InferTensorRedistributionOperatorList
(
bool
is_cost_model
=
false
);
OperatorList
operator_list
()
const
{
return
operator_list_
;
}
bool
reshape_flag
()
const
{
return
reshape_flag_
;
}
Status
ComputeCost
();
...
...
tests/ut/python/parallel/test_auto_parallel_resnet.py
浏览文件 @
0cdb1171
...
...
@@ -304,7 +304,7 @@ def train_32k_8p(epoch_size=3, batch_size=32, num_classes=32768):
def
test_train_32k_8p_fusion1
(
epoch_size
=
3
,
batch_size
=
32
,
num_classes
=
32768
):
#1048576 #131072 #32768 #8192
cost_model_context
.
set_cost_model_context
(
costmodel_gamma
=
0.001
,
costmodel_beta
=
26
0.0
)
cost_model_context
.
set_cost_model_context
(
costmodel_gamma
=
0.001
,
costmodel_beta
=
40
0.0
)
cost_model_context
.
set_cost_model_context
(
costmodel_allreduce_fusion_algorithm
=
1
)
cost_model_context
.
set_cost_model_context
(
costmodel_allreduce_fusion_times
=
2
)
cost_model_context
.
set_cost_model_context
(
costmodel_allreduce_fusion_tail_percent
=
0.5
)
...
...
@@ -651,7 +651,7 @@ def test_train_32k_8p_fusion2(epoch_size=3, batch_size=32, num_classes=32768): #
def
test_train_64k_8p
(
epoch_size
=
3
,
batch_size
=
32
,
num_classes
=
65536
):
#1048576 #131072 #32768 #8192
dev_num
=
8
context
.
set_auto_parallel_context
(
parallel_mode
=
ParallelMode
.
AUTO_PARALLEL
,
device_num
=
dev_num
)
cost_model_context
.
set_cost_model_context
(
costmodel_gamma
=
0.001
,
costmodel_beta
=
26
0.0
)
cost_model_context
.
set_cost_model_context
(
costmodel_gamma
=
0.001
,
costmodel_beta
=
40
0.0
)
set_algo_parameters
(
elementwise_op_strategy_follow
=
True
)
resset_op_id
()
np
.
random
.
seed
(
6
)
...
...
tests/ut/python/parallel/test_auto_parallel_two_matmul.py
浏览文件 @
0cdb1171
...
...
@@ -86,7 +86,7 @@ def test_two_matmul():
costmodel_alpha
=
cost_model_context
.
get_cost_model_context
(
"costmodel_alpha"
)
assert
costmodel_alpha
==
1.0
costmodel_beta
=
cost_model_context
.
get_cost_model_context
(
"costmodel_beta"
)
assert
costmodel_beta
==
26
0.0
assert
costmodel_beta
==
40
0.0
costmodel_gamma
=
cost_model_context
.
get_cost_model_context
(
"costmodel_gamma"
)
assert
costmodel_gamma
==
0.001
costmodel_communi_threshold
=
cost_model_context
.
get_cost_model_context
(
"costmodel_communi_threshold"
)
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录