未验证 提交 e0eb5cf3 编写于 作者: S ShenLiang 提交者: GitHub

[OPT]Set order for hybridparallel setting (#51781)

* set order for hybridparallel

* fix bug

* fix ->

* fix ->

* fix ->

* add topology

* fix utest
上级 325feca6
......@@ -55,6 +55,7 @@ message HybridConfig {
optional int32 mp_degree = 2 [ default = 1 ];
optional int32 pp_degree = 3 [ default = 1 ];
optional int32 sharding_degree = 4 [ default = 1 ];
repeated string order = 5 ;
}
message AMPConfig {
......
......@@ -1673,6 +1673,8 @@ class DistributedStrategy:
**pp_degree(int)**: set number of GPUs in a pipeline parallel group. Default 1
**order(list(string))**: set hybrid parallel dimensions, the order is from outside to inside. Default ['dp','pp','sharding','mp']
Examples:
.. code-block:: python
......@@ -1681,7 +1683,8 @@ class DistributedStrategy:
strategy.hybrid_configs = {
"dp_degree": 1,
"mp_degree": 2,
"pp_degree": 1}
"pp_degree": 1,
"order":['dp','pp','sharding','mp']}
"""
return get_msg_dict(self.strategy.hybrid_configs)
......
......@@ -405,14 +405,28 @@ class Fleet:
self.dp_degree = max(self.dp_degree, 1)
d_hybrid_degree = {
"dp": ["data", self.dp_degree],
"pp": ['pipe', self.pp_degree],
"sharding": ['sharding', self.sharding_degree],
"mp": ['model', self.mp_degree],
}
order = self.hybrid_configs["order"]
if not order:
order = ['dp', 'pp', 'sharding', 'mp']
if order[:].sort() != list(d_hybrid_degree.keys())[:].sort():
assert False, "The order of hybrid_config setting is incorrect."
hybrid_group_names = []
dims = []
for h_name in order:
name, degree = d_hybrid_degree[h_name]
hybrid_group_names.append(name)
dims.append(degree)
self._topology = tp.CommunicateTopology(
hybrid_group_names=["data", "pipe", "sharding", "model"],
dims=[
self.dp_degree,
self.pp_degree,
self.sharding_degree,
self.mp_degree,
],
hybrid_group_names=hybrid_group_names, dims=dims
)
self._hcg = tp.HybridCommunicateGroup(self._topology)
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册