Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
Paddle
提交
70eb435c
P
Paddle
项目概览
PaddlePaddle
/
Paddle
大约 2 年 前同步成功
通知
2325
Star
20933
Fork
5424
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1423
列表
看板
标记
里程碑
合并请求
543
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1,423
Issue
1,423
列表
看板
标记
里程碑
合并请求
543
合并请求
543
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
70eb435c
编写于
5月 06, 2021
作者:
Z
zhiboniu
提交者:
GitHub
5月 06, 2021
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
update 2.0 public api in distributed (#32695)
上级
28d42a94
变更
76
隐藏空白更改
内联
并排
Showing
76 changed file
with
312 addition
and
120 deletion
+312
-120
python/paddle/distributed/__init__.py
python/paddle/distributed/__init__.py
+56
-40
python/paddle/distributed/cloud_utils.py
python/paddle/distributed/cloud_utils.py
+6
-1
python/paddle/distributed/collective.py
python/paddle/distributed/collective.py
+9
-18
python/paddle/distributed/entry_attr.py
python/paddle/distributed/entry_attr.py
+1
-1
python/paddle/distributed/fleet/__init__.py
python/paddle/distributed/fleet/__init__.py
+27
-14
python/paddle/distributed/fleet/ascend_utils.py
python/paddle/distributed/fleet/ascend_utils.py
+2
-0
python/paddle/distributed/fleet/base/distributed_strategy.py
python/paddle/distributed/fleet/base/distributed_strategy.py
+1
-1
python/paddle/distributed/fleet/base/fleet_base.py
python/paddle/distributed/fleet/base/fleet_base.py
+2
-0
python/paddle/distributed/fleet/base/meta_optimizer_factory.py
...n/paddle/distributed/fleet/base/meta_optimizer_factory.py
+2
-0
python/paddle/distributed/fleet/base/private_helper_function.py
.../paddle/distributed/fleet/base/private_helper_function.py
+2
-0
python/paddle/distributed/fleet/base/role_maker.py
python/paddle/distributed/fleet/base/role_maker.py
+2
-0
python/paddle/distributed/fleet/base/runtime_factory.py
python/paddle/distributed/fleet/base/runtime_factory.py
+2
-0
python/paddle/distributed/fleet/base/strategy_compiler.py
python/paddle/distributed/fleet/base/strategy_compiler.py
+2
-0
python/paddle/distributed/fleet/base/util_factory.py
python/paddle/distributed/fleet/base/util_factory.py
+2
-1
python/paddle/distributed/fleet/cloud_utils.py
python/paddle/distributed/fleet/cloud_utils.py
+2
-0
python/paddle/distributed/fleet/data_generator/__init__.py
python/paddle/distributed/fleet/data_generator/__init__.py
+3
-1
python/paddle/distributed/fleet/data_generator/data_generator.py
...paddle/distributed/fleet/data_generator/data_generator.py
+2
-0
python/paddle/distributed/fleet/dataset/__init__.py
python/paddle/distributed/fleet/dataset/__init__.py
+8
-2
python/paddle/distributed/fleet/dataset/dataset.py
python/paddle/distributed/fleet/dataset/dataset.py
+2
-0
python/paddle/distributed/fleet/dataset/index_dataset.py
python/paddle/distributed/fleet/dataset/index_dataset.py
+2
-0
python/paddle/distributed/fleet/launch.py
python/paddle/distributed/fleet/launch.py
+2
-0
python/paddle/distributed/fleet/meta_optimizers/amp_optimizer.py
...paddle/distributed/fleet/meta_optimizers/amp_optimizer.py
+2
-0
python/paddle/distributed/fleet/meta_optimizers/ascend/ascend_optimizer.py
...tributed/fleet/meta_optimizers/ascend/ascend_optimizer.py
+2
-0
python/paddle/distributed/fleet/meta_optimizers/ascend/ascend_parser.py
...distributed/fleet/meta_optimizers/ascend/ascend_parser.py
+2
-0
python/paddle/distributed/fleet/meta_optimizers/common.py
python/paddle/distributed/fleet/meta_optimizers/common.py
+2
-0
python/paddle/distributed/fleet/meta_optimizers/dgc_optimizer.py
...paddle/distributed/fleet/meta_optimizers/dgc_optimizer.py
+2
-0
python/paddle/distributed/fleet/meta_optimizers/dygraph_optimizer/__init__.py
...buted/fleet/meta_optimizers/dygraph_optimizer/__init__.py
+2
-0
python/paddle/distributed/fleet/meta_optimizers/dygraph_optimizer/hybrid_parallel_gradscaler.py
...ptimizers/dygraph_optimizer/hybrid_parallel_gradscaler.py
+2
-0
python/paddle/distributed/fleet/meta_optimizers/dygraph_optimizer/hybrid_parallel_optimizer.py
...optimizers/dygraph_optimizer/hybrid_parallel_optimizer.py
+2
-0
python/paddle/distributed/fleet/meta_optimizers/fp16_allreduce_optimizer.py
...ributed/fleet/meta_optimizers/fp16_allreduce_optimizer.py
+2
-0
python/paddle/distributed/fleet/meta_optimizers/gradient_merge_optimizer.py
...ributed/fleet/meta_optimizers/gradient_merge_optimizer.py
+2
-0
python/paddle/distributed/fleet/meta_optimizers/graph_execution_optimizer.py
...ibuted/fleet/meta_optimizers/graph_execution_optimizer.py
+2
-0
python/paddle/distributed/fleet/meta_optimizers/lamb_optimizer.py
...addle/distributed/fleet/meta_optimizers/lamb_optimizer.py
+2
-0
python/paddle/distributed/fleet/meta_optimizers/lars_optimizer.py
...addle/distributed/fleet/meta_optimizers/lars_optimizer.py
+2
-0
python/paddle/distributed/fleet/meta_optimizers/localsgd_optimizer.py
...e/distributed/fleet/meta_optimizers/localsgd_optimizer.py
+2
-0
python/paddle/distributed/fleet/meta_optimizers/meta_optimizer_base.py
.../distributed/fleet/meta_optimizers/meta_optimizer_base.py
+2
-0
python/paddle/distributed/fleet/meta_optimizers/parameter_server_graph_optimizer.py
...fleet/meta_optimizers/parameter_server_graph_optimizer.py
+2
-0
python/paddle/distributed/fleet/meta_optimizers/parameter_server_optimizer.py
...buted/fleet/meta_optimizers/parameter_server_optimizer.py
+2
-0
python/paddle/distributed/fleet/meta_optimizers/pipeline_optimizer.py
...e/distributed/fleet/meta_optimizers/pipeline_optimizer.py
+2
-0
python/paddle/distributed/fleet/meta_optimizers/recompute_optimizer.py
.../distributed/fleet/meta_optimizers/recompute_optimizer.py
+2
-0
python/paddle/distributed/fleet/meta_optimizers/sharding/fp16_helper.py
...distributed/fleet/meta_optimizers/sharding/fp16_helper.py
+2
-0
python/paddle/distributed/fleet/meta_optimizers/sharding/gradient_clip_helper.py
...ed/fleet/meta_optimizers/sharding/gradient_clip_helper.py
+2
-0
python/paddle/distributed/fleet/meta_optimizers/sharding/offload_helper.py
...tributed/fleet/meta_optimizers/sharding/offload_helper.py
+2
-0
python/paddle/distributed/fleet/meta_optimizers/sharding/prune.py
...addle/distributed/fleet/meta_optimizers/sharding/prune.py
+2
-0
python/paddle/distributed/fleet/meta_optimizers/sharding/shard.py
...addle/distributed/fleet/meta_optimizers/sharding/shard.py
+2
-0
python/paddle/distributed/fleet/meta_optimizers/sharding/weight_decay_helper.py
...ted/fleet/meta_optimizers/sharding/weight_decay_helper.py
+2
-0
python/paddle/distributed/fleet/meta_optimizers/sharding_optimizer.py
...e/distributed/fleet/meta_optimizers/sharding_optimizer.py
+1
-1
python/paddle/distributed/fleet/meta_optimizers/tensor_parallel_optimizer.py
...ibuted/fleet/meta_optimizers/tensor_parallel_optimizer.py
+2
-0
python/paddle/distributed/fleet/meta_parallel/__init__.py
python/paddle/distributed/fleet/meta_parallel/__init__.py
+12
-3
python/paddle/distributed/fleet/meta_parallel/meta_parallel_base.py
...dle/distributed/fleet/meta_parallel/meta_parallel_base.py
+2
-0
python/paddle/distributed/fleet/meta_parallel/model_parallel.py
.../paddle/distributed/fleet/meta_parallel/model_parallel.py
+5
-1
python/paddle/distributed/fleet/meta_parallel/parallel_layers/__init__.py
...stributed/fleet/meta_parallel/parallel_layers/__init__.py
+10
-3
python/paddle/distributed/fleet/meta_parallel/parallel_layers/mp_layers.py
...tributed/fleet/meta_parallel/parallel_layers/mp_layers.py
+1
-3
python/paddle/distributed/fleet/meta_parallel/parallel_layers/pp_layers.py
...tributed/fleet/meta_parallel/parallel_layers/pp_layers.py
+1
-1
python/paddle/distributed/fleet/meta_parallel/parallel_layers/random.py
...distributed/fleet/meta_parallel/parallel_layers/random.py
+2
-3
python/paddle/distributed/fleet/meta_parallel/pipeline_parallel.py
...ddle/distributed/fleet/meta_parallel/pipeline_parallel.py
+12
-1
python/paddle/distributed/fleet/meta_parallel/pp_utils/__init__.py
...ddle/distributed/fleet/meta_parallel/pp_utils/__init__.py
+3
-1
python/paddle/distributed/fleet/meta_parallel/pp_utils/utils.py
.../paddle/distributed/fleet/meta_parallel/pp_utils/utils.py
+1
-4
python/paddle/distributed/fleet/metrics/__init__.py
python/paddle/distributed/fleet/metrics/__init__.py
+9
-11
python/paddle/distributed/fleet/metrics/metric.py
python/paddle/distributed/fleet/metrics/metric.py
+2
-0
python/paddle/distributed/fleet/runtime/__init__.py
python/paddle/distributed/fleet/runtime/__init__.py
+2
-0
python/paddle/distributed/fleet/runtime/collective_runtime.py
...on/paddle/distributed/fleet/runtime/collective_runtime.py
+2
-0
python/paddle/distributed/fleet/runtime/parameter_server_runtime.py
...dle/distributed/fleet/runtime/parameter_server_runtime.py
+2
-0
python/paddle/distributed/fleet/runtime/the_one_ps.py
python/paddle/distributed/fleet/runtime/the_one_ps.py
+2
-0
python/paddle/distributed/fleet/utils/__init__.py
python/paddle/distributed/fleet/utils/__init__.py
+11
-3
python/paddle/distributed/fleet/utils/fs.py
python/paddle/distributed/fleet/utils/fs.py
+1
-1
python/paddle/distributed/fleet/utils/http_server.py
python/paddle/distributed/fleet/utils/http_server.py
+2
-0
python/paddle/distributed/fleet/utils/hybrid_parallel_util.py
...on/paddle/distributed/fleet/utils/hybrid_parallel_util.py
+2
-0
python/paddle/distributed/fleet/utils/log_util.py
python/paddle/distributed/fleet/utils/log_util.py
+2
-0
python/paddle/distributed/fleet/utils/ps_util.py
python/paddle/distributed/fleet/utils/ps_util.py
+2
-0
python/paddle/distributed/fleet/utils/recompute.py
python/paddle/distributed/fleet/utils/recompute.py
+2
-0
python/paddle/distributed/launch.py
python/paddle/distributed/launch.py
+2
-0
python/paddle/distributed/parallel.py
python/paddle/distributed/parallel.py
+6
-3
python/paddle/distributed/spawn.py
python/paddle/distributed/spawn.py
+5
-1
python/paddle/distributed/utils.py
python/paddle/distributed/utils.py
+18
-0
python/paddle/nn/__init__.py
python/paddle/nn/__init__.py
+1
-1
未找到文件。
python/paddle/distributed/__init__.py
浏览文件 @
70eb435c
...
@@ -12,46 +12,62 @@
...
@@ -12,46 +12,62 @@
# See the License for the specific language governing permissions and
# See the License for the specific language governing permissions and
# limitations under the License.
# limitations under the License.
from
.
import
spawn
from
.spawn
import
spawn
# noqa: F401
from
.spawn
import
spawn
from
.
import
parallel
from
.parallel
import
init_parallel_env
from
.parallel
import
get_rank
from
.parallel
import
get_world_size
from
paddle.fluid.dygraph.parallel
import
ParallelEnv
#DEFINE_ALIAS
from
paddle.distributed.fleet.dataset
import
*
from
.
import
collective
from
.collective
import
*
from
.entry_attr
import
ProbabilityEntry
from
.entry_attr
import
CountFilterEntry
# start multiprocess apis
__all__
=
[
"spawn"
]
# dygraph parallel apis
__all__
+=
[
"init_parallel_env"
,
"get_rank"
,
"get_world_size"
,
"ParallelEnv"
,
"InMemoryDataset"
,
"QueueDataset"
,
]
# dataset reader
from
.parallel
import
init_parallel_env
# noqa: F401
__all__
+=
[
from
.parallel
import
get_rank
# noqa: F401
"InMemoryDataset"
,
from
.parallel
import
get_world_size
# noqa: F401
"QueueDataset"
,
]
# entry for embedding
from
paddle.distributed.fleet.dataset
import
InMemoryDataset
# noqa: F401
__all__
+=
[
from
paddle.distributed.fleet.dataset
import
QueueDataset
# noqa: F401
"ProbabilityEntry"
,
"CountFilterEntry"
,
from
.collective
import
broadcast
# noqa: F401
]
from
.collective
import
all_reduce
# noqa: F401
from
.collective
import
reduce
# noqa: F401
from
.collective
import
all_gather
# noqa: F401
from
.collective
import
scatter
# noqa: F401
from
.collective
import
barrier
# noqa: F401
from
.collective
import
ReduceOp
# noqa: F401
from
.collective
import
split
# noqa: F401
from
.collective
import
new_group
# noqa: F401
from
.collective
import
alltoall
# noqa: F401
from
.collective
import
recv
# noqa: F401
from
.collective
import
get_group
# noqa: F401
from
.collective
import
send
# noqa: F401
from
.collective
import
wait
# noqa: F401
from
.fleet
import
BoxPSDataset
# noqa: F401
# collective apis
from
.entry_attr
import
ProbabilityEntry
# noqa: F401
__all__
+=
collective
.
__all__
from
.entry_attr
import
CountFilterEntry
# noqa: F401
from
paddle.fluid.dygraph.parallel
import
ParallelEnv
# noqa: F401
from
.
import
cloud_utils
# noqa: F401
from
.
import
utils
# noqa: F401
__all__
=
[
#noqa
"spawn"
,
"scatter"
,
"broadcast"
,
"ParallelEnv"
,
"new_group"
,
"init_parallel_env"
,
"QueueDataset"
,
"split"
,
"CountFilterEntry"
,
"get_world_size"
,
"get_group"
,
"all_gather"
,
"InMemoryDataset"
,
"barrier"
,
"all_reduce"
,
"alltoall"
,
"send"
,
"reduce"
,
"recv"
,
"ReduceOp"
,
"wait"
,
"get_rank"
,
"ProbabilityEntry"
]
python/paddle/distributed/cloud_utils.py
浏览文件 @
70eb435c
...
@@ -14,7 +14,12 @@
...
@@ -14,7 +14,12 @@
import
os
import
os
import
paddle
import
paddle
from
paddle.distributed.utils
import
get_cluster
,
logger
,
get_gpus
,
get_cluster_from_args
from
paddle.distributed.utils
import
get_cluster
from
paddle.distributed.utils
import
logger
from
paddle.distributed.utils
import
get_gpus
from
paddle.distributed.utils
import
get_cluster_from_args
__all__
=
[]
def
get_cloud_cluster
(
args_node_ips
,
args_node_ip
,
args_port
,
selected_devices
):
def
get_cloud_cluster
(
args_node_ips
,
args_node_ip
,
args_port
,
selected_devices
):
...
...
python/paddle/distributed/collective.py
浏览文件 @
70eb435c
...
@@ -15,8 +15,14 @@
...
@@ -15,8 +15,14 @@
import
numpy
as
np
import
numpy
as
np
import
os
import
os
from
..fluid.layer_helper
import
LayerHelper
from
..fluid.layer_helper
import
LayerHelper
from
..fluid.framework
import
Variable
,
OpProtoHolder
,
in_dygraph_mode
,
convert_np_dtype_to_dtype_
from
..fluid.framework
import
Variable
from
..fluid.data_feeder
import
convert_dtype
,
check_variable_and_dtype
,
check_type
,
check_dtype
from
..fluid.framework
import
OpProtoHolder
from
..fluid.framework
import
in_dygraph_mode
from
..fluid.framework
import
convert_np_dtype_to_dtype_
from
..fluid.data_feeder
import
convert_dtype
from
..fluid.data_feeder
import
check_variable_and_dtype
from
..fluid.data_feeder
import
check_type
from
..fluid.data_feeder
import
check_dtype
from
..fluid.layers.tensor
import
fill_constant
from
..fluid.layers.tensor
import
fill_constant
from
..fluid.layers
import
utils
from
..fluid.layers
import
utils
from
..fluid.dygraph.parallel
import
prepare_context
from
..fluid.dygraph.parallel
import
prepare_context
...
@@ -25,22 +31,7 @@ from .fleet import fleet
...
@@ -25,22 +31,7 @@ from .fleet import fleet
import
paddle.fluid
as
fluid
import
paddle.fluid
as
fluid
import
paddle.fluid.core
as
core
import
paddle.fluid.core
as
core
__all__
=
[
__all__
=
[]
'wait'
,
'new_group'
,
'get_group'
,
'broadcast'
,
'all_reduce'
,
'reduce'
,
'all_gather'
,
'scatter'
,
'barrier'
,
'split'
,
'alltoall'
,
'ReduceOp'
,
'send'
,
'recv'
,
]
class
ReduceOp
:
class
ReduceOp
:
...
...
python/paddle/distributed/entry_attr.py
浏览文件 @
70eb435c
...
@@ -14,7 +14,7 @@
...
@@ -14,7 +14,7 @@
from
__future__
import
print_function
from
__future__
import
print_function
__all__
=
[
'ProbabilityEntry'
,
'CountFilterEntry'
]
__all__
=
[]
class
EntryAttr
(
object
):
class
EntryAttr
(
object
):
...
...
python/paddle/distributed/fleet/__init__.py
浏览文件 @
70eb435c
...
@@ -13,21 +13,34 @@
...
@@ -13,21 +13,34 @@
# limitations under the License.
# limitations under the License.
# TODO: define distributed api under this directory,
# TODO: define distributed api under this directory,
from
.base.role_maker
import
Role
,
UserDefinedRoleMaker
,
PaddleCloudRoleMaker
from
.base.role_maker
import
Role
# noqa: F401
from
.base.distributed_strategy
import
DistributedStrategy
from
.base.role_maker
import
UserDefinedRoleMaker
# noqa: F401
from
.base.fleet_base
import
Fleet
from
.base.role_maker
import
PaddleCloudRoleMaker
# noqa: F401
from
.base.util_factory
import
UtilBase
from
.base.distributed_strategy
import
DistributedStrategy
# noqa: F401
from
.dataset
import
*
from
.base.fleet_base
import
Fleet
# noqa: F401
from
.data_generator
import
MultiSlotDataGenerator
,
MultiSlotStringDataGenerator
from
.base.util_factory
import
UtilBase
# noqa: F401
from
.
import
metrics
from
.dataset
import
DatasetBase
# noqa: F401
from
.base.topology
import
CommunicateTopology
,
HybridCommunicateGroup
from
.dataset
import
InMemoryDataset
# noqa: F401
from
.meta_parallel
import
*
from
.dataset
import
QueueDataset
# noqa: F401
from
.dataset
import
FileInstantDataset
# noqa: F401
from
.dataset
import
BoxPSDataset
# noqa: F401
from
.data_generator.data_generator
import
MultiSlotDataGenerator
# noqa: F401
from
.data_generator.data_generator
import
MultiSlotStringDataGenerator
# noqa: F401
from
.
import
metrics
# noqa: F401
from
.base.topology
import
CommunicateTopology
from
.base.topology
import
HybridCommunicateGroup
# noqa: F401
__all__
=
[
__all__
=
[
#noqa
"DistributedStrategy"
,
"UtilBase"
,
"UserDefinedRoleMaker"
,
"CommunicateTopology"
,
"PaddleCloudRoleMaker"
,
"Fleet"
,
"MultiSlotDataGenerator"
,
"UtilBase"
,
"MultiSlotStringDataGenerator"
,
"Role"
,
"CommunicateTopology"
,
"HybridCommunicateGroup"
,
"HybridCommunicateGroup"
"MultiSlotStringDataGenerator"
,
"UserDefinedRoleMaker"
,
"DistributedStrategy"
,
"Role"
,
"MultiSlotDataGenerator"
,
"PaddleCloudRoleMaker"
,
"Fleet"
]
]
fleet
=
Fleet
()
fleet
=
Fleet
()
...
...
python/paddle/distributed/fleet/ascend_utils.py
浏览文件 @
70eb435c
...
@@ -17,6 +17,8 @@ import json
...
@@ -17,6 +17,8 @@ import json
import
paddle
import
paddle
from
paddle.distributed.fleet.launch_utils
import
get_cluster
,
logger
,
get_host_name_ip
,
DeviceMode
from
paddle.distributed.fleet.launch_utils
import
get_cluster
,
logger
,
get_host_name_ip
,
DeviceMode
__all__
=
[]
def
_get_ascend_rankfile
(
rank_table_file_path
):
def
_get_ascend_rankfile
(
rank_table_file_path
):
"""
"""
...
...
python/paddle/distributed/fleet/base/distributed_strategy.py
浏览文件 @
70eb435c
...
@@ -19,7 +19,7 @@ from paddle.fluid.wrapped_decorator import wrap_decorator
...
@@ -19,7 +19,7 @@ from paddle.fluid.wrapped_decorator import wrap_decorator
import
google.protobuf.text_format
import
google.protobuf.text_format
import
google.protobuf
import
google.protobuf
__all__
=
[
"DistributedStrategy"
]
__all__
=
[]
non_auto_func_called
=
True
non_auto_func_called
=
True
...
...
python/paddle/distributed/fleet/base/fleet_base.py
浏览文件 @
70eb435c
...
@@ -33,6 +33,8 @@ from ..meta_parallel import PipelineParallel
...
@@ -33,6 +33,8 @@ from ..meta_parallel import PipelineParallel
from
..meta_optimizers
import
HybridParallelOptimizer
from
..meta_optimizers
import
HybridParallelOptimizer
from
..meta_optimizers
import
HybridParallelGradScaler
from
..meta_optimizers
import
HybridParallelGradScaler
__all__
=
[]
def
_inited_runtime_handler_
(
func
):
def
_inited_runtime_handler_
(
func
):
def
__impl__
(
*
args
,
**
kwargs
):
def
__impl__
(
*
args
,
**
kwargs
):
...
...
python/paddle/distributed/fleet/base/meta_optimizer_factory.py
浏览文件 @
70eb435c
...
@@ -14,6 +14,8 @@
...
@@ -14,6 +14,8 @@
from
..meta_optimizers
import
*
from
..meta_optimizers
import
*
__all__
=
[]
meta_optimizer_names
=
list
(
meta_optimizer_names
=
list
(
filter
(
lambda
name
:
name
.
endswith
(
"Optimizer"
),
dir
()))
filter
(
lambda
name
:
name
.
endswith
(
"Optimizer"
),
dir
()))
...
...
python/paddle/distributed/fleet/base/private_helper_function.py
浏览文件 @
70eb435c
...
@@ -17,6 +17,8 @@ import socket
...
@@ -17,6 +17,8 @@ import socket
from
contextlib
import
closing
from
contextlib
import
closing
from
six
import
string_types
from
six
import
string_types
__all__
=
[]
def
wait_server_ready
(
endpoints
):
def
wait_server_ready
(
endpoints
):
"""
"""
...
...
python/paddle/distributed/fleet/base/role_maker.py
浏览文件 @
70eb435c
...
@@ -22,6 +22,8 @@ import paddle
...
@@ -22,6 +22,8 @@ import paddle
import
paddle.fluid
as
fluid
import
paddle.fluid
as
fluid
from
paddle.distributed.fleet.base.private_helper_function
import
wait_server_ready
from
paddle.distributed.fleet.base.private_helper_function
import
wait_server_ready
__all__
=
[]
class
Role
:
class
Role
:
WORKER
=
1
WORKER
=
1
...
...
python/paddle/distributed/fleet/base/runtime_factory.py
浏览文件 @
70eb435c
...
@@ -15,6 +15,8 @@ from ..runtime.collective_runtime import CollectiveRuntime
...
@@ -15,6 +15,8 @@ from ..runtime.collective_runtime import CollectiveRuntime
from
..runtime.parameter_server_runtime
import
ParameterServerRuntime
from
..runtime.parameter_server_runtime
import
ParameterServerRuntime
from
..runtime.the_one_ps
import
TheOnePSRuntime
from
..runtime.the_one_ps
import
TheOnePSRuntime
__all__
=
[]
class
RuntimeFactory
(
object
):
class
RuntimeFactory
(
object
):
def
__init__
(
self
):
def
__init__
(
self
):
...
...
python/paddle/distributed/fleet/base/strategy_compiler.py
浏览文件 @
70eb435c
...
@@ -12,6 +12,8 @@
...
@@ -12,6 +12,8 @@
# See the License for the specific language governing permissions and
# See the License for the specific language governing permissions and
# limitations under the License.
# limitations under the License.
__all__
=
[]
def
create_graph
(
optimizer_list
):
def
create_graph
(
optimizer_list
):
nsize
=
len
(
optimizer_list
)
nsize
=
len
(
optimizer_list
)
...
...
python/paddle/distributed/fleet/base/util_factory.py
浏览文件 @
70eb435c
...
@@ -27,7 +27,8 @@ from paddle.fluid import core
...
@@ -27,7 +27,8 @@ from paddle.fluid import core
import
subprocess
import
subprocess
import
os
import
os
import
numpy
as
np
import
numpy
as
np
__all__
=
[
'UtilBase'
]
__all__
=
[]
class
UtilFactory
(
object
):
class
UtilFactory
(
object
):
...
...
python/paddle/distributed/fleet/cloud_utils.py
浏览文件 @
70eb435c
...
@@ -16,6 +16,8 @@ import os
...
@@ -16,6 +16,8 @@ import os
import
paddle
import
paddle
from
paddle.distributed.fleet.launch_utils
import
get_cluster
,
logger
from
paddle.distributed.fleet.launch_utils
import
get_cluster
,
logger
__all__
=
[]
def
get_cloud_cluster
(
args_node_ips
,
def
get_cloud_cluster
(
args_node_ips
,
device_mode
,
device_mode
,
...
...
python/paddle/distributed/fleet/data_generator/__init__.py
浏览文件 @
70eb435c
...
@@ -11,4 +11,6 @@
...
@@ -11,4 +11,6 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# See the License for the specific language governing permissions and
from
.data_generator
import
*
from
.data_generator
import
DataGenerator
# noqa: F401
__all__
=
[]
python/paddle/distributed/fleet/data_generator/data_generator.py
浏览文件 @
70eb435c
...
@@ -15,6 +15,8 @@
...
@@ -15,6 +15,8 @@
import
os
import
os
import
sys
import
sys
__all__
=
[]
class
DataGenerator
(
object
):
class
DataGenerator
(
object
):
"""
"""
...
...
python/paddle/distributed/fleet/dataset/__init__.py
浏览文件 @
70eb435c
...
@@ -11,5 +11,11 @@
...
@@ -11,5 +11,11 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# See the License for the specific language governing permissions and
from
.dataset
import
*
from
.dataset
import
DatasetBase
# noqa: F401
from
.index_dataset
import
*
from
.dataset
import
InMemoryDataset
# noqa: F401
from
.dataset
import
QueueDataset
# noqa: F401
from
.dataset
import
FileInstantDataset
# noqa: F401
from
.dataset
import
BoxPSDataset
# noqa: F401
from
.index_dataset
import
TreeIndex
# noqa: F401
__all__
=
[]
python/paddle/distributed/fleet/dataset/dataset.py
浏览文件 @
70eb435c
...
@@ -18,6 +18,8 @@ from paddle.fluid.proto import data_feed_pb2
...
@@ -18,6 +18,8 @@ from paddle.fluid.proto import data_feed_pb2
from
google.protobuf
import
text_format
from
google.protobuf
import
text_format
import
paddle.fluid.core
as
core
import
paddle.fluid.core
as
core
__all__
=
[]
class
DatasetBase
(
object
):
class
DatasetBase
(
object
):
""" Base dataset class. """
""" Base dataset class. """
...
...
python/paddle/distributed/fleet/dataset/index_dataset.py
浏览文件 @
70eb435c
...
@@ -13,6 +13,8 @@
...
@@ -13,6 +13,8 @@
# limitations under the License.
# limitations under the License.
from
paddle.fluid
import
core
from
paddle.fluid
import
core
__all__
=
[]
class
Index
(
object
):
class
Index
(
object
):
def
__init__
(
self
,
name
):
def
__init__
(
self
,
name
):
...
...
python/paddle/distributed/fleet/launch.py
浏览文件 @
70eb435c
...
@@ -75,6 +75,8 @@ from paddle.distributed.fleet.launch_utils import *
...
@@ -75,6 +75,8 @@ from paddle.distributed.fleet.launch_utils import *
import
paddle.distributed.fleet.cloud_utils
as
cloud_utils
import
paddle.distributed.fleet.cloud_utils
as
cloud_utils
import
paddle.distributed.fleet.ascend_utils
as
ascend_utils
import
paddle.distributed.fleet.ascend_utils
as
ascend_utils
__all__
=
[]
def
_print_arguments
(
args
):
def
_print_arguments
(
args
):
print
(
"----------- Configuration Arguments -----------"
)
print
(
"----------- Configuration Arguments -----------"
)
...
...
python/paddle/distributed/fleet/meta_optimizers/amp_optimizer.py
浏览文件 @
70eb435c
...
@@ -14,6 +14,8 @@
...
@@ -14,6 +14,8 @@
import
paddle.fluid.contrib.mixed_precision
as
mixed_precision
import
paddle.fluid.contrib.mixed_precision
as
mixed_precision
from
.meta_optimizer_base
import
MetaOptimizerBase
from
.meta_optimizer_base
import
MetaOptimizerBase
__all__
=
[]
class
AMPOptimizer
(
MetaOptimizerBase
):
class
AMPOptimizer
(
MetaOptimizerBase
):
def
__init__
(
self
,
optimizer
):
def
__init__
(
self
,
optimizer
):
...
...
python/paddle/distributed/fleet/meta_optimizers/ascend/ascend_optimizer.py
浏览文件 @
70eb435c
...
@@ -24,6 +24,8 @@ from collections import namedtuple
...
@@ -24,6 +24,8 @@ from collections import namedtuple
HcomGroupConfig
=
namedtuple
(
'HcomGroupConfig'
,
[
'name'
,
'nranks'
,
'rank_ids'
])
HcomGroupConfig
=
namedtuple
(
'HcomGroupConfig'
,
[
'name'
,
'nranks'
,
'rank_ids'
])
__all__
=
[]
class
AscendIRParser
(
object
):
class
AscendIRParser
(
object
):
def
__init__
(
self
,
auto_dp
=
False
,
world_rank_size
=
1
):
def
__init__
(
self
,
auto_dp
=
False
,
world_rank_size
=
1
):
...
...
python/paddle/distributed/fleet/meta_optimizers/ascend/ascend_parser.py
浏览文件 @
70eb435c
...
@@ -18,6 +18,8 @@ import numpy as np
...
@@ -18,6 +18,8 @@ import numpy as np
from
paddle.distributed
import
fleet
from
paddle.distributed
import
fleet
from
functools
import
reduce
from
functools
import
reduce
__all__
=
[]
registerd_op
=
{
## forwards
registerd_op
=
{
## forwards
"elementwise_add"
:
"AddParser"
,
"elementwise_add"
:
"AddParser"
,
"matmul"
:
"MatMulParser"
,
"matmul"
:
"MatMulParser"
,
...
...
python/paddle/distributed/fleet/meta_optimizers/common.py
浏览文件 @
70eb435c
...
@@ -19,6 +19,8 @@ import paddle.fluid as fluid
...
@@ -19,6 +19,8 @@ import paddle.fluid as fluid
from
paddle.fluid
import
core
,
unique_name
from
paddle.fluid
import
core
,
unique_name
from
..base.private_helper_function
import
wait_server_ready
from
..base.private_helper_function
import
wait_server_ready
__all__
=
[]
OpRole
=
core
.
op_proto_and_checker_maker
.
OpRole
OpRole
=
core
.
op_proto_and_checker_maker
.
OpRole
OP_ROLE_KEY
=
core
.
op_proto_and_checker_maker
.
kOpRoleAttrName
()
OP_ROLE_KEY
=
core
.
op_proto_and_checker_maker
.
kOpRoleAttrName
()
...
...
python/paddle/distributed/fleet/meta_optimizers/dgc_optimizer.py
浏览文件 @
70eb435c
...
@@ -15,6 +15,8 @@ from paddle.fluid.optimizer import Momentum, DGCMomentumOptimizer
...
@@ -15,6 +15,8 @@ from paddle.fluid.optimizer import Momentum, DGCMomentumOptimizer
from
.meta_optimizer_base
import
MetaOptimizerBase
from
.meta_optimizer_base
import
MetaOptimizerBase
import
logging
import
logging
__all__
=
[]
class
DGCOptimizer
(
MetaOptimizerBase
):
class
DGCOptimizer
(
MetaOptimizerBase
):
def
__init__
(
self
,
optimizer
):
def
__init__
(
self
,
optimizer
):
...
...
python/paddle/distributed/fleet/meta_optimizers/dygraph_optimizer/__init__.py
浏览文件 @
70eb435c
...
@@ -12,3 +12,5 @@
...
@@ -12,3 +12,5 @@
# See the License for the specific language governing permissions and
# See the License for the specific language governing permissions and
from
.hybrid_parallel_optimizer
import
HybridParallelOptimizer
from
.hybrid_parallel_optimizer
import
HybridParallelOptimizer
from
.hybrid_parallel_gradscaler
import
HybridParallelGradScaler
from
.hybrid_parallel_gradscaler
import
HybridParallelGradScaler
__all__
=
[]
python/paddle/distributed/fleet/meta_optimizers/dygraph_optimizer/hybrid_parallel_gradscaler.py
浏览文件 @
70eb435c
...
@@ -23,6 +23,8 @@ import types
...
@@ -23,6 +23,8 @@ import types
from
paddle.fluid
import
core
from
paddle.fluid
import
core
import
paddle
import
paddle
__all__
=
[]
class
HybridParallelGradScaler
:
class
HybridParallelGradScaler
:
def
__init__
(
self
,
scaler
,
hcg
):
def
__init__
(
self
,
scaler
,
hcg
):
...
...
python/paddle/distributed/fleet/meta_optimizers/dygraph_optimizer/hybrid_parallel_optimizer.py
浏览文件 @
70eb435c
...
@@ -23,6 +23,8 @@ from paddle.fluid import framework
...
@@ -23,6 +23,8 @@ from paddle.fluid import framework
from
paddle.fluid.framework
import
Variable
from
paddle.fluid.framework
import
Variable
from
...utils.log_util
import
logger
from
...utils.log_util
import
logger
__all__
=
[]
class
HybridParallelClipGrad
:
class
HybridParallelClipGrad
:
def
__init__
(
self
,
clip
,
hcg
):
def
__init__
(
self
,
clip
,
hcg
):
...
...
python/paddle/distributed/fleet/meta_optimizers/fp16_allreduce_optimizer.py
浏览文件 @
70eb435c
...
@@ -14,6 +14,8 @@
...
@@ -14,6 +14,8 @@
from
paddle.fluid
import
core
,
framework
,
unique_name
from
paddle.fluid
import
core
,
framework
,
unique_name
from
.meta_optimizer_base
import
MetaOptimizerBase
from
.meta_optimizer_base
import
MetaOptimizerBase
__all__
=
[]
class
FP16AllReduceOptimizer
(
MetaOptimizerBase
):
class
FP16AllReduceOptimizer
(
MetaOptimizerBase
):
def
__init__
(
self
,
optimizer
):
def
__init__
(
self
,
optimizer
):
...
...
python/paddle/distributed/fleet/meta_optimizers/gradient_merge_optimizer.py
浏览文件 @
70eb435c
...
@@ -14,6 +14,8 @@
...
@@ -14,6 +14,8 @@
from
paddle.fluid.optimizer
import
GradientMergeOptimizer
as
GM
from
paddle.fluid.optimizer
import
GradientMergeOptimizer
as
GM
from
.meta_optimizer_base
import
MetaOptimizerBase
from
.meta_optimizer_base
import
MetaOptimizerBase
__all__
=
[]
class
GradientMergeOptimizer
(
MetaOptimizerBase
):
class
GradientMergeOptimizer
(
MetaOptimizerBase
):
def
__init__
(
self
,
optimizer
):
def
__init__
(
self
,
optimizer
):
...
...
python/paddle/distributed/fleet/meta_optimizers/graph_execution_optimizer.py
浏览文件 @
70eb435c
...
@@ -19,6 +19,8 @@ from .meta_optimizer_base import MetaOptimizerBase
...
@@ -19,6 +19,8 @@ from .meta_optimizer_base import MetaOptimizerBase
from
..base.private_helper_function
import
wait_server_ready
from
..base.private_helper_function
import
wait_server_ready
import
logging
import
logging
__all__
=
[]
class
GraphExecutionOptimizer
(
MetaOptimizerBase
):
class
GraphExecutionOptimizer
(
MetaOptimizerBase
):
def
__init__
(
self
,
optimizer
):
def
__init__
(
self
,
optimizer
):
...
...
python/paddle/distributed/fleet/meta_optimizers/lamb_optimizer.py
浏览文件 @
70eb435c
...
@@ -16,6 +16,8 @@ from paddle.fluid.optimizer import LambOptimizer as LAMB
...
@@ -16,6 +16,8 @@ from paddle.fluid.optimizer import LambOptimizer as LAMB
from
.meta_optimizer_base
import
MetaOptimizerBase
from
.meta_optimizer_base
import
MetaOptimizerBase
import
logging
import
logging
__all__
=
[]
class
LambOptimizer
(
MetaOptimizerBase
):
class
LambOptimizer
(
MetaOptimizerBase
):
def
__init__
(
self
,
optimizer
):
def
__init__
(
self
,
optimizer
):
...
...
python/paddle/distributed/fleet/meta_optimizers/lars_optimizer.py
浏览文件 @
70eb435c
...
@@ -15,6 +15,8 @@ from paddle.fluid.optimizer import Momentum, LarsMomentumOptimizer
...
@@ -15,6 +15,8 @@ from paddle.fluid.optimizer import Momentum, LarsMomentumOptimizer
from
.meta_optimizer_base
import
MetaOptimizerBase
from
.meta_optimizer_base
import
MetaOptimizerBase
import
logging
import
logging
__all__
=
[]
class
LarsOptimizer
(
MetaOptimizerBase
):
class
LarsOptimizer
(
MetaOptimizerBase
):
def
__init__
(
self
,
optimizer
):
def
__init__
(
self
,
optimizer
):
...
...
python/paddle/distributed/fleet/meta_optimizers/localsgd_optimizer.py
浏览文件 @
70eb435c
...
@@ -19,6 +19,8 @@ from paddle.fluid import program_guard, layers, default_main_program
...
@@ -19,6 +19,8 @@ from paddle.fluid import program_guard, layers, default_main_program
from
.meta_optimizer_base
import
MetaOptimizerBase
from
.meta_optimizer_base
import
MetaOptimizerBase
from
.common
import
OpRole
,
OP_ROLE_KEY
,
CollectiveHelper
,
is_update_op
from
.common
import
OpRole
,
OP_ROLE_KEY
,
CollectiveHelper
,
is_update_op
__all__
=
[]
class
LocalSGDOptimizer
(
MetaOptimizerBase
):
class
LocalSGDOptimizer
(
MetaOptimizerBase
):
def
__init__
(
self
,
optimizer
):
def
__init__
(
self
,
optimizer
):
...
...
python/paddle/distributed/fleet/meta_optimizers/meta_optimizer_base.py
浏览文件 @
70eb435c
...
@@ -14,6 +14,8 @@
...
@@ -14,6 +14,8 @@
from
paddle.fluid.optimizer
import
Optimizer
from
paddle.fluid.optimizer
import
Optimizer
__all__
=
[]
class
MetaOptimizerBase
(
Optimizer
):
class
MetaOptimizerBase
(
Optimizer
):
def
__init__
(
self
,
optimizer
):
def
__init__
(
self
,
optimizer
):
...
...
python/paddle/distributed/fleet/meta_optimizers/parameter_server_graph_optimizer.py
浏览文件 @
70eb435c
...
@@ -15,6 +15,8 @@ from paddle import fluid
...
@@ -15,6 +15,8 @@ from paddle import fluid
from
paddle.fluid
import
compiler
from
paddle.fluid
import
compiler
from
.parameter_server_optimizer
import
ParameterServerOptimizer
from
.parameter_server_optimizer
import
ParameterServerOptimizer
__all__
=
[]
class
ParameterServerGraphOptimizer
(
ParameterServerOptimizer
):
class
ParameterServerGraphOptimizer
(
ParameterServerOptimizer
):
def
__init__
(
self
,
optimizer
):
def
__init__
(
self
,
optimizer
):
...
...
python/paddle/distributed/fleet/meta_optimizers/parameter_server_optimizer.py
浏览文件 @
70eb435c
...
@@ -20,6 +20,8 @@ import os
...
@@ -20,6 +20,8 @@ import os
import
platform
import
platform
from
..base.private_helper_function
import
wait_server_ready
from
..base.private_helper_function
import
wait_server_ready
__all__
=
[]
class
ParameterServerOptimizer
(
MetaOptimizerBase
):
class
ParameterServerOptimizer
(
MetaOptimizerBase
):
def
__init__
(
self
,
optimizer
):
def
__init__
(
self
,
optimizer
):
...
...
python/paddle/distributed/fleet/meta_optimizers/pipeline_optimizer.py
浏览文件 @
70eb435c
...
@@ -22,6 +22,8 @@ from paddle.fluid.optimizer import PipelineOptimizer as PO
...
@@ -22,6 +22,8 @@ from paddle.fluid.optimizer import PipelineOptimizer as PO
from
.meta_optimizer_base
import
MetaOptimizerBase
from
.meta_optimizer_base
import
MetaOptimizerBase
from
.common
import
OpRole
,
OP_ROLE_KEY
,
OP_ROLE_VAR_KEY
,
CollectiveHelper
,
is_loss_grad_op
,
is_backward_op
,
is_optimizer_op
from
.common
import
OpRole
,
OP_ROLE_KEY
,
OP_ROLE_VAR_KEY
,
CollectiveHelper
,
is_loss_grad_op
,
is_backward_op
,
is_optimizer_op
__all__
=
[]
class
PipelineOptimizer
(
MetaOptimizerBase
):
class
PipelineOptimizer
(
MetaOptimizerBase
):
def
__init__
(
self
,
optimizer
):
def
__init__
(
self
,
optimizer
):
...
...
python/paddle/distributed/fleet/meta_optimizers/recompute_optimizer.py
浏览文件 @
70eb435c
...
@@ -14,6 +14,8 @@
...
@@ -14,6 +14,8 @@
from
paddle.fluid.optimizer
import
RecomputeOptimizer
as
RO
from
paddle.fluid.optimizer
import
RecomputeOptimizer
as
RO
from
.meta_optimizer_base
import
MetaOptimizerBase
from
.meta_optimizer_base
import
MetaOptimizerBase
__all__
=
[]
class
RecomputeOptimizer
(
MetaOptimizerBase
):
class
RecomputeOptimizer
(
MetaOptimizerBase
):
def
__init__
(
self
,
optimizer
):
def
__init__
(
self
,
optimizer
):
...
...
python/paddle/distributed/fleet/meta_optimizers/sharding/fp16_helper.py
浏览文件 @
70eb435c
...
@@ -17,6 +17,8 @@ from paddle.distributed.fleet.meta_optimizers.sharding.utils import *
...
@@ -17,6 +17,8 @@ from paddle.distributed.fleet.meta_optimizers.sharding.utils import *
from
paddle.fluid
import
core
from
paddle.fluid
import
core
__all__
=
[]
class
FP16Utils
(
object
):
class
FP16Utils
(
object
):
def
__init__
(
self
):
def
__init__
(
self
):
...
...
python/paddle/distributed/fleet/meta_optimizers/sharding/gradient_clip_helper.py
浏览文件 @
70eb435c
...
@@ -14,6 +14,8 @@
...
@@ -14,6 +14,8 @@
from
paddle.distributed.fleet.meta_optimizers.common
import
OP_ROLE_KEY
,
OpRole
from
paddle.distributed.fleet.meta_optimizers.common
import
OP_ROLE_KEY
,
OpRole
__all__
=
[]
class
GradientClipHelper
(
object
):
class
GradientClipHelper
(
object
):
def
__init__
(
self
,
mp_ring_id
):
def
__init__
(
self
,
mp_ring_id
):
...
...
python/paddle/distributed/fleet/meta_optimizers/sharding/offload_helper.py
浏览文件 @
70eb435c
...
@@ -15,6 +15,8 @@
...
@@ -15,6 +15,8 @@
from
..common
import
is_optimizer_op
,
OP_ROLE_KEY
,
OpRole
from
..common
import
is_optimizer_op
,
OP_ROLE_KEY
,
OpRole
from
paddle.fluid
import
core
,
unique_name
from
paddle.fluid
import
core
,
unique_name
__all__
=
[]
class
OffloadHelper
(
object
):
class
OffloadHelper
(
object
):
cpu_place_type
=
0
cpu_place_type
=
0
...
...
python/paddle/distributed/fleet/meta_optimizers/sharding/prune.py
浏览文件 @
70eb435c
...
@@ -12,6 +12,8 @@
...
@@ -12,6 +12,8 @@
# See the License for the specific language governing permissions and
# See the License for the specific language governing permissions and
# limitations under the License.
# limitations under the License.
__all__
=
[]
class
ProgramDeps
(
object
):
class
ProgramDeps
(
object
):
def
__init__
(
self
,
block
,
start_vars
,
end_vars
):
def
__init__
(
self
,
block
,
start_vars
,
end_vars
):
...
...
python/paddle/distributed/fleet/meta_optimizers/sharding/shard.py
浏览文件 @
70eb435c
...
@@ -16,6 +16,8 @@ from paddle.distributed.fleet.meta_optimizers.common import is_optimizer_op
...
@@ -16,6 +16,8 @@ from paddle.distributed.fleet.meta_optimizers.common import is_optimizer_op
from
paddle.distributed.fleet.meta_optimizers.sharding.utils
import
*
from
paddle.distributed.fleet.meta_optimizers.sharding.utils
import
*
from
paddle.distributed.fleet.meta_optimizers.sharding.fp16_helper
import
FP16Utils
from
paddle.distributed.fleet.meta_optimizers.sharding.fp16_helper
import
FP16Utils
__all__
=
[]
class
Shard
(
object
):
class
Shard
(
object
):
def
__init__
(
self
,
):
def
__init__
(
self
,
):
...
...
python/paddle/distributed/fleet/meta_optimizers/sharding/weight_decay_helper.py
浏览文件 @
70eb435c
...
@@ -14,6 +14,8 @@
...
@@ -14,6 +14,8 @@
from
paddle.distributed.fleet.meta_optimizers.common
import
OP_ROLE_VAR_KEY
from
paddle.distributed.fleet.meta_optimizers.common
import
OP_ROLE_VAR_KEY
__all__
=
[]
class
WeightDecayHelper
(
object
):
class
WeightDecayHelper
(
object
):
def
__init__
(
self
):
def
__init__
(
self
):
...
...
python/paddle/distributed/fleet/meta_optimizers/sharding_optimizer.py
浏览文件 @
70eb435c
...
@@ -37,7 +37,7 @@ ch.setFormatter(formatter)
...
@@ -37,7 +37,7 @@ ch.setFormatter(formatter)
logger
.
addHandler
(
ch
)
logger
.
addHandler
(
ch
)
from
functools
import
reduce
from
functools
import
reduce
__all__
=
[
"ShardingOptimizer"
]
__all__
=
[]
class
ShardingOptimizer
(
MetaOptimizerBase
):
class
ShardingOptimizer
(
MetaOptimizerBase
):
...
...
python/paddle/distributed/fleet/meta_optimizers/tensor_parallel_optimizer.py
浏览文件 @
70eb435c
...
@@ -19,6 +19,8 @@ from paddle.fluid import core, unique_name
...
@@ -19,6 +19,8 @@ from paddle.fluid import core, unique_name
from
.meta_optimizer_base
import
MetaOptimizerBase
from
.meta_optimizer_base
import
MetaOptimizerBase
from
.common
import
OpRole
,
OP_ROLE_KEY
,
OP_ROLE_VAR_KEY
,
CollectiveHelper
,
is_update_op
,
is_loss_grad_op
,
is_backward_op
,
is_optimizer_op
from
.common
import
OpRole
,
OP_ROLE_KEY
,
OP_ROLE_VAR_KEY
,
CollectiveHelper
,
is_update_op
,
is_loss_grad_op
,
is_backward_op
,
is_optimizer_op
__all__
=
[]
class
TensorParallelOptimizer
(
MetaOptimizerBase
):
class
TensorParallelOptimizer
(
MetaOptimizerBase
):
def
__init__
(
self
,
optimizer
):
def
__init__
(
self
,
optimizer
):
...
...
python/paddle/distributed/fleet/meta_parallel/__init__.py
浏览文件 @
70eb435c
...
@@ -12,6 +12,15 @@
...
@@ -12,6 +12,15 @@
# See the License for the specific language governing permissions and
# See the License for the specific language governing permissions and
# limitations under the License.
# limitations under the License.
from
.parallel_layers
import
*
from
.parallel_layers
import
VocabParallelEmbedding
# noqa: F401
from
.model_parallel
import
ModelParallel
from
.parallel_layers
import
ColumnParallelLinear
# noqa: F401
from
.pipeline_parallel
import
PipelineParallel
from
.parallel_layers
import
RowParallelLinear
# noqa: F401
from
.parallel_layers
import
LayerDesc
# noqa: F401
from
.parallel_layers
import
PipelineLayer
# noqa: F401
from
.parallel_layers
import
RNGStatesTracker
# noqa: F401
from
.parallel_layers
import
model_parallel_random_seed
# noqa: F401
from
.parallel_layers
import
get_rng_state_tracker
# noqa: F401
from
.model_parallel
import
ModelParallel
# noqa: F401
from
.pipeline_parallel
import
PipelineParallel
# noqa: F401
__all__
=
[]
python/paddle/distributed/fleet/meta_parallel/meta_parallel_base.py
浏览文件 @
70eb435c
...
@@ -14,6 +14,8 @@
...
@@ -14,6 +14,8 @@
from
paddle.fluid.dygraph.layers
import
Layer
from
paddle.fluid.dygraph.layers
import
Layer
__all__
=
[]
class
MetaParallelBase
(
Layer
):
class
MetaParallelBase
(
Layer
):
def
__init__
(
self
,
layers
,
hcg
,
strategy
):
def
__init__
(
self
,
layers
,
hcg
,
strategy
):
...
...
python/paddle/distributed/fleet/meta_parallel/model_parallel.py
浏览文件 @
70eb435c
...
@@ -14,9 +14,13 @@
...
@@ -14,9 +14,13 @@
from
paddle.fluid.dygraph.layers
import
Layer
from
paddle.fluid.dygraph.layers
import
Layer
from
.meta_parallel_base
import
MetaParallelBase
from
.meta_parallel_base
import
MetaParallelBase
from
..utils.hybrid_parallel_util
import
*
from
..utils.hybrid_parallel_util
import
broadcast_dp_parameters
from
..utils.hybrid_parallel_util
import
broadcast_input_data
from
..utils.hybrid_parallel_util
import
broadcast_mp_parameters
from
..utils.log_util
import
logger
from
..utils.log_util
import
logger
__all__
=
[]
class
ModelParallel
(
MetaParallelBase
):
class
ModelParallel
(
MetaParallelBase
):
def
__init__
(
self
,
layers
,
hcg
,
**
kwargs
):
def
__init__
(
self
,
layers
,
hcg
,
**
kwargs
):
...
...
python/paddle/distributed/fleet/meta_parallel/parallel_layers/__init__.py
浏览文件 @
70eb435c
...
@@ -12,6 +12,13 @@
...
@@ -12,6 +12,13 @@
# See the License for the specific language governing permissions and
# See the License for the specific language governing permissions and
# limitations under the License.
# limitations under the License.
from
.mp_layers
import
*
from
.mp_layers
import
VocabParallelEmbedding
# noqa: F401
from
.pp_layers
import
*
from
.mp_layers
import
ColumnParallelLinear
# noqa: F401
from
.random
import
*
from
.mp_layers
import
RowParallelLinear
# noqa: F401
from
.pp_layers
import
LayerDesc
# noqa: F401
from
.pp_layers
import
PipelineLayer
# noqa: F401
from
.random
import
RNGStatesTracker
# noqa: F401
from
.random
import
model_parallel_random_seed
# noqa: F401
from
.random
import
get_rng_state_tracker
# noqa: F401
__all__
=
[]
python/paddle/distributed/fleet/meta_parallel/parallel_layers/mp_layers.py
浏览文件 @
70eb435c
...
@@ -19,9 +19,7 @@ from paddle.nn import functional as F
...
@@ -19,9 +19,7 @@ from paddle.nn import functional as F
from
paddle
import
framework
from
paddle
import
framework
from
...base
import
topology
as
tp
from
...base
import
topology
as
tp
__all__
=
[
__all__
=
[]
'VocabParallelEmbedding'
,
'ColumnParallelLinear'
,
'RowParallelLinear'
]
# Follow this paper to achieve the file:
# Follow this paper to achieve the file:
# Shoeybi M, Patwary M, Puri R, et al. Megatron-lm: Training multi-billion parameter
# Shoeybi M, Patwary M, Puri R, et al. Megatron-lm: Training multi-billion parameter
...
...
python/paddle/distributed/fleet/meta_parallel/parallel_layers/pp_layers.py
浏览文件 @
70eb435c
...
@@ -16,7 +16,7 @@ import paddle
...
@@ -16,7 +16,7 @@ import paddle
from
paddle.fluid.dygraph.layers
import
Layer
from
paddle.fluid.dygraph.layers
import
Layer
from
...utils.log_util
import
logger
,
layer_to_str
from
...utils.log_util
import
logger
,
layer_to_str
__all__
=
[
'LayerDesc'
,
'PipelineLayer'
]
__all__
=
[]
class
SegmentLayers
(
object
):
class
SegmentLayers
(
object
):
...
...
python/paddle/distributed/fleet/meta_parallel/parallel_layers/random.py
浏览文件 @
70eb435c
...
@@ -14,9 +14,8 @@
...
@@ -14,9 +14,8 @@
import
paddle
import
paddle
import
contextlib
import
contextlib
__all__
=
[
'RNGStatesTracker'
,
'model_parallel_random_seed'
,
'get_rng_state_tracker'
__all__
=
[]
]
MODEL_PARALLEL_RNG
=
'model_parallel_rng'
MODEL_PARALLEL_RNG
=
'model_parallel_rng'
...
...
python/paddle/distributed/fleet/meta_parallel/pipeline_parallel.py
浏览文件 @
70eb435c
...
@@ -25,9 +25,20 @@ from .meta_parallel_base import MetaParallelBase
...
@@ -25,9 +25,20 @@ from .meta_parallel_base import MetaParallelBase
from
.pp_utils.utils
import
get_tensor_bytes
,
is_float_tensor
from
.pp_utils.utils
import
get_tensor_bytes
,
is_float_tensor
from
.pp_utils
import
utils
from
.pp_utils
import
utils
from
.parallel_layers.pp_layers
import
PipelineLayer
from
.parallel_layers.pp_layers
import
PipelineLayer
from
..utils.hybrid_parallel_util
import
*
from
..utils.hybrid_parallel_util
import
broadcast_mp_parameters
from
..utils.hybrid_parallel_util
import
broadcast_dp_parameters
from
..utils.hybrid_parallel_util
import
fused_allreduce_gradients
from
..utils.log_util
import
logger
from
..utils.log_util
import
logger
__all__
=
[]
FLOAT_TYPES
=
[
paddle
.
float16
,
paddle
.
float32
,
paddle
.
float64
,
]
class
PipelineParallel
(
MetaParallelBase
):
class
PipelineParallel
(
MetaParallelBase
):
def
__init__
(
self
,
layers
,
hcg
,
strategy
):
def
__init__
(
self
,
layers
,
hcg
,
strategy
):
...
...
python/paddle/distributed/fleet/meta_parallel/pp_utils/__init__.py
浏览文件 @
70eb435c
...
@@ -12,4 +12,6 @@
...
@@ -12,4 +12,6 @@
# See the License for the specific language governing permissions and
# See the License for the specific language governing permissions and
# limitations under the License.
# limitations under the License.
from
.utils
import
*
from
.utils
import
get_tensor_bytes
__all__
=
[]
python/paddle/distributed/fleet/meta_parallel/pp_utils/utils.py
浏览文件 @
70eb435c
...
@@ -16,10 +16,7 @@ import abc
...
@@ -16,10 +16,7 @@ import abc
import
paddle
import
paddle
from
...utils
import
hybrid_parallel_util
as
hp_util
from
...utils
import
hybrid_parallel_util
as
hp_util
__all__
=
[
__all__
=
[]
'get_tensor_bytes'
,
'is_float_tensor'
,
]
FLOAT_TYPES
=
[
FLOAT_TYPES
=
[
paddle
.
float16
,
paddle
.
float16
,
...
...
python/paddle/distributed/fleet/metrics/__init__.py
浏览文件 @
70eb435c
...
@@ -12,15 +12,13 @@
...
@@ -12,15 +12,13 @@
# See the License for the specific language governing permissions and
# See the License for the specific language governing permissions and
# limitations under the License.
# limitations under the License.
from
.metric
import
*
from
.metric
import
acc
# noqa: F401
from
.metric
import
auc
# noqa: F401
from
.metric
import
mae
# noqa: F401
from
.metric
import
max
# noqa: F401
from
.metric
import
min
# noqa: F401
from
.metric
import
mse
# noqa: F401
from
.metric
import
rmse
# noqa: F401
from
.metric
import
sum
# noqa: F401
__all__
=
[
__all__
=
[]
"sum"
,
"max"
,
"min"
,
"auc"
,
"mae"
,
"rmse"
,
"mse"
,
"acc"
,
]
python/paddle/distributed/fleet/metrics/metric.py
浏览文件 @
70eb435c
...
@@ -18,6 +18,8 @@ import numpy as np
...
@@ -18,6 +18,8 @@ import numpy as np
from
paddle.static
import
Variable
from
paddle.static
import
Variable
import
paddle
import
paddle
__all__
=
[]
def
sum
(
input
,
scope
=
None
,
util
=
None
):
def
sum
(
input
,
scope
=
None
,
util
=
None
):
"""
"""
...
...
python/paddle/distributed/fleet/runtime/__init__.py
浏览文件 @
70eb435c
...
@@ -15,3 +15,5 @@
...
@@ -15,3 +15,5 @@
from
.collective_runtime
import
CollectiveRuntime
from
.collective_runtime
import
CollectiveRuntime
from
.parameter_server_runtime
import
ParameterServerRuntime
from
.parameter_server_runtime
import
ParameterServerRuntime
from
.the_one_ps
import
TheOnePSRuntime
from
.the_one_ps
import
TheOnePSRuntime
__all__
=
[]
python/paddle/distributed/fleet/runtime/collective_runtime.py
浏览文件 @
70eb435c
...
@@ -15,6 +15,8 @@
...
@@ -15,6 +15,8 @@
from
.runtime_base
import
RuntimeBase
from
.runtime_base
import
RuntimeBase
import
logging
import
logging
__all__
=
[]
class
CollectiveRuntime
(
RuntimeBase
):
class
CollectiveRuntime
(
RuntimeBase
):
def
__init__
(
self
):
def
__init__
(
self
):
...
...
python/paddle/distributed/fleet/runtime/parameter_server_runtime.py
浏览文件 @
70eb435c
...
@@ -26,6 +26,8 @@ from paddle.fluid.framework import Variable, Parameter
...
@@ -26,6 +26,8 @@ from paddle.fluid.framework import Variable, Parameter
from
.runtime_base
import
RuntimeBase
from
.runtime_base
import
RuntimeBase
from
..base.private_helper_function
import
wait_server_ready
from
..base.private_helper_function
import
wait_server_ready
__all__
=
[]
class
ParameterServerRuntime
(
RuntimeBase
):
class
ParameterServerRuntime
(
RuntimeBase
):
def
__init__
(
self
):
def
__init__
(
self
):
...
...
python/paddle/distributed/fleet/runtime/the_one_ps.py
浏览文件 @
70eb435c
...
@@ -25,6 +25,8 @@ from paddle.fluid.framework import Variable, Parameter
...
@@ -25,6 +25,8 @@ from paddle.fluid.framework import Variable, Parameter
from
.runtime_base
import
RuntimeBase
from
.runtime_base
import
RuntimeBase
from
..base.private_helper_function
import
wait_server_ready
from
..base.private_helper_function
import
wait_server_ready
__all__
=
[]
def
conv_indent
(
indent
):
def
conv_indent
(
indent
):
return
""
.
join
([
" "
]
*
indent
)
return
""
.
join
([
" "
]
*
indent
)
...
...
python/paddle/distributed/fleet/utils/__init__.py
浏览文件 @
70eb435c
...
@@ -12,6 +12,14 @@
...
@@ -12,6 +12,14 @@
# See the License for the specific language governing permissions and
# See the License for the specific language governing permissions and
# limitations under the License.
# limitations under the License.
from
.fs
import
LocalFS
,
HDFSClient
from
.fs
import
LocalFS
# noqa: F401
from
.ps_util
import
DistributedInfer
from
.fs
import
HDFSClient
# noqa: F401
from
.recompute
import
recompute
from
.ps_util
import
DistributedInfer
# noqa: F401
from
.recompute
import
recompute
# noqa: F401
from
.
import
log_util
# noqa: F401
from
.
import
hybrid_parallel_util
# noqa: F401
__all__
=
[
#noqa
"LocalFS"
,
"recompute"
,
"DistributedInfer"
,
"HDFSClient"
]
python/paddle/distributed/fleet/utils/fs.py
浏览文件 @
70eb435c
...
@@ -31,7 +31,7 @@ import functools
...
@@ -31,7 +31,7 @@ import functools
import
shutil
import
shutil
__all__
=
[
'LocalFS'
,
'HDFSClient'
]
__all__
=
[]
class
ExecuteError
(
Exception
):
class
ExecuteError
(
Exception
):
...
...
python/paddle/distributed/fleet/utils/http_server.py
浏览文件 @
70eb435c
...
@@ -28,6 +28,8 @@ import time
...
@@ -28,6 +28,8 @@ import time
import
threading
import
threading
import
socket
import
socket
__all__
=
[]
def
get_logger
(
name
,
level
,
fmt
):
def
get_logger
(
name
,
level
,
fmt
):
logger
=
logging
.
getLogger
(
name
)
logger
=
logging
.
getLogger
(
name
)
...
...
python/paddle/distributed/fleet/utils/hybrid_parallel_util.py
浏览文件 @
70eb435c
...
@@ -23,6 +23,8 @@ from paddle.fluid.dygraph.parallel import _split_tensors, sync_params_buffers, b
...
@@ -23,6 +23,8 @@ from paddle.fluid.dygraph.parallel import _split_tensors, sync_params_buffers, b
from
collections
import
OrderedDict
from
collections
import
OrderedDict
from
.log_util
import
logger
from
.log_util
import
logger
__all__
=
[]
def
_apply_collective_grads
(
parameters
,
comm_group
):
def
_apply_collective_grads
(
parameters
,
comm_group
):
grad_var_set
=
set
()
grad_var_set
=
set
()
...
...
python/paddle/distributed/fleet/utils/log_util.py
浏览文件 @
70eb435c
...
@@ -15,6 +15,8 @@
...
@@ -15,6 +15,8 @@
import
logging
import
logging
import
sys
import
sys
__all__
=
[]
class
LoggerFactory
:
class
LoggerFactory
:
@
staticmethod
@
staticmethod
...
...
python/paddle/distributed/fleet/utils/ps_util.py
浏览文件 @
70eb435c
...
@@ -18,6 +18,8 @@ import os
...
@@ -18,6 +18,8 @@ import os
import
paddle
import
paddle
import
warnings
import
warnings
__all__
=
[]
class
DistributedInfer
:
class
DistributedInfer
:
"""
"""
...
...
python/paddle/distributed/fleet/utils/recompute.py
浏览文件 @
70eb435c
...
@@ -26,6 +26,8 @@ ch = logging.StreamHandler()
...
@@ -26,6 +26,8 @@ ch = logging.StreamHandler()
ch
.
setFormatter
(
formatter
)
ch
.
setFormatter
(
formatter
)
logger
.
addHandler
(
ch
)
logger
.
addHandler
(
ch
)
__all__
=
[]
def
detach_variable
(
inputs
):
def
detach_variable
(
inputs
):
out
=
[]
out
=
[]
...
...
python/paddle/distributed/launch.py
浏览文件 @
70eb435c
...
@@ -14,3 +14,5 @@
...
@@ -14,3 +14,5 @@
from
paddle.distributed.fleet
import
launch
from
paddle.distributed.fleet
import
launch
launch
.
launch
()
launch
.
launch
()
__all__
=
[]
python/paddle/distributed/parallel.py
浏览文件 @
70eb435c
...
@@ -15,7 +15,8 @@
...
@@ -15,7 +15,8 @@
import
os
import
os
import
six
import
six
import
warnings
import
warnings
from
multiprocessing
import
Process
,
Manager
from
multiprocessing
import
Process
# noqa: F401
from
multiprocessing
import
Manager
# noqa: F401
import
time
import
time
import
sys
import
sys
...
@@ -26,9 +27,11 @@ from paddle.fluid import core
...
@@ -26,9 +27,11 @@ from paddle.fluid import core
from
paddle.fluid.framework
import
_set_expected_place
from
paddle.fluid.framework
import
_set_expected_place
from
paddle.fluid.dygraph
import
parallel_helper
from
paddle.fluid.dygraph
import
parallel_helper
from
paddle.fluid.dygraph.parallel
import
ParallelEnv
from
paddle.fluid.dygraph.parallel
import
ParallelEnv
from
paddle.distributed.fleet.base.private_helper_function
import
wait_server_ready
from
paddle.distributed.fleet.base.private_helper_function
import
wait_server_ready
# noqa: F401
__all__
=
[
"init_parallel_env"
]
__all__
=
[
#noqa
"init_parallel_env"
]
ParallelStrategy
=
core
.
ParallelStrategy
ParallelStrategy
=
core
.
ParallelStrategy
...
...
python/paddle/distributed/spawn.py
浏览文件 @
70eb435c
...
@@ -21,7 +21,9 @@ import six
...
@@ -21,7 +21,9 @@ import six
import
sys
import
sys
import
warnings
import
warnings
from
paddle.distributed.utils
import
_print_arguments
,
_prepare_trainer_env
,
get_host_name_ip
from
paddle.distributed.utils
import
_print_arguments
from
paddle.distributed.utils
import
_prepare_trainer_env
from
paddle.distributed.utils
import
get_host_name_ip
from
paddle.distributed.cloud_utils
import
get_cluster_and_pod
from
paddle.distributed.cloud_utils
import
get_cluster_and_pod
from
paddle.distributed.fleet.cloud_utils
import
use_paddlecloud
from
paddle.distributed.fleet.cloud_utils
import
use_paddlecloud
from
paddle.device
import
get_device
from
paddle.device
import
get_device
...
@@ -30,6 +32,8 @@ from paddle.device import get_device
...
@@ -30,6 +32,8 @@ from paddle.device import get_device
from
paddle.fluid
import
core
from
paddle.fluid
import
core
from
paddle.fluid.framework
import
_cpu_num
,
set_flags
from
paddle.fluid.framework
import
_cpu_num
,
set_flags
__all__
=
[]
class
ParallelEnvArgs
(
object
):
class
ParallelEnvArgs
(
object
):
def
__init__
(
self
):
def
__init__
(
self
):
...
...
python/paddle/distributed/utils.py
浏览文件 @
70eb435c
...
@@ -26,6 +26,24 @@ from contextlib import closing
...
@@ -26,6 +26,24 @@ from contextlib import closing
import
socket
import
socket
from
paddle.fluid
import
core
from
paddle.fluid
import
core
__all__
=
[
#noqa
'get_host_name_ip'
,
'Trainer'
,
'get_cluster'
,
'start_local_trainers'
,
'watch_local_trainers'
,
'find_free_ports'
,
'JobServer'
,
'Cluster'
,
'Pod'
,
'Hdfs'
,
'add_arguments'
,
'terminate_local_procs'
,
'TrainerProc'
,
'get_logger'
,
'pull_worker_log'
]
logger
=
logging
.
getLogger
(
"root"
)
logger
=
logging
.
getLogger
(
"root"
)
logger
.
propagate
=
False
logger
.
propagate
=
False
...
...
python/paddle/nn/__init__.py
浏览文件 @
70eb435c
...
@@ -203,7 +203,7 @@ __all__ = [ #noqa
...
@@ -203,7 +203,7 @@ __all__ = [ #noqa
'Dropout3D'
,
'Dropout3D'
,
'Bilinear'
,
'Bilinear'
,
'AlphaDropout'
,
'AlphaDropout'
,
'Unfold'
'Unfold'
,
'RNNCellBase'
,
'RNNCellBase'
,
'SimpleRNNCell'
,
'SimpleRNNCell'
,
'LSTMCell'
,
'LSTMCell'
,
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录