Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
Paddle
提交
4d094b0c
P
Paddle
项目概览
PaddlePaddle
/
Paddle
大约 1 年 前同步成功
通知
2298
Star
20931
Fork
5422
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1423
列表
看板
标记
里程碑
合并请求
543
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1,423
Issue
1,423
列表
看板
标记
里程碑
合并请求
543
合并请求
543
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
4d094b0c
编写于
8月 07, 2023
作者:
C
co63oc
提交者:
GitHub
8月 07, 2023
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Fix typos (#56008)
上级
c1913a5f
变更
41
隐藏空白更改
内联
并排
Showing
41 changed file
with
68 addition
and
68 deletion
+68
-68
test/collective/fleet/hybrid_parallel_mp_amp.py
test/collective/fleet/hybrid_parallel_mp_amp.py
+2
-2
test/collective/fleet/hybrid_parallel_mp_bf16.py
test/collective/fleet/hybrid_parallel_mp_bf16.py
+2
-2
test/collective/fleet/hybrid_parallel_mp_broadcast_obj.py
test/collective/fleet/hybrid_parallel_mp_broadcast_obj.py
+2
-2
test/collective/fleet/hybrid_parallel_mp_clip_grad.py
test/collective/fleet/hybrid_parallel_mp_clip_grad.py
+2
-2
test/collective/fleet/hybrid_parallel_mp_fp16.py
test/collective/fleet/hybrid_parallel_mp_fp16.py
+2
-2
test/collective/fleet/hybrid_parallel_mp_layers.py
test/collective/fleet/hybrid_parallel_mp_layers.py
+1
-1
test/collective/fleet/hybrid_parallel_mp_model.py
test/collective/fleet/hybrid_parallel_mp_model.py
+3
-3
test/collective/fleet/hybrid_parallel_mp_model_with_sequence_parallel.py
.../fleet/hybrid_parallel_mp_model_with_sequence_parallel.py
+3
-3
test/collective/fleet/hybrid_parallel_mp_random.py
test/collective/fleet/hybrid_parallel_mp_random.py
+1
-1
test/collective/fleet/hybrid_parallel_pp_amp.py
test/collective/fleet/hybrid_parallel_pp_amp.py
+1
-1
test/collective/fleet/hybrid_parallel_pp_bf16.py
test/collective/fleet/hybrid_parallel_pp_bf16.py
+1
-1
test/collective/fleet/hybrid_parallel_pp_clip_grad.py
test/collective/fleet/hybrid_parallel_pp_clip_grad.py
+3
-3
test/collective/fleet/hybrid_parallel_pp_embedding.py
test/collective/fleet/hybrid_parallel_pp_embedding.py
+1
-1
test/collective/fleet/hybrid_parallel_pp_fp16.py
test/collective/fleet/hybrid_parallel_pp_fp16.py
+1
-1
test/collective/fleet/hybrid_parallel_pp_recompute.py
test/collective/fleet/hybrid_parallel_pp_recompute.py
+1
-1
test/collective/fleet/hybrid_parallel_pp_save_load.py
test/collective/fleet/hybrid_parallel_pp_save_load.py
+1
-1
test/collective/fleet/hybrid_parallel_pp_save_load_with_virtual_stage.py
.../fleet/hybrid_parallel_pp_save_load_with_virtual_stage.py
+1
-1
test/collective/fleet/hybrid_parallel_pp_transformer.py
test/collective/fleet/hybrid_parallel_pp_transformer.py
+1
-1
test/collective/fleet/hybrid_parallel_pp_transformer_save.py
test/collective/fleet/hybrid_parallel_pp_transformer_save.py
+1
-1
test/collective/fleet/hybrid_parallel_pp_transformer_save_with_virtual_stage.py
...hybrid_parallel_pp_transformer_save_with_virtual_stage.py
+1
-1
test/collective/fleet/hybrid_parallel_pp_transformer_unbalanced_data.py
...e/fleet/hybrid_parallel_pp_transformer_unbalanced_data.py
+2
-2
test/collective/fleet/hybrid_parallel_pp_transformer_with_virtual_stage.py
...leet/hybrid_parallel_pp_transformer_with_virtual_stage.py
+1
-1
test/collective/fleet/hybrid_parallel_qat.py
test/collective/fleet/hybrid_parallel_qat.py
+1
-1
test/collective/fleet/hybrid_parallel_sharding_model.py
test/collective/fleet/hybrid_parallel_sharding_model.py
+1
-1
test/collective/fleet/hybrid_parallel_shared_weight.py
test/collective/fleet/hybrid_parallel_shared_weight.py
+1
-1
test/collective/fleet/parallel_dygraph_no_sync_gradient_check.py
...llective/fleet/parallel_dygraph_no_sync_gradient_check.py
+1
-1
test/collective/fleet/test_fleet_static_mp_layers.py
test/collective/fleet/test_fleet_static_mp_layers.py
+1
-1
test/legacy_test/auto_parallel_gpt_model.py
test/legacy_test/auto_parallel_gpt_model.py
+5
-5
test/legacy_test/benchmark_sum_op.py
test/legacy_test/benchmark_sum_op.py
+2
-2
test/legacy_test/dist_fleet_ctr.py
test/legacy_test/dist_fleet_ctr.py
+2
-2
test/legacy_test/dist_hapi_mnist_dynamic.py
test/legacy_test/dist_hapi_mnist_dynamic.py
+1
-1
test/legacy_test/dist_hapi_mnist_static.py
test/legacy_test/dist_hapi_mnist_static.py
+1
-1
test/legacy_test/dist_hapi_pure_fp16_static.py
test/legacy_test/dist_hapi_pure_fp16_static.py
+2
-2
test/legacy_test/gradient_checker.py
test/legacy_test/gradient_checker.py
+7
-7
test/legacy_test/hybrid_parallel_pp_alexnet.py
test/legacy_test/hybrid_parallel_pp_alexnet.py
+3
-3
test/legacy_test/parallel_dygraph_dataparallel_with_pylayer.py
...legacy_test/parallel_dygraph_dataparallel_with_pylayer.py
+1
-1
test/legacy_test/parallel_dygraph_gradient_check.py
test/legacy_test/parallel_dygraph_gradient_check.py
+1
-1
test/legacy_test/parallel_dygraph_gradient_check_in_eager_mode.py
...acy_test/parallel_dygraph_gradient_check_in_eager_mode.py
+1
-1
test/xpu/parallel_dygraph_dataparallel_with_pylayer.py
test/xpu/parallel_dygraph_dataparallel_with_pylayer.py
+1
-1
test/xpu/parallel_dygraph_gradient_check.py
test/xpu/parallel_dygraph_gradient_check.py
+1
-1
test/xpu/parallel_dygraph_gradient_check_in_eager_mode.py
test/xpu/parallel_dygraph_gradient_check_in_eager_mode.py
+1
-1
未找到文件。
test/collective/fleet/hybrid_parallel_mp_amp.py
浏览文件 @
4d094b0c
...
@@ -14,13 +14,13 @@
...
@@ -14,13 +14,13 @@
import
unittest
import
unittest
from
hybrid_parallel_mp_model
import
TestDistMPTraning
from
hybrid_parallel_mp_model
import
TestDistMPTra
i
ning
import
paddle
import
paddle
from
paddle.distributed
import
fleet
from
paddle.distributed
import
fleet
class
TestMPClipGrad
(
TestDistMPTraning
):
class
TestMPClipGrad
(
TestDistMPTra
i
ning
):
def
build_optimizer
(
self
,
model
):
def
build_optimizer
(
self
,
model
):
grad_clip
=
paddle
.
nn
.
ClipGradByGlobalNorm
(
2.0
)
grad_clip
=
paddle
.
nn
.
ClipGradByGlobalNorm
(
2.0
)
scheduler
=
paddle
.
optimizer
.
lr
.
ExponentialDecay
(
scheduler
=
paddle
.
optimizer
.
lr
.
ExponentialDecay
(
...
...
test/collective/fleet/hybrid_parallel_mp_bf16.py
浏览文件 @
4d094b0c
...
@@ -14,14 +14,14 @@
...
@@ -14,14 +14,14 @@
import
unittest
import
unittest
from
hybrid_parallel_mp_model
import
TestDistMPTraning
from
hybrid_parallel_mp_model
import
TestDistMPTra
i
ning
import
paddle
import
paddle
from
paddle.distributed
import
fleet
from
paddle.distributed
import
fleet
from
paddle.distributed.utils.nccl_utils
import
check_nccl_version_for_bf16
from
paddle.distributed.utils.nccl_utils
import
check_nccl_version_for_bf16
class
TestMPFP16
(
TestDistMPTraning
):
class
TestMPFP16
(
TestDistMPTra
i
ning
):
def
build_optimizer
(
self
,
model
):
def
build_optimizer
(
self
,
model
):
grad_clip
=
paddle
.
nn
.
ClipGradByGlobalNorm
(
1.0
)
grad_clip
=
paddle
.
nn
.
ClipGradByGlobalNorm
(
1.0
)
scheduler
=
paddle
.
optimizer
.
lr
.
ExponentialDecay
(
scheduler
=
paddle
.
optimizer
.
lr
.
ExponentialDecay
(
...
...
test/collective/fleet/hybrid_parallel_mp_broadcast_obj.py
浏览文件 @
4d094b0c
...
@@ -19,7 +19,7 @@ import numpy as np
...
@@ -19,7 +19,7 @@ import numpy as np
from
hybrid_parallel_mp_model
import
(
from
hybrid_parallel_mp_model
import
(
SimpleDPNet
,
SimpleDPNet
,
SimpleMPNet
,
SimpleMPNet
,
TestDistMPTraning
,
TestDistMPTra
i
ning
,
parallel_matmul
,
parallel_matmul
,
set_random_seed
,
set_random_seed
,
)
)
...
@@ -58,7 +58,7 @@ class SimpleDPMultimodalNet(SimpleDPNet):
...
@@ -58,7 +58,7 @@ class SimpleDPMultimodalNet(SimpleDPNet):
return
x
return
x
class
TestMPBroadcastObj
(
TestDistMPTraning
):
class
TestMPBroadcastObj
(
TestDistMPTra
i
ning
):
def
build_model_optimizer
(
self
):
def
build_model_optimizer
(
self
):
hcg
=
fleet
.
get_hybrid_communicate_group
()
hcg
=
fleet
.
get_hybrid_communicate_group
()
word_size
=
hcg
.
get_model_parallel_world_size
()
word_size
=
hcg
.
get_model_parallel_world_size
()
...
...
test/collective/fleet/hybrid_parallel_mp_clip_grad.py
浏览文件 @
4d094b0c
...
@@ -14,7 +14,7 @@
...
@@ -14,7 +14,7 @@
import
unittest
import
unittest
from
hybrid_parallel_mp_model
import
TestDistMPTraning
from
hybrid_parallel_mp_model
import
TestDistMPTra
i
ning
import
paddle
import
paddle
...
@@ -22,7 +22,7 @@ import paddle
...
@@ -22,7 +22,7 @@ import paddle
# log.setLevel(logging.WARNING)
# log.setLevel(logging.WARNING)
class
TestMPClipGrad
(
TestDistMPTraning
):
class
TestMPClipGrad
(
TestDistMPTra
i
ning
):
def
build_optimizer
(
self
,
model
):
def
build_optimizer
(
self
,
model
):
grad_clip
=
paddle
.
nn
.
ClipGradByGlobalNorm
(
2.0
)
grad_clip
=
paddle
.
nn
.
ClipGradByGlobalNorm
(
2.0
)
scheduler
=
paddle
.
optimizer
.
lr
.
ExponentialDecay
(
scheduler
=
paddle
.
optimizer
.
lr
.
ExponentialDecay
(
...
...
test/collective/fleet/hybrid_parallel_mp_fp16.py
浏览文件 @
4d094b0c
...
@@ -14,13 +14,13 @@
...
@@ -14,13 +14,13 @@
import
unittest
import
unittest
from
hybrid_parallel_mp_model
import
TestDistMPTraning
from
hybrid_parallel_mp_model
import
TestDistMPTra
i
ning
import
paddle
import
paddle
from
paddle.distributed
import
fleet
from
paddle.distributed
import
fleet
class
TestMPFP16
(
TestDistMPTraning
):
class
TestMPFP16
(
TestDistMPTra
i
ning
):
def
build_optimizer
(
self
,
model
):
def
build_optimizer
(
self
,
model
):
grad_clip
=
paddle
.
nn
.
ClipGradByGlobalNorm
(
1.0
)
grad_clip
=
paddle
.
nn
.
ClipGradByGlobalNorm
(
1.0
)
scheduler
=
paddle
.
optimizer
.
lr
.
ExponentialDecay
(
scheduler
=
paddle
.
optimizer
.
lr
.
ExponentialDecay
(
...
...
test/collective/fleet/hybrid_parallel_mp_layers.py
浏览文件 @
4d094b0c
...
@@ -115,7 +115,7 @@ class SimpleEmbedding(paddle.nn.Layer):
...
@@ -115,7 +115,7 @@ class SimpleEmbedding(paddle.nn.Layer):
return
output
return
output
class
TestDistTraning
(
unittest
.
TestCase
):
class
TestDistTra
i
ning
(
unittest
.
TestCase
):
def
setUp
(
self
):
def
setUp
(
self
):
strategy
=
fleet
.
DistributedStrategy
()
strategy
=
fleet
.
DistributedStrategy
()
self
.
model_parallel_size
=
2
self
.
model_parallel_size
=
2
...
...
test/collective/fleet/hybrid_parallel_mp_model.py
浏览文件 @
4d094b0c
...
@@ -180,7 +180,7 @@ class SimpleDPNet(paddle.nn.Layer):
...
@@ -180,7 +180,7 @@ class SimpleDPNet(paddle.nn.Layer):
return
x
return
x
class
TestDistMPSyncTraning
(
unittest
.
TestCase
):
class
TestDistMPSyncTra
i
ning
(
unittest
.
TestCase
):
def
setUp
(
self
):
def
setUp
(
self
):
strategy
=
fleet
.
DistributedStrategy
()
strategy
=
fleet
.
DistributedStrategy
()
self
.
model_parallel_size
=
2
self
.
model_parallel_size
=
2
...
@@ -348,7 +348,7 @@ class TestDistMPSyncTraning(unittest.TestCase):
...
@@ -348,7 +348,7 @@ class TestDistMPSyncTraning(unittest.TestCase):
)
)
class
TestDistMPSyncModelTra
ning
(
TestDistMPSyncTra
ning
):
class
TestDistMPSyncModelTra
ining
(
TestDistMPSyncTrai
ning
):
def
setUp
(
self
):
def
setUp
(
self
):
strategy
=
fleet
.
DistributedStrategy
()
strategy
=
fleet
.
DistributedStrategy
()
self
.
model_parallel_size
=
2
self
.
model_parallel_size
=
2
...
@@ -368,7 +368,7 @@ class TestDistMPSyncModelTraning(TestDistMPSyncTraning):
...
@@ -368,7 +368,7 @@ class TestDistMPSyncModelTraning(TestDistMPSyncTraning):
fleet
.
init
(
is_collective
=
True
,
strategy
=
strategy
)
fleet
.
init
(
is_collective
=
True
,
strategy
=
strategy
)
class
TestDistMPTraning
(
unittest
.
TestCase
):
class
TestDistMPTra
i
ning
(
unittest
.
TestCase
):
def
setUp
(
self
):
def
setUp
(
self
):
strategy
=
fleet
.
DistributedStrategy
()
strategy
=
fleet
.
DistributedStrategy
()
self
.
model_parallel_size
=
2
self
.
model_parallel_size
=
2
...
...
test/collective/fleet/hybrid_parallel_mp_model_with_sequence_parallel.py
浏览文件 @
4d094b0c
...
@@ -200,7 +200,7 @@ class SimpleDPNet(paddle.nn.Layer):
...
@@ -200,7 +200,7 @@ class SimpleDPNet(paddle.nn.Layer):
return
x
return
x
class
TestDistSPSyncTraning
(
unittest
.
TestCase
):
class
TestDistSPSyncTra
i
ning
(
unittest
.
TestCase
):
def
setUp
(
self
):
def
setUp
(
self
):
strategy
=
fleet
.
DistributedStrategy
()
strategy
=
fleet
.
DistributedStrategy
()
self
.
model_parallel_size
=
2
self
.
model_parallel_size
=
2
...
@@ -370,7 +370,7 @@ class TestDistSPSyncTraning(unittest.TestCase):
...
@@ -370,7 +370,7 @@ class TestDistSPSyncTraning(unittest.TestCase):
)
)
class
TestDistSPSyncModelTra
ning
(
TestDistSPSyncTra
ning
):
class
TestDistSPSyncModelTra
ining
(
TestDistSPSyncTrai
ning
):
def
setUp
(
self
):
def
setUp
(
self
):
strategy
=
fleet
.
DistributedStrategy
()
strategy
=
fleet
.
DistributedStrategy
()
self
.
model_parallel_size
=
2
self
.
model_parallel_size
=
2
...
@@ -390,7 +390,7 @@ class TestDistSPSyncModelTraning(TestDistSPSyncTraning):
...
@@ -390,7 +390,7 @@ class TestDistSPSyncModelTraning(TestDistSPSyncTraning):
fleet
.
init
(
is_collective
=
True
,
strategy
=
strategy
)
fleet
.
init
(
is_collective
=
True
,
strategy
=
strategy
)
class
TestDistSPTraning
(
unittest
.
TestCase
):
class
TestDistSPTra
i
ning
(
unittest
.
TestCase
):
def
setUp
(
self
):
def
setUp
(
self
):
strategy
=
fleet
.
DistributedStrategy
()
strategy
=
fleet
.
DistributedStrategy
()
self
.
model_parallel_size
=
2
self
.
model_parallel_size
=
2
...
...
test/collective/fleet/hybrid_parallel_mp_random.py
浏览文件 @
4d094b0c
...
@@ -20,7 +20,7 @@ import paddle
...
@@ -20,7 +20,7 @@ import paddle
from
paddle.distributed
import
fleet
from
paddle.distributed
import
fleet
class
TestDistTraning
(
unittest
.
TestCase
):
class
TestDistTra
i
ning
(
unittest
.
TestCase
):
def
setUp
(
self
):
def
setUp
(
self
):
strategy
=
fleet
.
DistributedStrategy
()
strategy
=
fleet
.
DistributedStrategy
()
self
.
model_parallel_size
=
2
self
.
model_parallel_size
=
2
...
...
test/collective/fleet/hybrid_parallel_pp_amp.py
浏览文件 @
4d094b0c
...
@@ -34,7 +34,7 @@ batch_size = 4
...
@@ -34,7 +34,7 @@ batch_size = 4
micro_batch_size
=
2
micro_batch_size
=
2
class
TestDistPPTraning
(
unittest
.
TestCase
):
class
TestDistPPTra
i
ning
(
unittest
.
TestCase
):
def
setUp
(
self
):
def
setUp
(
self
):
strategy
=
fleet
.
DistributedStrategy
()
strategy
=
fleet
.
DistributedStrategy
()
self
.
model_parallel_size
=
1
self
.
model_parallel_size
=
1
...
...
test/collective/fleet/hybrid_parallel_pp_bf16.py
浏览文件 @
4d094b0c
...
@@ -35,7 +35,7 @@ batch_size = 4
...
@@ -35,7 +35,7 @@ batch_size = 4
micro_batch_size
=
2
micro_batch_size
=
2
class
TestDistPPTraning
(
unittest
.
TestCase
):
class
TestDistPPTra
i
ning
(
unittest
.
TestCase
):
def
setUp
(
self
):
def
setUp
(
self
):
strategy
=
fleet
.
DistributedStrategy
()
strategy
=
fleet
.
DistributedStrategy
()
self
.
model_parallel_size
=
1
self
.
model_parallel_size
=
1
...
...
test/collective/fleet/hybrid_parallel_pp_clip_grad.py
浏览文件 @
4d094b0c
...
@@ -17,12 +17,12 @@ import unittest
...
@@ -17,12 +17,12 @@ import unittest
sys
.
path
.
append
(
"../../legacy_test"
)
sys
.
path
.
append
(
"../../legacy_test"
)
from
hybrid_parallel_pp_alexnet
import
TestDistPPTraning
from
hybrid_parallel_pp_alexnet
import
TestDistPPTra
i
ning
import
paddle
import
paddle
class
TestPPClipGrad
(
TestDistPPTraning
):
class
TestPPClipGrad
(
TestDistPPTra
i
ning
):
def
build_optimizer
(
self
,
model
):
def
build_optimizer
(
self
,
model
):
grad_clip
=
paddle
.
nn
.
ClipGradByGlobalNorm
(
0.5
)
grad_clip
=
paddle
.
nn
.
ClipGradByGlobalNorm
(
0.5
)
scheduler
=
paddle
.
optimizer
.
lr
.
PiecewiseDecay
(
scheduler
=
paddle
.
optimizer
.
lr
.
PiecewiseDecay
(
...
@@ -36,7 +36,7 @@ class TestPPClipGrad(TestDistPPTraning):
...
@@ -36,7 +36,7 @@ class TestPPClipGrad(TestDistPPTraning):
return
scheduler
,
optimizer
return
scheduler
,
optimizer
class
TestPPClipGradParamGroup
(
TestDistPPTraning
):
class
TestPPClipGradParamGroup
(
TestDistPPTra
i
ning
):
def
build_optimizer
(
self
,
model
):
def
build_optimizer
(
self
,
model
):
grad_clip
=
paddle
.
nn
.
ClipGradByGlobalNorm
(
0.5
)
grad_clip
=
paddle
.
nn
.
ClipGradByGlobalNorm
(
0.5
)
scheduler
=
paddle
.
optimizer
.
lr
.
PiecewiseDecay
(
scheduler
=
paddle
.
optimizer
.
lr
.
PiecewiseDecay
(
...
...
test/collective/fleet/hybrid_parallel_pp_embedding.py
浏览文件 @
4d094b0c
...
@@ -120,7 +120,7 @@ class SimpleNetPipe(Layer):
...
@@ -120,7 +120,7 @@ class SimpleNetPipe(Layer):
return
feat
return
feat
class
TestDistEmbeddingTraning
(
unittest
.
TestCase
):
class
TestDistEmbeddingTra
i
ning
(
unittest
.
TestCase
):
def
setUp
(
self
):
def
setUp
(
self
):
strategy
=
fleet
.
DistributedStrategy
()
strategy
=
fleet
.
DistributedStrategy
()
self
.
model_parallel_size
=
1
self
.
model_parallel_size
=
1
...
...
test/collective/fleet/hybrid_parallel_pp_fp16.py
浏览文件 @
4d094b0c
...
@@ -38,7 +38,7 @@ batch_size = 4
...
@@ -38,7 +38,7 @@ batch_size = 4
micro_batch_size
=
2
micro_batch_size
=
2
class
TestDistPPTraning
(
unittest
.
TestCase
):
class
TestDistPPTra
i
ning
(
unittest
.
TestCase
):
def
setUp
(
self
):
def
setUp
(
self
):
strategy
=
fleet
.
DistributedStrategy
()
strategy
=
fleet
.
DistributedStrategy
()
self
.
model_parallel_size
=
1
self
.
model_parallel_size
=
1
...
...
test/collective/fleet/hybrid_parallel_pp_recompute.py
浏览文件 @
4d094b0c
...
@@ -138,7 +138,7 @@ class ModelPipe(PipelineLayer):
...
@@ -138,7 +138,7 @@ class ModelPipe(PipelineLayer):
)
)
class
TestDistPPTraning
(
unittest
.
TestCase
):
class
TestDistPPTra
i
ning
(
unittest
.
TestCase
):
def
setUp
(
self
):
def
setUp
(
self
):
strategy
=
fleet
.
DistributedStrategy
()
strategy
=
fleet
.
DistributedStrategy
()
self
.
model_parallel_size
=
1
self
.
model_parallel_size
=
1
...
...
test/collective/fleet/hybrid_parallel_pp_save_load.py
浏览文件 @
4d094b0c
...
@@ -30,7 +30,7 @@ micro_batch_size = 2
...
@@ -30,7 +30,7 @@ micro_batch_size = 2
vocab_size
=
128
vocab_size
=
128
class
TestDistPPSaveLoadTraning
(
unittest
.
TestCase
):
class
TestDistPPSaveLoadTra
i
ning
(
unittest
.
TestCase
):
def
setUp
(
self
):
def
setUp
(
self
):
strategy
=
fleet
.
DistributedStrategy
()
strategy
=
fleet
.
DistributedStrategy
()
self
.
model_parallel_size
=
1
self
.
model_parallel_size
=
1
...
...
test/collective/fleet/hybrid_parallel_pp_save_load_with_virtual_stage.py
浏览文件 @
4d094b0c
...
@@ -33,7 +33,7 @@ micro_batch_size = 2
...
@@ -33,7 +33,7 @@ micro_batch_size = 2
vocab_size
=
128
vocab_size
=
128
class
TestDistPPSaveLoadTraning
(
unittest
.
TestCase
):
class
TestDistPPSaveLoadTra
i
ning
(
unittest
.
TestCase
):
def
setUp
(
self
):
def
setUp
(
self
):
strategy
=
fleet
.
DistributedStrategy
()
strategy
=
fleet
.
DistributedStrategy
()
self
.
model_parallel_size
=
1
self
.
model_parallel_size
=
1
...
...
test/collective/fleet/hybrid_parallel_pp_transformer.py
浏览文件 @
4d094b0c
...
@@ -138,7 +138,7 @@ class ModelPipe(PipelineLayer):
...
@@ -138,7 +138,7 @@ class ModelPipe(PipelineLayer):
)
)
class
TestDistPPTraning
(
unittest
.
TestCase
):
class
TestDistPPTra
i
ning
(
unittest
.
TestCase
):
def
setUp
(
self
):
def
setUp
(
self
):
strategy
=
fleet
.
DistributedStrategy
()
strategy
=
fleet
.
DistributedStrategy
()
self
.
model_parallel_size
=
1
self
.
model_parallel_size
=
1
...
...
test/collective/fleet/hybrid_parallel_pp_transformer_save.py
浏览文件 @
4d094b0c
...
@@ -29,7 +29,7 @@ vocab_size = 128
...
@@ -29,7 +29,7 @@ vocab_size = 128
transformer_layer_num
=
8
transformer_layer_num
=
8
class
TestDistPPSaveTraning
(
unittest
.
TestCase
):
class
TestDistPPSaveTra
i
ning
(
unittest
.
TestCase
):
def
setUp
(
self
):
def
setUp
(
self
):
strategy
=
fleet
.
DistributedStrategy
()
strategy
=
fleet
.
DistributedStrategy
()
self
.
model_parallel_size
=
1
self
.
model_parallel_size
=
1
...
...
test/collective/fleet/hybrid_parallel_pp_transformer_save_with_virtual_stage.py
浏览文件 @
4d094b0c
...
@@ -33,7 +33,7 @@ vocab_size = 128
...
@@ -33,7 +33,7 @@ vocab_size = 128
transformer_layer_num
=
8
transformer_layer_num
=
8
class
TestDistPPSaveTraning
(
unittest
.
TestCase
):
class
TestDistPPSaveTra
i
ning
(
unittest
.
TestCase
):
def
setUp
(
self
):
def
setUp
(
self
):
strategy
=
fleet
.
DistributedStrategy
()
strategy
=
fleet
.
DistributedStrategy
()
self
.
model_parallel_size
=
1
self
.
model_parallel_size
=
1
...
...
test/collective/fleet/hybrid_parallel_pp_transformer_unbalanced_data.py
浏览文件 @
4d094b0c
...
@@ -17,7 +17,7 @@ import unittest
...
@@ -17,7 +17,7 @@ import unittest
import
numpy
as
np
import
numpy
as
np
from
hybrid_parallel_pp_transformer
import
(
from
hybrid_parallel_pp_transformer
import
(
ModelPipe
,
ModelPipe
,
TestDistPPTraning
,
TestDistPPTra
i
ning
,
batch_size
,
batch_size
,
length
,
length
,
micro_batch_size
,
micro_batch_size
,
...
@@ -30,7 +30,7 @@ import paddle.distributed as dist
...
@@ -30,7 +30,7 @@ import paddle.distributed as dist
from
paddle.distributed
import
fleet
from
paddle.distributed
import
fleet
class
TestDistPPTra
ningUnbalancedData
(
TestDistPPTra
ning
):
class
TestDistPPTra
iningUnbalancedData
(
TestDistPPTrai
ning
):
def
test_pp_model
(
self
):
def
test_pp_model
(
self
):
hcg
=
fleet
.
get_hybrid_communicate_group
()
hcg
=
fleet
.
get_hybrid_communicate_group
()
word_size
=
hcg
.
get_model_parallel_world_size
()
word_size
=
hcg
.
get_model_parallel_world_size
()
...
...
test/collective/fleet/hybrid_parallel_pp_transformer_with_virtual_stage.py
浏览文件 @
4d094b0c
...
@@ -137,7 +137,7 @@ class ModelPipe(PipelineLayer):
...
@@ -137,7 +137,7 @@ class ModelPipe(PipelineLayer):
)
)
class
TestDistPPTraning
(
unittest
.
TestCase
):
class
TestDistPPTra
i
ning
(
unittest
.
TestCase
):
def
setUp
(
self
):
def
setUp
(
self
):
strategy
=
fleet
.
DistributedStrategy
()
strategy
=
fleet
.
DistributedStrategy
()
self
.
model_parallel_size
=
1
self
.
model_parallel_size
=
1
...
...
test/collective/fleet/hybrid_parallel_qat.py
浏览文件 @
4d094b0c
...
@@ -235,7 +235,7 @@ class SimpleDPNet(nn.Layer):
...
@@ -235,7 +235,7 @@ class SimpleDPNet(nn.Layer):
return
x
return
x
class
TestDistMPTraning
(
unittest
.
TestCase
):
class
TestDistMPTra
i
ning
(
unittest
.
TestCase
):
def
setUp
(
self
):
def
setUp
(
self
):
strategy
=
fleet
.
DistributedStrategy
()
strategy
=
fleet
.
DistributedStrategy
()
self
.
model_parallel_size
=
2
self
.
model_parallel_size
=
2
...
...
test/collective/fleet/hybrid_parallel_sharding_model.py
浏览文件 @
4d094b0c
...
@@ -180,7 +180,7 @@ class SimpleDPNet(paddle.nn.Layer):
...
@@ -180,7 +180,7 @@ class SimpleDPNet(paddle.nn.Layer):
return
x
return
x
class
TestDistMPTraning
(
unittest
.
TestCase
):
class
TestDistMPTra
i
ning
(
unittest
.
TestCase
):
def
setUp
(
self
):
def
setUp
(
self
):
random
.
seed
(
2021
)
random
.
seed
(
2021
)
np
.
random
.
seed
(
2021
)
np
.
random
.
seed
(
2021
)
...
...
test/collective/fleet/hybrid_parallel_shared_weight.py
浏览文件 @
4d094b0c
...
@@ -152,7 +152,7 @@ class SimpleNetPipe(PipelineLayer):
...
@@ -152,7 +152,7 @@ class SimpleNetPipe(PipelineLayer):
super
().
__init__
(
layers
=
self
.
descs
,
loss_fn
=
LossNet
(),
**
kwargs
)
super
().
__init__
(
layers
=
self
.
descs
,
loss_fn
=
LossNet
(),
**
kwargs
)
class
TestDistEmbeddingTraning
(
unittest
.
TestCase
):
class
TestDistEmbeddingTra
i
ning
(
unittest
.
TestCase
):
def
setUp
(
self
):
def
setUp
(
self
):
strategy
=
fleet
.
DistributedStrategy
()
strategy
=
fleet
.
DistributedStrategy
()
self
.
model_parallel_size
=
1
self
.
model_parallel_size
=
1
...
...
test/collective/fleet/parallel_dygraph_no_sync_gradient_check.py
浏览文件 @
4d094b0c
...
@@ -64,7 +64,7 @@ class SimpleNet(paddle.nn.Layer):
...
@@ -64,7 +64,7 @@ class SimpleNet(paddle.nn.Layer):
return
self
.
share_net
(
tmp
)
return
self
.
share_net
(
tmp
)
class
TestDistTraning
(
unittest
.
TestCase
):
class
TestDistTra
i
ning
(
unittest
.
TestCase
):
def
test_multiple_gpus
(
self
):
def
test_multiple_gpus
(
self
):
self
.
trainer_id
=
dist
.
get_rank
()
self
.
trainer_id
=
dist
.
get_rank
()
dist
.
init_parallel_env
()
dist
.
init_parallel_env
()
...
...
test/collective/fleet/test_fleet_static_mp_layers.py
浏览文件 @
4d094b0c
...
@@ -66,7 +66,7 @@ class EmbeddingNet(paddle.nn.Layer):
...
@@ -66,7 +66,7 @@ class EmbeddingNet(paddle.nn.Layer):
return
output
return
output
class
TestDistTraning
(
unittest
.
TestCase
):
class
TestDistTra
i
ning
(
unittest
.
TestCase
):
def
setUp
(
self
):
def
setUp
(
self
):
os
.
environ
[
"PADDLE_TRAINER_ID"
]
=
"2"
os
.
environ
[
"PADDLE_TRAINER_ID"
]
=
"2"
os
.
environ
[
os
.
environ
[
...
...
test/legacy_test/auto_parallel_gpt_model.py
浏览文件 @
4d094b0c
...
@@ -35,7 +35,7 @@ def init_global():
...
@@ -35,7 +35,7 @@ def init_global():
class
MultiHeadAttention
(
nn
.
Layer
):
class
MultiHeadAttention
(
nn
.
Layer
):
"""
"""
Attention map
p
s queries and a set of key-value pairs to outputs, and
Attention maps queries and a set of key-value pairs to outputs, and
Multi-Head Attention performs multiple parallel attention to jointly attending
Multi-Head Attention performs multiple parallel attention to jointly attending
to information from different representation subspaces.
to information from different representation subspaces.
"""
"""
...
@@ -114,7 +114,7 @@ class MultiHeadAttention(nn.Layer):
...
@@ -114,7 +114,7 @@ class MultiHeadAttention(nn.Layer):
def
_prepare_qkv
(
self
,
query
,
key
,
value
,
use_cache
=
False
,
cache
=
None
):
def
_prepare_qkv
(
self
,
query
,
key
,
value
,
use_cache
=
False
,
cache
=
None
):
"""
"""
Pr
apares linear projected queries, keys and values for usage of subsequ
nt
Pr
epares linear projected queries, keys and values for usage of subseque
nt
multiple parallel attention. If `cache` is not None, using cached results
multiple parallel attention. If `cache` is not None, using cached results
to reduce redundant calculations.
to reduce redundant calculations.
"""
"""
...
@@ -203,7 +203,7 @@ class MultiHeadAttention(nn.Layer):
...
@@ -203,7 +203,7 @@ class MultiHeadAttention(nn.Layer):
def
gen_cache
(
self
,
key
,
value
=
None
,
type
=
Cache
):
def
gen_cache
(
self
,
key
,
value
=
None
,
type
=
Cache
):
"""
"""
Generates cache for `forward` usage in inference acc
ro
ding to arguments.
Generates cache for `forward` usage in inference acc
or
ding to arguments.
The generated cache is an instance of `MultiHeadAttention.Cache` or an
The generated cache is an instance of `MultiHeadAttention.Cache` or an
instance of `MultiHeadAttention.StaticCache`.
instance of `MultiHeadAttention.StaticCache`.
"""
"""
...
@@ -573,7 +573,7 @@ class GPTEmbeddings(nn.Layer):
...
@@ -573,7 +573,7 @@ class GPTEmbeddings(nn.Layer):
ones
=
paddle
.
ones_like
(
input_ids
,
dtype
=
"int64"
)
ones
=
paddle
.
ones_like
(
input_ids
,
dtype
=
"int64"
)
seq_length
=
paddle
.
cumsum
(
ones
,
axis
=-
1
)
seq_length
=
paddle
.
cumsum
(
ones
,
axis
=-
1
)
position_ids
=
seq_length
-
ones
position_ids
=
seq_length
-
ones
input_embedings
=
self
.
word_embeddings
(
input_ids
)
input_embed
d
ings
=
self
.
word_embeddings
(
input_ids
)
if
_global_parallel_strategy
==
"mp"
:
if
_global_parallel_strategy
==
"mp"
:
auto
.
shard_tensor
(
auto
.
shard_tensor
(
self
.
word_embeddings
.
weight
,
_global_process_mesh
,
[
"x"
,
None
]
self
.
word_embeddings
.
weight
,
_global_process_mesh
,
[
"x"
,
None
]
...
@@ -592,7 +592,7 @@ class GPTEmbeddings(nn.Layer):
...
@@ -592,7 +592,7 @@ class GPTEmbeddings(nn.Layer):
)
)
position_embeddings
=
self
.
position_embeddings
(
position_ids
)
position_embeddings
=
self
.
position_embeddings
(
position_ids
)
embeddings
=
input_embedings
+
position_embeddings
embeddings
=
input_embed
d
ings
+
position_embeddings
embeddings
=
self
.
dropout
(
embeddings
)
embeddings
=
self
.
dropout
(
embeddings
)
return
embeddings
return
embeddings
...
...
test/legacy_test/benchmark_sum_op.py
浏览文件 @
4d094b0c
...
@@ -59,7 +59,7 @@ class TestSumOp(BenchmarkSuite):
...
@@ -59,7 +59,7 @@ class TestSumOp(BenchmarkSuite):
def
test_timeit_output
(
self
):
def
test_timeit_output
(
self
):
"""
"""
perf the op, time cost will be averged in iters.
perf the op, time cost will be aver
a
ged in iters.
output example
output example
>>> One pass of (sum_op) at CPUPlace cost 0.000461330413818
>>> One pass of (sum_op) at CPUPlace cost 0.000461330413818
>>> One pass of (sum_op) at CUDAPlace(0) cost 0.000556070804596
>>> One pass of (sum_op) at CUDAPlace(0) cost 0.000556070804596
...
@@ -68,7 +68,7 @@ class TestSumOp(BenchmarkSuite):
...
@@ -68,7 +68,7 @@ class TestSumOp(BenchmarkSuite):
def
test_timeit_grad
(
self
):
def
test_timeit_grad
(
self
):
"""
"""
perf the op gradient, time cost will be averged in iters.
perf the op gradient, time cost will be aver
a
ged in iters.
output example
output example
>>> One pass of (sum_grad_op) at CPUPlace cost 0.00279935121536
>>> One pass of (sum_grad_op) at CPUPlace cost 0.00279935121536
>>> One pass of (sum_grad_op) at CUDAPlace(0) cost 0.00500632047653
>>> One pass of (sum_grad_op) at CUDAPlace(0) cost 0.00500632047653
...
...
test/legacy_test/dist_fleet_ctr.py
浏览文件 @
4d094b0c
...
@@ -129,7 +129,7 @@ class TestDistCTR2x2(FleetDistRunnerBase):
...
@@ -129,7 +129,7 @@ class TestDistCTR2x2(FleetDistRunnerBase):
dnn_out
=
fc
dnn_out
=
fc
# build lr model
# build lr model
lr_emb
b
ding
=
paddle
.
static
.
nn
.
embedding
(
lr_emb
ed
ding
=
paddle
.
static
.
nn
.
embedding
(
is_distributed
=
False
,
is_distributed
=
False
,
input
=
lr_data
,
input
=
lr_data
,
size
=
[
lr_input_dim
,
1
],
size
=
[
lr_input_dim
,
1
],
...
@@ -141,7 +141,7 @@ class TestDistCTR2x2(FleetDistRunnerBase):
...
@@ -141,7 +141,7 @@ class TestDistCTR2x2(FleetDistRunnerBase):
padding_idx
=
0
,
padding_idx
=
0
,
)
)
lr_pool
=
paddle
.
static
.
nn
.
sequence_lod
.
sequence_pool
(
lr_pool
=
paddle
.
static
.
nn
.
sequence_lod
.
sequence_pool
(
input
=
lr_emb
b
ding
.
squeeze
(
-
2
),
pool_type
=
"sum"
input
=
lr_emb
ed
ding
.
squeeze
(
-
2
),
pool_type
=
"sum"
)
)
merge_layer
=
paddle
.
concat
([
dnn_out
,
lr_pool
],
axis
=
1
)
merge_layer
=
paddle
.
concat
([
dnn_out
,
lr_pool
],
axis
=
1
)
...
...
test/legacy_test/dist_hapi_mnist_dynamic.py
浏览文件 @
4d094b0c
...
@@ -52,7 +52,7 @@ def compute_accuracy(pred, gt):
...
@@ -52,7 +52,7 @@ def compute_accuracy(pred, gt):
@
unittest
.
skipIf
(
@
unittest
.
skipIf
(
not
fluid
.
is_compiled_with_cuda
(),
'CPU testing is not supported'
not
fluid
.
is_compiled_with_cuda
(),
'CPU testing is not supported'
)
)
class
TestDistTraning
(
unittest
.
TestCase
):
class
TestDistTra
i
ning
(
unittest
.
TestCase
):
def
test_dynamic_multiple_gpus
(
self
):
def
test_dynamic_multiple_gpus
(
self
):
device
=
set_device
(
'gpu'
)
device
=
set_device
(
'gpu'
)
...
...
test/legacy_test/dist_hapi_mnist_static.py
浏览文件 @
4d094b0c
...
@@ -52,7 +52,7 @@ def compute_accuracy(pred, gt):
...
@@ -52,7 +52,7 @@ def compute_accuracy(pred, gt):
@
unittest
.
skipIf
(
@
unittest
.
skipIf
(
not
fluid
.
is_compiled_with_cuda
(),
'CPU testing is not supported'
not
fluid
.
is_compiled_with_cuda
(),
'CPU testing is not supported'
)
)
class
TestDistTraning
(
unittest
.
TestCase
):
class
TestDistTra
i
ning
(
unittest
.
TestCase
):
def
test_static_multiple_gpus
(
self
):
def
test_static_multiple_gpus
(
self
):
paddle
.
enable_static
()
paddle
.
enable_static
()
device
=
set_device
(
'gpu'
)
device
=
set_device
(
'gpu'
)
...
...
test/legacy_test/dist_hapi_pure_fp16_static.py
浏览文件 @
4d094b0c
...
@@ -26,10 +26,10 @@ from paddle.vision.models import LeNet
...
@@ -26,10 +26,10 @@ from paddle.vision.models import LeNet
@
unittest
.
skipIf
(
@
unittest
.
skipIf
(
not
fluid
.
is_compiled_with_cuda
(),
'CPU testing is not supported'
not
fluid
.
is_compiled_with_cuda
(),
'CPU testing is not supported'
)
)
class
TestDistTraningWithPureFP16
(
unittest
.
TestCase
):
class
TestDistTra
i
ningWithPureFP16
(
unittest
.
TestCase
):
def
test_amp_training_purefp16
(
self
):
def
test_amp_training_purefp16
(
self
):
if
not
fluid
.
is_compiled_with_cuda
():
if
not
fluid
.
is_compiled_with_cuda
():
self
.
skipTest
(
'module not tested when ONLY_CPU compling'
)
self
.
skipTest
(
'module not tested when ONLY_CPU comp
i
ling'
)
data
=
np
.
random
.
random
(
size
=
(
4
,
1
,
28
,
28
)).
astype
(
np
.
float32
)
data
=
np
.
random
.
random
(
size
=
(
4
,
1
,
28
,
28
)).
astype
(
np
.
float32
)
label
=
np
.
random
.
randint
(
0
,
10
,
size
=
(
4
,
1
)).
astype
(
np
.
int64
)
label
=
np
.
random
.
randint
(
0
,
10
,
size
=
(
4
,
1
)).
astype
(
np
.
int64
)
...
...
test/legacy_test/gradient_checker.py
浏览文件 @
4d094b0c
...
@@ -269,7 +269,7 @@ def grad_check(
...
@@ -269,7 +269,7 @@ def grad_check(
if
program
is
None
:
if
program
is
None
:
program
=
fluid
.
default_main_program
()
program
=
fluid
.
default_main_program
()
# init variable in strtup program
# init variable in st
a
rtup program
scope
=
fluid
.
executor
.
global_scope
()
scope
=
fluid
.
executor
.
global_scope
()
exe
=
fluid
.
Executor
(
place
)
exe
=
fluid
.
Executor
(
place
)
exe
.
run
(
fluid
.
default_startup_program
())
exe
.
run
(
fluid
.
default_startup_program
())
...
@@ -595,7 +595,7 @@ def get_static_double_grad(
...
@@ -595,7 +595,7 @@ def get_static_double_grad(
if
program
is
None
:
if
program
is
None
:
program
=
fluid
.
default_main_program
()
program
=
fluid
.
default_main_program
()
# init variable in strtup program
# init variable in st
a
rtup program
scope
=
fluid
.
executor
.
global_scope
()
scope
=
fluid
.
executor
.
global_scope
()
exe
=
fluid
.
Executor
(
place
)
exe
=
fluid
.
Executor
(
place
)
exe
.
run
(
fluid
.
default_startup_program
())
exe
.
run
(
fluid
.
default_startup_program
())
...
@@ -657,7 +657,7 @@ def get_eager_double_grad(
...
@@ -657,7 +657,7 @@ def get_eager_double_grad(
the second order derivative and the inputs of second order derivative's calculation
the second order derivative and the inputs of second order derivative's calculation
will be returned for higher order derivative's calculation.
will be returned for higher order derivative's calculation.
If 'return_mid_result' set False.
If 'return_mid_result' set False.
A list of numpy array that stores second derivative result calulated by dygraph.
A list of numpy array that stores second derivative result cal
c
ulated by dygraph.
"""
"""
if
isinstance
(
place
,
fluid
.
CPUPlace
):
if
isinstance
(
place
,
fluid
.
CPUPlace
):
paddle
.
set_device
(
"cpu"
)
paddle
.
set_device
(
"cpu"
)
...
@@ -684,7 +684,7 @@ def get_eager_double_grad(
...
@@ -684,7 +684,7 @@ def get_eager_double_grad(
)
)
d_inputs
=
[
d_input
for
d_input
in
d_inputs
if
d_input
is
not
None
]
d_inputs
=
[
d_input
for
d_input
in
d_inputs
if
d_input
is
not
None
]
# calc
lu
ate second derivative
# calc
ul
ate second derivative
inputs
=
inputs
+
dys
inputs
=
inputs
+
dys
ddys
=
[]
ddys
=
[]
if
return_mid_result
:
if
return_mid_result
:
...
@@ -808,7 +808,7 @@ def get_static_triple_grad(
...
@@ -808,7 +808,7 @@ def get_static_triple_grad(
program (Program|None): a Program with forward pass.
program (Program|None): a Program with forward pass.
If None, use fluid.default_main_program().
If None, use fluid.default_main_program().
Returns:
Returns:
A list of numpy array that stores third derivative result calulated by static graph.
A list of numpy array that stores third derivative result cal
c
ulated by static graph.
"""
"""
if
program
is
None
:
if
program
is
None
:
program
=
fluid
.
default_main_program
()
program
=
fluid
.
default_main_program
()
...
@@ -858,13 +858,13 @@ def get_eager_triple_grad(
...
@@ -858,13 +858,13 @@ def get_eager_triple_grad(
place (fluid.CPUPlace or fluid.CUDAPlace): the device.
place (fluid.CPUPlace or fluid.CUDAPlace): the device.
return_mid_result (list[Tensor], list[Tensor]): If set True, the
return_mid_result (list[Tensor], list[Tensor]): If set True, the
Returns:
Returns:
A list of numpy array that stores second derivative result calulated by dygraph
A list of numpy array that stores second derivative result cal
c
ulated by dygraph
"""
"""
dd_y
,
dd_x
=
get_eager_double_grad
(
dd_y
,
dd_x
=
get_eager_double_grad
(
func
,
x_init
,
dy_init
,
place
,
return_mid_result
=
True
func
,
x_init
,
dy_init
,
place
,
return_mid_result
=
True
)
)
# calc
lu
ate third derivative
# calc
ul
ate third derivative
dddys
=
[]
dddys
=
[]
for
dd_yi
in
dd_y
:
for
dd_yi
in
dd_y
:
dd_yi
.
stop_gradient
=
False
dd_yi
.
stop_gradient
=
False
...
...
test/legacy_test/hybrid_parallel_pp_alexnet.py
浏览文件 @
4d094b0c
...
@@ -41,7 +41,7 @@ batch_size = 4
...
@@ -41,7 +41,7 @@ batch_size = 4
micro_batch_size
=
2
micro_batch_size
=
2
class
TestDistPPTraning
(
unittest
.
TestCase
):
class
TestDistPPTra
i
ning
(
unittest
.
TestCase
):
def
setUp
(
self
):
def
setUp
(
self
):
strategy
=
fleet
.
DistributedStrategy
()
strategy
=
fleet
.
DistributedStrategy
()
self
.
model_parallel_size
=
1
self
.
model_parallel_size
=
1
...
@@ -136,7 +136,7 @@ class TestDistPPTraning(unittest.TestCase):
...
@@ -136,7 +136,7 @@ class TestDistPPTraning(unittest.TestCase):
)
)
class
TestDistPPDelayScaleLoss
(
TestDistPPTraning
):
class
TestDistPPDelayScaleLoss
(
TestDistPPTra
i
ning
):
def
setUp
(
self
):
def
setUp
(
self
):
strategy
=
fleet
.
DistributedStrategy
()
strategy
=
fleet
.
DistributedStrategy
()
self
.
model_parallel_size
=
1
self
.
model_parallel_size
=
1
...
@@ -158,7 +158,7 @@ class TestDistPPDelayScaleLoss(TestDistPPTraning):
...
@@ -158,7 +158,7 @@ class TestDistPPDelayScaleLoss(TestDistPPTraning):
fleet
.
init
(
is_collective
=
True
,
strategy
=
strategy
)
fleet
.
init
(
is_collective
=
True
,
strategy
=
strategy
)
class
TestDistPPMainGrad
(
TestDistPPTraning
):
class
TestDistPPMainGrad
(
TestDistPPTra
i
ning
):
def
wrapper_mix_precision
(
self
,
model
,
optimizer
):
def
wrapper_mix_precision
(
self
,
model
,
optimizer
):
model
=
MixPrecisionLayer
(
model
,
dtype
=
"float16"
)
model
=
MixPrecisionLayer
(
model
,
dtype
=
"float16"
)
optimizer
=
MixPrecisionOptimizer
(
optimizer
)
optimizer
=
MixPrecisionOptimizer
(
optimizer
)
...
...
test/legacy_test/parallel_dygraph_dataparallel_with_pylayer.py
浏览文件 @
4d094b0c
...
@@ -62,7 +62,7 @@ class SimpleNet(paddle.nn.Layer):
...
@@ -62,7 +62,7 @@ class SimpleNet(paddle.nn.Layer):
return
self
.
linear
(
inputs
)
return
self
.
linear
(
inputs
)
class
TestDistTraning
(
unittest
.
TestCase
):
class
TestDistTra
i
ning
(
unittest
.
TestCase
):
def
test_multiple_gpus
(
self
):
def
test_multiple_gpus
(
self
):
self
.
trainer_id
=
dist
.
get_rank
()
self
.
trainer_id
=
dist
.
get_rank
()
dist
.
init_parallel_env
()
dist
.
init_parallel_env
()
...
...
test/legacy_test/parallel_dygraph_gradient_check.py
浏览文件 @
4d094b0c
...
@@ -64,7 +64,7 @@ class SimpleNet(paddle.nn.Layer):
...
@@ -64,7 +64,7 @@ class SimpleNet(paddle.nn.Layer):
return
self
.
share_net
(
tmp
)
return
self
.
share_net
(
tmp
)
class
TestDistTraning
(
unittest
.
TestCase
):
class
TestDistTra
i
ning
(
unittest
.
TestCase
):
def
test_multiple_gpus
(
self
):
def
test_multiple_gpus
(
self
):
dist
.
init_parallel_env
()
dist
.
init_parallel_env
()
self
.
trainer_id
=
dist
.
get_rank
()
self
.
trainer_id
=
dist
.
get_rank
()
...
...
test/legacy_test/parallel_dygraph_gradient_check_in_eager_mode.py
浏览文件 @
4d094b0c
...
@@ -64,7 +64,7 @@ class SimpleNet(paddle.nn.Layer):
...
@@ -64,7 +64,7 @@ class SimpleNet(paddle.nn.Layer):
return
self
.
share_net
(
tmp
)
return
self
.
share_net
(
tmp
)
class
TestDistTraning
(
unittest
.
TestCase
):
class
TestDistTra
i
ning
(
unittest
.
TestCase
):
def
test_multiple_gpus
(
self
):
def
test_multiple_gpus
(
self
):
self
.
trainer_id
=
dist
.
get_rank
()
self
.
trainer_id
=
dist
.
get_rank
()
self
.
pg
=
dist
.
init_parallel_env
()
self
.
pg
=
dist
.
init_parallel_env
()
...
...
test/xpu/parallel_dygraph_dataparallel_with_pylayer.py
浏览文件 @
4d094b0c
...
@@ -62,7 +62,7 @@ class SimpleNet(paddle.nn.Layer):
...
@@ -62,7 +62,7 @@ class SimpleNet(paddle.nn.Layer):
return
self
.
linear
(
inputs
)
return
self
.
linear
(
inputs
)
class
TestDistTraning
(
unittest
.
TestCase
):
class
TestDistTra
i
ning
(
unittest
.
TestCase
):
def
test_multiple_xpus
(
self
):
def
test_multiple_xpus
(
self
):
self
.
trainer_id
=
dist
.
get_rank
()
self
.
trainer_id
=
dist
.
get_rank
()
dist
.
init_parallel_env
()
dist
.
init_parallel_env
()
...
...
test/xpu/parallel_dygraph_gradient_check.py
浏览文件 @
4d094b0c
...
@@ -64,7 +64,7 @@ class SimpleNet(paddle.nn.Layer):
...
@@ -64,7 +64,7 @@ class SimpleNet(paddle.nn.Layer):
return
self
.
share_net
(
tmp
)
return
self
.
share_net
(
tmp
)
class
TestDistTraning
(
unittest
.
TestCase
):
class
TestDistTra
i
ning
(
unittest
.
TestCase
):
def
test_multiple_xpus
(
self
):
def
test_multiple_xpus
(
self
):
dist
.
init_parallel_env
()
dist
.
init_parallel_env
()
self
.
trainer_id
=
dist
.
get_rank
()
self
.
trainer_id
=
dist
.
get_rank
()
...
...
test/xpu/parallel_dygraph_gradient_check_in_eager_mode.py
浏览文件 @
4d094b0c
...
@@ -64,7 +64,7 @@ class SimpleNet(paddle.nn.Layer):
...
@@ -64,7 +64,7 @@ class SimpleNet(paddle.nn.Layer):
return
self
.
share_net
(
tmp
)
return
self
.
share_net
(
tmp
)
class
TestDistTraning
(
unittest
.
TestCase
):
class
TestDistTra
i
ning
(
unittest
.
TestCase
):
def
test_multiple_xpus
(
self
):
def
test_multiple_xpus
(
self
):
self
.
trainer_id
=
dist
.
get_rank
()
self
.
trainer_id
=
dist
.
get_rank
()
self
.
pg
=
dist
.
init_parallel_env
()
self
.
pg
=
dist
.
init_parallel_env
()
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录