Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
机器未来
Paddle
提交
2c8739e8
P
Paddle
项目概览
机器未来
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
2c8739e8
编写于
6月 09, 2022
作者:
Z
zhaoyingli
提交者:
GitHub
6月 09, 2022
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
use tempfile to place temporary files (#43316)
上级
00ce09e6
变更
11
隐藏空白更改
内联
并排
Showing
11 changed file
with
146 addition
and
121 deletion
+146
-121
python/paddle/fluid/tests/unittests/auto_parallel/engine_api.py
.../paddle/fluid/tests/unittests/auto_parallel/engine_api.py
+5
-2
python/paddle/fluid/tests/unittests/auto_parallel/test_auto_parallel_relaunch.py
...ts/unittests/auto_parallel/test_auto_parallel_relaunch.py
+36
-15
python/paddle/fluid/tests/unittests/auto_parallel/test_cluster.py
...addle/fluid/tests/unittests/auto_parallel/test_cluster.py
+12
-12
python/paddle/fluid/tests/unittests/auto_parallel/test_comm_cost.py
...dle/fluid/tests/unittests/auto_parallel/test_comm_cost.py
+11
-12
python/paddle/fluid/tests/unittests/auto_parallel/test_engine_api.py
...le/fluid/tests/unittests/auto_parallel/test_engine_api.py
+5
-11
python/paddle/fluid/tests/unittests/auto_parallel/test_new_cost_model.py
...luid/tests/unittests/auto_parallel/test_new_cost_model.py
+9
-6
python/paddle/fluid/tests/unittests/auto_parallel/test_relaunch_with_gpt_planner.py
...unittests/auto_parallel/test_relaunch_with_gpt_planner.py
+23
-20
python/paddle/fluid/tests/unittests/auto_parallel/test_relaunch_with_planner.py
...sts/unittests/auto_parallel/test_relaunch_with_planner.py
+23
-16
python/paddle/fluid/tests/unittests/test_auto_parallel_cluster.py
...addle/fluid/tests/unittests/test_auto_parallel_cluster.py
+11
-4
python/paddle/fluid/tests/unittests/test_auto_parallel_mapper.py
...paddle/fluid/tests/unittests/test_auto_parallel_mapper.py
+11
-4
python/paddle/fluid/tests/unittests/test_auto_parallel_partitioner_gpt.py
...uid/tests/unittests/test_auto_parallel_partitioner_gpt.py
+0
-19
未找到文件。
python/paddle/fluid/tests/unittests/auto_parallel/engine_api.py
浏览文件 @
2c8739e8
...
...
@@ -14,7 +14,7 @@
import
unittest
import
time
import
paddle.fluid
as
fluid
import
tempfile
import
copy
import
os
import
numpy
as
np
...
...
@@ -145,7 +145,10 @@ def train():
engine
.
predict
(
test_dataset
,
batch_size
,
fetch_list
=
[
'label'
])
# save
engine
.
save
(
'./mlp_inf'
,
training
=
False
,
mode
=
'predict'
)
temp_dir
=
tempfile
.
TemporaryDirectory
()
model_filename
=
os
.
path
.
join
(
temp_dir
.
name
,
'mlp_inf'
)
engine
.
save
(
model_filename
,
training
=
False
,
mode
=
'predict'
)
temp_dir
.
cleanup
()
if
__name__
==
"__main__"
:
...
...
python/paddle/fluid/tests/unittests/auto_parallel/test_auto_parallel_relaunch.py
浏览文件 @
2c8739e8
...
...
@@ -12,6 +12,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.
import
tempfile
import
unittest
import
os
import
sys
...
...
@@ -77,16 +78,45 @@ cluster_json = """
}
"""
mapping_josn
=
"""
[
{
"hostname": "machine1",
"addr": "127.0.0.1",
"port": "768",
"ranks":
{
"0": [1],
"1": [0]
}
}
]
"""
class
TestAutoParallelReLaunch
(
unittest
.
TestCase
):
def
setUp
(
self
):
self
.
temp_dir
=
tempfile
.
TemporaryDirectory
()
def
tearDown
(
self
):
self
.
temp_dir
.
cleanup
()
def
test_relaunch
(
self
):
file_dir
=
os
.
path
.
dirname
(
os
.
path
.
abspath
(
__file__
))
cluster_json_path
=
os
.
path
.
join
(
file_dir
,
"auto_parallel_cluster.json"
)
cluster_json_path
=
os
.
path
.
join
(
self
.
temp_dir
.
name
,
"auto_parallel_cluster.json"
)
mapping_json_path
=
os
.
path
.
join
(
self
.
temp_dir
.
name
,
"auto_parallel_rank_mapping.json"
)
cluster_json_object
=
json
.
loads
(
cluster_json
)
with
open
(
cluster_json_path
,
"w"
)
as
cluster_json_file
:
json
.
dump
(
cluster_json_object
,
cluster_json_file
)
mapping_josn_object
=
json
.
loads
(
mapping_josn
)
with
open
(
mapping_json_path
,
"w"
)
as
mapping_josn_file
:
json
.
dump
(
mapping_josn_object
,
mapping_josn_file
)
file_dir
=
os
.
path
.
dirname
(
os
.
path
.
abspath
(
__file__
))
launch_model_path
=
os
.
path
.
join
(
file_dir
,
"auto_parallel_relaunch_model.py"
)
...
...
@@ -96,24 +126,15 @@ class TestAutoParallelReLaunch(unittest.TestCase):
coverage_args
=
[]
cmd
=
[
sys
.
executable
,
"-u"
]
+
coverage_args
+
[
"-m"
,
"launch"
,
"--cluster_topo_path"
,
cluster_json_path
,
"--enable_auto_mapping"
,
"True"
,
launch_model_path
"-m"
,
"launch"
,
"--log_dir"
,
self
.
temp_dir
.
name
,
"--cluster_topo_path"
,
cluster_json_path
,
"--rank_mapping_path"
,
mapping_json_path
,
"--enable_auto_mapping"
,
"True"
,
launch_model_path
]
process
=
subprocess
.
Popen
(
cmd
)
process
.
wait
()
self
.
assertEqual
(
process
.
returncode
,
0
)
# Remove unnecessary files
if
os
.
path
.
exists
(
cluster_json_path
):
os
.
remove
(
cluster_json_path
)
rank_mapping_json_path
=
os
.
path
.
join
(
file_dir
,
"auto_parallel_rank_mapping.json"
)
if
os
.
path
.
exists
(
rank_mapping_json_path
):
os
.
remove
(
rank_mapping_json_path
)
log_path
=
os
.
path
.
join
(
file_dir
,
"log"
)
if
os
.
path
.
exists
(
log_path
):
shutil
.
rmtree
(
log_path
)
if
__name__
==
"__main__"
:
unittest
.
main
()
python/paddle/fluid/tests/unittests/auto_parallel/test_cluster.py
浏览文件 @
2c8739e8
...
...
@@ -12,6 +12,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.
import
tempfile
import
unittest
import
os
import
json
...
...
@@ -1968,10 +1969,17 @@ multi_cluster_json = """{
class
TestCluster
(
unittest
.
TestCase
):
def
setUp
(
self
):
self
.
temp_dir
=
tempfile
.
TemporaryDirectory
()
def
tearDown
(
self
):
self
.
temp_dir
.
cleanup
()
def
test_single_machine
(
self
):
# Build cluster
file_dir
=
os
.
path
.
dirname
(
os
.
path
.
abspath
(
__file__
))
cluster_json_path
=
os
.
path
.
join
(
file_dir
,
"auto_parallel_cluster.json"
)
cluster_json_path
=
os
.
path
.
join
(
self
.
temp_dir
.
name
,
"auto_parallel_cluster_single.json"
)
cluster_json_object
=
json
.
loads
(
cluster_json
)
with
open
(
cluster_json_path
,
"w"
)
as
cluster_json_file
:
json
.
dump
(
cluster_json_object
,
cluster_json_file
)
...
...
@@ -1989,14 +1997,10 @@ class TestCluster(unittest.TestCase):
self
.
assertTrue
(
devices
==
[
0
,
1
,
2
,
3
])
self
.
assertTrue
(
involved_machine_count
==
1
)
# Remove unnecessary files
if
os
.
path
.
exists
(
cluster_json_path
):
os
.
remove
(
cluster_json_path
)
def
test_multi_machine
(
self
):
# Build cluster
file_dir
=
os
.
path
.
dirname
(
os
.
path
.
abspath
(
__file__
))
cluster_json_path
=
os
.
path
.
join
(
file_dir
,
"auto_parallel_cluster
.json"
)
cluster_json_path
=
os
.
path
.
join
(
self
.
temp_dir
.
name
,
"auto_parallel_cluster_multi
.json"
)
cluster_json_object
=
json
.
loads
(
multi_cluster_json
)
with
open
(
cluster_json_path
,
"w"
)
as
cluster_json_file
:
json
.
dump
(
cluster_json_object
,
cluster_json_file
)
...
...
@@ -2014,10 +2018,6 @@ class TestCluster(unittest.TestCase):
self
.
assertTrue
(
devices
==
[
5
,
6
,
7
,
10
])
self
.
assertTrue
(
involved_machine_count
==
2
)
# Remove unnecessary files
if
os
.
path
.
exists
(
cluster_json_path
):
os
.
remove
(
cluster_json_path
)
if
__name__
==
"__main__"
:
unittest
.
main
()
python/paddle/fluid/tests/unittests/auto_parallel/test_comm_cost.py
浏览文件 @
2c8739e8
...
...
@@ -15,6 +15,7 @@
import
unittest
import
os
import
json
import
tempfile
import
paddle
from
paddle.distributed.auto_parallel.cluster
import
Cluster
...
...
@@ -32,10 +33,16 @@ from test_cluster import cluster_json, multi_cluster_json
class
TestCommOpCost
(
unittest
.
TestCase
):
def
setUp
(
self
):
self
.
temp_dir
=
tempfile
.
TemporaryDirectory
()
def
tearDown
(
self
):
self
.
temp_dir
.
cleanup
()
def
test_comm_cost
(
self
):
# Build cluster
file_dir
=
os
.
path
.
dirname
(
os
.
path
.
abspath
(
__file__
))
cluster_json_path
=
os
.
path
.
join
(
file_dir
,
"auto_parallel_cluster
.json"
)
cluster_json_path
=
os
.
path
.
join
(
self
.
temp_dir
.
name
,
"auto_parallel_cluster0
.json"
)
cluster_json_object
=
json
.
loads
(
cluster_json
)
with
open
(
cluster_json_path
,
"w"
)
as
cluster_json_file
:
json
.
dump
(
cluster_json_object
,
cluster_json_file
)
...
...
@@ -92,14 +99,10 @@ class TestCommOpCost(unittest.TestCase):
comm_context
=
comm_context
)
self
.
assertTrue
(
identity_op_cost
.
time
>=
0
)
# Remove unnecessary files
if
os
.
path
.
exists
(
cluster_json_path
):
os
.
remove
(
cluster_json_path
)
def
test_cross_machine_comm_cost
(
self
):
# Build cluster
file_dir
=
os
.
path
.
dirname
(
os
.
path
.
abspath
(
__file__
))
cluster_json_path
=
os
.
path
.
join
(
file_dir
,
"auto_parallel_cluster
.json"
)
cluster_json_path
=
os
.
path
.
join
(
self
.
temp_dir
.
name
,
"auto_parallel_cluster1
.json"
)
cluster_json_object
=
json
.
loads
(
multi_cluster_json
)
with
open
(
cluster_json_path
,
"w"
)
as
cluster_json_file
:
json
.
dump
(
cluster_json_object
,
cluster_json_file
)
...
...
@@ -151,10 +154,6 @@ class TestCommOpCost(unittest.TestCase):
comm_context
=
comm_context
)
self
.
assertTrue
(
recv_op_cost
.
time
>
0
)
# Remove unnecessary files
if
os
.
path
.
exists
(
cluster_json_path
):
os
.
remove
(
cluster_json_path
)
if
__name__
==
"__main__"
:
unittest
.
main
()
python/paddle/fluid/tests/unittests/auto_parallel/test_engine_api.py
浏览文件 @
2c8739e8
...
...
@@ -12,6 +12,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.
import
tempfile
import
unittest
import
os
import
sys
...
...
@@ -31,24 +32,17 @@ class TestEngineAPI(unittest.TestCase):
else
:
coverage_args
=
[]
tmp_dir
=
tempfile
.
TemporaryDirectory
()
cmd
=
[
sys
.
executable
,
"-u"
]
+
coverage_args
+
[
"-m"
,
"launch"
,
"--gpus"
,
"0,1"
,
launch_model_path
"-m"
,
"launch"
,
"--gpus"
,
"0,1"
,
"--log_dir"
,
tmp_dir
.
name
,
launch_model_path
]
process
=
subprocess
.
Popen
(
cmd
)
process
.
wait
()
self
.
assertEqual
(
process
.
returncode
,
0
)
# Remove unnecessary files
log_path
=
os
.
path
.
join
(
file_dir
,
"log"
)
if
os
.
path
.
exists
(
log_path
):
shutil
.
rmtree
(
log_path
)
files_path
=
[
path
for
path
in
os
.
listdir
(
'.'
)
if
'.pd'
in
path
]
for
path
in
files_path
:
if
os
.
path
.
exists
(
path
):
os
.
remove
(
path
)
if
os
.
path
.
exists
(
'rank_mapping.csv'
):
os
.
remove
(
'rank_mapping.csv'
)
tmp_dir
.
cleanup
()
if
__name__
==
"__main__"
:
...
...
python/paddle/fluid/tests/unittests/auto_parallel/test_new_cost_model.py
浏览文件 @
2c8739e8
...
...
@@ -15,6 +15,7 @@
import
unittest
import
os
import
json
import
tempfile
import
paddle
import
paddle.distributed.auto_parallel.cost
as
cost_model
...
...
@@ -36,6 +37,12 @@ def check_cost(cost):
class
TestCost
(
unittest
.
TestCase
):
def
setUp
(
self
):
self
.
temp_dir
=
tempfile
.
TemporaryDirectory
()
def
tearDown
(
self
):
self
.
temp_dir
.
cleanup
()
def
test_base_cost
(
self
):
cost
=
cost_model
.
Cost
(
memory
=
100
,
flops
=
200
,
time
=
0.5
)
self
.
assertTrue
(
check_cost
(
cost
))
...
...
@@ -65,8 +72,8 @@ class TestCost(unittest.TestCase):
def
test_comm_cost
(
self
):
# Build cluster
file_dir
=
os
.
path
.
dirname
(
os
.
path
.
abspath
(
__file__
))
cluster_json_path
=
os
.
path
.
join
(
file_dir
,
"auto_parallel_cluster.json"
)
cluster_json_path
=
os
.
path
.
join
(
self
.
temp_dir
.
name
,
"auto_parallel_cluster.json"
)
cluster_json_object
=
json
.
loads
(
cluster_json
)
with
open
(
cluster_json_path
,
"w"
)
as
cluster_json_file
:
json
.
dump
(
cluster_json_object
,
cluster_json_file
)
...
...
@@ -85,10 +92,6 @@ class TestCost(unittest.TestCase):
op_desc
=
desc
,
comm_context
=
CommContext
(
cluster
))
self
.
assertTrue
(
check_cost
(
allreduce_cost
.
cost
))
# Remove unnecessary files
if
os
.
path
.
exists
(
cluster_json_path
):
os
.
remove
(
cluster_json_path
)
def
test_cost_estimator
(
self
):
train_program
=
paddle
.
static
.
Program
()
cost_estimator
=
cost_model
.
CostEstimator
(
train_program
)
...
...
python/paddle/fluid/tests/unittests/auto_parallel/test_relaunch_with_gpt_planner.py
浏览文件 @
2c8739e8
...
...
@@ -12,6 +12,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.
import
tempfile
import
unittest
import
os
import
sys
...
...
@@ -23,14 +24,29 @@ from paddle.distributed.fleet.launch_utils import run_with_coverage
class
TestPlannerReLaunch
(
unittest
.
TestCase
):
def
setUp
(
self
):
self
.
temp_dir
=
tempfile
.
TemporaryDirectory
()
def
tearDown
(
self
):
self
.
temp_dir
.
cleanup
()
def
test_relaunch_with_planner
(
self
):
from
test_auto_parallel_relaunch
import
cluster_json
file_dir
=
os
.
path
.
dirname
(
os
.
path
.
abspath
(
__file__
))
cluster_json_path
=
os
.
path
.
join
(
file_dir
,
"auto_parallel_cluster.json"
)
from
test_auto_parallel_relaunch
import
cluster_json
,
mapping_josn
cluster_json_path
=
os
.
path
.
join
(
self
.
temp_dir
.
name
,
"auto_parallel_cluster.json"
)
mapping_json_path
=
os
.
path
.
join
(
self
.
temp_dir
.
name
,
"auto_parallel_rank_mapping.json"
)
cluster_json_object
=
json
.
loads
(
cluster_json
)
with
open
(
cluster_json_path
,
"w"
)
as
cluster_json_file
:
json
.
dump
(
cluster_json_object
,
cluster_json_file
)
mapping_json_object
=
json
.
loads
(
mapping_josn
)
with
open
(
mapping_json_path
,
"w"
)
as
mapping_json_file
:
json
.
dump
(
mapping_json_object
,
mapping_json_file
)
file_dir
=
os
.
path
.
dirname
(
os
.
path
.
abspath
(
__file__
))
launch_model_path
=
os
.
path
.
join
(
file_dir
,
"auto_parallel_relaunch_with_gpt_planner.py"
)
...
...
@@ -40,28 +56,15 @@ class TestPlannerReLaunch(unittest.TestCase):
coverage_args
=
[]
cmd
=
[
sys
.
executable
,
"-u"
]
+
coverage_args
+
[
"-m"
,
"launch"
,
"--cluster_topo_path"
,
cluster_json_path
,
"--enable_auto_mapping"
,
"True"
,
launch_model_path
"-m"
,
"launch"
,
"--log_dir"
,
self
.
temp_dir
.
name
,
"--cluster_topo_path"
,
cluster_json_path
,
"--rank_mapping_path"
,
mapping_json_path
,
"--enable_auto_mapping"
,
"True"
,
launch_model_path
]
process
=
subprocess
.
Popen
(
cmd
)
process
.
wait
()
self
.
assertEqual
(
process
.
returncode
,
0
)
# Remove unnecessary files
if
os
.
path
.
exists
(
cluster_json_path
):
os
.
remove
(
cluster_json_path
)
rank_mapping_json_path
=
os
.
path
.
join
(
file_dir
,
"auto_parallel_rank_mapping.json"
)
if
os
.
path
.
exists
(
rank_mapping_json_path
):
os
.
remove
(
rank_mapping_json_path
)
files_path
=
[
path
for
path
in
os
.
listdir
(
'.'
)
if
'.pkl'
in
path
]
for
path
in
files_path
:
if
os
.
path
.
exists
(
path
):
os
.
remove
(
path
)
log_path
=
os
.
path
.
join
(
file_dir
,
"log"
)
if
os
.
path
.
exists
(
log_path
):
shutil
.
rmtree
(
log_path
)
if
__name__
==
"__main__"
:
unittest
.
main
()
python/paddle/fluid/tests/unittests/auto_parallel/test_relaunch_with_planner.py
浏览文件 @
2c8739e8
...
...
@@ -12,6 +12,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.
import
tempfile
import
unittest
import
os
import
sys
...
...
@@ -23,14 +24,29 @@ from paddle.distributed.fleet.launch_utils import run_with_coverage
class
TestPlannerReLaunch
(
unittest
.
TestCase
):
def
setUp
(
self
):
self
.
temp_dir
=
tempfile
.
TemporaryDirectory
()
def
tearDown
(
self
):
self
.
temp_dir
.
cleanup
()
def
test_relaunch_with_planner
(
self
):
from
test_auto_parallel_relaunch
import
cluster_json
file_dir
=
os
.
path
.
dirname
(
os
.
path
.
abspath
(
__file__
))
cluster_json_path
=
os
.
path
.
join
(
file_dir
,
"auto_parallel_cluster.json"
)
from
test_auto_parallel_relaunch
import
cluster_json
,
mapping_josn
cluster_json_path
=
os
.
path
.
join
(
self
.
temp_dir
.
name
,
"auto_parallel_cluster.json"
)
mapping_json_path
=
os
.
path
.
join
(
self
.
temp_dir
.
name
,
"auto_parallel_rank_mapping.json"
)
cluster_json_object
=
json
.
loads
(
cluster_json
)
with
open
(
cluster_json_path
,
"w"
)
as
cluster_json_file
:
json
.
dump
(
cluster_json_object
,
cluster_json_file
)
mapping_json_object
=
json
.
loads
(
mapping_josn
)
with
open
(
mapping_json_path
,
"w"
)
as
mapping_json_file
:
json
.
dump
(
mapping_json_object
,
mapping_json_file
)
file_dir
=
os
.
path
.
dirname
(
os
.
path
.
abspath
(
__file__
))
launch_model_path
=
os
.
path
.
join
(
file_dir
,
"auto_parallel_relaunch_with_planner.py"
)
...
...
@@ -40,24 +56,15 @@ class TestPlannerReLaunch(unittest.TestCase):
coverage_args
=
[]
cmd
=
[
sys
.
executable
,
"-u"
]
+
coverage_args
+
[
"-m"
,
"launch"
,
"--cluster_topo_path"
,
cluster_json_path
,
"--enable_auto_mapping"
,
"True"
,
launch_model_path
"-m"
,
"launch"
,
"--log_dir"
,
self
.
temp_dir
.
name
,
"--cluster_topo_path"
,
cluster_json_path
,
"--rank_mapping_path"
,
mapping_json_path
,
"--enable_auto_mapping"
,
"True"
,
launch_model_path
]
process
=
subprocess
.
Popen
(
cmd
)
process
.
wait
()
self
.
assertEqual
(
process
.
returncode
,
0
)
# Remove unnecessary files
if
os
.
path
.
exists
(
cluster_json_path
):
os
.
remove
(
cluster_json_path
)
rank_mapping_json_path
=
os
.
path
.
join
(
file_dir
,
"auto_parallel_rank_mapping.json"
)
if
os
.
path
.
exists
(
rank_mapping_json_path
):
os
.
remove
(
rank_mapping_json_path
)
log_path
=
os
.
path
.
join
(
file_dir
,
"log"
)
if
os
.
path
.
exists
(
log_path
):
shutil
.
rmtree
(
log_path
)
if
__name__
==
"__main__"
:
unittest
.
main
()
python/paddle/fluid/tests/unittests/test_auto_parallel_cluster.py
浏览文件 @
2c8739e8
...
...
@@ -14,6 +14,7 @@
from
__future__
import
print_function
import
tempfile
import
unittest
import
os
import
json
...
...
@@ -201,15 +202,21 @@ cluster_json = """
class
TestAutoParallelCluster
(
unittest
.
TestCase
):
def
setUp
(
self
):
self
.
temp_dir
=
tempfile
.
TemporaryDirectory
()
def
tearDown
(
self
):
self
.
temp_dir
.
cleanup
()
def
test_cluster
(
self
):
cluster_json_file
=
""
cluster_json_path
=
os
.
path
.
join
(
self
.
temp_dir
.
name
,
"auto_parallel_cluster.json"
)
cluster_json_object
=
json
.
loads
(
cluster_json
)
with
open
(
"./auto_parallel_cluster.json"
,
"w"
)
as
cluster_json_file
:
with
open
(
cluster_json_path
,
"w"
)
as
cluster_json_file
:
json
.
dump
(
cluster_json_object
,
cluster_json_file
)
cluster
=
Cluster
()
cluster
.
build_from_file
(
"./auto_parallel_cluster.json"
)
os
.
remove
(
"./auto_parallel_cluster.json"
)
cluster
.
build_from_file
(
cluster_json_path
)
self
.
assertEqual
(
len
(
cluster
.
get_all_devices
(
"GPU"
)),
4
)
self
.
assertEqual
(
len
(
cluster
.
get_all_devices
(
"CPU"
)),
2
)
...
...
python/paddle/fluid/tests/unittests/test_auto_parallel_mapper.py
浏览文件 @
2c8739e8
...
...
@@ -14,6 +14,7 @@
from
__future__
import
print_function
import
tempfile
import
unittest
import
os
import
json
...
...
@@ -527,14 +528,20 @@ def get_device_local_ids(machine):
class
TestAutoParallelMapper
(
unittest
.
TestCase
):
def
setUp
(
self
):
self
.
temp_dir
=
tempfile
.
TemporaryDirectory
()
def
tearDown
(
self
):
self
.
temp_dir
.
cleanup
()
def
test_mapper_dp_mp_pp
(
self
):
cluster_json_file
=
""
cluster_json_path
=
os
.
path
.
join
(
self
.
temp_dir
.
name
,
"auto_parallel_cluster.json"
)
cluster_json_object
=
json
.
loads
(
cluster_json
)
with
open
(
"./auto_parallel_cluster.json"
,
"w"
)
as
cluster_json_file
:
with
open
(
cluster_json_path
,
"w"
)
as
cluster_json_file
:
json
.
dump
(
cluster_json_object
,
cluster_json_file
)
cluster
=
Cluster
()
cluster
.
build_from_file
(
"./auto_parallel_cluster.json"
)
os
.
remove
(
"./auto_parallel_cluster.json"
)
cluster
.
build_from_file
(
cluster_json_path
)
global
_global_parallel_strategy
_global_parallel_strategy
=
"dp_mp_pp"
...
...
python/paddle/fluid/tests/unittests/test_auto_parallel_partitioner_gpt.py
浏览文件 @
2c8739e8
...
...
@@ -892,25 +892,6 @@ class TestGPTPartitioner(unittest.TestCase):
auto_parallel_main_prog
,
auto_parallel_startup_prog
,
params_grads
=
partitioner
.
partition
(
complete_train_program
,
startup_program
,
params_grads
)
with
open
(
"./test_auto_parallel_partitioner_serial_main_new.txt"
,
"w"
)
as
fw
:
fw
.
write
(
str
(
train_program
))
with
open
(
"./test_auto_parallel_partitioner_serial_startup_new.txt"
,
"w"
)
as
fw
:
fw
.
write
(
str
(
startup_program
))
from
paddle.distributed.auto_parallel.dist_context
import
set_default_distributed_context
set_default_distributed_context
(
dist_context
)
with
open
(
"./test_auto_parallel_partitioner_main_new.txt1"
,
"w"
)
as
fw
:
fw
.
write
(
str
(
auto_parallel_main_prog
))
with
open
(
"./test_auto_parallel_partitioner_startup_new.txt1"
,
"w"
)
as
fw
:
fw
.
write
(
str
(
auto_parallel_startup_prog
))
# with open("./test_auto_parallel_partitioner_main_completed.txt", "w") as fw:
# from paddle.distributed.auto_parallel.completion import Completer
# completer = Completer()
# completer.complete_forward_annotation(auto_parallel_main_prog)
# fw.write(str(auto_parallel_main_prog))
nrank
=
4
# col parallel
weights
=
[
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录