Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
Paddle
提交
2c5edb4f
P
Paddle
项目概览
PaddlePaddle
/
Paddle
大约 1 年 前同步成功
通知
2298
Star
20931
Fork
5422
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1423
列表
看板
标记
里程碑
合并请求
543
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1,423
Issue
1,423
列表
看板
标记
里程碑
合并请求
543
合并请求
543
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
2c5edb4f
编写于
3月 15, 2022
作者:
Y
Yulong Ao
提交者:
GitHub
3月 15, 2022
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
[Auto Parallel] Add the recorder and trial class for the tuner (#40555)
Add the recorder
上级
0c703fe7
变更
5
隐藏空白更改
内联
并排
Showing
5 changed file
with
538 addition
and
0 deletion
+538
-0
python/paddle/distributed/auto_parallel/tuner/recorder.py
python/paddle/distributed/auto_parallel/tuner/recorder.py
+214
-0
python/paddle/distributed/auto_parallel/tuner/trial.py
python/paddle/distributed/auto_parallel/tuner/trial.py
+114
-0
python/paddle/fluid/tests/unittests/auto_parallel/CMakeLists.txt
...paddle/fluid/tests/unittests/auto_parallel/CMakeLists.txt
+5
-0
python/paddle/fluid/tests/unittests/auto_parallel/test_recorder.py
...ddle/fluid/tests/unittests/auto_parallel/test_recorder.py
+152
-0
python/paddle/fluid/tests/unittests/auto_parallel/test_trial.py
.../paddle/fluid/tests/unittests/auto_parallel/test_trial.py
+53
-0
未找到文件。
python/paddle/distributed/auto_parallel/tuner/recorder.py
0 → 100644
浏览文件 @
2c5edb4f
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import
numpy
as
np
class
MetricRecord
(
object
):
"""
One record for a single metric at a given execution step.
"""
def
__init__
(
self
,
value
,
step
):
self
.
_value
=
value
self
.
_step
=
step
@
property
def
value
(
self
):
return
self
.
_value
@
value
.
setter
def
value
(
self
,
value
):
self
.
_value
=
value
@
property
def
step
(
self
):
return
self
.
_step
@
step
.
setter
def
step
(
self
,
step
):
self
.
_step
=
step
def
mean
(
self
):
return
np
.
mean
(
self
.
value
)
def
get_state
(
self
):
return
{
"value"
:
self
.
value
,
"step"
:
self
.
step
}
@
classmethod
def
from_state
(
cls
,
state
):
return
cls
(
**
state
)
def
__eq__
(
self
,
other
):
if
not
isinstance
(
other
,
MetricRecord
):
return
False
return
other
.
value
==
self
.
value
and
other
.
step
==
self
.
step
def
__repr__
(
self
):
return
"MetricRecord(value={}, step={})"
.
format
(
self
.
value
,
self
.
step
)
class
MetricRecords
(
object
):
"""
Records of a single metric across different executions.
"""
def
__init__
(
self
,
direction
=
"min"
):
if
direction
not
in
{
"min"
,
"max"
}:
raise
ValueError
(
"direction should be one of {min, max}, but got: {}."
.
format
(
direction
))
self
.
_direction
=
direction
self
.
_records
=
{}
@
property
def
records
(
self
):
return
sorted
(
self
.
_records
.
values
(),
key
=
lambda
r
:
r
.
step
)
@
records
.
setter
def
records
(
self
,
records
):
for
r
in
records
:
self
.
update
(
r
.
value
,
step
=
r
.
step
)
@
property
def
direction
(
self
):
return
self
.
_direction
@
direction
.
setter
def
direction
(
self
,
direction
):
self
.
_direction
=
direction
def
update
(
self
,
value
,
step
=
0
):
if
step
in
self
.
_records
:
self
.
_records
[
step
].
set_value
(
value
)
else
:
self
.
_records
[
step
]
=
MetricRecord
(
value
,
step
=
step
)
def
get_best_value
(
self
):
values
=
list
(
r
.
mean
()
for
r
in
self
.
_records
.
values
())
if
not
values
:
return
None
if
self
.
_direction
==
"min"
:
return
np
.
nanmin
(
values
)
return
np
.
nanmax
(
values
)
def
get_best_step
(
self
):
best_value
=
self
.
get_best_value
()
if
best_value
is
None
:
return
None
for
r
in
self
.
_records
.
values
():
if
r
.
mean
()
==
best_value
:
return
r
.
step
def
get_statistics
(
self
):
records
=
self
.
records
records_values
=
[
r
.
mean
()
for
r
in
records
]
if
not
len
(
records_values
):
return
{}
return
{
"min"
:
float
(
np
.
nanmin
(
records_values
)),
"max"
:
float
(
np
.
nanmax
(
records_values
)),
"mean"
:
float
(
np
.
nanmean
(
records_values
)),
"median"
:
float
(
np
.
nanmedian
(
records_values
)),
"var"
:
float
(
np
.
nanvar
(
records_values
)),
"std"
:
float
(
np
.
nanstd
(
records_values
)),
}
def
get_state
(
self
):
state
=
{}
state
[
"direction"
]
=
self
.
_direction
state
[
"records"
]
=
[
r
.
get_state
()
for
r
in
self
.
records
]
return
state
@
classmethod
def
from_state
(
cls
,
state
):
records
=
cls
(
state
[
"direction"
])
records
.
records
=
[
MetricRecord
.
from_state
(
r
)
for
r
in
state
[
"records"
]]
print
(
"here 1"
,
records
.
records
)
return
records
class
MetricsRecorder
(
object
):
"""
Record the values for all metrics.
"""
def
__init__
(
self
,
metrics
=
None
):
self
.
_records
=
{}
self
.
register_metrics
(
metrics
)
@
property
def
records
(
self
):
return
self
.
_records
def
exists
(
self
,
name
):
return
name
in
self
.
_records
def
register_metrics
(
self
,
metrics
=
None
):
metrics
=
metrics
or
[]
for
metric
in
metrics
:
self
.
register
(
metric
.
name
)
def
register
(
self
,
name
,
direction
=
None
):
if
self
.
exists
(
name
):
raise
ValueError
(
"Metric {} have been registered."
.
format
(
name
))
if
direction
is
None
:
direction
=
"min"
self
.
_records
[
name
]
=
MetricRecords
(
direction
)
def
update
(
self
,
name
,
value
,
step
=
0
):
value
=
float
(
value
)
if
not
self
.
exists
(
name
):
self
.
register
(
name
)
prev_best
=
self
.
_records
[
name
].
get_best_value
()
self
.
_records
[
name
].
update
(
value
,
step
=
step
)
new_best
=
self
.
_records
[
name
].
get_best_value
()
improved
=
new_best
!=
prev_best
return
improved
def
get_records
(
self
,
name
):
return
self
.
_records
[
name
].
records
def
set_records
(
self
,
name
,
records
):
if
not
self
.
exists
(
name
):
self
.
register
(
name
)
self
.
_records
[
name
].
records
=
records
def
get_best_value
(
self
,
name
):
return
self
.
_records
[
name
].
get_best_value
()
def
get_best_step
(
self
,
name
):
return
self
.
_records
[
name
].
get_best_step
()
def
get_statistics
(
self
,
name
):
return
self
.
_records
[
name
].
get_statistics
()
def
get_state
(
self
):
return
{
"metrics"
:
{
name
:
metric_records
.
get_state
()
for
name
,
metric_records
in
self
.
_records
.
items
()
}
}
@
classmethod
def
from_state
(
cls
,
state
):
recorder
=
cls
()
recorder
.
_records
=
{
name
:
MetricRecords
.
from_state
(
metric_records
)
for
name
,
metric_records
in
state
[
"metrics"
].
items
()
}
return
recorder
python/paddle/distributed/auto_parallel/tuner/trial.py
0 → 100644
浏览文件 @
2c5edb4f
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import
hashlib
import
random
import
time
from
enum
import
Enum
from
.storable
import
Storable
from
.recorder
import
MetricsRecorder
from
.tunable_space
import
TunableSpace
class
TrialStatus
:
RUNNING
=
"RUNNING"
COMPLETED
=
"COMPLETED"
STOPPED
=
"STOPPED"
INVALID
=
"INVALID"
class
Trial
(
Storable
):
def
__init__
(
self
,
tunable_space
,
trial_id
=
None
,
status
=
TrialStatus
.
RUNNING
):
self
.
_id
=
_generate_trial_id
()
if
trial_id
is
None
else
trial_id
self
.
_space
=
tunable_space
self
.
_recorder
=
MetricsRecorder
()
self
.
_score
=
None
self
.
_best_step
=
None
self
.
_status
=
status
@
property
def
id
(
self
):
return
self
.
_id
@
property
def
space
(
self
):
return
self
.
_space
@
property
def
recorder
(
self
):
return
self
.
_recorder
@
property
def
score
(
self
):
return
self
.
_score
@
score
.
setter
def
score
(
self
,
score
):
self
.
_score
=
score
@
property
def
best_step
(
self
):
return
self
.
_best_step
@
best_step
.
setter
def
best_step
(
self
,
best_step
):
self
.
_best_step
=
best_step
@
property
def
status
(
self
):
return
self
.
_status
@
status
.
setter
def
status
(
self
,
status
):
self
.
_status
=
status
def
summary
(
self
):
print
(
"Tunable space:"
)
if
self
.
space
.
values
:
for
tv
,
value
in
self
.
space
.
values
.
items
():
print
(
tv
+
":"
,
value
)
if
self
.
score
is
not
None
:
print
(
"Score: {}"
.
format
(
self
.
score
))
def
get_state
(
self
):
return
{
"id"
:
self
.
id
,
"space"
:
self
.
space
.
get_state
(),
"recorder"
:
self
.
recorder
.
get_state
(),
"score"
:
self
.
score
,
"best_step"
:
self
.
best_step
,
"status"
:
self
.
status
,
}
def
set_state
(
self
,
state
):
self
.
_id
=
state
[
"id"
]
self
.
_space
=
TunableSpace
.
from_state
(
state
[
"space"
])
self
.
_recorder
=
MetricsRecorder
.
from_state
(
state
[
"recorder"
])
self
.
_score
=
state
[
"score"
]
self
.
_best_step
=
state
[
"best_step"
]
self
.
_status
=
state
[
"status"
]
@
classmethod
def
from_state
(
cls
,
state
):
trial
=
cls
(
tunable_space
=
None
)
trial
.
set_state
(
state
)
return
trial
def
_generate_trial_id
():
s
=
str
(
time
.
time
())
+
str
(
random
.
randint
(
1
,
int
(
1e7
)))
return
hashlib
.
sha256
(
s
.
encode
(
"utf-8"
)).
hexdigest
()[:
32
]
python/paddle/fluid/tests/unittests/auto_parallel/CMakeLists.txt
浏览文件 @
2c5edb4f
...
...
@@ -11,4 +11,9 @@ if(WITH_DISTRIBUTE AND WITH_GPU)
set_tests_properties
(
test_engine_api PROPERTIES LABELS
"RUN_TYPE=EXCLUSIVE"
TIMEOUT 80
)
py_test_modules
(
test_converter MODULES test_converter ENVS
${
dist_ENVS
}
)
set_tests_properties
(
test_converter PROPERTIES LABELS
"RUN_TYPE=EXCLUSIVE"
TIMEOUT 50
)
py_test_modules
(
test_tunable_variable MODULES test_tunable_variable ENVS
${
dist_ENVS
}
)
py_test_modules
(
test_tunable_space MODULES test_tunable_space ENVS
${
dist_ENVS
}
)
py_test_modules
(
test_recorder MODULES test_recorder ENVS
${
dist_ENVS
}
)
py_test_modules
(
test_trial MODULES test_trial ENVS
${
dist_ENVS
}
)
endif
()
python/paddle/fluid/tests/unittests/auto_parallel/test_recorder.py
0 → 100644
浏览文件 @
2c5edb4f
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import
unittest
import
numpy
as
np
from
paddle.distributed.auto_parallel.tuner
import
recorder
as
rd
class
TestRecorder
(
unittest
.
TestCase
):
def
test_register
(
self
):
recorder
=
rd
.
MetricsRecorder
()
recorder
.
register
(
"metric"
)
self
.
assertEqual
(
set
(
recorder
.
records
.
keys
()),
{
"metric"
})
self
.
assertEqual
(
recorder
.
records
[
"metric"
].
direction
,
"min"
)
def
test_exists
(
self
):
recorder
=
rd
.
MetricsRecorder
()
recorder
.
register
(
"metric"
,
direction
=
"max"
)
self
.
assertTrue
(
recorder
.
exists
(
"metric"
))
def
test_update
(
self
):
recorder
=
rd
.
MetricsRecorder
()
recorder
.
update
(
"metric"
,
4
,
1000
)
self
.
assertEqual
(
recorder
.
records
[
"metric"
].
direction
,
"min"
)
self
.
assertEqual
(
recorder
.
get_records
(
"metric"
),
[
rd
.
MetricRecord
(
4
,
1000
)])
def
test_get_records
(
self
):
recorder
=
rd
.
MetricsRecorder
()
recorder
.
update
(
"metric"
,
1
,
step
=
0
)
recorder
.
update
(
"metric"
,
2
,
step
=
1
)
recorder
.
update
(
"metric"
,
3
,
step
=
2
)
recorder
.
update
(
"metric"
,
4
,
step
=
3
)
self
.
assertEqual
(
recorder
.
get_records
(
"metric"
),
[
rd
.
MetricRecord
(
1
,
0
),
rd
.
MetricRecord
(
2
,
1
),
rd
.
MetricRecord
(
3
,
2
),
rd
.
MetricRecord
(
4
,
3
),
])
def
test_set_records
(
self
):
recorder
=
rd
.
MetricsRecorder
()
recorder
.
set_records
(
"metric"
,
[
rd
.
MetricRecord
(
1
,
0
),
rd
.
MetricRecord
(
2
,
1
),
rd
.
MetricRecord
(
3
,
2
),
rd
.
MetricRecord
(
4
,
3
),
],
)
self
.
assertEqual
(
recorder
.
get_records
(
"metric"
),
[
rd
.
MetricRecord
(
1
,
0
),
rd
.
MetricRecord
(
2
,
1
),
rd
.
MetricRecord
(
3
,
2
),
rd
.
MetricRecord
(
4
,
3
),
])
def
test_get_best_value
(
self
):
recorder
=
rd
.
MetricsRecorder
()
recorder
.
register
(
"metric_min"
,
"min"
)
recorder
.
register
(
"metric_max"
,
"max"
)
recorder
.
set_records
(
"metric_min"
,
[
rd
.
MetricRecord
(
1
,
0
),
rd
.
MetricRecord
(
2
,
1
),
rd
.
MetricRecord
(
3
,
2
),
rd
.
MetricRecord
(
4
,
3
),
],
)
self
.
assertEqual
(
recorder
.
get_best_value
(
"metric_min"
),
1
)
recorder
.
set_records
(
"metric_max"
,
[
rd
.
MetricRecord
(
1
,
0
),
rd
.
MetricRecord
(
2
,
1
),
rd
.
MetricRecord
(
3
,
2
),
rd
.
MetricRecord
(
4
,
3
),
],
)
self
.
assertEqual
(
recorder
.
get_best_value
(
"metric_max"
),
4
)
def
test_get_best_step
(
self
):
recorder
=
rd
.
MetricsRecorder
()
recorder
.
register
(
"metric_min"
,
"min"
)
recorder
.
set_records
(
"metric_min"
,
[
rd
.
MetricRecord
(
1
,
0
),
rd
.
MetricRecord
(
2
,
1
),
rd
.
MetricRecord
(
3
,
2
),
rd
.
MetricRecord
(
4
,
3
),
],
)
self
.
assertEqual
(
recorder
.
get_best_step
(
"metric_min"
),
0
)
recorder
.
register
(
"metric_max"
,
"max"
)
recorder
.
set_records
(
"metric_max"
,
[
rd
.
MetricRecord
(
1
,
0
),
rd
.
MetricRecord
(
2
,
1
),
rd
.
MetricRecord
(
3
,
2
),
rd
.
MetricRecord
(
4
,
3
),
],
)
self
.
assertEqual
(
recorder
.
get_best_step
(
"metric_max"
),
3
)
def
test_get_statistics
(
self
):
recorder
=
rd
.
MetricsRecorder
()
records
=
[
rd
.
MetricRecord
(
np
.
random
.
random
(),
i
)
for
i
in
range
(
14
)]
recorder
.
set_records
(
"metric"
,
records
)
stats
=
recorder
.
get_statistics
(
"metric"
)
records
=
[
r
.
value
for
r
in
records
]
self
.
assertEqual
(
stats
[
"min"
],
np
.
min
(
records
))
self
.
assertEqual
(
stats
[
"max"
],
np
.
max
(
records
))
self
.
assertEqual
(
stats
[
"mean"
],
np
.
mean
(
records
))
self
.
assertEqual
(
stats
[
"median"
],
np
.
median
(
records
))
self
.
assertEqual
(
stats
[
"var"
],
np
.
var
(
records
))
self
.
assertEqual
(
stats
[
"std"
],
np
.
std
(
records
))
def
test_serialization
(
self
):
recorder
=
rd
.
MetricsRecorder
()
recorder
.
register
(
"metric"
)
recorder
.
set_records
(
"metric"
,
[
rd
.
MetricRecord
(
1
,
0
),
rd
.
MetricRecord
(
2
,
1
),
rd
.
MetricRecord
(
3
,
2
),
rd
.
MetricRecord
(
4
,
3
),
],
)
print
(
recorder
.
get_state
())
new_recorder
=
rd
.
MetricsRecorder
.
from_state
(
recorder
.
get_state
())
self
.
assertEqual
(
new_recorder
.
records
.
keys
(),
recorder
.
records
.
keys
())
if
__name__
==
"__main__"
:
unittest
.
main
()
python/paddle/fluid/tests/unittests/auto_parallel/test_trial.py
0 → 100644
浏览文件 @
2c5edb4f
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import
unittest
from
paddle.distributed.auto_parallel.tuner
import
tunable_space
as
ts
from
paddle.distributed.auto_parallel.tuner
import
trial
as
tr
class
TestTiral
(
unittest
.
TestCase
):
def
test_trial
(
self
):
space
=
ts
.
TunableSpace
()
space
.
choice
(
"choice"
,
[
0
,
1
,
2
,
3
],
default
=
2
)
trial
=
tr
.
Trial
(
space
,
trial_id
=
"trial-1"
)
trial
.
recorder
.
register
(
"latency"
,
direction
=
"min"
)
trial
.
recorder
.
update
(
"latency"
,
0.1
,
step
=
0
)
trial
.
recorder
.
update
(
"latency"
,
0.2
,
step
=
1
)
trial
.
best_step
=
0
self
.
assertEqual
(
trial
.
id
,
"trial-1"
)
self
.
assertEqual
(
trial
.
space
.
get_value
(
"choice"
),
2
)
self
.
assertEqual
(
trial
.
best_step
,
0
)
self
.
assertEqual
(
trial
.
status
,
"RUNNING"
)
def
test_serialization
(
self
):
space
=
ts
.
TunableSpace
()
space
.
int_range
(
"int_range"
,
start
=
1
,
stop
=
4
,
default
=
2
)
trial
=
tr
.
Trial
(
space
,
trial_id
=
"trial-2"
,
status
=
"COMPLETED"
)
trial
.
recorder
.
register
(
"latency"
,
direction
=
"min"
)
trial
.
recorder
.
update
(
"latency"
,
0.1
,
step
=
0
)
trial
.
recorder
.
update
(
"latency"
,
0.2
,
step
=
1
)
trial
.
best_step
=
0
new_trial
=
tr
.
Trial
.
from_state
(
trial
.
get_state
())
self
.
assertEqual
(
new_trial
.
id
,
"trial-2"
)
self
.
assertEqual
(
new_trial
.
space
.
get_value
(
"int_range"
),
2
)
self
.
assertEqual
(
new_trial
.
best_step
,
0
)
self
.
assertEqual
(
new_trial
.
status
,
"COMPLETED"
)
if
__name__
==
"__main__"
:
unittest
.
main
()
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录