Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
BaiXuePrincess
Paddle
提交
2c5edb4f
P
Paddle
项目概览
BaiXuePrincess
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
2c5edb4f
编写于
3月 15, 2022
作者:
Y
Yulong Ao
提交者:
GitHub
3月 15, 2022
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
[Auto Parallel] Add the recorder and trial class for the tuner (#40555)
Add the recorder
上级
0c703fe7
变更
5
隐藏空白更改
内联
并排
Showing
5 changed file
with
538 addition
and
0 deletion
+538
-0
python/paddle/distributed/auto_parallel/tuner/recorder.py
python/paddle/distributed/auto_parallel/tuner/recorder.py
+214
-0
python/paddle/distributed/auto_parallel/tuner/trial.py
python/paddle/distributed/auto_parallel/tuner/trial.py
+114
-0
python/paddle/fluid/tests/unittests/auto_parallel/CMakeLists.txt
...paddle/fluid/tests/unittests/auto_parallel/CMakeLists.txt
+5
-0
python/paddle/fluid/tests/unittests/auto_parallel/test_recorder.py
...ddle/fluid/tests/unittests/auto_parallel/test_recorder.py
+152
-0
python/paddle/fluid/tests/unittests/auto_parallel/test_trial.py
.../paddle/fluid/tests/unittests/auto_parallel/test_trial.py
+53
-0
未找到文件。
python/paddle/distributed/auto_parallel/tuner/recorder.py
0 → 100644
浏览文件 @
2c5edb4f
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import
numpy
as
np
class
MetricRecord
(
object
):
"""
One record for a single metric at a given execution step.
"""
def
__init__
(
self
,
value
,
step
):
self
.
_value
=
value
self
.
_step
=
step
@
property
def
value
(
self
):
return
self
.
_value
@
value
.
setter
def
value
(
self
,
value
):
self
.
_value
=
value
@
property
def
step
(
self
):
return
self
.
_step
@
step
.
setter
def
step
(
self
,
step
):
self
.
_step
=
step
def
mean
(
self
):
return
np
.
mean
(
self
.
value
)
def
get_state
(
self
):
return
{
"value"
:
self
.
value
,
"step"
:
self
.
step
}
@
classmethod
def
from_state
(
cls
,
state
):
return
cls
(
**
state
)
def
__eq__
(
self
,
other
):
if
not
isinstance
(
other
,
MetricRecord
):
return
False
return
other
.
value
==
self
.
value
and
other
.
step
==
self
.
step
def
__repr__
(
self
):
return
"MetricRecord(value={}, step={})"
.
format
(
self
.
value
,
self
.
step
)
class
MetricRecords
(
object
):
"""
Records of a single metric across different executions.
"""
def
__init__
(
self
,
direction
=
"min"
):
if
direction
not
in
{
"min"
,
"max"
}:
raise
ValueError
(
"direction should be one of {min, max}, but got: {}."
.
format
(
direction
))
self
.
_direction
=
direction
self
.
_records
=
{}
@
property
def
records
(
self
):
return
sorted
(
self
.
_records
.
values
(),
key
=
lambda
r
:
r
.
step
)
@
records
.
setter
def
records
(
self
,
records
):
for
r
in
records
:
self
.
update
(
r
.
value
,
step
=
r
.
step
)
@
property
def
direction
(
self
):
return
self
.
_direction
@
direction
.
setter
def
direction
(
self
,
direction
):
self
.
_direction
=
direction
def
update
(
self
,
value
,
step
=
0
):
if
step
in
self
.
_records
:
self
.
_records
[
step
].
set_value
(
value
)
else
:
self
.
_records
[
step
]
=
MetricRecord
(
value
,
step
=
step
)
def
get_best_value
(
self
):
values
=
list
(
r
.
mean
()
for
r
in
self
.
_records
.
values
())
if
not
values
:
return
None
if
self
.
_direction
==
"min"
:
return
np
.
nanmin
(
values
)
return
np
.
nanmax
(
values
)
def
get_best_step
(
self
):
best_value
=
self
.
get_best_value
()
if
best_value
is
None
:
return
None
for
r
in
self
.
_records
.
values
():
if
r
.
mean
()
==
best_value
:
return
r
.
step
def
get_statistics
(
self
):
records
=
self
.
records
records_values
=
[
r
.
mean
()
for
r
in
records
]
if
not
len
(
records_values
):
return
{}
return
{
"min"
:
float
(
np
.
nanmin
(
records_values
)),
"max"
:
float
(
np
.
nanmax
(
records_values
)),
"mean"
:
float
(
np
.
nanmean
(
records_values
)),
"median"
:
float
(
np
.
nanmedian
(
records_values
)),
"var"
:
float
(
np
.
nanvar
(
records_values
)),
"std"
:
float
(
np
.
nanstd
(
records_values
)),
}
def
get_state
(
self
):
state
=
{}
state
[
"direction"
]
=
self
.
_direction
state
[
"records"
]
=
[
r
.
get_state
()
for
r
in
self
.
records
]
return
state
@
classmethod
def
from_state
(
cls
,
state
):
records
=
cls
(
state
[
"direction"
])
records
.
records
=
[
MetricRecord
.
from_state
(
r
)
for
r
in
state
[
"records"
]]
print
(
"here 1"
,
records
.
records
)
return
records
class
MetricsRecorder
(
object
):
"""
Record the values for all metrics.
"""
def
__init__
(
self
,
metrics
=
None
):
self
.
_records
=
{}
self
.
register_metrics
(
metrics
)
@
property
def
records
(
self
):
return
self
.
_records
def
exists
(
self
,
name
):
return
name
in
self
.
_records
def
register_metrics
(
self
,
metrics
=
None
):
metrics
=
metrics
or
[]
for
metric
in
metrics
:
self
.
register
(
metric
.
name
)
def
register
(
self
,
name
,
direction
=
None
):
if
self
.
exists
(
name
):
raise
ValueError
(
"Metric {} have been registered."
.
format
(
name
))
if
direction
is
None
:
direction
=
"min"
self
.
_records
[
name
]
=
MetricRecords
(
direction
)
def
update
(
self
,
name
,
value
,
step
=
0
):
value
=
float
(
value
)
if
not
self
.
exists
(
name
):
self
.
register
(
name
)
prev_best
=
self
.
_records
[
name
].
get_best_value
()
self
.
_records
[
name
].
update
(
value
,
step
=
step
)
new_best
=
self
.
_records
[
name
].
get_best_value
()
improved
=
new_best
!=
prev_best
return
improved
def
get_records
(
self
,
name
):
return
self
.
_records
[
name
].
records
def
set_records
(
self
,
name
,
records
):
if
not
self
.
exists
(
name
):
self
.
register
(
name
)
self
.
_records
[
name
].
records
=
records
def
get_best_value
(
self
,
name
):
return
self
.
_records
[
name
].
get_best_value
()
def
get_best_step
(
self
,
name
):
return
self
.
_records
[
name
].
get_best_step
()
def
get_statistics
(
self
,
name
):
return
self
.
_records
[
name
].
get_statistics
()
def
get_state
(
self
):
return
{
"metrics"
:
{
name
:
metric_records
.
get_state
()
for
name
,
metric_records
in
self
.
_records
.
items
()
}
}
@
classmethod
def
from_state
(
cls
,
state
):
recorder
=
cls
()
recorder
.
_records
=
{
name
:
MetricRecords
.
from_state
(
metric_records
)
for
name
,
metric_records
in
state
[
"metrics"
].
items
()
}
return
recorder
python/paddle/distributed/auto_parallel/tuner/trial.py
0 → 100644
浏览文件 @
2c5edb4f
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import
hashlib
import
random
import
time
from
enum
import
Enum
from
.storable
import
Storable
from
.recorder
import
MetricsRecorder
from
.tunable_space
import
TunableSpace
class
TrialStatus
:
RUNNING
=
"RUNNING"
COMPLETED
=
"COMPLETED"
STOPPED
=
"STOPPED"
INVALID
=
"INVALID"
class
Trial
(
Storable
):
def
__init__
(
self
,
tunable_space
,
trial_id
=
None
,
status
=
TrialStatus
.
RUNNING
):
self
.
_id
=
_generate_trial_id
()
if
trial_id
is
None
else
trial_id
self
.
_space
=
tunable_space
self
.
_recorder
=
MetricsRecorder
()
self
.
_score
=
None
self
.
_best_step
=
None
self
.
_status
=
status
@
property
def
id
(
self
):
return
self
.
_id
@
property
def
space
(
self
):
return
self
.
_space
@
property
def
recorder
(
self
):
return
self
.
_recorder
@
property
def
score
(
self
):
return
self
.
_score
@
score
.
setter
def
score
(
self
,
score
):
self
.
_score
=
score
@
property
def
best_step
(
self
):
return
self
.
_best_step
@
best_step
.
setter
def
best_step
(
self
,
best_step
):
self
.
_best_step
=
best_step
@
property
def
status
(
self
):
return
self
.
_status
@
status
.
setter
def
status
(
self
,
status
):
self
.
_status
=
status
def
summary
(
self
):
print
(
"Tunable space:"
)
if
self
.
space
.
values
:
for
tv
,
value
in
self
.
space
.
values
.
items
():
print
(
tv
+
":"
,
value
)
if
self
.
score
is
not
None
:
print
(
"Score: {}"
.
format
(
self
.
score
))
def
get_state
(
self
):
return
{
"id"
:
self
.
id
,
"space"
:
self
.
space
.
get_state
(),
"recorder"
:
self
.
recorder
.
get_state
(),
"score"
:
self
.
score
,
"best_step"
:
self
.
best_step
,
"status"
:
self
.
status
,
}
def
set_state
(
self
,
state
):
self
.
_id
=
state
[
"id"
]
self
.
_space
=
TunableSpace
.
from_state
(
state
[
"space"
])
self
.
_recorder
=
MetricsRecorder
.
from_state
(
state
[
"recorder"
])
self
.
_score
=
state
[
"score"
]
self
.
_best_step
=
state
[
"best_step"
]
self
.
_status
=
state
[
"status"
]
@
classmethod
def
from_state
(
cls
,
state
):
trial
=
cls
(
tunable_space
=
None
)
trial
.
set_state
(
state
)
return
trial
def
_generate_trial_id
():
s
=
str
(
time
.
time
())
+
str
(
random
.
randint
(
1
,
int
(
1e7
)))
return
hashlib
.
sha256
(
s
.
encode
(
"utf-8"
)).
hexdigest
()[:
32
]
python/paddle/fluid/tests/unittests/auto_parallel/CMakeLists.txt
浏览文件 @
2c5edb4f
...
...
@@ -11,4 +11,9 @@ if(WITH_DISTRIBUTE AND WITH_GPU)
set_tests_properties
(
test_engine_api PROPERTIES LABELS
"RUN_TYPE=EXCLUSIVE"
TIMEOUT 80
)
py_test_modules
(
test_converter MODULES test_converter ENVS
${
dist_ENVS
}
)
set_tests_properties
(
test_converter PROPERTIES LABELS
"RUN_TYPE=EXCLUSIVE"
TIMEOUT 50
)
py_test_modules
(
test_tunable_variable MODULES test_tunable_variable ENVS
${
dist_ENVS
}
)
py_test_modules
(
test_tunable_space MODULES test_tunable_space ENVS
${
dist_ENVS
}
)
py_test_modules
(
test_recorder MODULES test_recorder ENVS
${
dist_ENVS
}
)
py_test_modules
(
test_trial MODULES test_trial ENVS
${
dist_ENVS
}
)
endif
()
python/paddle/fluid/tests/unittests/auto_parallel/test_recorder.py
0 → 100644
浏览文件 @
2c5edb4f
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import
unittest
import
numpy
as
np
from
paddle.distributed.auto_parallel.tuner
import
recorder
as
rd
class
TestRecorder
(
unittest
.
TestCase
):
def
test_register
(
self
):
recorder
=
rd
.
MetricsRecorder
()
recorder
.
register
(
"metric"
)
self
.
assertEqual
(
set
(
recorder
.
records
.
keys
()),
{
"metric"
})
self
.
assertEqual
(
recorder
.
records
[
"metric"
].
direction
,
"min"
)
def
test_exists
(
self
):
recorder
=
rd
.
MetricsRecorder
()
recorder
.
register
(
"metric"
,
direction
=
"max"
)
self
.
assertTrue
(
recorder
.
exists
(
"metric"
))
def
test_update
(
self
):
recorder
=
rd
.
MetricsRecorder
()
recorder
.
update
(
"metric"
,
4
,
1000
)
self
.
assertEqual
(
recorder
.
records
[
"metric"
].
direction
,
"min"
)
self
.
assertEqual
(
recorder
.
get_records
(
"metric"
),
[
rd
.
MetricRecord
(
4
,
1000
)])
def
test_get_records
(
self
):
recorder
=
rd
.
MetricsRecorder
()
recorder
.
update
(
"metric"
,
1
,
step
=
0
)
recorder
.
update
(
"metric"
,
2
,
step
=
1
)
recorder
.
update
(
"metric"
,
3
,
step
=
2
)
recorder
.
update
(
"metric"
,
4
,
step
=
3
)
self
.
assertEqual
(
recorder
.
get_records
(
"metric"
),
[
rd
.
MetricRecord
(
1
,
0
),
rd
.
MetricRecord
(
2
,
1
),
rd
.
MetricRecord
(
3
,
2
),
rd
.
MetricRecord
(
4
,
3
),
])
def
test_set_records
(
self
):
recorder
=
rd
.
MetricsRecorder
()
recorder
.
set_records
(
"metric"
,
[
rd
.
MetricRecord
(
1
,
0
),
rd
.
MetricRecord
(
2
,
1
),
rd
.
MetricRecord
(
3
,
2
),
rd
.
MetricRecord
(
4
,
3
),
],
)
self
.
assertEqual
(
recorder
.
get_records
(
"metric"
),
[
rd
.
MetricRecord
(
1
,
0
),
rd
.
MetricRecord
(
2
,
1
),
rd
.
MetricRecord
(
3
,
2
),
rd
.
MetricRecord
(
4
,
3
),
])
def
test_get_best_value
(
self
):
recorder
=
rd
.
MetricsRecorder
()
recorder
.
register
(
"metric_min"
,
"min"
)
recorder
.
register
(
"metric_max"
,
"max"
)
recorder
.
set_records
(
"metric_min"
,
[
rd
.
MetricRecord
(
1
,
0
),
rd
.
MetricRecord
(
2
,
1
),
rd
.
MetricRecord
(
3
,
2
),
rd
.
MetricRecord
(
4
,
3
),
],
)
self
.
assertEqual
(
recorder
.
get_best_value
(
"metric_min"
),
1
)
recorder
.
set_records
(
"metric_max"
,
[
rd
.
MetricRecord
(
1
,
0
),
rd
.
MetricRecord
(
2
,
1
),
rd
.
MetricRecord
(
3
,
2
),
rd
.
MetricRecord
(
4
,
3
),
],
)
self
.
assertEqual
(
recorder
.
get_best_value
(
"metric_max"
),
4
)
def
test_get_best_step
(
self
):
recorder
=
rd
.
MetricsRecorder
()
recorder
.
register
(
"metric_min"
,
"min"
)
recorder
.
set_records
(
"metric_min"
,
[
rd
.
MetricRecord
(
1
,
0
),
rd
.
MetricRecord
(
2
,
1
),
rd
.
MetricRecord
(
3
,
2
),
rd
.
MetricRecord
(
4
,
3
),
],
)
self
.
assertEqual
(
recorder
.
get_best_step
(
"metric_min"
),
0
)
recorder
.
register
(
"metric_max"
,
"max"
)
recorder
.
set_records
(
"metric_max"
,
[
rd
.
MetricRecord
(
1
,
0
),
rd
.
MetricRecord
(
2
,
1
),
rd
.
MetricRecord
(
3
,
2
),
rd
.
MetricRecord
(
4
,
3
),
],
)
self
.
assertEqual
(
recorder
.
get_best_step
(
"metric_max"
),
3
)
def
test_get_statistics
(
self
):
recorder
=
rd
.
MetricsRecorder
()
records
=
[
rd
.
MetricRecord
(
np
.
random
.
random
(),
i
)
for
i
in
range
(
14
)]
recorder
.
set_records
(
"metric"
,
records
)
stats
=
recorder
.
get_statistics
(
"metric"
)
records
=
[
r
.
value
for
r
in
records
]
self
.
assertEqual
(
stats
[
"min"
],
np
.
min
(
records
))
self
.
assertEqual
(
stats
[
"max"
],
np
.
max
(
records
))
self
.
assertEqual
(
stats
[
"mean"
],
np
.
mean
(
records
))
self
.
assertEqual
(
stats
[
"median"
],
np
.
median
(
records
))
self
.
assertEqual
(
stats
[
"var"
],
np
.
var
(
records
))
self
.
assertEqual
(
stats
[
"std"
],
np
.
std
(
records
))
def
test_serialization
(
self
):
recorder
=
rd
.
MetricsRecorder
()
recorder
.
register
(
"metric"
)
recorder
.
set_records
(
"metric"
,
[
rd
.
MetricRecord
(
1
,
0
),
rd
.
MetricRecord
(
2
,
1
),
rd
.
MetricRecord
(
3
,
2
),
rd
.
MetricRecord
(
4
,
3
),
],
)
print
(
recorder
.
get_state
())
new_recorder
=
rd
.
MetricsRecorder
.
from_state
(
recorder
.
get_state
())
self
.
assertEqual
(
new_recorder
.
records
.
keys
(),
recorder
.
records
.
keys
())
if
__name__
==
"__main__"
:
unittest
.
main
()
python/paddle/fluid/tests/unittests/auto_parallel/test_trial.py
0 → 100644
浏览文件 @
2c5edb4f
# Copyright (c) 2022 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import
unittest
from
paddle.distributed.auto_parallel.tuner
import
tunable_space
as
ts
from
paddle.distributed.auto_parallel.tuner
import
trial
as
tr
class
TestTiral
(
unittest
.
TestCase
):
def
test_trial
(
self
):
space
=
ts
.
TunableSpace
()
space
.
choice
(
"choice"
,
[
0
,
1
,
2
,
3
],
default
=
2
)
trial
=
tr
.
Trial
(
space
,
trial_id
=
"trial-1"
)
trial
.
recorder
.
register
(
"latency"
,
direction
=
"min"
)
trial
.
recorder
.
update
(
"latency"
,
0.1
,
step
=
0
)
trial
.
recorder
.
update
(
"latency"
,
0.2
,
step
=
1
)
trial
.
best_step
=
0
self
.
assertEqual
(
trial
.
id
,
"trial-1"
)
self
.
assertEqual
(
trial
.
space
.
get_value
(
"choice"
),
2
)
self
.
assertEqual
(
trial
.
best_step
,
0
)
self
.
assertEqual
(
trial
.
status
,
"RUNNING"
)
def
test_serialization
(
self
):
space
=
ts
.
TunableSpace
()
space
.
int_range
(
"int_range"
,
start
=
1
,
stop
=
4
,
default
=
2
)
trial
=
tr
.
Trial
(
space
,
trial_id
=
"trial-2"
,
status
=
"COMPLETED"
)
trial
.
recorder
.
register
(
"latency"
,
direction
=
"min"
)
trial
.
recorder
.
update
(
"latency"
,
0.1
,
step
=
0
)
trial
.
recorder
.
update
(
"latency"
,
0.2
,
step
=
1
)
trial
.
best_step
=
0
new_trial
=
tr
.
Trial
.
from_state
(
trial
.
get_state
())
self
.
assertEqual
(
new_trial
.
id
,
"trial-2"
)
self
.
assertEqual
(
new_trial
.
space
.
get_value
(
"int_range"
),
2
)
self
.
assertEqual
(
new_trial
.
best_step
,
0
)
self
.
assertEqual
(
new_trial
.
status
,
"COMPLETED"
)
if
__name__
==
"__main__"
:
unittest
.
main
()
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录