Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
MindSpore
mindinsight
提交
660a8dbf
M
mindinsight
项目概览
MindSpore
/
mindinsight
通知
7
Star
3
Fork
2
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
M
mindinsight
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
660a8dbf
编写于
4月 26, 2020
作者:
L
luopengting
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
optimize parsing for out-of-order events
上级
09fb9f16
变更
6
隐藏空白更改
内联
并排
Showing
6 changed file
with
114 addition
and
26 deletion
+114
-26
mindinsight/datavisual/data_transform/events_data.py
mindinsight/datavisual/data_transform/events_data.py
+8
-6
mindinsight/datavisual/data_transform/ms_data_loader.py
mindinsight/datavisual/data_transform/ms_data_loader.py
+10
-5
mindinsight/datavisual/data_transform/reservoir.py
mindinsight/datavisual/data_transform/reservoir.py
+33
-5
tests/ut/datavisual/data_transform/test_events_data.py
tests/ut/datavisual/data_transform/test_events_data.py
+56
-5
tests/ut/datavisual/data_transform/test_reservoir.py
tests/ut/datavisual/data_transform/test_reservoir.py
+4
-0
tests/ut/datavisual/processors/test_images_processor.py
tests/ut/datavisual/processors/test_images_processor.py
+3
-5
未找到文件。
mindinsight/datavisual/data_transform/events_data.py
浏览文件 @
660a8dbf
...
...
@@ -23,9 +23,9 @@ from mindinsight.conf import settings
# Type of the tensor event from external component
_Tensor
=
collections
.
namedtuple
(
'_Tensor'
,
[
'wall_time'
,
'step'
,
'value'
])
_Tensor
=
collections
.
namedtuple
(
'_Tensor'
,
[
'wall_time'
,
'step'
,
'value'
,
'filename'
])
TensorEvent
=
collections
.
namedtuple
(
'TensorEvent'
,
[
'wall_time'
,
'step'
,
'tag'
,
'plugin_name'
,
'value'
])
'TensorEvent'
,
[
'wall_time'
,
'step'
,
'tag'
,
'plugin_name'
,
'value'
,
'filename'
])
# config for `EventsData`
_DEFAULT_STEP_SIZES_PER_TAG
=
settings
.
DEFAULT_STEP_SIZES_PER_TAG
...
...
@@ -99,10 +99,11 @@ class EventsData:
tensor
=
_Tensor
(
wall_time
=
tensor_event
.
wall_time
,
step
=
tensor_event
.
step
,
value
=
tensor_event
.
value
)
value
=
tensor_event
.
value
,
filename
=
tensor_event
.
filename
)
if
self
.
_is_out_of_order_step
(
tensor_event
.
step
,
tensor_event
.
tag
):
self
.
purge_reservoir_data
(
tensor_event
.
step
,
self
.
_reservoir_by_tag
[
tag
])
self
.
purge_reservoir_data
(
tensor_event
.
filename
,
tensor_event
.
step
,
self
.
_reservoir_by_tag
[
tag
])
self
.
_reservoir_by_tag
[
tag
].
add_sample
(
tensor
)
...
...
@@ -176,7 +177,7 @@ class EventsData:
return
False
@
staticmethod
def
purge_reservoir_data
(
start_step
,
tensor_reservoir
):
def
purge_reservoir_data
(
filename
,
start_step
,
tensor_reservoir
):
"""
Purge all tensor event that are out-of-order step after the given start step.
...
...
@@ -188,7 +189,8 @@ class EventsData:
Returns:
int, the number of items removed.
"""
cnt_out_of_order
=
tensor_reservoir
.
remove_sample
(
lambda
x
:
x
.
step
<
start_step
)
cnt_out_of_order
=
tensor_reservoir
.
remove_sample
(
lambda
x
:
x
.
step
<
start_step
or
(
x
.
step
>
start_step
and
x
.
filename
==
filename
))
return
cnt_out_of_order
...
...
mindinsight/datavisual/data_transform/ms_data_loader.py
浏览文件 @
660a8dbf
...
...
@@ -223,7 +223,8 @@ class MSDataLoader:
step
=
event
.
step
,
tag
=
tag
,
plugin_name
=
PluginNameEnum
.
SCALAR
.
value
,
value
=
value
.
scalar_value
)
value
=
value
.
scalar_value
,
filename
=
self
.
_latest_summary_filename
)
self
.
_events_data
.
add_tensor_event
(
tensor_event
)
if
value
.
HasField
(
'image'
):
...
...
@@ -232,7 +233,8 @@ class MSDataLoader:
step
=
event
.
step
,
tag
=
tag
,
plugin_name
=
PluginNameEnum
.
IMAGE
.
value
,
value
=
value
.
image
)
value
=
value
.
image
,
filename
=
self
.
_latest_summary_filename
)
self
.
_events_data
.
add_tensor_event
(
tensor_event
)
if
value
.
HasField
(
'histogram'
):
...
...
@@ -242,7 +244,8 @@ class MSDataLoader:
step
=
event
.
step
,
tag
=
tag
,
plugin_name
=
PluginNameEnum
.
HISTOGRAM
.
value
,
value
=
histogram_msg
)
value
=
histogram_msg
,
filename
=
self
.
_latest_summary_filename
)
self
.
_events_data
.
add_tensor_event
(
tensor_event
)
if
event
.
HasField
(
'graph_def'
):
...
...
@@ -253,7 +256,8 @@ class MSDataLoader:
step
=
event
.
step
,
tag
=
self
.
_latest_summary_filename
,
plugin_name
=
PluginNameEnum
.
GRAPH
.
value
,
value
=
graph
)
value
=
graph
,
filename
=
self
.
_latest_summary_filename
)
try
:
graph_tags
=
self
.
_events_data
.
list_tags_by_plugin
(
PluginNameEnum
.
GRAPH
.
value
)
...
...
@@ -436,7 +440,8 @@ class _PbParser:
step
=
0
,
tag
=
filename
,
plugin_name
=
PluginNameEnum
.
GRAPH
.
value
,
value
=
graph
)
value
=
graph
,
filename
=
filename
)
logger
.
info
(
"Build graph success, file path: %s."
,
file_path
)
return
tensor_event
mindinsight/datavisual/data_transform/reservoir.py
浏览文件 @
660a8dbf
...
...
@@ -23,6 +23,24 @@ from mindinsight.utils.exceptions import ParamValueError
from
mindinsight.datavisual.utils.utils
import
calc_histogram_bins
def
binary_search
(
samples
,
target
):
"""Binary search target in samples."""
left
=
0
right
=
len
(
samples
)
-
1
while
left
<=
right
:
mid
=
(
left
+
right
)
//
2
if
target
<
samples
[
mid
].
step
:
right
=
mid
-
1
elif
target
>
samples
[
mid
].
step
:
left
=
mid
+
1
else
:
return
mid
# if right is -1, it is less than the first one.
# if list is [1, 2, 4], target is 3, right will be 1, so wo will insert by 2.
return
right
+
1
class
Reservoir
:
"""
A container based on Reservoir Sampling algorithm.
...
...
@@ -68,18 +86,28 @@ class Reservoir:
"""
with
self
.
_mutex
:
if
len
(
self
.
_samples
)
<
self
.
_samples_max_size
or
self
.
_samples_max_size
==
0
:
self
.
_
samples
.
append
(
sample
)
self
.
_
add_sample
(
sample
)
else
:
# Use the Reservoir Sampling algorithm to replace the old sample.
rand_int
=
self
.
_sample_selector
.
randint
(
0
,
self
.
_sample_counter
)
rand_int
=
self
.
_sample_selector
.
randint
(
0
,
self
.
_sample_counter
)
if
rand_int
<
self
.
_samples_max_size
:
self
.
_samples
.
pop
(
rand_int
)
self
.
_samples
.
append
(
sample
)
else
:
self
.
_samples
[
-
1
]
=
sample
self
.
_samples
=
self
.
_samples
[:
-
1
]
self
.
_add_sample
(
sample
)
self
.
_sample_counter
+=
1
def
_add_sample
(
self
,
sample
):
"""Search the index and add sample."""
if
not
self
.
_samples
or
sample
.
step
>
self
.
_samples
[
-
1
].
step
:
self
.
_samples
.
append
(
sample
)
return
index
=
binary_search
(
self
.
_samples
,
sample
.
step
)
if
index
==
len
(
self
.
_samples
):
self
.
_samples
.
append
(
sample
)
else
:
self
.
_samples
.
insert
(
index
,
sample
)
def
remove_sample
(
self
,
filter_fun
):
"""
Remove the samples from Reservoir that do not meet the filter criteria.
...
...
tests/ut/datavisual/data_transform/test_events_data.py
浏览文件 @
660a8dbf
...
...
@@ -36,9 +36,9 @@ class MockReservoir:
def
__init__
(
self
,
size
):
self
.
size
=
size
self
.
_samples
=
[
_Tensor
(
'wall_time1'
,
1
,
'value1'
),
_Tensor
(
'wall_time2'
,
2
,
'value2'
),
_Tensor
(
'wall_time3'
,
3
,
'value3'
)
_Tensor
(
'wall_time1'
,
1
,
'value1'
,
'filename1'
),
_Tensor
(
'wall_time2'
,
2
,
'value2'
,
'filename2'
),
_Tensor
(
'wall_time3'
,
3
,
'value3'
,
'filename3'
)
]
def
samples
(
self
):
...
...
@@ -107,7 +107,8 @@ class TestEventsData:
"""Test add_tensor_event success."""
ev_data
=
self
.
get_ev_data
()
t_event
=
TensorEvent
(
wall_time
=
1
,
step
=
4
,
tag
=
'new_tag'
,
plugin_name
=
'plugin_name1'
,
value
=
'value1'
)
t_event
=
TensorEvent
(
wall_time
=
1
,
step
=
4
,
tag
=
'new_tag'
,
plugin_name
=
'plugin_name1'
,
value
=
'value1'
,
filename
=
'filename'
)
ev_data
.
add_tensor_event
(
t_event
)
assert
'tag0'
not
in
ev_data
.
_tags
...
...
@@ -116,4 +117,54 @@ class TestEventsData:
assert
'tag0'
not
in
ev_data
.
_reservoir_by_tag
assert
'new_tag'
in
ev_data
.
_tags_by_plugin
[
'plugin_name1'
]
assert
ev_data
.
_reservoir_by_tag
[
'new_tag'
].
samples
()[
-
1
]
==
_Tensor
(
t_event
.
wall_time
,
t_event
.
step
,
t_event
.
value
)
t_event
.
value
,
'filename'
)
def
test_add_tensor_event_out_of_order
(
self
):
"""Test add_tensor_event success for out_of_order summaries."""
wall_time
=
1
value
=
'1'
tag
=
'tag'
plugin_name
=
'scalar'
file1
=
'file1'
ev_data
=
EventsData
()
steps
=
[
i
for
i
in
range
(
2
,
10
)]
for
step
in
steps
:
t_event
=
TensorEvent
(
wall_time
=
1
,
step
=
step
,
tag
=
tag
,
plugin_name
=
plugin_name
,
value
=
value
,
filename
=
file1
)
ev_data
.
add_tensor_event
(
t_event
)
t_event
=
TensorEvent
(
wall_time
=
1
,
step
=
1
,
tag
=
tag
,
plugin_name
=
plugin_name
,
value
=
value
,
filename
=
file1
)
ev_data
.
add_tensor_event
(
t_event
)
# Current steps should be: [1, 2, 3, 4, 5, 6, 7, 8, 9]
assert
len
(
ev_data
.
_reservoir_by_tag
[
tag
].
samples
())
==
len
(
steps
)
+
1
file2
=
'file2'
new_steps_1
=
[
5
,
10
]
for
step
in
new_steps_1
:
t_event
=
TensorEvent
(
wall_time
=
1
,
step
=
step
,
tag
=
tag
,
plugin_name
=
plugin_name
,
value
=
value
,
filename
=
file2
)
ev_data
.
add_tensor_event
(
t_event
)
assert
ev_data
.
_reservoir_by_tag
[
tag
].
samples
()[
-
1
]
==
_Tensor
(
wall_time
,
step
,
value
,
file2
)
# Current steps should be: [1, 2, 3, 4, 5, 10]
steps
=
[
1
,
2
,
3
,
4
,
5
,
10
]
samples
=
ev_data
.
_reservoir_by_tag
[
tag
].
samples
()
for
i
in
range
(
len
(
samples
)):
filename
=
file1
if
samples
[
i
].
step
<
5
else
file2
assert
samples
[
i
]
==
_Tensor
(
wall_time
,
steps
[
i
],
value
,
filename
)
new_steps_2
=
[
7
,
11
,
3
]
for
step
in
new_steps_2
:
t_event
=
TensorEvent
(
wall_time
=
1
,
step
=
step
,
tag
=
tag
,
plugin_name
=
plugin_name
,
value
=
value
,
filename
=
file2
)
ev_data
.
add_tensor_event
(
t_event
)
# Current steps should be: [1, 2, 3, 5, 7, 10, 11], file2: [3, 5, 7, 10, 11]
steps
=
[
1
,
2
,
3
,
5
,
7
,
10
,
11
]
new_steps_2
.
extend
(
new_steps_1
)
samples
=
ev_data
.
_reservoir_by_tag
[
tag
].
samples
()
for
i
in
range
(
len
(
samples
)):
filename
=
file2
if
samples
[
i
].
step
in
new_steps_2
else
file1
assert
samples
[
i
]
==
_Tensor
(
wall_time
,
steps
[
i
],
value
,
filename
)
tests/ut/datavisual/data_transform/test_reservoir.py
浏览文件 @
660a8dbf
...
...
@@ -27,10 +27,14 @@ class TestHistogramReservoir:
sample1
.
value
.
count
=
1
sample1
.
value
.
max
=
102
sample1
.
value
.
min
=
101
sample1
.
step
=
2
sample1
.
filename
=
'filename'
sample2
=
mock
.
MagicMock
()
sample2
.
value
.
count
=
2
sample2
.
value
.
max
=
102
sample2
.
value
.
min
=
101
sample2
.
step
=
1
sample2
.
filename
=
'filename'
my_reservoir
.
add_sample
(
sample1
)
my_reservoir
.
add_sample
(
sample2
)
samples
=
my_reservoir
.
samples
()
...
...
tests/ut/datavisual/processors/test_images_processor.py
浏览文件 @
660a8dbf
...
...
@@ -216,9 +216,8 @@ class TestImagesProcessor:
"""
Test removing sample in reservoir.
If step list is [1, 3, 5, 7, 9, 2, 3, 4, 15],
and then [3, 5, 7, 9] will be deleted.
Results will be [1, 2, 3, 4, 15].
If step list is [1, 3, 5, 7, 9, 2, 3, 4, 15] in one summary,
Results will be [1, 2, 3, 4, 5, 7, 9, 15].
"""
test_tag_name
=
self
.
_complete_tag_name
...
...
@@ -237,5 +236,4 @@ class TestImagesProcessor:
except
ImageNotExistError
:
not_found_step_list
.
append
(
test_step
)
assert
current_step_list
==
[
1
,
2
,
3
,
4
,
15
]
assert
not_found_step_list
==
[
5
,
7
,
9
]
assert
current_step_list
==
[
1
,
2
,
3
,
4
,
5
,
7
,
9
,
15
]
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录