Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
MindSpore
mindinsight
提交
008bdf56
M
mindinsight
项目概览
MindSpore
/
mindinsight
通知
8
Star
4
Fork
2
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
M
mindinsight
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
008bdf56
编写于
7月 20, 2020
作者:
M
mindspore-ci-bot
提交者:
Gitee
7月 20, 2020
浏览文件
操作
浏览文件
下载
差异文件
!437 Use multiple processes to calc events
Merge pull request !437 from wangshuide/wsd_multiple_processes_for_file_parsing
上级
4211e0ec
7877f33b
变更
9
显示空白变更内容
内联
并排
Showing
9 changed file
with
148 addition
and
81 deletion
+148
-81
mindinsight/backend/run.py
mindinsight/backend/run.py
+5
-6
mindinsight/conf/constants.py
mindinsight/conf/constants.py
+2
-0
mindinsight/datavisual/data_transform/data_loader.py
mindinsight/datavisual/data_transform/data_loader.py
+8
-3
mindinsight/datavisual/data_transform/data_manager.py
mindinsight/datavisual/data_transform/data_manager.py
+4
-4
mindinsight/datavisual/data_transform/events_data.py
mindinsight/datavisual/data_transform/events_data.py
+1
-0
mindinsight/datavisual/data_transform/ms_data_loader.py
mindinsight/datavisual/data_transform/ms_data_loader.py
+101
-48
mindinsight/datavisual/data_transform/tensor_container.py
mindinsight/datavisual/data_transform/tensor_container.py
+2
-3
mindinsight/datavisual/processors/tensor_processor.py
mindinsight/datavisual/processors/tensor_processor.py
+3
-3
mindinsight/scripts/stop.py
mindinsight/scripts/stop.py
+22
-14
未找到文件。
mindinsight/backend/run.py
浏览文件 @
008bdf56
...
@@ -236,9 +236,10 @@ def start():
...
@@ -236,9 +236,10 @@ def start():
process
=
subprocess
.
Popen
(
process
=
subprocess
.
Popen
(
shlex
.
split
(
cmd
),
shlex
.
split
(
cmd
),
shell
=
False
,
shell
=
False
,
stdin
=
subprocess
.
PIPE
,
# Change stdout to DEVNULL to prevent broken pipe error when creating new processes.
stdout
=
subprocess
.
PIPE
,
stdin
=
subprocess
.
DEVNULL
,
stderr
=
subprocess
.
PIPE
stdout
=
subprocess
.
DEVNULL
,
stderr
=
subprocess
.
STDOUT
)
)
# sleep 1 second for gunicorn appplication to load modules
# sleep 1 second for gunicorn appplication to load modules
...
@@ -246,9 +247,7 @@ def start():
...
@@ -246,9 +247,7 @@ def start():
# check if gunicorn application is running
# check if gunicorn application is running
if
process
.
poll
()
is
not
None
:
if
process
.
poll
()
is
not
None
:
_
,
stderr
=
process
.
communicate
()
console
.
error
(
"Start MindInsight failed. See log for details."
)
for
line
in
stderr
.
decode
().
split
(
'
\n
'
):
console
.
error
(
line
)
else
:
else
:
state_result
=
_check_server_start_stat
(
errorlog_abspath
,
log_size
)
state_result
=
_check_server_start_stat
(
errorlog_abspath
,
log_size
)
# print gunicorn start state to stdout
# print gunicorn start state to stdout
...
...
mindinsight/conf/constants.py
浏览文件 @
008bdf56
...
@@ -14,6 +14,7 @@
...
@@ -14,6 +14,7 @@
# ============================================================================
# ============================================================================
"""Constants module for mindinsight settings."""
"""Constants module for mindinsight settings."""
import
logging
import
logging
import
os
####################################
####################################
# Global default settings.
# Global default settings.
...
@@ -48,6 +49,7 @@ API_PREFIX = '/v1/mindinsight'
...
@@ -48,6 +49,7 @@ API_PREFIX = '/v1/mindinsight'
# Datavisual default settings.
# Datavisual default settings.
####################################
####################################
MAX_THREADS_COUNT
=
15
MAX_THREADS_COUNT
=
15
MAX_PROCESSES_COUNT
=
max
(
os
.
cpu_count
()
or
0
,
15
)
MAX_TAG_SIZE_PER_EVENTS_DATA
=
300
MAX_TAG_SIZE_PER_EVENTS_DATA
=
300
DEFAULT_STEP_SIZES_PER_TAG
=
500
DEFAULT_STEP_SIZES_PER_TAG
=
500
...
...
mindinsight/datavisual/data_transform/data_loader.py
浏览文件 @
008bdf56
...
@@ -34,8 +34,13 @@ class DataLoader:
...
@@ -34,8 +34,13 @@ class DataLoader:
self
.
_summary_dir
=
summary_dir
self
.
_summary_dir
=
summary_dir
self
.
_loader
=
None
self
.
_loader
=
None
def
load
(
self
):
def
load
(
self
,
workers_count
=
1
):
"""Load the data when loader is exist."""
"""Load the data when loader is exist.
Args:
workers_count (int): The count of workers. Default value is 1.
"""
if
self
.
_loader
is
None
:
if
self
.
_loader
is
None
:
ms_dataloader
=
MSDataLoader
(
self
.
_summary_dir
)
ms_dataloader
=
MSDataLoader
(
self
.
_summary_dir
)
loaders
=
[
ms_dataloader
]
loaders
=
[
ms_dataloader
]
...
@@ -48,7 +53,7 @@ class DataLoader:
...
@@ -48,7 +53,7 @@ class DataLoader:
logger
.
warning
(
"No valid files can be loaded, summary_dir: %s."
,
self
.
_summary_dir
)
logger
.
warning
(
"No valid files can be loaded, summary_dir: %s."
,
self
.
_summary_dir
)
raise
exceptions
.
SummaryLogPathInvalid
()
raise
exceptions
.
SummaryLogPathInvalid
()
self
.
_loader
.
load
()
self
.
_loader
.
load
(
workers_count
)
def
get_events_data
(
self
):
def
get_events_data
(
self
):
"""
"""
...
...
mindinsight/datavisual/data_transform/data_manager.py
浏览文件 @
008bdf56
...
@@ -510,7 +510,7 @@ class _DetailCacheManager(_BaseCacheManager):
...
@@ -510,7 +510,7 @@ class _DetailCacheManager(_BaseCacheManager):
logger
.
debug
(
"delete loader %s"
,
loader_id
)
logger
.
debug
(
"delete loader %s"
,
loader_id
)
self
.
_loader_pool
.
pop
(
loader_id
)
self
.
_loader_pool
.
pop
(
loader_id
)
def
_execute_loader
(
self
,
loader_id
):
def
_execute_loader
(
self
,
loader_id
,
workers_count
):
"""
"""
Load data form data_loader.
Load data form data_loader.
...
@@ -518,7 +518,7 @@ class _DetailCacheManager(_BaseCacheManager):
...
@@ -518,7 +518,7 @@ class _DetailCacheManager(_BaseCacheManager):
Args:
Args:
loader_id (str): An ID for `Loader`.
loader_id (str): An ID for `Loader`.
workers_count (int): The count of workers.
"""
"""
try
:
try
:
with
self
.
_loader_pool_mutex
:
with
self
.
_loader_pool_mutex
:
...
@@ -527,7 +527,7 @@ class _DetailCacheManager(_BaseCacheManager):
...
@@ -527,7 +527,7 @@ class _DetailCacheManager(_BaseCacheManager):
logger
.
debug
(
"Loader %r has been deleted, will not load data."
,
loader_id
)
logger
.
debug
(
"Loader %r has been deleted, will not load data."
,
loader_id
)
return
return
loader
.
data_loader
.
load
()
loader
.
data_loader
.
load
(
workers_count
)
# Update loader cache status to CACHED.
# Update loader cache status to CACHED.
# Loader with cache status CACHED should remain the same cache status.
# Loader with cache status CACHED should remain the same cache status.
...
@@ -584,7 +584,7 @@ class _DetailCacheManager(_BaseCacheManager):
...
@@ -584,7 +584,7 @@ class _DetailCacheManager(_BaseCacheManager):
futures
=
[]
futures
=
[]
loader_pool
=
self
.
_get_snapshot_loader_pool
()
loader_pool
=
self
.
_get_snapshot_loader_pool
()
for
loader_id
in
loader_pool
:
for
loader_id
in
loader_pool
:
future
=
executor
.
submit
(
self
.
_execute_loader
,
loader_id
)
future
=
executor
.
submit
(
self
.
_execute_loader
,
loader_id
,
threads_count
)
futures
.
append
(
future
)
futures
.
append
(
future
)
wait
(
futures
,
return_when
=
ALL_COMPLETED
)
wait
(
futures
,
return_when
=
ALL_COMPLETED
)
...
...
mindinsight/datavisual/data_transform/events_data.py
浏览文件 @
008bdf56
...
@@ -85,6 +85,7 @@ class EventsData:
...
@@ -85,6 +85,7 @@ class EventsData:
deleted_tag
=
self
.
_check_tag_out_of_spec
(
plugin_name
)
deleted_tag
=
self
.
_check_tag_out_of_spec
(
plugin_name
)
if
deleted_tag
is
not
None
:
if
deleted_tag
is
not
None
:
if
tag
in
self
.
_deleted_tags
:
if
tag
in
self
.
_deleted_tags
:
logger
.
debug
(
"Tag is in deleted tags: %s."
,
tag
)
return
return
self
.
delete_tensor_event
(
deleted_tag
)
self
.
delete_tensor_event
(
deleted_tag
)
...
...
mindinsight/datavisual/data_transform/ms_data_loader.py
浏览文件 @
008bdf56
...
@@ -19,12 +19,17 @@ This module is used to load the MindSpore training log file.
...
@@ -19,12 +19,17 @@ This module is used to load the MindSpore training log file.
Each instance will read an entire run, a run can contain one or
Each instance will read an entire run, a run can contain one or
more log file.
more log file.
"""
"""
import
concurrent.futures
as
futures
import
math
import
os
import
re
import
re
import
struct
import
struct
import
threading
from
google.protobuf.message
import
DecodeError
from
google.protobuf.message
import
DecodeError
from
google.protobuf.text_format
import
ParseError
from
google.protobuf.text_format
import
ParseError
from
mindinsight.conf
import
settings
from
mindinsight.datavisual.common
import
exceptions
from
mindinsight.datavisual.common
import
exceptions
from
mindinsight.datavisual.common.enums
import
PluginNameEnum
from
mindinsight.datavisual.common.enums
import
PluginNameEnum
from
mindinsight.datavisual.common.log
import
logger
from
mindinsight.datavisual.common.log
import
logger
...
@@ -32,13 +37,13 @@ from mindinsight.datavisual.data_access.file_handler import FileHandler
...
@@ -32,13 +37,13 @@ from mindinsight.datavisual.data_access.file_handler import FileHandler
from
mindinsight.datavisual.data_transform.events_data
import
EventsData
from
mindinsight.datavisual.data_transform.events_data
import
EventsData
from
mindinsight.datavisual.data_transform.events_data
import
TensorEvent
from
mindinsight.datavisual.data_transform.events_data
import
TensorEvent
from
mindinsight.datavisual.data_transform.graph
import
MSGraph
from
mindinsight.datavisual.data_transform.graph
import
MSGraph
from
mindinsight.datavisual.proto_files
import
mindinsight_summary_pb2
as
summary_pb2
from
mindinsight.datavisual.proto_files
import
mindinsight_anf_ir_pb2
as
anf_ir_pb2
from
mindinsight.datavisual.utils
import
crc32
from
mindinsight.utils.exceptions
import
UnknownError
from
mindinsight.datavisual.data_transform.histogram
import
Histogram
from
mindinsight.datavisual.data_transform.histogram
import
Histogram
from
mindinsight.datavisual.data_transform.histogram_container
import
HistogramContainer
from
mindinsight.datavisual.data_transform.histogram_container
import
HistogramContainer
from
mindinsight.datavisual.data_transform.tensor_container
import
TensorContainer
from
mindinsight.datavisual.data_transform.tensor_container
import
TensorContainer
from
mindinsight.datavisual.proto_files
import
mindinsight_anf_ir_pb2
as
anf_ir_pb2
from
mindinsight.datavisual.proto_files
import
mindinsight_summary_pb2
as
summary_pb2
from
mindinsight.datavisual.utils
import
crc32
from
mindinsight.utils.exceptions
import
UnknownError
HEADER_SIZE
=
8
HEADER_SIZE
=
8
CRC_STR_SIZE
=
4
CRC_STR_SIZE
=
4
...
@@ -79,11 +84,14 @@ class MSDataLoader:
...
@@ -79,11 +84,14 @@ class MSDataLoader:
"we will reload all files in path %s."
,
self
.
_summary_dir
)
"we will reload all files in path %s."
,
self
.
_summary_dir
)
self
.
__init__
(
self
.
_summary_dir
)
self
.
__init__
(
self
.
_summary_dir
)
def
load
(
self
):
def
load
(
self
,
workers_count
=
1
):
"""
"""
Load all log valid files.
Load all log valid files.
When the file is reloaded, it will continue to load from where it left off.
When the file is reloaded, it will continue to load from where it left off.
Args:
workers_count (int): The count of workers. Default value is 1.
"""
"""
logger
.
debug
(
"Start to load data in ms data loader."
)
logger
.
debug
(
"Start to load data in ms data loader."
)
filenames
=
self
.
filter_valid_files
()
filenames
=
self
.
filter_valid_files
()
...
@@ -95,7 +103,7 @@ class MSDataLoader:
...
@@ -95,7 +103,7 @@ class MSDataLoader:
self
.
_check_files_deleted
(
filenames
,
old_filenames
)
self
.
_check_files_deleted
(
filenames
,
old_filenames
)
for
parser
in
self
.
_parser_list
:
for
parser
in
self
.
_parser_list
:
parser
.
parse_files
(
filenames
,
events_data
=
self
.
_events_data
)
parser
.
parse_files
(
workers_count
,
filenames
,
events_data
=
self
.
_events_data
)
def
filter_valid_files
(
self
):
def
filter_valid_files
(
self
):
"""
"""
...
@@ -125,11 +133,12 @@ class _Parser:
...
@@ -125,11 +133,12 @@ class _Parser:
self
.
_latest_mtime
=
0
self
.
_latest_mtime
=
0
self
.
_summary_dir
=
summary_dir
self
.
_summary_dir
=
summary_dir
def
parse_files
(
self
,
filenames
,
events_data
):
def
parse_files
(
self
,
workers_count
,
filenames
,
events_data
):
"""
"""
Load files and parse files content.
Load files and parse files content.
Args:
Args:
workers_count (int): The count of workers.
filenames (list[str]): File name list.
filenames (list[str]): File name list.
events_data (EventsData): The container of event data.
events_data (EventsData): The container of event data.
"""
"""
...
@@ -177,7 +186,7 @@ class _Parser:
...
@@ -177,7 +186,7 @@ class _Parser:
class
_PbParser
(
_Parser
):
class
_PbParser
(
_Parser
):
"""This class is used to parse pb file."""
"""This class is used to parse pb file."""
def
parse_files
(
self
,
filenames
,
events_data
):
def
parse_files
(
self
,
workers_count
,
filenames
,
events_data
):
pb_filenames
=
self
.
filter_files
(
filenames
)
pb_filenames
=
self
.
filter_files
(
filenames
)
pb_filenames
=
self
.
sort_files
(
pb_filenames
)
pb_filenames
=
self
.
sort_files
(
pb_filenames
)
for
filename
in
pb_filenames
:
for
filename
in
pb_filenames
:
...
@@ -255,11 +264,12 @@ class _SummaryParser(_Parser):
...
@@ -255,11 +264,12 @@ class _SummaryParser(_Parser):
self
.
_summary_file_handler
=
None
self
.
_summary_file_handler
=
None
self
.
_events_data
=
None
self
.
_events_data
=
None
def
parse_files
(
self
,
filenames
,
events_data
):
def
parse_files
(
self
,
workers_count
,
filenames
,
events_data
):
"""
"""
Load summary file and parse file content.
Load summary file and parse file content.
Args:
Args:
workers_count (int): The count of workers.
filenames (list[str]): File name list.
filenames (list[str]): File name list.
events_data (EventsData): The container of event data.
events_data (EventsData): The container of event data.
"""
"""
...
@@ -285,7 +295,7 @@ class _SummaryParser(_Parser):
...
@@ -285,7 +295,7 @@ class _SummaryParser(_Parser):
self
.
_latest_file_size
=
new_size
self
.
_latest_file_size
=
new_size
try
:
try
:
self
.
_load_single_file
(
self
.
_summary_file_handler
)
self
.
_load_single_file
(
self
.
_summary_file_handler
,
workers_count
)
except
UnknownError
as
ex
:
except
UnknownError
as
ex
:
logger
.
warning
(
"Parse summary file failed, detail: %r,"
logger
.
warning
(
"Parse summary file failed, detail: %r,"
"file path: %s."
,
str
(
ex
),
file_path
)
"file path: %s."
,
str
(
ex
),
file_path
)
...
@@ -304,14 +314,28 @@ class _SummaryParser(_Parser):
...
@@ -304,14 +314,28 @@ class _SummaryParser(_Parser):
lambda
filename
:
(
re
.
search
(
r
'summary\.\d+'
,
filename
)
lambda
filename
:
(
re
.
search
(
r
'summary\.\d+'
,
filename
)
and
not
filename
.
endswith
(
"_lineage"
)),
filenames
))
and
not
filename
.
endswith
(
"_lineage"
)),
filenames
))
def
_load_single_file
(
self
,
file_handler
):
def
_load_single_file
(
self
,
file_handler
,
workers_count
):
"""
"""
Load a log file data.
Load a log file data.
Args:
Args:
file_handler (FileHandler): A file handler.
file_handler (FileHandler): A file handler.
workers_count (int): The count of workers.
"""
"""
logger
.
debug
(
"Load single summary file, file path: %s."
,
file_handler
.
file_path
)
default_concurrency
=
1
cpu_count
=
os
.
cpu_count
()
if
cpu_count
is
None
:
concurrency
=
default_concurrency
else
:
concurrency
=
min
(
math
.
floor
(
cpu_count
/
workers_count
),
math
.
floor
(
settings
.
MAX_PROCESSES_COUNT
/
workers_count
))
if
concurrency
<=
0
:
concurrency
=
default_concurrency
logger
.
debug
(
"Load single summary file, file path: %s, concurrency: %s."
,
file_handler
.
file_path
,
concurrency
)
semaphore
=
threading
.
Semaphore
(
value
=
concurrency
)
with
futures
.
ProcessPoolExecutor
(
max_workers
=
concurrency
)
as
executor
:
while
True
:
while
True
:
start_offset
=
file_handler
.
offset
start_offset
=
file_handler
.
offset
try
:
try
:
...
@@ -320,8 +344,33 @@ class _SummaryParser(_Parser):
...
@@ -320,8 +344,33 @@ class _SummaryParser(_Parser):
file_handler
.
reset_offset
(
start_offset
)
file_handler
.
reset_offset
(
start_offset
)
break
break
event
=
summary_pb2
.
Event
.
FromString
(
event_str
)
# Make sure we have at most concurrency tasks not finished to save memory.
self
.
_event_parse
(
event
)
semaphore
.
acquire
()
future
=
executor
.
submit
(
self
.
_event_parse
,
event_str
,
self
.
_latest_filename
)
def
_add_tensor_event_callback
(
future_value
):
try
:
tensor_values
=
future_value
.
result
()
for
tensor_value
in
tensor_values
:
if
tensor_value
.
plugin_name
==
PluginNameEnum
.
GRAPH
.
value
:
try
:
graph_tags
=
self
.
_events_data
.
list_tags_by_plugin
(
PluginNameEnum
.
GRAPH
.
value
)
except
KeyError
:
graph_tags
=
[]
summary_tags
=
self
.
filter_files
(
graph_tags
)
for
tag
in
summary_tags
:
self
.
_events_data
.
delete_tensor_event
(
tag
)
self
.
_events_data
.
add_tensor_event
(
tensor_value
)
except
Exception
as
exc
:
# Log exception for debugging.
logger
.
exception
(
exc
)
raise
finally
:
semaphore
.
release
()
future
.
add_done_callback
(
_add_tensor_event_callback
)
except
exceptions
.
CRCFailedError
:
except
exceptions
.
CRCFailedError
:
file_handler
.
reset_offset
(
start_offset
)
file_handler
.
reset_offset
(
start_offset
)
logger
.
warning
(
"Check crc faild and ignore this file, file_path=%s, "
logger
.
warning
(
"Check crc faild and ignore this file, file_path=%s, "
...
@@ -381,20 +430,29 @@ class _SummaryParser(_Parser):
...
@@ -381,20 +430,29 @@ class _SummaryParser(_Parser):
return
event_str
return
event_str
def
_event_parse
(
self
,
event
):
@
staticmethod
def
_event_parse
(
event_str
,
latest_file_name
):
"""
"""
Transform `Event` data to tensor_event and update it to EventsData.
Transform `Event` data to tensor_event and update it to EventsData.
This method is static to avoid sending unnecessary objects to other processes.
Args:
Args:
event (Event): Message event in summary proto, data read from file handler.
event (str): Message event string in summary proto, data read from file handler.
latest_file_name (str): Latest file name.
"""
"""
plugins
=
{
plugins
=
{
'scalar_value'
:
PluginNameEnum
.
SCALAR
,
'scalar_value'
:
PluginNameEnum
.
SCALAR
,
'image'
:
PluginNameEnum
.
IMAGE
,
'image'
:
PluginNameEnum
.
IMAGE
,
'histogram'
:
PluginNameEnum
.
HISTOGRAM
,
'histogram'
:
PluginNameEnum
.
HISTOGRAM
,
'tensor'
:
PluginNameEnum
.
TENSOR
'tensor'
:
PluginNameEnum
.
TENSOR
}
}
logger
.
debug
(
"Start to parse event string. Event string len: %s."
,
len
(
event_str
))
event
=
summary_pb2
.
Event
.
FromString
(
event_str
)
logger
.
debug
(
"Deserialize event string completed."
)
ret_tensor_events
=
[]
if
event
.
HasField
(
'summary'
):
if
event
.
HasField
(
'summary'
):
for
value
in
event
.
summary
.
value
:
for
value
in
event
.
summary
.
value
:
for
plugin
in
plugins
:
for
plugin
in
plugins
:
...
@@ -402,6 +460,7 @@ class _SummaryParser(_Parser):
...
@@ -402,6 +460,7 @@ class _SummaryParser(_Parser):
continue
continue
plugin_name_enum
=
plugins
[
plugin
]
plugin_name_enum
=
plugins
[
plugin
]
tensor_event_value
=
getattr
(
value
,
plugin
)
tensor_event_value
=
getattr
(
value
,
plugin
)
logger
.
debug
(
"Processing plugin value: %s."
,
plugin_name_enum
)
if
plugin
==
'histogram'
:
if
plugin
==
'histogram'
:
tensor_event_value
=
HistogramContainer
(
tensor_event_value
)
tensor_event_value
=
HistogramContainer
(
tensor_event_value
)
...
@@ -419,29 +478,23 @@ class _SummaryParser(_Parser):
...
@@ -419,29 +478,23 @@ class _SummaryParser(_Parser):
tag
=
'{}/{}'
.
format
(
value
.
tag
,
plugin_name_enum
.
value
),
tag
=
'{}/{}'
.
format
(
value
.
tag
,
plugin_name_enum
.
value
),
plugin_name
=
plugin_name_enum
.
value
,
plugin_name
=
plugin_name_enum
.
value
,
value
=
tensor_event_value
,
value
=
tensor_event_value
,
filename
=
self
.
_latest_filename
)
filename
=
latest_file_name
)
self
.
_events_data
.
add_tensor_event
(
tensor_event
)
logger
.
debug
(
"Tensor event generated, plugin is %s, tag is %s, step is %s."
,
plugin_name_enum
,
value
.
tag
,
event
.
step
)
ret_tensor_events
.
append
(
tensor_event
)
elif
event
.
HasField
(
'graph_def'
):
elif
event
.
HasField
(
'graph_def'
):
graph
=
MSGraph
()
graph
=
MSGraph
()
graph
.
build_graph
(
event
.
graph_def
)
graph
.
build_graph
(
event
.
graph_def
)
tensor_event
=
TensorEvent
(
wall_time
=
event
.
wall_time
,
tensor_event
=
TensorEvent
(
wall_time
=
event
.
wall_time
,
step
=
event
.
step
,
step
=
event
.
step
,
tag
=
self
.
_latest_file
name
,
tag
=
latest_file_
name
,
plugin_name
=
PluginNameEnum
.
GRAPH
.
value
,
plugin_name
=
PluginNameEnum
.
GRAPH
.
value
,
value
=
graph
,
value
=
graph
,
filename
=
self
.
_latest_filename
)
filename
=
latest_file_name
)
ret_tensor_events
.
append
(
tensor_event
)
try
:
graph_tags
=
self
.
_events_data
.
list_tags_by_plugin
(
PluginNameEnum
.
GRAPH
.
value
)
except
KeyError
:
graph_tags
=
[]
summary_tags
=
self
.
filter_files
(
graph_tags
)
for
tag
in
summary_tags
:
self
.
_events_data
.
delete_tensor_event
(
tag
)
self
.
_events_data
.
add_tensor_event
(
tensor_event
)
return
ret_tensor_events
@
staticmethod
@
staticmethod
def
_compare_summary_file
(
current_file
,
dst_file
):
def
_compare_summary_file
(
current_file
,
dst_file
):
...
...
mindinsight/datavisual/data_transform/tensor_container.py
浏览文件 @
008bdf56
...
@@ -199,8 +199,8 @@ class TensorContainer:
...
@@ -199,8 +199,8 @@ class TensorContainer:
def
__init__
(
self
,
tensor_message
):
def
__init__
(
self
,
tensor_message
):
self
.
_lock
=
threading
.
Lock
self
.
_lock
=
threading
.
Lock
self
.
_msg
=
tensor_message
# Original dims can not be pickled to transfer to other process, so tuple is used.
self
.
_dims
=
t
ensor_message
.
dims
self
.
_dims
=
t
uple
(
tensor_message
.
dims
)
self
.
_data_type
=
tensor_message
.
data_type
self
.
_data_type
=
tensor_message
.
data_type
self
.
_np_array
=
None
self
.
_np_array
=
None
self
.
_data
=
_get_data_from_tensor
(
tensor_message
)
self
.
_data
=
_get_data_from_tensor
(
tensor_message
)
...
@@ -265,5 +265,4 @@ class TensorContainer:
...
@@ -265,5 +265,4 @@ class TensorContainer:
logger
.
error
(
"Reshape array fail, detail: %r"
,
str
(
ex
))
logger
.
error
(
"Reshape array fail, detail: %r"
,
str
(
ex
))
return
return
self
.
_msg
=
None
self
.
_np_array
=
ndarray
self
.
_np_array
=
ndarray
mindinsight/datavisual/processors/tensor_processor.py
浏览文件 @
008bdf56
...
@@ -245,7 +245,7 @@ class TensorProcessor(BaseProcessor):
...
@@ -245,7 +245,7 @@ class TensorProcessor(BaseProcessor):
# This value is an instance of TensorContainer
# This value is an instance of TensorContainer
value
=
tensor
.
value
value
=
tensor
.
value
value_dict
=
{
value_dict
=
{
"dims"
:
tuple
(
value
.
dims
)
,
"dims"
:
value
.
dims
,
"data_type"
:
anf_ir_pb2
.
DataType
.
Name
(
value
.
data_type
)
"data_type"
:
anf_ir_pb2
.
DataType
.
Name
(
value
.
data_type
)
}
}
if
detail
and
detail
==
'stats'
:
if
detail
and
detail
==
'stats'
:
...
@@ -313,7 +313,7 @@ class TensorProcessor(BaseProcessor):
...
@@ -313,7 +313,7 @@ class TensorProcessor(BaseProcessor):
"wall_time"
:
tensor
.
wall_time
,
"wall_time"
:
tensor
.
wall_time
,
"step"
:
tensor
.
step
,
"step"
:
tensor
.
step
,
"value"
:
{
"value"
:
{
"dims"
:
tuple
(
value
.
dims
)
,
"dims"
:
value
.
dims
,
"data_type"
:
anf_ir_pb2
.
DataType
.
Name
(
value
.
data_type
),
"data_type"
:
anf_ir_pb2
.
DataType
.
Name
(
value
.
data_type
),
"data"
:
res_data
.
tolist
(),
"data"
:
res_data
.
tolist
(),
"statistics"
:
get_statistics_dict
(
value
,
flatten_data
)
"statistics"
:
get_statistics_dict
(
value
,
flatten_data
)
...
@@ -362,7 +362,7 @@ class TensorProcessor(BaseProcessor):
...
@@ -362,7 +362,7 @@ class TensorProcessor(BaseProcessor):
"wall_time"
:
tensor
.
wall_time
,
"wall_time"
:
tensor
.
wall_time
,
"step"
:
tensor
.
step
,
"step"
:
tensor
.
step
,
"value"
:
{
"value"
:
{
"dims"
:
tuple
(
value
.
dims
)
,
"dims"
:
value
.
dims
,
"data_type"
:
anf_ir_pb2
.
DataType
.
Name
(
value
.
data_type
),
"data_type"
:
anf_ir_pb2
.
DataType
.
Name
(
value
.
data_type
),
"histogram_buckets"
:
buckets
,
"histogram_buckets"
:
buckets
,
"statistics"
:
get_statistics_dict
(
value
,
None
)
"statistics"
:
get_statistics_dict
(
value
,
None
)
...
...
mindinsight/scripts/stop.py
浏览文件 @
008bdf56
...
@@ -103,21 +103,17 @@ class Command(BaseCommand):
...
@@ -103,21 +103,17 @@ class Command(BaseCommand):
self
.
logfile
.
info
(
'Stop mindinsight with port %s and pid %s.'
,
port
,
pid
)
self
.
logfile
.
info
(
'Stop mindinsight with port %s and pid %s.'
,
port
,
pid
)
process
=
psutil
.
Process
(
pid
)
process
=
psutil
.
Process
(
pid
)
child_pids
=
[
child
.
pid
for
child
in
process
.
children
()]
processes_to_kill
=
[
process
]
# Set recursive to True to kill grand children processes.
for
child
in
process
.
children
(
recursive
=
True
):
processes_to_kill
.
append
(
child
)
# kill gunicorn master process
for
proc
in
processes_to_kill
:
self
.
logfile
.
info
(
'Stopping mindinsight process %s.'
,
proc
.
pid
)
try
:
try
:
os
.
kill
(
pid
,
signal
.
SIGKILL
)
proc
.
send_signal
(
signal
.
SIGKILL
)
except
PermissionError
:
except
psutil
.
Error
as
ex
:
self
.
console
.
info
(
'kill pid %s failed due to permission error'
,
pid
)
self
.
logfile
.
warning
(
"Stop process %s failed. Detail: %s."
,
proc
.
pid
,
str
(
ex
))
sys
.
exit
(
1
)
# cleanup gunicorn worker processes
for
child_pid
in
child_pids
:
try
:
os
.
kill
(
child_pid
,
signal
.
SIGKILL
)
except
ProcessLookupError
:
pass
for
hook
in
HookUtils
.
instance
().
hooks
():
for
hook
in
HookUtils
.
instance
().
hooks
():
hook
.
on_shutdown
(
self
.
logfile
)
hook
.
on_shutdown
(
self
.
logfile
)
...
@@ -154,7 +150,19 @@ class Command(BaseCommand):
...
@@ -154,7 +150,19 @@ class Command(BaseCommand):
if
user
!=
process
.
username
():
if
user
!=
process
.
username
():
continue
continue
pid
=
process
.
pid
if
process
.
ppid
()
==
1
else
process
.
ppid
()
gunicorn_master_process
=
process
# The gunicorn master process might have grand children (eg forked by process pool).
while
True
:
parent_process
=
gunicorn_master_process
.
parent
()
if
parent_process
is
None
or
parent_process
.
pid
==
1
:
break
parent_cmd
=
parent_process
.
cmdline
()
if
' '
.
join
(
parent_cmd
).
find
(
self
.
cmd_regex
)
==
-
1
:
break
gunicorn_master_process
=
parent_process
pid
=
gunicorn_master_process
.
pid
for
open_file
in
process
.
open_files
():
for
open_file
in
process
.
open_files
():
if
open_file
.
path
.
endswith
(
self
.
access_log_path
):
if
open_file
.
path
.
endswith
(
self
.
access_log_path
):
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录