Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
BaiXuePrincess
PaddleRec
提交
abc50489
P
PaddleRec
项目概览
BaiXuePrincess
/
PaddleRec
与 Fork 源项目一致
Fork自
PaddlePaddle / PaddleRec
通知
1
Star
0
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
PaddleRec
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
abc50489
编写于
6月 08, 2020
作者:
X
xiexionghang
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
fix code style
上级
a5d3f512
变更
3
隐藏空白更改
内联
并排
Showing
3 changed file
with
23 addition
and
20 deletion
+23
-20
core/utils/dataset_holder.py
core/utils/dataset_holder.py
+13
-12
core/utils/envs.py
core/utils/envs.py
+6
-6
core/utils/util.py
core/utils/util.py
+4
-2
未找到文件。
core/utils/dataset_holder.py
浏览文件 @
abc50489
...
@@ -66,6 +66,7 @@ class TimeSplitDatasetHolder(DatasetHolder):
...
@@ -66,6 +66,7 @@ class TimeSplitDatasetHolder(DatasetHolder):
"""
"""
Dataset with time split dir. root_path/$DAY/$HOUR
Dataset with time split dir. root_path/$DAY/$HOUR
"""
"""
def
__init__
(
self
,
config
):
def
__init__
(
self
,
config
):
"""
"""
init data root_path, time_split_interval, data_path_format
init data root_path, time_split_interval, data_path_format
...
@@ -112,8 +113,8 @@ class TimeSplitDatasetHolder(DatasetHolder):
...
@@ -112,8 +113,8 @@ class TimeSplitDatasetHolder(DatasetHolder):
True/False
True/False
"""
"""
is_ready
=
True
is_ready
=
True
data_time
,
windows_mins
=
self
.
_format_data_time
(
data_time
,
windows_mins
=
self
.
_format_data_time
(
daytime_str
,
daytime_str
,
time_window_mins
)
time_window_mins
)
while
time_window_mins
>
0
:
while
time_window_mins
>
0
:
file_path
=
self
.
_path_generator
.
generate_path
(
file_path
=
self
.
_path_generator
.
generate_path
(
'donefile_path'
,
{
'time_format'
:
data_time
})
'donefile_path'
,
{
'time_format'
:
data_time
})
...
@@ -141,19 +142,19 @@ class TimeSplitDatasetHolder(DatasetHolder):
...
@@ -141,19 +142,19 @@ class TimeSplitDatasetHolder(DatasetHolder):
list, data_shard[node_idx]
list, data_shard[node_idx]
"""
"""
data_file_list
=
[]
data_file_list
=
[]
data_time
,
windows_mins
=
self
.
_format_data_time
(
data_time
,
windows_mins
=
self
.
_format_data_time
(
daytime_str
,
daytime_str
,
time_window_mins
)
time_window_mins
)
while
time_window_mins
>
0
:
while
time_window_mins
>
0
:
file_path
=
self
.
_path_generator
.
generate_path
(
file_path
=
self
.
_path_generator
.
generate_path
(
'data_path'
,
{
'time_format'
:
data_time
})
'data_path'
,
{
'time_format'
:
data_time
})
sub_file_list
=
self
.
_data_file_handler
.
ls
(
file_path
)
sub_file_list
=
self
.
_data_file_handler
.
ls
(
file_path
)
for
sub_file
in
sub_file_list
:
for
sub_file
in
sub_file_list
:
sub_file_name
=
self
.
_data_file_handler
.
get_file_name
(
sub_file
)
sub_file_name
=
self
.
_data_file_handler
.
get_file_name
(
sub_file
)
if
not
sub_file_name
.
startswith
(
if
not
sub_file_name
.
startswith
(
self
.
_config
[
self
.
_config
[
'filename_prefix'
]):
'filename_prefix'
]):
continue
continue
postfix
=
sub_file_name
.
split
(
postfix
=
sub_file_name
.
split
(
self
.
_config
[
'filename_prefix'
])[
self
.
_config
[
'filename_prefix'
])[
1
]
1
]
if
postfix
.
isdigit
():
if
postfix
.
isdigit
():
if
int
(
postfix
)
%
node_num
==
node_idx
:
if
int
(
postfix
)
%
node_num
==
node_idx
:
data_file_list
.
append
(
sub_file
)
data_file_list
.
append
(
sub_file
)
...
@@ -167,8 +168,8 @@ class TimeSplitDatasetHolder(DatasetHolder):
...
@@ -167,8 +168,8 @@ class TimeSplitDatasetHolder(DatasetHolder):
def
_alloc_dataset
(
self
,
file_list
):
def
_alloc_dataset
(
self
,
file_list
):
""" """
""" """
dataset
=
fluid
.
DatasetFactory
().
create_dataset
(
dataset
=
fluid
.
DatasetFactory
().
create_dataset
(
self
.
_config
[
self
.
_config
[
'dataset_type'
])
'dataset_type'
])
dataset
.
set_batch_size
(
self
.
_config
[
'batch_size'
])
dataset
.
set_batch_size
(
self
.
_config
[
'batch_size'
])
dataset
.
set_thread
(
self
.
_config
[
'load_thread'
])
dataset
.
set_thread
(
self
.
_config
[
'load_thread'
])
dataset
.
set_hdfs_config
(
self
.
_config
[
'fs_name'
],
dataset
.
set_hdfs_config
(
self
.
_config
[
'fs_name'
],
...
@@ -207,8 +208,8 @@ class TimeSplitDatasetHolder(DatasetHolder):
...
@@ -207,8 +208,8 @@ class TimeSplitDatasetHolder(DatasetHolder):
params
[
'node_num'
],
params
[
'node_num'
],
params
[
'node_idx'
])
params
[
'node_idx'
])
self
.
_datasets
[
begin_time
]
=
self
.
_alloc_dataset
(
file_list
)
self
.
_datasets
[
begin_time
]
=
self
.
_alloc_dataset
(
file_list
)
self
.
_datasets
[
begin_time
].
preload_into_memory
(
self
.
_datasets
[
begin_time
].
preload_into_memory
(
self
.
_config
[
self
.
_config
[
'preload_thread'
])
'preload_thread'
])
return
True
return
True
return
False
return
False
...
...
core/utils/envs.py
浏览文件 @
abc50489
...
@@ -70,8 +70,8 @@ def set_global_envs(envs):
...
@@ -70,8 +70,8 @@ def set_global_envs(envs):
nests
=
copy
.
deepcopy
(
namespace_nests
)
nests
=
copy
.
deepcopy
(
namespace_nests
)
nests
.
append
(
k
)
nests
.
append
(
k
)
fatten_env_namespace
(
nests
,
v
)
fatten_env_namespace
(
nests
,
v
)
elif
(
k
==
"dataset"
or
k
==
"phase"
elif
(
k
==
"dataset"
or
k
==
"phase"
or
or
k
==
"runner"
)
and
isinstance
(
v
,
list
):
k
==
"runner"
)
and
isinstance
(
v
,
list
):
for
i
in
v
:
for
i
in
v
:
if
i
.
get
(
"name"
)
is
None
:
if
i
.
get
(
"name"
)
is
None
:
raise
ValueError
(
"name must be in dataset list "
,
v
)
raise
ValueError
(
"name must be in dataset list "
,
v
)
...
@@ -169,8 +169,8 @@ def pretty_print_envs(envs, header=None):
...
@@ -169,8 +169,8 @@ def pretty_print_envs(envs, header=None):
def
lazy_instance_by_package
(
package
,
class_name
):
def
lazy_instance_by_package
(
package
,
class_name
):
try
:
try
:
model_package
=
__import__
(
package
,
globals
(),
locals
(),
model_package
=
__import__
(
package
,
package
.
split
(
"."
))
globals
(),
locals
(),
package
.
split
(
"."
))
instance
=
getattr
(
model_package
,
class_name
)
instance
=
getattr
(
model_package
,
class_name
)
return
instance
return
instance
except
Exception
,
err
:
except
Exception
,
err
:
...
@@ -185,8 +185,8 @@ def lazy_instance_by_fliename(abs, class_name):
...
@@ -185,8 +185,8 @@ def lazy_instance_by_fliename(abs, class_name):
sys
.
path
.
append
(
dirname
)
sys
.
path
.
append
(
dirname
)
package
=
os
.
path
.
splitext
(
os
.
path
.
basename
(
abs
))[
0
]
package
=
os
.
path
.
splitext
(
os
.
path
.
basename
(
abs
))[
0
]
model_package
=
__import__
(
package
,
globals
(),
locals
(),
model_package
=
__import__
(
package
,
package
.
split
(
"."
))
globals
(),
locals
(),
package
.
split
(
"."
))
instance
=
getattr
(
model_package
,
class_name
)
instance
=
getattr
(
model_package
,
class_name
)
return
instance
return
instance
except
Exception
,
err
:
except
Exception
,
err
:
...
...
core/utils/util.py
浏览文件 @
abc50489
...
@@ -175,6 +175,7 @@ class CostPrinter(object):
...
@@ -175,6 +175,7 @@ class CostPrinter(object):
"""
"""
For count cost time && print cost log
For count cost time && print cost log
"""
"""
def
__init__
(
self
,
callback
,
callback_params
):
def
__init__
(
self
,
callback
,
callback_params
):
"""R
"""R
"""
"""
...
@@ -210,6 +211,7 @@ class PathGenerator(object):
...
@@ -210,6 +211,7 @@ class PathGenerator(object):
"""
"""
generate path with template & runtime variables
generate path with template & runtime variables
"""
"""
def
__init__
(
self
,
config
):
def
__init__
(
self
,
config
):
"""R
"""R
"""
"""
...
@@ -230,8 +232,8 @@ class PathGenerator(object):
...
@@ -230,8 +232,8 @@ class PathGenerator(object):
"""
"""
if
template_name
in
self
.
_templates
:
if
template_name
in
self
.
_templates
:
if
'time_format'
in
param
:
if
'time_format'
in
param
:
str
=
param
[
'time_format'
].
strftime
(
str
=
param
[
'time_format'
].
strftime
(
self
.
_templates
[
self
.
_templates
[
template_name
])
template_name
])
return
str
.
format
(
**
param
)
return
str
.
format
(
**
param
)
return
self
.
_templates
[
template_name
].
format
(
**
param
)
return
self
.
_templates
[
template_name
].
format
(
**
param
)
else
:
else
:
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录