Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
PaddleRec
提交
457f693c
P
PaddleRec
项目概览
PaddlePaddle
/
PaddleRec
通知
68
Star
12
Fork
5
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
27
列表
看板
标记
里程碑
合并请求
10
Wiki
1
Wiki
分析
仓库
DevOps
项目成员
Pages
P
PaddleRec
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
27
Issue
27
列表
看板
标记
里程碑
合并请求
10
合并请求
10
Pages
分析
分析
仓库分析
DevOps
Wiki
1
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
457f693c
编写于
8月 31, 2020
作者:
L
liuyuhui
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
fix a bug , transfer from print() to logging
上级
2caf374d
变更
19
显示空白变更内容
内联
并排
Showing
19 changed file
with
189 addition
and
89 deletion
+189
-89
core/engine/cluster/cluster.py
core/engine/cluster/cluster.py
+7
-1
core/engine/local_cluster.py
core/engine/local_cluster.py
+8
-2
core/engine/local_mpi.py
core/engine/local_mpi.py
+6
-1
core/factory.py
core/factory.py
+6
-1
core/trainer.py
core/trainer.py
+15
-9
core/trainers/finetuning_trainer.py
core/trainers/finetuning_trainer.py
+1
-1
core/trainers/framework/dataset.py
core/trainers/framework/dataset.py
+9
-3
core/trainers/framework/instance.py
core/trainers/framework/instance.py
+9
-4
core/trainers/framework/network.py
core/trainers/framework/network.py
+10
-5
core/trainers/framework/runner.py
core/trainers/framework/runner.py
+21
-16
core/trainers/framework/startup.py
core/trainers/framework/startup.py
+14
-9
core/trainers/framework/terminal.py
core/trainers/framework/terminal.py
+7
-2
core/trainers/general_trainer.py
core/trainers/general_trainer.py
+6
-1
core/utils/dataloader_instance.py
core/utils/dataloader_instance.py
+9
-4
core/utils/dataset_holder.py
core/utils/dataset_holder.py
+6
-1
core/utils/envs.py
core/utils/envs.py
+9
-3
core/utils/fs.py
core/utils/fs.py
+7
-1
core/utils/util.py
core/utils/util.py
+7
-2
core/utils/validation.py
core/utils/validation.py
+32
-23
未找到文件。
core/engine/cluster/cluster.py
浏览文件 @
457f693c
...
@@ -19,11 +19,16 @@ import copy
...
@@ -19,11 +19,16 @@ import copy
import
os
import
os
import
subprocess
import
subprocess
import
warnings
import
warnings
import
logging
from
paddlerec.core.engine.engine
import
Engine
from
paddlerec.core.engine.engine
import
Engine
from
paddlerec.core.factory
import
TrainerFactory
from
paddlerec.core.factory
import
TrainerFactory
from
paddlerec.core.utils
import
envs
from
paddlerec.core.utils
import
envs
logging
.
basicConfig
(
format
=
"%(asctime)s - %(levelname)s - %(message)s"
)
logger
=
logging
.
getLogger
()
logger
.
setLevel
(
logging
.
INFO
)
class
ClusterEngine
(
Engine
):
class
ClusterEngine
(
Engine
):
def
__init_impl__
(
self
):
def
__init_impl__
(
self
):
...
@@ -220,7 +225,8 @@ class ClusterEnvBase(object):
...
@@ -220,7 +225,8 @@ class ClusterEnvBase(object):
def
env_set
(
self
):
def
env_set
(
self
):
envs
.
set_runtime_environs
(
self
.
cluster_env
)
envs
.
set_runtime_environs
(
self
.
cluster_env
)
flattens
=
envs
.
flatten_environs
(
self
.
cluster_env
)
flattens
=
envs
.
flatten_environs
(
self
.
cluster_env
)
print
(
envs
.
pretty_print_envs
(
flattens
,
(
"Cluster Envs"
,
"Value"
)))
logger
.
info
(
envs
.
pretty_print_envs
(
flattens
,
(
"Cluster Envs"
,
"Value"
)))
class
PaddleCloudMpiEnv
(
ClusterEnvBase
):
class
PaddleCloudMpiEnv
(
ClusterEnvBase
):
...
...
core/engine/local_cluster.py
浏览文件 @
457f693c
...
@@ -19,10 +19,15 @@ import copy
...
@@ -19,10 +19,15 @@ import copy
import
os
import
os
import
sys
import
sys
import
subprocess
import
subprocess
import
logging
from
paddlerec.core.engine.engine
import
Engine
from
paddlerec.core.engine.engine
import
Engine
from
paddlerec.core.utils
import
envs
from
paddlerec.core.utils
import
envs
logging
.
basicConfig
(
format
=
"%(asctime)s - %(levelname)s - %(message)s"
)
logger
=
logging
.
getLogger
()
logger
.
setLevel
(
logging
.
INFO
)
class
LocalClusterEngine
(
Engine
):
class
LocalClusterEngine
(
Engine
):
def
start_procs
(
self
):
def
start_procs
(
self
):
...
@@ -57,7 +62,8 @@ class LocalClusterEngine(Engine):
...
@@ -57,7 +62,8 @@ class LocalClusterEngine(Engine):
]
]
factory
=
"paddlerec.core.factory"
factory
=
"paddlerec.core.factory"
cmd
=
[
sys
.
executable
,
"-u"
,
"-m"
,
factory
,
self
.
trainer
]
cmd
=
[
sys
.
executable
,
"-u"
,
"-m"
,
factory
,
self
.
trainer
]
#problems
for
i
in
range
(
server_num
):
for
i
in
range
(
server_num
):
current_env
.
update
({
current_env
.
update
({
...
@@ -145,7 +151,7 @@ class LocalClusterEngine(Engine):
...
@@ -145,7 +151,7 @@ class LocalClusterEngine(Engine):
if
len
(
log_fns
)
>
0
:
if
len
(
log_fns
)
>
0
:
log_fns
[
i
].
close
()
log_fns
[
i
].
close
()
procs
[
i
].
terminate
()
procs
[
i
].
terminate
()
print
(
logger
.
info
(
"all workers already completed, you can view logs under the `{}` directory"
.
"all workers already completed, you can view logs under the `{}` directory"
.
format
(
logs_dir
),
format
(
logs_dir
),
file
=
sys
.
stderr
)
file
=
sys
.
stderr
)
...
...
core/engine/local_mpi.py
浏览文件 @
457f693c
...
@@ -19,9 +19,14 @@ import copy
...
@@ -19,9 +19,14 @@ import copy
import
os
import
os
import
sys
import
sys
import
subprocess
import
subprocess
import
logging
from
paddlerec.core.engine.engine
import
Engine
from
paddlerec.core.engine.engine
import
Engine
logging
.
basicConfig
(
format
=
"%(asctime)s - %(levelname)s - %(message)s"
)
logger
=
logging
.
getLogger
()
logger
.
setLevel
(
logging
.
INFO
)
class
LocalMPIEngine
(
Engine
):
class
LocalMPIEngine
(
Engine
):
def
start_procs
(
self
):
def
start_procs
(
self
):
...
@@ -51,7 +56,7 @@ class LocalMPIEngine(Engine):
...
@@ -51,7 +56,7 @@ class LocalMPIEngine(Engine):
if
len
(
log_fns
)
>
0
:
if
len
(
log_fns
)
>
0
:
log_fns
[
i
].
close
()
log_fns
[
i
].
close
()
procs
[
i
].
wait
()
procs
[
i
].
wait
()
print
(
logger
.
info
(
"all workers and parameter servers already completed"
,
"all workers and parameter servers already completed"
,
file
=
sys
.
stderr
)
file
=
sys
.
stderr
)
...
...
core/factory.py
浏览文件 @
457f693c
...
@@ -14,8 +14,13 @@
...
@@ -14,8 +14,13 @@
import
os
import
os
import
sys
import
sys
import
logging
from
paddlerec.core.utils
import
envs
from
paddlerec.core.utils
import
envs
logging
.
basicConfig
(
format
=
"%(asctime)s - %(levelname)s - %(message)s"
)
logger
=
logging
.
getLogger
(
"fluid"
)
logger
.
setLevel
(
logging
.
INFO
)
trainer_abs
=
os
.
path
.
join
(
trainer_abs
=
os
.
path
.
join
(
os
.
path
.
dirname
(
os
.
path
.
abspath
(
__file__
)),
"trainers"
)
os
.
path
.
dirname
(
os
.
path
.
abspath
(
__file__
)),
"trainers"
)
trainers
=
{}
trainers
=
{}
...
@@ -53,7 +58,7 @@ class TrainerFactory(object):
...
@@ -53,7 +58,7 @@ class TrainerFactory(object):
@
staticmethod
@
staticmethod
def
_build_trainer
(
yaml_path
):
def
_build_trainer
(
yaml_path
):
print
(
envs
.
pretty_print_envs
(
envs
.
get_global_envs
()))
logger
.
info
(
envs
.
pretty_print_envs
(
envs
.
get_global_envs
()))
train_mode
=
envs
.
get_trainer
()
train_mode
=
envs
.
get_trainer
()
trainer_abs
=
trainers
.
get
(
train_mode
,
None
)
trainer_abs
=
trainers
.
get
(
train_mode
,
None
)
...
...
core/trainer.py
浏览文件 @
457f693c
...
@@ -17,11 +17,16 @@ import os
...
@@ -17,11 +17,16 @@ import os
import
time
import
time
import
sys
import
sys
import
traceback
import
traceback
import
logging
from
paddle
import
fluid
from
paddle
import
fluid
from
paddlerec.core.utils
import
envs
from
paddlerec.core.utils
import
envs
logging
.
basicConfig
(
format
=
"%(asctime)s - %(levelname)s - %(message)s"
)
logger
=
logging
.
getLogger
(
"fluid"
)
logger
.
setLevel
(
logging
.
INFO
)
class
EngineMode
:
class
EngineMode
:
"""
"""
...
@@ -88,7 +93,7 @@ class Trainer(object):
...
@@ -88,7 +93,7 @@ class Trainer(object):
phases
.
append
(
phase
)
phases
.
append
(
phase
)
self
.
_context
[
"phases"
]
=
phases
self
.
_context
[
"phases"
]
=
phases
print
(
"PaddleRec: Runner {} Begin"
.
format
(
self
.
_runner_name
))
logger
.
info
(
"PaddleRec: Runner {} Begin"
.
format
(
self
.
_runner_name
))
self
.
which_engine
()
self
.
which_engine
()
self
.
which_device
()
self
.
which_device
()
self
.
which_fleet_mode
()
self
.
which_fleet_mode
()
...
@@ -107,7 +112,7 @@ class Trainer(object):
...
@@ -107,7 +112,7 @@ class Trainer(object):
self
.
device
=
Device
.
GPU
self
.
device
=
Device
.
GPU
gpu_id
=
int
(
os
.
environ
.
get
(
'FLAGS_selected_gpus'
,
0
))
gpu_id
=
int
(
os
.
environ
.
get
(
'FLAGS_selected_gpus'
,
0
))
self
.
_place
=
fluid
.
CUDAPlace
(
gpu_id
)
self
.
_place
=
fluid
.
CUDAPlace
(
gpu_id
)
print
(
"PaddleRec run on device GPU: {}"
.
format
(
gpu_id
))
logger
.
info
(
"PaddleRec run on device GPU: {}"
.
format
(
gpu_id
))
self
.
_exe
=
fluid
.
Executor
(
self
.
_place
)
self
.
_exe
=
fluid
.
Executor
(
self
.
_place
)
elif
device
==
"CPU"
:
elif
device
==
"CPU"
:
self
.
device
=
Device
.
CPU
self
.
device
=
Device
.
CPU
...
@@ -169,7 +174,7 @@ class Trainer(object):
...
@@ -169,7 +174,7 @@ class Trainer(object):
def
which_cluster_type
(
self
):
def
which_cluster_type
(
self
):
cluster_type
=
os
.
getenv
(
"PADDLEREC_CLUSTER_TYPE"
,
"MPI"
)
cluster_type
=
os
.
getenv
(
"PADDLEREC_CLUSTER_TYPE"
,
"MPI"
)
print
(
"PADDLEREC_CLUSTER_TYPE: {}"
.
format
(
cluster_type
))
logger
.
info
(
"PADDLEREC_CLUSTER_TYPE: {}"
.
format
(
cluster_type
))
if
cluster_type
and
cluster_type
.
upper
()
==
"K8S"
:
if
cluster_type
and
cluster_type
.
upper
()
==
"K8S"
:
self
.
_context
[
"cluster_type"
]
=
"K8S"
self
.
_context
[
"cluster_type"
]
=
"K8S"
else
:
else
:
...
@@ -184,7 +189,7 @@ class Trainer(object):
...
@@ -184,7 +189,7 @@ class Trainer(object):
self
.
is_infer
=
False
self
.
is_infer
=
False
else
:
else
:
self
.
is_infer
=
True
self
.
is_infer
=
True
print
(
"Executor Mode: {}"
.
format
(
executor_mode
))
logger
.
info
(
"Executor Mode: {}"
.
format
(
executor_mode
))
self
.
_context
[
"is_infer"
]
=
self
.
is_infer
self
.
_context
[
"is_infer"
]
=
self
.
is_infer
def
legality_check
(
self
):
def
legality_check
(
self
):
...
@@ -224,7 +229,7 @@ class Trainer(object):
...
@@ -224,7 +229,7 @@ class Trainer(object):
Return:
Return:
None, just sleep in base
None, just sleep in base
"""
"""
print
(
'unknow context_status:%s, do nothing'
%
context
[
'status'
])
logger
.
info
(
'unknow context_status:%s, do nothing'
%
context
[
'status'
])
time
.
sleep
(
60
)
time
.
sleep
(
60
)
def
handle_processor_exception
(
self
,
context
,
exception
):
def
handle_processor_exception
(
self
,
context
,
exception
):
...
@@ -233,9 +238,10 @@ class Trainer(object):
...
@@ -233,9 +238,10 @@ class Trainer(object):
Return:
Return:
bool exit_app or not
bool exit_app or not
"""
"""
print
(
"
\n
--------------------------------
\n
PaddleRec Error Message "
logger
.
info
(
"
\n
--------------------------------
\n
PaddleRec Error Message "
"Summary:
\n
--------------------------------
\n
"
)
"Summary:
\n
--------------------------------
\n
"
)
print
(
logger
.
info
(
'Exit PaddleRec. catch exception in precoss status: [%s], except: %s'
'Exit PaddleRec. catch exception in precoss status: [%s], except: %s'
%
(
context
[
'status'
],
str
(
exception
)))
%
(
context
[
'status'
],
str
(
exception
)))
return
True
return
True
...
@@ -258,7 +264,7 @@ class Trainer(object):
...
@@ -258,7 +264,7 @@ class Trainer(object):
break
break
except
Exception
as
err
:
except
Exception
as
err
:
traceback
.
print_exc
()
traceback
.
print_exc
()
print
(
'Catch Exception:%s'
%
str
(
err
))
logger
.
info
(
'Catch Exception:%s'
%
str
(
err
))
sys
.
stdout
.
flush
()
sys
.
stdout
.
flush
()
self
.
handle_processor_exception
(
self
.
_context
,
err
)
self
.
handle_processor_exception
(
self
.
_context
,
err
)
sys
.
exit
(
type
(
err
).
__name__
)
sys
.
exit
(
type
(
err
).
__name__
)
core/trainers/finetuning_trainer.py
浏览文件 @
457f693c
...
@@ -34,7 +34,7 @@ class FineTuningTrainer(Trainer):
...
@@ -34,7 +34,7 @@ class FineTuningTrainer(Trainer):
self
.
runner_env_name
=
"runner."
+
self
.
_context
[
"runner_name"
]
self
.
runner_env_name
=
"runner."
+
self
.
_context
[
"runner_name"
]
def
processor_register
(
self
):
def
processor_register
(
self
):
print
(
"processor_register begin"
)
logger
.
info
(
"processor_register begin"
)
self
.
regist_context_processor
(
'uninit'
,
self
.
instance
)
self
.
regist_context_processor
(
'uninit'
,
self
.
instance
)
self
.
regist_context_processor
(
'network_pass'
,
self
.
network
)
self
.
regist_context_processor
(
'network_pass'
,
self
.
network
)
self
.
regist_context_processor
(
'startup_pass'
,
self
.
startup
)
self
.
regist_context_processor
(
'startup_pass'
,
self
.
startup
)
...
...
core/trainers/framework/dataset.py
浏览文件 @
457f693c
...
@@ -15,6 +15,7 @@
...
@@ -15,6 +15,7 @@
from
__future__
import
print_function
from
__future__
import
print_function
import
os
import
os
import
logging
import
paddle.fluid
as
fluid
import
paddle.fluid
as
fluid
from
paddlerec.core.utils
import
envs
from
paddlerec.core.utils
import
envs
...
@@ -25,6 +26,10 @@ from paddlerec.core.utils.util import split_files, check_filelist
...
@@ -25,6 +26,10 @@ from paddlerec.core.utils.util import split_files, check_filelist
__all__
=
[
"DatasetBase"
,
"DataLoader"
,
"QueueDataset"
]
__all__
=
[
"DatasetBase"
,
"DataLoader"
,
"QueueDataset"
]
logging
.
basicConfig
(
format
=
"%(asctime)s - %(levelname)s - %(message)s"
)
logger
=
logging
.
getLogger
(
"fluid"
)
logger
.
setLevel
(
logging
.
INFO
)
class
DatasetBase
(
object
):
class
DatasetBase
(
object
):
"""R
"""R
...
@@ -83,7 +88,8 @@ class QueueDataset(DatasetBase):
...
@@ -83,7 +88,8 @@ class QueueDataset(DatasetBase):
name
=
"dataset."
+
dataset_name
+
"."
name
=
"dataset."
+
dataset_name
+
"."
type_name
=
envs
.
get_global_env
(
name
+
"type"
)
type_name
=
envs
.
get_global_env
(
name
+
"type"
)
if
envs
.
get_platform
()
!=
"LINUX"
:
if
envs
.
get_platform
()
!=
"LINUX"
:
print
(
"platform "
,
envs
.
get_platform
(),
"Reader To Dataloader"
)
logger
.
info
(
"platform "
,
envs
.
get_platform
(),
"Reader To Dataloader"
)
type_name
=
"DataLoader"
type_name
=
"DataLoader"
if
type_name
==
"DataLoader"
:
if
type_name
==
"DataLoader"
:
...
@@ -126,7 +132,7 @@ class QueueDataset(DatasetBase):
...
@@ -126,7 +132,7 @@ class QueueDataset(DatasetBase):
data_file_list
=
[],
data_file_list
=
[],
train_data_path
=
train_data_path
)
train_data_path
=
train_data_path
)
if
(
hidden_file_list
is
not
None
):
if
(
hidden_file_list
is
not
None
):
print
(
logger
.
info
(
"Warning:please make sure there are no hidden files in the dataset folder and check these hidden files:{}"
.
"Warning:please make sure there are no hidden files in the dataset folder and check these hidden files:{}"
.
format
(
hidden_file_list
))
format
(
hidden_file_list
))
...
@@ -143,7 +149,7 @@ class QueueDataset(DatasetBase):
...
@@ -143,7 +149,7 @@ class QueueDataset(DatasetBase):
if
need_split_files
:
if
need_split_files
:
file_list
=
split_files
(
file_list
,
context
[
"fleet"
].
worker_index
(),
file_list
=
split_files
(
file_list
,
context
[
"fleet"
].
worker_index
(),
context
[
"fleet"
].
worker_num
())
context
[
"fleet"
].
worker_num
())
print
(
"File_list: {}"
.
format
(
file_list
))
logger
.
info
(
"File_list: {}"
.
format
(
file_list
))
dataset
.
set_filelist
(
file_list
)
dataset
.
set_filelist
(
file_list
)
for
model_dict
in
context
[
"phases"
]:
for
model_dict
in
context
[
"phases"
]:
...
...
core/trainers/framework/instance.py
浏览文件 @
457f693c
...
@@ -15,6 +15,7 @@
...
@@ -15,6 +15,7 @@
from
__future__
import
print_function
from
__future__
import
print_function
import
warnings
import
warnings
import
logging
import
paddle.fluid
as
fluid
import
paddle.fluid
as
fluid
from
paddlerec.core.utils
import
envs
from
paddlerec.core.utils
import
envs
...
@@ -24,6 +25,10 @@ __all__ = [
...
@@ -24,6 +25,10 @@ __all__ = [
"CollectiveInstance"
"CollectiveInstance"
]
]
logging
.
basicConfig
(
format
=
"%(asctime)s - %(levelname)s - %(message)s"
)
logger
=
logging
.
getLogger
(
"fluid"
)
logger
.
setLevel
(
logging
.
INFO
)
class
InstanceBase
(
object
):
class
InstanceBase
(
object
):
"""R
"""R
...
@@ -38,7 +43,7 @@ class InstanceBase(object):
...
@@ -38,7 +43,7 @@ class InstanceBase(object):
class
SingleInstance
(
InstanceBase
):
class
SingleInstance
(
InstanceBase
):
def
__init__
(
self
,
context
):
def
__init__
(
self
,
context
):
print
(
"Running SingleInstance."
)
logger
.
info
(
"Running SingleInstance."
)
pass
pass
def
instance
(
self
,
context
):
def
instance
(
self
,
context
):
...
@@ -47,7 +52,7 @@ class SingleInstance(InstanceBase):
...
@@ -47,7 +52,7 @@ class SingleInstance(InstanceBase):
class
PSInstance
(
InstanceBase
):
class
PSInstance
(
InstanceBase
):
def
__init__
(
self
,
context
):
def
__init__
(
self
,
context
):
print
(
"Running PSInstance."
)
logger
.
info
(
"Running PSInstance."
)
pass
pass
def
instance
(
self
,
context
):
def
instance
(
self
,
context
):
...
@@ -61,7 +66,7 @@ class PSInstance(InstanceBase):
...
@@ -61,7 +66,7 @@ class PSInstance(InstanceBase):
class
PslibInstance
(
InstanceBase
):
class
PslibInstance
(
InstanceBase
):
def
__init__
(
self
,
context
):
def
__init__
(
self
,
context
):
print
(
"Running PslibInstance."
)
logger
.
info
(
"Running PslibInstance."
)
pass
pass
def
instance
(
self
,
context
):
def
instance
(
self
,
context
):
...
@@ -73,7 +78,7 @@ class PslibInstance(InstanceBase):
...
@@ -73,7 +78,7 @@ class PslibInstance(InstanceBase):
class
CollectiveInstance
(
InstanceBase
):
class
CollectiveInstance
(
InstanceBase
):
def
__init__
(
self
,
context
):
def
__init__
(
self
,
context
):
print
(
"Running CollectiveInstance."
)
logger
.
info
(
"Running CollectiveInstance."
)
pass
pass
def
instance
(
self
,
context
):
def
instance
(
self
,
context
):
...
...
core/trainers/framework/network.py
浏览文件 @
457f693c
...
@@ -16,6 +16,7 @@ from __future__ import print_function
...
@@ -16,6 +16,7 @@ from __future__ import print_function
import
os
import
os
import
warnings
import
warnings
import
logging
import
paddle.fluid
as
fluid
import
paddle.fluid
as
fluid
from
paddlerec.core.utils
import
envs
from
paddlerec.core.utils
import
envs
...
@@ -26,6 +27,10 @@ __all__ = [
...
@@ -26,6 +27,10 @@ __all__ = [
"CollectiveNetwork"
,
"FineTuningNetwork"
"CollectiveNetwork"
,
"FineTuningNetwork"
]
]
logging
.
basicConfig
(
format
=
"%(asctime)s - %(levelname)s - %(message)s"
)
logger
=
logging
.
getLogger
(
"fluid"
)
logger
.
setLevel
(
logging
.
INFO
)
class
NetworkBase
(
object
):
class
NetworkBase
(
object
):
"""R
"""R
...
@@ -43,7 +48,7 @@ class SingleNetwork(NetworkBase):
...
@@ -43,7 +48,7 @@ class SingleNetwork(NetworkBase):
"""
"""
def
__init__
(
self
,
context
):
def
__init__
(
self
,
context
):
print
(
"Running SingleNetwork."
)
logger
.
info
(
"Running SingleNetwork."
)
pass
pass
def
build_network
(
self
,
context
):
def
build_network
(
self
,
context
):
...
@@ -114,7 +119,7 @@ class FineTuningNetwork(NetworkBase):
...
@@ -114,7 +119,7 @@ class FineTuningNetwork(NetworkBase):
"""
"""
def
__init__
(
self
,
context
):
def
__init__
(
self
,
context
):
print
(
"Running FineTuningNetwork."
)
logger
.
info
(
"Running FineTuningNetwork."
)
def
build_network
(
self
,
context
):
def
build_network
(
self
,
context
):
context
[
"model"
]
=
{}
context
[
"model"
]
=
{}
...
@@ -193,7 +198,7 @@ class FineTuningNetwork(NetworkBase):
...
@@ -193,7 +198,7 @@ class FineTuningNetwork(NetworkBase):
class
PSNetwork
(
NetworkBase
):
class
PSNetwork
(
NetworkBase
):
def
__init__
(
self
,
context
):
def
__init__
(
self
,
context
):
print
(
"Running PSNetwork."
)
logger
.
info
(
"Running PSNetwork."
)
pass
pass
def
build_network
(
self
,
context
):
def
build_network
(
self
,
context
):
...
@@ -285,7 +290,7 @@ class PSNetwork(NetworkBase):
...
@@ -285,7 +290,7 @@ class PSNetwork(NetworkBase):
class
PslibNetwork
(
NetworkBase
):
class
PslibNetwork
(
NetworkBase
):
def
__init__
(
self
,
context
):
def
__init__
(
self
,
context
):
print
(
"Running PslibNetwork."
)
logger
.
info
(
"Running PslibNetwork."
)
pass
pass
def
build_network
(
self
,
context
):
def
build_network
(
self
,
context
):
...
@@ -357,7 +362,7 @@ class PslibNetwork(NetworkBase):
...
@@ -357,7 +362,7 @@ class PslibNetwork(NetworkBase):
class
CollectiveNetwork
(
NetworkBase
):
class
CollectiveNetwork
(
NetworkBase
):
def
__init__
(
self
,
context
):
def
__init__
(
self
,
context
):
print
(
"Running CollectiveNetwork."
)
logger
.
info
(
"Running CollectiveNetwork."
)
pass
pass
def
build_network
(
self
,
context
):
def
build_network
(
self
,
context
):
...
...
core/trainers/framework/runner.py
浏览文件 @
457f693c
...
@@ -17,6 +17,7 @@ from __future__ import print_function
...
@@ -17,6 +17,7 @@ from __future__ import print_function
import
os
import
os
import
time
import
time
import
warnings
import
warnings
import
logging
import
numpy
as
np
import
numpy
as
np
import
paddle.fluid
as
fluid
import
paddle.fluid
as
fluid
...
@@ -27,6 +28,10 @@ __all__ = [
...
@@ -27,6 +28,10 @@ __all__ = [
"RunnerBase"
,
"SingleRunner"
,
"PSRunner"
,
"CollectiveRunner"
,
"PslibRunner"
"RunnerBase"
,
"SingleRunner"
,
"PSRunner"
,
"CollectiveRunner"
,
"PslibRunner"
]
]
logging
.
basicConfig
(
format
=
"%(asctime)s - %(levelname)s - %(message)s"
)
logger
=
logging
.
getLogger
(
"fluid"
)
logger
.
setLevel
(
logging
.
INFO
)
def
as_numpy
(
tensor
):
def
as_numpy
(
tensor
):
"""
"""
...
@@ -169,7 +174,7 @@ class RunnerBase(object):
...
@@ -169,7 +174,7 @@ class RunnerBase(object):
metrics
.
extend
(
metrics_rets
)
metrics
.
extend
(
metrics_rets
)
if
batch_id
%
fetch_period
==
0
and
batch_id
!=
0
:
if
batch_id
%
fetch_period
==
0
and
batch_id
!=
0
:
print
(
metrics_format
.
format
(
*
metrics
))
logger
.
info
(
metrics_format
.
format
(
*
metrics
))
batch_id
+=
1
batch_id
+=
1
except
fluid
.
core
.
EOFException
:
except
fluid
.
core
.
EOFException
:
reader
.
reset
()
reader
.
reset
()
...
@@ -365,7 +370,7 @@ class SingleRunner(RunnerBase):
...
@@ -365,7 +370,7 @@ class SingleRunner(RunnerBase):
"""
"""
def
__init__
(
self
,
context
):
def
__init__
(
self
,
context
):
print
(
"Running SingleRunner."
)
logger
.
info
(
"Running SingleRunner."
)
pass
pass
def
run
(
self
,
context
):
def
run
(
self
,
context
):
...
@@ -381,7 +386,7 @@ class SingleRunner(RunnerBase):
...
@@ -381,7 +386,7 @@ class SingleRunner(RunnerBase):
result
=
self
.
_run
(
context
,
model_dict
)
result
=
self
.
_run
(
context
,
model_dict
)
end_time
=
time
.
time
()
end_time
=
time
.
time
()
seconds
=
end_time
-
begin_time
seconds
=
end_time
-
begin_time
message
=
"epoch {} done, use time: {}"
.
format
(
epoch
,
seconds
)
message
=
"epoch {} done, use time: {}
s
"
.
format
(
epoch
,
seconds
)
metrics_result
=
[]
metrics_result
=
[]
for
key
in
metrics
:
for
key
in
metrics
:
if
isinstance
(
metrics
[
key
],
Metric
):
if
isinstance
(
metrics
[
key
],
Metric
):
...
@@ -394,7 +399,7 @@ class SingleRunner(RunnerBase):
...
@@ -394,7 +399,7 @@ class SingleRunner(RunnerBase):
metrics_result
.
append
(
_str
)
metrics_result
.
append
(
_str
)
if
len
(
metrics_result
)
>
0
:
if
len
(
metrics_result
)
>
0
:
message
+=
", global metrics: "
+
", "
.
join
(
metrics_result
)
message
+=
", global metrics: "
+
", "
.
join
(
metrics_result
)
print
(
message
)
logger
.
info
(
message
)
with
fluid
.
scope_guard
(
context
[
"model"
][
model_dict
[
"name"
]][
with
fluid
.
scope_guard
(
context
[
"model"
][
model_dict
[
"name"
]][
"scope"
]):
"scope"
]):
...
@@ -409,7 +414,7 @@ class SingleRunner(RunnerBase):
...
@@ -409,7 +414,7 @@ class SingleRunner(RunnerBase):
class
PSRunner
(
RunnerBase
):
class
PSRunner
(
RunnerBase
):
def
__init__
(
self
,
context
):
def
__init__
(
self
,
context
):
print
(
"Running PSRunner."
)
logger
.
info
(
"Running PSRunner."
)
pass
pass
def
run
(
self
,
context
):
def
run
(
self
,
context
):
...
@@ -424,7 +429,7 @@ class PSRunner(RunnerBase):
...
@@ -424,7 +429,7 @@ class PSRunner(RunnerBase):
result
=
self
.
_run
(
context
,
model_dict
)
result
=
self
.
_run
(
context
,
model_dict
)
end_time
=
time
.
time
()
end_time
=
time
.
time
()
seconds
=
end_time
-
begin_time
seconds
=
end_time
-
begin_time
message
=
"epoch {} done, use time: {}"
.
format
(
epoch
,
seconds
)
message
=
"epoch {} done, use time: {}
s
"
.
format
(
epoch
,
seconds
)
# TODO, wait for PaddleCloudRoleMaker supports gloo
# TODO, wait for PaddleCloudRoleMaker supports gloo
from
paddle.fluid.incubate.fleet.base.role_maker
import
GeneralRoleMaker
from
paddle.fluid.incubate.fleet.base.role_maker
import
GeneralRoleMaker
...
@@ -442,7 +447,7 @@ class PSRunner(RunnerBase):
...
@@ -442,7 +447,7 @@ class PSRunner(RunnerBase):
metrics_result
.
append
(
_str
)
metrics_result
.
append
(
_str
)
if
len
(
metrics_result
)
>
0
:
if
len
(
metrics_result
)
>
0
:
message
+=
", global metrics: "
+
", "
.
join
(
metrics_result
)
message
+=
", global metrics: "
+
", "
.
join
(
metrics_result
)
print
(
message
)
logger
.
info
(
message
)
with
fluid
.
scope_guard
(
context
[
"model"
][
model_dict
[
"name"
]][
with
fluid
.
scope_guard
(
context
[
"model"
][
model_dict
[
"name"
]][
"scope"
]):
"scope"
]):
train_prog
=
context
[
"model"
][
model_dict
[
"name"
]][
train_prog
=
context
[
"model"
][
model_dict
[
"name"
]][
...
@@ -456,7 +461,7 @@ class PSRunner(RunnerBase):
...
@@ -456,7 +461,7 @@ class PSRunner(RunnerBase):
class
CollectiveRunner
(
RunnerBase
):
class
CollectiveRunner
(
RunnerBase
):
def
__init__
(
self
,
context
):
def
__init__
(
self
,
context
):
print
(
"Running CollectiveRunner."
)
logger
.
info
(
"Running CollectiveRunner."
)
pass
pass
def
run
(
self
,
context
):
def
run
(
self
,
context
):
...
@@ -469,7 +474,7 @@ class CollectiveRunner(RunnerBase):
...
@@ -469,7 +474,7 @@ class CollectiveRunner(RunnerBase):
self
.
_run
(
context
,
model_dict
)
self
.
_run
(
context
,
model_dict
)
end_time
=
time
.
time
()
end_time
=
time
.
time
()
seconds
=
end_time
-
begin_time
seconds
=
end_time
-
begin_time
print
(
"epoch {} done, use time: {}
"
.
format
(
epoch
,
seconds
))
logger
.
info
(
"epoch {} done, use time: {}s
"
.
format
(
epoch
,
seconds
))
with
fluid
.
scope_guard
(
context
[
"model"
][
model_dict
[
"name"
]][
with
fluid
.
scope_guard
(
context
[
"model"
][
model_dict
[
"name"
]][
"scope"
]):
"scope"
]):
train_prog
=
context
[
"model"
][
model_dict
[
"name"
]][
train_prog
=
context
[
"model"
][
model_dict
[
"name"
]][
...
@@ -483,7 +488,7 @@ class CollectiveRunner(RunnerBase):
...
@@ -483,7 +488,7 @@ class CollectiveRunner(RunnerBase):
class
PslibRunner
(
RunnerBase
):
class
PslibRunner
(
RunnerBase
):
def
__init__
(
self
,
context
):
def
__init__
(
self
,
context
):
print
(
"Running PSRunner."
)
logger
.
info
(
"Running PSRunner."
)
pass
pass
def
run
(
self
,
context
):
def
run
(
self
,
context
):
...
@@ -497,7 +502,7 @@ class PslibRunner(RunnerBase):
...
@@ -497,7 +502,7 @@ class PslibRunner(RunnerBase):
self
.
_run
(
context
,
model_dict
)
self
.
_run
(
context
,
model_dict
)
end_time
=
time
.
time
()
end_time
=
time
.
time
()
seconds
=
end_time
-
begin_time
seconds
=
end_time
-
begin_time
print
(
"epoch {} done, use time: {}
"
.
format
(
epoch
,
seconds
))
logger
.
info
(
"epoch {} done, use time: {}s
"
.
format
(
epoch
,
seconds
))
"""
"""
# online Training Can do more, As shown below:
# online Training Can do more, As shown below:
...
@@ -527,7 +532,7 @@ class PslibRunner(RunnerBase):
...
@@ -527,7 +532,7 @@ class PslibRunner(RunnerBase):
self._run(context, model_dict)
self._run(context, model_dict)
end_time = time.time()
end_time = time.time()
seconds = end_time - begin_time
seconds = end_time - begin_time
print
("epoch {} done, use time: {}".format(epoch, seconds))
logger.info
("epoch {} done, use time: {}".format(epoch, seconds))
with fluid.scope_guard(context["model"][model_dict["name"]]
with fluid.scope_guard(context["model"][model_dict["name"]]
["scope"]):
["scope"]):
train_prog = context["model"][model_dict["name"]][
train_prog = context["model"][model_dict["name"]][
...
@@ -543,7 +548,7 @@ class PslibRunner(RunnerBase):
...
@@ -543,7 +548,7 @@ class PslibRunner(RunnerBase):
class
SingleInferRunner
(
RunnerBase
):
class
SingleInferRunner
(
RunnerBase
):
def
__init__
(
self
,
context
):
def
__init__
(
self
,
context
):
print
(
"Running SingleInferRunner."
)
logger
.
info
(
"Running SingleInferRunner."
)
pass
pass
def
run
(
self
,
context
):
def
run
(
self
,
context
):
...
@@ -559,7 +564,7 @@ class SingleInferRunner(RunnerBase):
...
@@ -559,7 +564,7 @@ class SingleInferRunner(RunnerBase):
result
=
self
.
_run
(
context
,
model_dict
)
result
=
self
.
_run
(
context
,
model_dict
)
end_time
=
time
.
time
()
end_time
=
time
.
time
()
seconds
=
end_time
-
begin_time
seconds
=
end_time
-
begin_time
message
=
"Infer {} of epoch {} done, use time: {}"
.
format
(
message
=
"Infer {} of epoch {} done, use time: {}
s
"
.
format
(
model_dict
[
"name"
],
epoch_name
,
seconds
)
model_dict
[
"name"
],
epoch_name
,
seconds
)
metrics_result
=
[]
metrics_result
=
[]
for
key
in
metrics
:
for
key
in
metrics
:
...
@@ -573,14 +578,14 @@ class SingleInferRunner(RunnerBase):
...
@@ -573,14 +578,14 @@ class SingleInferRunner(RunnerBase):
metrics_result
.
append
(
_str
)
metrics_result
.
append
(
_str
)
if
len
(
metrics_result
)
>
0
:
if
len
(
metrics_result
)
>
0
:
message
+=
", global metrics: "
+
", "
.
join
(
metrics_result
)
message
+=
", global metrics: "
+
", "
.
join
(
metrics_result
)
print
(
message
)
logger
.
info
(
message
)
context
[
"status"
]
=
"terminal_pass"
context
[
"status"
]
=
"terminal_pass"
def
_load
(
self
,
context
,
model_dict
,
model_path
):
def
_load
(
self
,
context
,
model_dict
,
model_path
):
if
model_path
is
None
or
model_path
==
""
:
if
model_path
is
None
or
model_path
==
""
:
return
return
print
(
"load persistables from"
,
model_path
)
logger
.
info
(
"load persistables from"
,
model_path
)
with
fluid
.
scope_guard
(
context
[
"model"
][
model_dict
[
"name"
]][
"scope"
]):
with
fluid
.
scope_guard
(
context
[
"model"
][
model_dict
[
"name"
]][
"scope"
]):
train_prog
=
context
[
"model"
][
model_dict
[
"name"
]][
"main_program"
]
train_prog
=
context
[
"model"
][
model_dict
[
"name"
]][
"main_program"
]
...
...
core/trainers/framework/startup.py
浏览文件 @
457f693c
...
@@ -15,6 +15,7 @@
...
@@ -15,6 +15,7 @@
from
__future__
import
print_function
from
__future__
import
print_function
import
warnings
import
warnings
import
logging
import
paddle.fluid
as
fluid
import
paddle.fluid
as
fluid
import
paddle.fluid.core
as
core
import
paddle.fluid.core
as
core
...
@@ -25,6 +26,10 @@ __all__ = [
...
@@ -25,6 +26,10 @@ __all__ = [
"FineTuningStartup"
"FineTuningStartup"
]
]
logging
.
basicConfig
(
format
=
"%(asctime)s - %(levelname)s - %(message)s"
)
logger
=
logging
.
getLogger
(
"fluid"
)
logger
.
setLevel
(
logging
.
INFO
)
class
StartupBase
(
object
):
class
StartupBase
(
object
):
"""R
"""R
...
@@ -41,10 +46,10 @@ class StartupBase(object):
...
@@ -41,10 +46,10 @@ class StartupBase(object):
"runner."
+
context
[
"runner_name"
]
+
".init_model_path"
,
None
)
"runner."
+
context
[
"runner_name"
]
+
".init_model_path"
,
None
)
if
dirname
is
None
or
dirname
==
""
:
if
dirname
is
None
or
dirname
==
""
:
return
return
print
(
"going to load "
,
dirname
)
logger
.
info
(
"going to load "
,
dirname
)
fluid
.
io
.
load_persistables
(
fluid
.
io
.
load_persistables
(
context
[
"exe"
],
dirname
,
main_program
=
main_program
)
context
[
"exe"
],
dirname
,
main_program
=
main_program
)
print
(
"load from {} success"
.
format
(
dirname
))
logger
.
info
(
"load from {} success"
.
format
(
dirname
))
class
SingleStartup
(
StartupBase
):
class
SingleStartup
(
StartupBase
):
...
@@ -52,7 +57,7 @@ class SingleStartup(StartupBase):
...
@@ -52,7 +57,7 @@ class SingleStartup(StartupBase):
"""
"""
def
__init__
(
self
,
context
):
def
__init__
(
self
,
context
):
print
(
"Running SingleStartup."
)
logger
.
info
(
"Running SingleStartup."
)
pass
pass
def
startup
(
self
,
context
):
def
startup
(
self
,
context
):
...
@@ -79,7 +84,7 @@ class FineTuningStartup(StartupBase):
...
@@ -79,7 +84,7 @@ class FineTuningStartup(StartupBase):
self
.
self
.
op_role_var_attr_name
=
core
.
op_proto_and_checker_maker
.
kOpRoleVarAttrName
(
self
.
self
.
op_role_var_attr_name
=
core
.
op_proto_and_checker_maker
.
kOpRoleVarAttrName
(
)
)
print
(
"Running SingleStartup."
)
logger
.
info
(
"Running SingleStartup."
)
def
_is_opt_role_op
(
self
,
op
):
def
_is_opt_role_op
(
self
,
op
):
# NOTE: depend on oprole to find out whether this op is for
# NOTE: depend on oprole to find out whether this op is for
...
@@ -155,7 +160,7 @@ class FineTuningStartup(StartupBase):
...
@@ -155,7 +160,7 @@ class FineTuningStartup(StartupBase):
"runner."
+
context
[
"runner_name"
]
+
".init_model_path"
,
None
)
"runner."
+
context
[
"runner_name"
]
+
".init_model_path"
,
None
)
if
dirname
is
None
or
dirname
==
""
:
if
dirname
is
None
or
dirname
==
""
:
return
return
print
(
"going to load "
,
dirname
)
logger
.
info
(
"going to load "
,
dirname
)
params_grads
=
self
.
_get_params_grads
(
main_program
)
params_grads
=
self
.
_get_params_grads
(
main_program
)
update_params
=
[
p
for
p
,
_
in
params_grads
]
update_params
=
[
p
for
p
,
_
in
params_grads
]
...
@@ -169,7 +174,7 @@ class FineTuningStartup(StartupBase):
...
@@ -169,7 +174,7 @@ class FineTuningStartup(StartupBase):
fluid
.
io
.
load_vars
(
context
[
"exe"
],
dirname
,
main_program
,
fluid
.
io
.
load_vars
(
context
[
"exe"
],
dirname
,
main_program
,
need_load_vars
)
need_load_vars
)
print
(
"load from {} success"
.
format
(
dirname
))
logger
.
info
(
"load from {} success"
.
format
(
dirname
))
def
startup
(
self
,
context
):
def
startup
(
self
,
context
):
for
model_dict
in
context
[
"phases"
]:
for
model_dict
in
context
[
"phases"
]:
...
@@ -187,7 +192,7 @@ class FineTuningStartup(StartupBase):
...
@@ -187,7 +192,7 @@ class FineTuningStartup(StartupBase):
class
PSStartup
(
StartupBase
):
class
PSStartup
(
StartupBase
):
def
__init__
(
self
,
context
):
def
__init__
(
self
,
context
):
print
(
"Running PSStartup."
)
logger
.
info
(
"Running PSStartup."
)
pass
pass
def
startup
(
self
,
context
):
def
startup
(
self
,
context
):
...
@@ -204,7 +209,7 @@ class PSStartup(StartupBase):
...
@@ -204,7 +209,7 @@ class PSStartup(StartupBase):
class
CollectiveStartup
(
StartupBase
):
class
CollectiveStartup
(
StartupBase
):
def
__init__
(
self
,
context
):
def
__init__
(
self
,
context
):
print
(
"Running CollectiveStartup."
)
logger
.
info
(
"Running CollectiveStartup."
)
pass
pass
def
startup
(
self
,
context
):
def
startup
(
self
,
context
):
...
@@ -222,7 +227,7 @@ class CollectiveStartup(StartupBase):
...
@@ -222,7 +227,7 @@ class CollectiveStartup(StartupBase):
class
SingleInferStartup
(
StartupBase
):
class
SingleInferStartup
(
StartupBase
):
def
__init__
(
self
,
context
):
def
__init__
(
self
,
context
):
print
(
"Running SingleInferStartup."
)
logger
.
info
(
"Running SingleInferStartup."
)
pass
pass
def
startup
(
self
,
context
):
def
startup
(
self
,
context
):
...
...
core/trainers/framework/terminal.py
浏览文件 @
457f693c
...
@@ -15,12 +15,17 @@
...
@@ -15,12 +15,17 @@
from
__future__
import
print_function
from
__future__
import
print_function
import
warnings
import
warnings
import
logging
import
paddle.fluid
as
fluid
import
paddle.fluid
as
fluid
from
paddlerec.core.utils
import
envs
from
paddlerec.core.utils
import
envs
__all__
=
[
"TerminalBase"
,
"PSTerminalBase"
]
__all__
=
[
"TerminalBase"
,
"PSTerminalBase"
]
logging
.
basicConfig
(
format
=
"%(asctime)s - %(levelname)s - %(message)s"
)
logger
=
logging
.
getLogger
(
"fluid"
)
logger
.
setLevel
(
logging
.
INFO
)
class
TerminalBase
(
object
):
class
TerminalBase
(
object
):
"""R
"""R
...
@@ -30,7 +35,7 @@ class TerminalBase(object):
...
@@ -30,7 +35,7 @@ class TerminalBase(object):
pass
pass
def
terminal
(
self
,
context
):
def
terminal
(
self
,
context
):
print
(
"PaddleRec Finish"
)
logger
.
info
(
"PaddleRec Finish"
)
class
PSTerminal
(
TerminalBase
):
class
PSTerminal
(
TerminalBase
):
...
@@ -42,4 +47,4 @@ class PSTerminal(TerminalBase):
...
@@ -42,4 +47,4 @@ class PSTerminal(TerminalBase):
def
terminal
(
self
,
context
):
def
terminal
(
self
,
context
):
context
[
"fleet"
].
stop_worker
()
context
[
"fleet"
].
stop_worker
()
print
(
"PaddleRec Finish"
)
logger
.
info
(
"PaddleRec Finish"
)
core/trainers/general_trainer.py
浏览文件 @
457f693c
...
@@ -17,10 +17,15 @@ General Trainer, applicable to many situations: Single/Cluster/Local_Cluster + P
...
@@ -17,10 +17,15 @@ General Trainer, applicable to many situations: Single/Cluster/Local_Cluster + P
from
__future__
import
print_function
from
__future__
import
print_function
import
os
import
os
import
logging
from
paddlerec.core.utils
import
envs
from
paddlerec.core.utils
import
envs
from
paddlerec.core.trainer
import
Trainer
,
EngineMode
,
FleetMode
from
paddlerec.core.trainer
import
Trainer
,
EngineMode
,
FleetMode
logging
.
basicConfig
(
format
=
"%(asctime)s - %(levelname)s - %(message)s"
)
logger
=
logging
.
getLogger
()
logger
.
setLevel
(
logging
.
INFO
)
class
GeneralTrainer
(
Trainer
):
class
GeneralTrainer
(
Trainer
):
"""
"""
...
@@ -34,7 +39,7 @@ class GeneralTrainer(Trainer):
...
@@ -34,7 +39,7 @@ class GeneralTrainer(Trainer):
self
.
runner_env_name
=
"runner."
+
self
.
_context
[
"runner_name"
]
self
.
runner_env_name
=
"runner."
+
self
.
_context
[
"runner_name"
]
def
processor_register
(
self
):
def
processor_register
(
self
):
print
(
"processor_register begin"
)
logger
.
info
(
"processor_register begin"
)
self
.
regist_context_processor
(
'uninit'
,
self
.
instance
)
self
.
regist_context_processor
(
'uninit'
,
self
.
instance
)
self
.
regist_context_processor
(
'network_pass'
,
self
.
network
)
self
.
regist_context_processor
(
'network_pass'
,
self
.
network
)
self
.
regist_context_processor
(
'startup_pass'
,
self
.
startup
)
self
.
regist_context_processor
(
'startup_pass'
,
self
.
startup
)
...
...
core/utils/dataloader_instance.py
浏览文件 @
457f693c
...
@@ -14,6 +14,7 @@
...
@@ -14,6 +14,7 @@
from
__future__
import
print_function
from
__future__
import
print_function
import
os
import
os
import
logging
from
paddlerec.core.utils.envs
import
lazy_instance_by_fliename
from
paddlerec.core.utils.envs
import
lazy_instance_by_fliename
from
paddlerec.core.utils.envs
import
get_global_env
from
paddlerec.core.utils.envs
import
get_global_env
from
paddlerec.core.utils.envs
import
get_runtime_environ
from
paddlerec.core.utils.envs
import
get_runtime_environ
...
@@ -21,6 +22,10 @@ from paddlerec.core.reader import SlotReader
...
@@ -21,6 +22,10 @@ from paddlerec.core.reader import SlotReader
from
paddlerec.core.trainer
import
EngineMode
from
paddlerec.core.trainer
import
EngineMode
from
paddlerec.core.utils.util
import
split_files
,
check_filelist
from
paddlerec.core.utils.util
import
split_files
,
check_filelist
logging
.
basicConfig
(
format
=
"%(asctime)s - %(levelname)s - %(message)s"
)
logger
=
logging
.
getLogger
()
logger
.
setLevel
(
logging
.
INFO
)
def
dataloader_by_name
(
readerclass
,
def
dataloader_by_name
(
readerclass
,
dataset_name
,
dataset_name
,
...
@@ -41,7 +46,7 @@ def dataloader_by_name(readerclass,
...
@@ -41,7 +46,7 @@ def dataloader_by_name(readerclass,
hidden_file_list
,
files
=
check_filelist
(
hidden_file_list
,
files
=
check_filelist
(
hidden_file_list
=
[],
data_file_list
=
[],
train_data_path
=
data_path
)
hidden_file_list
=
[],
data_file_list
=
[],
train_data_path
=
data_path
)
if
(
hidden_file_list
is
not
None
):
if
(
hidden_file_list
is
not
None
):
print
(
logger
.
info
(
"Warning:please make sure there are no hidden files in the dataset folder and check these hidden files:{}"
.
"Warning:please make sure there are no hidden files in the dataset folder and check these hidden files:{}"
.
format
(
hidden_file_list
))
format
(
hidden_file_list
))
...
@@ -55,7 +60,7 @@ def dataloader_by_name(readerclass,
...
@@ -55,7 +60,7 @@ def dataloader_by_name(readerclass,
"cluster_type"
]
==
"K8S"
:
"cluster_type"
]
==
"K8S"
:
# for k8s mount mode, split files for every node
# for k8s mount mode, split files for every node
need_split_files
=
True
need_split_files
=
True
print
(
"need_split_files: {}"
.
format
(
need_split_files
))
logger
.
info
(
"need_split_files: {}"
.
format
(
need_split_files
))
if
need_split_files
:
if
need_split_files
:
files
=
split_files
(
files
,
context
[
"fleet"
].
worker_index
(),
files
=
split_files
(
files
,
context
[
"fleet"
].
worker_index
(),
context
[
"fleet"
].
worker_num
())
context
[
"fleet"
].
worker_num
())
...
@@ -103,7 +108,7 @@ def slotdataloader_by_name(readerclass, dataset_name, yaml_file, context):
...
@@ -103,7 +108,7 @@ def slotdataloader_by_name(readerclass, dataset_name, yaml_file, context):
hidden_file_list
,
files
=
check_filelist
(
hidden_file_list
,
files
=
check_filelist
(
hidden_file_list
=
[],
data_file_list
=
[],
train_data_path
=
data_path
)
hidden_file_list
=
[],
data_file_list
=
[],
train_data_path
=
data_path
)
if
(
hidden_file_list
is
not
None
):
if
(
hidden_file_list
is
not
None
):
print
(
logger
.
info
(
"Warning:please make sure there are no hidden files in the dataset folder and check these hidden files:{}"
.
"Warning:please make sure there are no hidden files in the dataset folder and check these hidden files:{}"
.
format
(
hidden_file_list
))
format
(
hidden_file_list
))
...
@@ -173,7 +178,7 @@ def slotdataloader(readerclass, train, yaml_file, context):
...
@@ -173,7 +178,7 @@ def slotdataloader(readerclass, train, yaml_file, context):
hidden_file_list
,
files
=
check_filelist
(
hidden_file_list
,
files
=
check_filelist
(
hidden_file_list
=
[],
data_file_list
=
[],
train_data_path
=
data_path
)
hidden_file_list
=
[],
data_file_list
=
[],
train_data_path
=
data_path
)
if
(
hidden_file_list
is
not
None
):
if
(
hidden_file_list
is
not
None
):
print
(
logger
.
info
(
"Warning:please make sure there are no hidden files in the dataset folder and check these hidden files:{}"
.
"Warning:please make sure there are no hidden files in the dataset folder and check these hidden files:{}"
.
format
(
hidden_file_list
))
format
(
hidden_file_list
))
...
...
core/utils/dataset_holder.py
浏览文件 @
457f693c
...
@@ -15,12 +15,17 @@
...
@@ -15,12 +15,17 @@
import
abc
import
abc
import
datetime
import
datetime
import
time
import
time
import
logging
import
paddle.fluid
as
fluid
import
paddle.fluid
as
fluid
from
paddlerec.core.utils
import
fs
as
fs
from
paddlerec.core.utils
import
fs
as
fs
from
paddlerec.core.utils
import
util
as
util
from
paddlerec.core.utils
import
util
as
util
logging
.
basicConfig
(
format
=
"%(asctime)s - %(levelname)s - %(message)s"
)
logger
=
logging
.
getLogger
(
"fluid"
)
logger
.
setLevel
(
logging
.
INFO
)
class
DatasetHolder
(
object
):
class
DatasetHolder
(
object
):
"""
"""
...
@@ -187,7 +192,7 @@ class TimeSplitDatasetHolder(DatasetHolder):
...
@@ -187,7 +192,7 @@ class TimeSplitDatasetHolder(DatasetHolder):
windown_min
=
params
[
'time_window_min'
]
windown_min
=
params
[
'time_window_min'
]
if
begin_time
not
in
self
.
_datasets
:
if
begin_time
not
in
self
.
_datasets
:
while
self
.
check_ready
(
begin_time
,
windown_min
)
==
False
:
while
self
.
check_ready
(
begin_time
,
windown_min
)
==
False
:
print
(
"dataset not ready, time:"
+
begin_time
)
logger
.
info
(
"dataset not ready, time:"
+
begin_time
)
time
.
sleep
(
30
)
time
.
sleep
(
30
)
file_list
=
self
.
get_file_list
(
begin_time
,
windown_min
,
file_list
=
self
.
get_file_list
(
begin_time
,
windown_min
,
params
[
'node_num'
],
params
[
'node_num'
],
...
...
core/utils/envs.py
浏览文件 @
457f693c
...
@@ -21,6 +21,12 @@ import sys
...
@@ -21,6 +21,12 @@ import sys
import
six
import
six
import
traceback
import
traceback
import
six
import
six
import
time
import
logging
logging
.
basicConfig
(
format
=
"%(asctime)s - %(levelname)s - %(message)s"
)
logger
=
logging
.
getLogger
()
logger
.
setLevel
(
logging
.
INFO
)
global_envs
=
{}
global_envs
=
{}
global_envs_flatten
=
{}
global_envs_flatten
=
{}
...
@@ -104,7 +110,7 @@ def set_global_envs(envs):
...
@@ -104,7 +110,7 @@ def set_global_envs(envs):
global_envs
[
name
]
=
"DataLoader"
global_envs
[
name
]
=
"DataLoader"
if
get_platform
()
==
"LINUX"
and
six
.
PY3
:
if
get_platform
()
==
"LINUX"
and
six
.
PY3
:
print
(
"QueueDataset can not support PY3, change to DataLoader"
)
logger
.
info
(
"QueueDataset can not support PY3, change to DataLoader"
)
for
dataset
in
envs
[
"dataset"
]:
for
dataset
in
envs
[
"dataset"
]:
name
=
"."
.
join
([
"dataset"
,
dataset
[
"name"
],
"type"
])
name
=
"."
.
join
([
"dataset"
,
dataset
[
"name"
],
"type"
])
global_envs
[
name
]
=
"DataLoader"
global_envs
[
name
]
=
"DataLoader"
...
@@ -207,7 +213,7 @@ def lazy_instance_by_package(package, class_name):
...
@@ -207,7 +213,7 @@ def lazy_instance_by_package(package, class_name):
return
instance
return
instance
except
Exception
as
err
:
except
Exception
as
err
:
traceback
.
print_exc
()
traceback
.
print_exc
()
print
(
'Catch Exception:%s'
%
str
(
err
))
logger
.
info
(
'Catch Exception:%s'
%
str
(
err
))
return
None
return
None
...
@@ -223,7 +229,7 @@ def lazy_instance_by_fliename(abs, class_name):
...
@@ -223,7 +229,7 @@ def lazy_instance_by_fliename(abs, class_name):
return
instance
return
instance
except
Exception
as
err
:
except
Exception
as
err
:
traceback
.
print_exc
()
traceback
.
print_exc
()
print
(
'Catch Exception:%s'
%
str
(
err
))
logger
.
info
(
'Catch Exception:%s'
%
str
(
err
))
return
None
return
None
...
...
core/utils/fs.py
浏览文件 @
457f693c
...
@@ -13,9 +13,15 @@
...
@@ -13,9 +13,15 @@
# limitations under the License.
# limitations under the License.
import
os
import
os
import
time
import
logging
from
paddle.fluid.incubate.fleet.utils.hdfs
import
HDFSClient
from
paddle.fluid.incubate.fleet.utils.hdfs
import
HDFSClient
logging
.
basicConfig
(
format
=
"%(asctime)s - %(levelname)s - %(message)s"
)
logger
=
logging
.
getLogger
()
logger
.
setLevel
(
logging
.
INFO
)
def
is_afs_path
(
path
):
def
is_afs_path
(
path
):
"""is_afs_path
"""is_afs_path
...
@@ -177,4 +183,4 @@ class FileHandler(object):
...
@@ -177,4 +183,4 @@ class FileHandler(object):
return
self
.
_hdfs_client
.
upload
(
dest_path
,
org_path
)
return
self
.
_hdfs_client
.
upload
(
dest_path
,
org_path
)
if
org_is_afs
and
not
dest_is_afs
:
if
org_is_afs
and
not
dest_is_afs
:
return
self
.
_hdfs_client
.
download
(
org_path
,
dest_path
)
return
self
.
_hdfs_client
.
download
(
org_path
,
dest_path
)
print
(
"Not Suppor hdfs cp currently"
)
logger
.
info
(
"Not Suppor hdfs cp currently"
)
core/utils/util.py
浏览文件 @
457f693c
...
@@ -16,9 +16,14 @@ import datetime
...
@@ -16,9 +16,14 @@ import datetime
import
os
import
os
import
sys
import
sys
import
time
import
time
import
logging
import
numpy
as
np
import
numpy
as
np
from
paddle
import
fluid
from
paddle
import
fluid
logging
.
basicConfig
(
format
=
"%(asctime)s - %(levelname)s - %(message)s"
)
logger
=
logging
.
getLogger
(
"fluid"
)
logger
.
setLevel
(
logging
.
INFO
)
def
save_program_proto
(
path
,
program
=
None
):
def
save_program_proto
(
path
,
program
=
None
):
if
program
is
None
:
if
program
is
None
:
...
@@ -146,9 +151,9 @@ def print_log(log_str, params):
...
@@ -146,9 +151,9 @@ def print_log(log_str, params):
log_str
=
time_str
+
" "
+
log_str
log_str
=
time_str
+
" "
+
log_str
if
'master'
in
params
and
params
[
'master'
]:
if
'master'
in
params
and
params
[
'master'
]:
if
'index'
in
params
and
params
[
'index'
]
==
0
:
if
'index'
in
params
and
params
[
'index'
]
==
0
:
print
(
log_str
)
logger
.
info
(
log_str
)
else
:
else
:
print
(
log_str
)
logger
.
info
(
log_str
)
sys
.
stdout
.
flush
()
sys
.
stdout
.
flush
()
if
'stdout'
in
params
:
if
'stdout'
in
params
:
params
[
'stdout'
]
+=
log_str
+
'
\n
'
params
[
'stdout'
]
+=
log_str
+
'
\n
'
...
...
core/utils/validation.py
浏览文件 @
457f693c
...
@@ -12,8 +12,14 @@
...
@@ -12,8 +12,14 @@
# See the License for the specific language governing permissions and
# See the License for the specific language governing permissions and
# limitations under the License.
# limitations under the License.
import
time
import
logging
from
paddlerec.core.utils
import
envs
from
paddlerec.core.utils
import
envs
logging
.
basicConfig
(
format
=
"%(asctime)s - %(levelname)s - %(message)s"
)
logger
=
logging
.
getLogger
()
logger
.
setLevel
(
logging
.
INFO
)
class
ValueFormat
:
class
ValueFormat
:
def
__init__
(
self
,
value_type
,
value
,
value_handler
,
required
=
False
):
def
__init__
(
self
,
value_type
,
value
,
value_handler
,
required
=
False
):
...
@@ -41,64 +47,67 @@ class ValueFormat:
...
@@ -41,64 +47,67 @@ class ValueFormat:
def
is_type_valid
(
self
,
name
,
value
):
def
is_type_valid
(
self
,
name
,
value
):
if
self
.
value_type
==
"int"
:
if
self
.
value_type
==
"int"
:
if
not
isinstance
(
value
,
int
):
if
not
isinstance
(
value
,
int
):
print
(
"
\n
attr {} should be int, but {} now
\n
"
.
format
(
logger
.
info
(
"
\n
attr {} should be int, but {} now
\n
"
.
format
(
name
,
type
(
value
)))
name
,
type
(
value
)))
return
False
return
False
return
True
return
True
elif
self
.
value_type
==
"str"
:
elif
self
.
value_type
==
"str"
:
if
not
isinstance
(
value
,
str
):
if
not
isinstance
(
value
,
str
):
print
(
"
\n
attr {} should be str, but {} now
\n
"
.
format
(
logger
.
info
(
"
\n
attr {} should be str, but {} now
\n
"
.
format
(
name
,
type
(
value
)))
name
,
type
(
value
)))
return
False
return
False
return
True
return
True
elif
self
.
value_type
==
"strs"
:
elif
self
.
value_type
==
"strs"
:
if
not
isinstance
(
value
,
list
):
if
not
isinstance
(
value
,
list
):
print
(
"
\n
attr {} should be list(str), but {} now
\n
"
.
format
(
logger
.
info
(
"
\n
attr {} should be list(str), but {} now
\n
"
.
name
,
type
(
value
)))
format
(
name
,
type
(
value
)))
return
False
return
False
for
v
in
value
:
for
v
in
value
:
if
not
isinstance
(
v
,
str
):
if
not
isinstance
(
v
,
str
):
print
(
"
\n
attr {} should be list(str), but list({}) now
\n
"
.
logger
.
info
(
"
\n
attr {} should be list(str), but list({}) now
\n
"
.
format
(
name
,
type
(
v
)))
format
(
name
,
type
(
v
)))
return
False
return
False
return
True
return
True
elif
self
.
value_type
==
"dict"
:
elif
self
.
value_type
==
"dict"
:
if
not
isinstance
(
value
,
dict
):
if
not
isinstance
(
value
,
dict
):
print
(
"
\n
attr {} should be str, but {} now
\n
"
.
format
(
logger
.
info
(
"
\n
attr {} should be str, but {} now
\n
"
.
format
(
name
,
type
(
value
)))
name
,
type
(
value
)))
return
False
return
False
return
True
return
True
elif
self
.
value_type
==
"dicts"
:
elif
self
.
value_type
==
"dicts"
:
if
not
isinstance
(
value
,
list
):
if
not
isinstance
(
value
,
list
):
print
(
"
\n
attr {} should be list(dist), but {} now
\n
"
.
format
(
logger
.
info
(
"
\n
attr {} should be list(dist), but {} now
\n
"
.
name
,
type
(
value
)))
format
(
name
,
type
(
value
)))
return
False
return
False
for
v
in
value
:
for
v
in
value
:
if
not
isinstance
(
v
,
dict
):
if
not
isinstance
(
v
,
dict
):
print
(
"
\n
attr {} should be list(dist), but list({}) now
\n
"
.
logger
.
info
(
"
\n
attr {} should be list(dist), but list({}) now
\n
"
.
format
(
name
,
type
(
v
)))
format
(
name
,
type
(
v
)))
return
False
return
False
return
True
return
True
elif
self
.
value_type
==
"ints"
:
elif
self
.
value_type
==
"ints"
:
if
not
isinstance
(
value
,
list
):
if
not
isinstance
(
value
,
list
):
print
(
"
\n
attr {} should be list(int), but {} now
\n
"
.
format
(
logger
.
info
(
"
\n
attr {} should be list(int), but {} now
\n
"
.
name
,
type
(
value
)))
format
(
name
,
type
(
value
)))
return
False
return
False
for
v
in
value
:
for
v
in
value
:
if
not
isinstance
(
v
,
int
):
if
not
isinstance
(
v
,
int
):
print
(
"
\n
attr {} should be list(int), but list({}) now
\n
"
.
logger
.
info
(
"
\n
attr {} should be list(int), but list({}) now
\n
"
.
format
(
name
,
type
(
v
)))
format
(
name
,
type
(
v
)))
return
False
return
False
return
True
return
True
else
:
else
:
print
(
"
\n
attr {}'s type is {}, can not be supported now
\n
"
.
format
(
logger
.
info
(
"
\n
attr {}'s type is {}, can not be supported now
\n
"
.
name
,
type
(
value
)))
format
(
name
,
type
(
value
)))
return
False
return
False
def
is_value_valid
(
self
,
name
,
value
):
def
is_value_valid
(
self
,
name
,
value
):
...
@@ -108,7 +117,7 @@ class ValueFormat:
...
@@ -108,7 +117,7 @@ class ValueFormat:
def
in_value_handler
(
name
,
value
,
values
):
def
in_value_handler
(
name
,
value
,
values
):
if
value
not
in
values
:
if
value
not
in
values
:
print
(
"
\n
attr {}'s value is {}, but {} is expected
\n
"
.
format
(
logger
.
info
(
"
\n
attr {}'s value is {}, but {} is expected
\n
"
.
format
(
name
,
value
,
values
))
name
,
value
,
values
))
return
False
return
False
return
True
return
True
...
@@ -116,7 +125,7 @@ def in_value_handler(name, value, values):
...
@@ -116,7 +125,7 @@ def in_value_handler(name, value, values):
def
eq_value_handler
(
name
,
value
,
values
):
def
eq_value_handler
(
name
,
value
,
values
):
if
value
!=
values
:
if
value
!=
values
:
print
(
"
\n
attr {}'s value is {}, but == {} is expected
\n
"
.
format
(
logger
.
info
(
"
\n
attr {}'s value is {}, but == {} is expected
\n
"
.
format
(
name
,
value
,
values
))
name
,
value
,
values
))
return
False
return
False
return
True
return
True
...
@@ -124,7 +133,7 @@ def eq_value_handler(name, value, values):
...
@@ -124,7 +133,7 @@ def eq_value_handler(name, value, values):
def
ge_value_handler
(
name
,
value
,
values
):
def
ge_value_handler
(
name
,
value
,
values
):
if
value
<
values
:
if
value
<
values
:
print
(
"
\n
attr {}'s value is {}, but >= {} is expected
\n
"
.
format
(
logger
.
info
(
"
\n
attr {}'s value is {}, but >= {} is expected
\n
"
.
format
(
name
,
value
,
values
))
name
,
value
,
values
))
return
False
return
False
return
True
return
True
...
@@ -132,7 +141,7 @@ def ge_value_handler(name, value, values):
...
@@ -132,7 +141,7 @@ def ge_value_handler(name, value, values):
def
le_value_handler
(
name
,
value
,
values
):
def
le_value_handler
(
name
,
value
,
values
):
if
value
>
values
:
if
value
>
values
:
print
(
"
\n
attr {}'s value is {}, but <= {} is expected
\n
"
.
format
(
logger
.
info
(
"
\n
attr {}'s value is {}, but <= {} is expected
\n
"
.
format
(
name
,
value
,
values
))
name
,
value
,
values
))
return
False
return
False
return
True
return
True
...
@@ -160,8 +169,8 @@ def yaml_validation(config):
...
@@ -160,8 +169,8 @@ def yaml_validation(config):
for
required
in
require_checkers
:
for
required
in
require_checkers
:
if
required
not
in
_config
.
keys
():
if
required
not
in
_config
.
keys
():
print
(
"
\n
can not find {} in yaml, which is required
\n
"
.
format
(
logger
.
info
(
"
\n
can not find {} in yaml, which is required
\n
"
.
required
))
format
(
required
))
return
False
return
False
for
name
,
value
in
_config
.
items
():
for
name
,
value
in
_config
.
items
():
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录