Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
BaiXuePrincess
PaddleRec
提交
d14f44da
P
PaddleRec
项目概览
BaiXuePrincess
/
PaddleRec
与 Fork 源项目一致
Fork自
PaddlePaddle / PaddleRec
通知
1
Star
0
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
PaddleRec
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
d14f44da
编写于
6月 03, 2020
作者:
W
wuzhihua
提交者:
GitHub
6月 03, 2020
浏览文件
操作
浏览文件
下载
差异文件
Merge pull request #37 from xjqbest/fixtt1
add format checker, fix reduce
上级
74625fc4
3cfb57a9
变更
13
隐藏空白更改
内联
并排
Showing
13 changed file
with
196 addition
and
92 deletion
+196
-92
core/factory.py
core/factory.py
+1
-10
core/reader.py
core/reader.py
+7
-14
core/trainer.py
core/trainer.py
+2
-10
core/utils/envs.py
core/utils/envs.py
+23
-0
core/utils/validation.py
core/utils/validation.py
+151
-0
doc/design.md
doc/design.md
+1
-7
models/rank/dcn/data/get_slot_data.py
models/rank/dcn/data/get_slot_data.py
+0
-13
models/rank/deepfm/data/get_slot_data.py
models/rank/deepfm/data/get_slot_data.py
+1
-10
models/rank/dnn/model.py
models/rank/dnn/model.py
+1
-1
models/rank/wide_deep/data/get_slot_data.py
models/rank/wide_deep/data/get_slot_data.py
+1
-10
models/rank/xdeepfm/data/get_slot_data.py
models/rank/xdeepfm/data/get_slot_data.py
+1
-8
run.py
run.py
+6
-8
setup.py
setup.py
+1
-1
未找到文件。
core/factory.py
浏览文件 @
d14f44da
...
...
@@ -14,9 +14,7 @@
import
os
import
sys
import
yaml
from
paddlerec.core.utils
import
envs
trainer_abs
=
os
.
path
.
join
(
...
...
@@ -66,16 +64,9 @@ class TrainerFactory(object):
@
staticmethod
def
create
(
config
):
_config
=
None
if
os
.
path
.
isfile
(
config
):
with
open
(
config
,
'r'
)
as
rb
:
_config
=
yaml
.
load
(
rb
.
read
(),
Loader
=
yaml
.
FullLoader
)
else
:
raise
ValueError
(
"paddlerec's config only support yaml"
)
_config
=
envs
.
load_yaml
(
config
)
envs
.
set_global_envs
(
_config
)
envs
.
update_workspace
()
trainer
=
TrainerFactory
.
_build_trainer
(
config
)
return
trainer
...
...
core/reader.py
浏览文件 @
d14f44da
...
...
@@ -13,13 +13,11 @@
# limitations under the License.
from
__future__
import
print_function
import
abc
import
os
from
functools
import
reduce
import
paddle.fluid.incubate.data_generator
as
dg
import
yaml
from
paddlerec.core.utils
import
envs
...
...
@@ -28,12 +26,9 @@ class Reader(dg.MultiSlotDataGenerator):
def
__init__
(
self
,
config
):
dg
.
MultiSlotDataGenerator
.
__init__
(
self
)
if
os
.
path
.
isfile
(
config
):
with
open
(
config
,
'r'
)
as
rb
:
_config
=
yaml
.
load
(
rb
.
read
(),
Loader
=
yaml
.
FullLoader
)
else
:
raise
ValueError
(
"reader config only support yaml"
)
_config
=
envs
.
load_yaml
(
config
)
envs
.
set_global_envs
(
_config
)
envs
.
update_workspace
()
@
abc
.
abstractmethod
def
init
(
self
):
...
...
@@ -50,11 +45,9 @@ class SlotReader(dg.MultiSlotDataGenerator):
def
__init__
(
self
,
config
):
dg
.
MultiSlotDataGenerator
.
__init__
(
self
)
if
os
.
path
.
isfile
(
config
):
with
open
(
config
,
'r'
)
as
rb
:
_config
=
yaml
.
load
(
rb
.
read
(),
Loader
=
yaml
.
FullLoader
)
else
:
raise
ValueError
(
"reader config only support yaml"
)
_config
=
envs
.
load_yaml
(
config
)
envs
.
set_global_envs
(
_config
)
envs
.
update_workspace
()
def
init
(
self
,
sparse_slots
,
dense_slots
,
padding
=
0
):
from
operator
import
mul
...
...
core/trainer.py
浏览文件 @
d14f44da
...
...
@@ -30,16 +30,12 @@ class Trainer(object):
def
__init__
(
self
,
config
=
None
):
self
.
_status_processor
=
{}
self
.
_place
=
fluid
.
CPUPlace
()
self
.
_exe
=
fluid
.
Executor
(
self
.
_place
)
self
.
_exector_context
=
{}
self
.
_context
=
{
'status'
:
'uninit'
,
'is_exit'
:
False
}
self
.
_config_yaml
=
config
with
open
(
config
,
'r'
)
as
rb
:
self
.
_config
=
yaml
.
load
(
rb
.
read
(),
Loader
=
yaml
.
FullLoader
)
self
.
_config
=
envs
.
load_yaml
(
config
)
def
regist_context_processor
(
self
,
status_name
,
processor
):
"""
...
...
@@ -87,12 +83,8 @@ class Trainer(object):
def
user_define_engine
(
engine_yaml
):
with
open
(
engine_yaml
,
'r'
)
as
rb
:
_config
=
yaml
.
load
(
rb
.
read
(),
Loader
=
yaml
.
FullLoader
)
assert
_config
is
not
None
_config
=
envs
.
load_yaml
(
engine_yaml
)
envs
.
set_runtime_environs
(
_config
)
train_location
=
envs
.
get_global_env
(
"engine.file"
)
train_dirname
=
os
.
path
.
dirname
(
train_location
)
base_name
=
os
.
path
.
splitext
(
os
.
path
.
basename
(
train_location
))[
0
]
...
...
core/utils/envs.py
浏览文件 @
d14f44da
...
...
@@ -203,3 +203,26 @@ def find_free_port():
new_port
=
__free_port
()
return
new_port
def
load_yaml
(
config
):
vs
=
[
int
(
i
)
for
i
in
yaml
.
__version__
.
split
(
"."
)]
if
vs
[
0
]
<
5
:
use_full_loader
=
False
elif
vs
[
0
]
>
5
:
use_full_loader
=
True
else
:
if
vs
[
1
]
>=
1
:
use_full_loader
=
True
else
:
use_full_loader
=
False
if
os
.
path
.
isfile
(
config
):
with
open
(
config
,
'r'
)
as
rb
:
if
use_full_loader
:
_config
=
yaml
.
load
(
rb
.
read
(),
Loader
=
yaml
.
FullLoader
)
else
:
_config
=
yaml
.
load
(
rb
.
read
())
return
_config
else
:
raise
ValueError
(
"config {} can not be supported"
.
format
(
config
))
core/utils/validation.py
0 → 100644
浏览文件 @
d14f44da
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from
paddlerec.core.utils
import
envs
class
ValueFormat
:
def
__init__
(
self
,
type
,
value
,
value_handler
):
self
.
type
=
type
self
.
value
=
value
self
.
value_handler
=
value_handler
self
.
help
=
help
def
is_valid
(
self
,
name
,
value
):
ret
=
self
.
is_type_valid
(
name
,
value
)
if
not
ret
:
return
ret
ret
=
self
.
is_value_valid
(
name
,
value
)
return
ret
def
is_type_valid
(
self
,
name
,
value
):
if
self
.
type
==
"int"
:
if
not
isinstance
(
value
,
int
):
print
(
"
\n
attr {} should be int, but {} now
\n
"
.
format
(
name
,
self
.
type
))
return
False
return
True
elif
self
.
type
==
"str"
:
if
not
isinstance
(
value
,
str
):
print
(
"
\n
attr {} should be str, but {} now
\n
"
.
format
(
name
,
self
.
type
))
return
False
return
True
elif
self
.
type
==
"strs"
:
if
not
isinstance
(
value
,
list
):
print
(
"
\n
attr {} should be list(str), but {} now
\n
"
.
format
(
name
,
self
.
type
))
return
False
for
v
in
value
:
if
not
isinstance
(
v
,
str
):
print
(
"
\n
attr {} should be list(str), but list({}) now
\n
"
.
format
(
name
,
type
(
v
)))
return
False
return
True
elif
self
.
type
==
"ints"
:
if
not
isinstance
(
value
,
list
):
print
(
"
\n
attr {} should be list(int), but {} now
\n
"
.
format
(
name
,
self
.
type
))
return
False
for
v
in
value
:
if
not
isinstance
(
v
,
int
):
print
(
"
\n
attr {} should be list(int), but list({}) now
\n
"
.
format
(
name
,
type
(
v
)))
return
False
return
True
else
:
print
(
"
\n
attr {}'s type is {}, can not be supported now
\n
"
.
format
(
name
,
type
(
value
)))
return
False
def
is_value_valid
(
self
,
name
,
value
):
ret
=
self
.
value_handler
(
value
)
return
ret
def
in_value_handler
(
name
,
value
,
values
):
if
value
not
in
values
:
print
(
"
\n
attr {}'s value is {}, but {} is expected
\n
"
.
format
(
name
,
value
,
values
))
return
False
return
True
def
eq_value_handler
(
name
,
value
,
values
):
if
value
!=
values
:
print
(
"
\n
attr {}'s value is {}, but == {} is expected
\n
"
.
format
(
name
,
value
,
values
))
return
False
return
True
def
ge_value_handler
(
name
,
value
,
values
):
if
value
<
values
:
print
(
"
\n
attr {}'s value is {}, but >= {} is expected
\n
"
.
format
(
name
,
value
,
values
))
return
False
return
True
def
le_value_handler
(
name
,
value
,
values
):
if
value
>
values
:
print
(
"
\n
attr {}'s value is {}, but <= {} is expected
\n
"
.
format
(
name
,
value
,
values
))
return
False
return
True
def
register
():
validations
=
{}
validations
[
"train.workspace"
]
=
ValueFormat
(
"str"
,
None
,
eq_value_handler
)
validations
[
"train.device"
]
=
ValueFormat
(
"str"
,
[
"cpu"
,
"gpu"
],
in_value_handler
)
validations
[
"train.epochs"
]
=
ValueFormat
(
"int"
,
1
,
ge_value_handler
)
validations
[
"train.engine"
]
=
ValueFormat
(
"str"
,
[
"single"
,
"local_cluster"
,
"cluster"
],
in_value_handler
)
requires
=
[
"train.namespace"
,
"train.device"
,
"train.epochs"
,
"train.engine"
]
return
validations
,
requires
def
yaml_validation
(
config
):
all_checkers
,
require_checkers
=
register
()
_config
=
envs
.
load_yaml
(
config
)
flattens
=
envs
.
flatten_environs
(
_config
)
for
required
in
require_checkers
:
if
required
not
in
flattens
.
keys
():
print
(
"
\n
can not find {} in yaml, which is required
\n
"
.
format
(
required
))
return
False
for
name
,
flatten
in
flattens
.
items
():
checker
=
all_checkers
.
get
(
name
,
None
)
if
not
checker
:
continue
ret
=
checker
.
is_valid
(
name
,
flattens
)
if
not
ret
:
return
False
return
True
doc/design.md
浏览文件 @
d14f44da
...
...
@@ -197,13 +197,7 @@ class Reader(dg.MultiSlotDataGenerator):
def
__init__
(
self
,
config
):
dg
.
MultiSlotDataGenerator
.
__init__
(
self
)
if
os
.
path
.
isfile
(
config
):
with
open
(
config
,
'r'
)
as
rb
:
_config
=
yaml
.
load
(
rb
.
read
(),
Loader
=
yaml
.
FullLoader
)
else
:
raise
ValueError
(
"reader config only support yaml"
)
_config
=
envs
.
load_yaml
(
config
)
envs
.
set_global_envs
(
_config
)
envs
.
update_workspace
()
...
...
models/rank/dcn/data/get_slot_data.py
浏览文件 @
d14f44da
...
...
@@ -12,18 +12,11 @@
# See the License for the specific language governing permissions and
# limitations under the License.
import
math
import
sys
import
yaml
from
paddlerec.core.reader
import
Reader
from
paddlerec.core.utils
import
envs
import
math
import
os
try
:
import
cPickle
as
pickle
except
ImportError
:
import
pickle
from
collections
import
Counter
import
os
import
paddle.fluid.incubate.data_generator
as
dg
...
...
@@ -31,12 +24,6 @@ class TrainReader(dg.MultiSlotDataGenerator):
def
__init__
(
self
,
config
):
dg
.
MultiSlotDataGenerator
.
__init__
(
self
)
if
os
.
path
.
isfile
(
config
):
with
open
(
config
,
'r'
)
as
rb
:
_config
=
yaml
.
load
(
rb
.
read
(),
Loader
=
yaml
.
FullLoader
)
else
:
raise
ValueError
(
"reader config only support yaml"
)
def
init
(
self
):
self
.
cont_min_
=
[
0
,
-
3
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
]
self
.
cont_max_
=
[
...
...
models/rank/deepfm/data/get_slot_data.py
浏览文件 @
d14f44da
...
...
@@ -12,10 +12,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.
import
yaml
,
os
from
paddlerec.core.reader
import
Reader
from
paddlerec.core.utils
import
envs
import
os
import
paddle.fluid.incubate.data_generator
as
dg
try
:
import
cPickle
as
pickle
...
...
@@ -27,12 +24,6 @@ class TrainReader(dg.MultiSlotDataGenerator):
def
__init__
(
self
,
config
):
dg
.
MultiSlotDataGenerator
.
__init__
(
self
)
if
os
.
path
.
isfile
(
config
):
with
open
(
config
,
'r'
)
as
rb
:
_config
=
yaml
.
load
(
rb
.
read
(),
Loader
=
yaml
.
FullLoader
)
else
:
raise
ValueError
(
"reader config only support yaml"
)
def
init
(
self
):
self
.
cont_min_
=
[
0
,
-
3
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
,
0
]
self
.
cont_max_
=
[
...
...
models/rank/dnn/model.py
浏览文件 @
d14f44da
...
...
@@ -32,7 +32,7 @@ class Model(ModelBase):
self
.
sparse_feature_dim
=
envs
.
get_global_env
(
"hyper_parameters.sparse_feature_dim"
)
self
.
learning_rate
=
envs
.
get_global_env
(
"hyper_parameters.learning_rate"
)
"hyper_parameters.
optimizer.
learning_rate"
)
def
net
(
self
,
input
,
is_infer
=
False
):
self
.
sparse_inputs
=
self
.
_sparse_data_var
[
1
:]
...
...
models/rank/wide_deep/data/get_slot_data.py
浏览文件 @
d14f44da
...
...
@@ -11,10 +11,7 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import
yaml
,
os
from
paddlerec.core.reader
import
Reader
from
paddlerec.core.utils
import
envs
import
os
try
:
import
cPickle
as
pickle
except
ImportError
:
...
...
@@ -26,12 +23,6 @@ class TrainReader(dg.MultiSlotDataGenerator):
def
__init__
(
self
,
config
):
dg
.
MultiSlotDataGenerator
.
__init__
(
self
)
if
os
.
path
.
isfile
(
config
):
with
open
(
config
,
'r'
)
as
rb
:
_config
=
yaml
.
load
(
rb
.
read
(),
Loader
=
yaml
.
FullLoader
)
else
:
raise
ValueError
(
"reader config only support yaml"
)
def
init
(
self
):
pass
...
...
models/rank/xdeepfm/data/get_slot_data.py
浏览文件 @
d14f44da
...
...
@@ -12,9 +12,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.
import
yaml
,
os
from
paddlerec.core.reader
import
Reader
from
paddlerec.core.utils
import
envs
import
os
try
:
import
cPickle
as
pickle
except
ImportError
:
...
...
@@ -25,11 +23,6 @@ import paddle.fluid.incubate.data_generator as dg
class
TrainReader
(
dg
.
MultiSlotDataGenerator
):
def
__init__
(
self
,
config
):
dg
.
MultiSlotDataGenerator
.
__init__
(
self
)
if
os
.
path
.
isfile
(
config
):
with
open
(
config
,
'r'
)
as
rb
:
_config
=
yaml
.
load
(
rb
.
read
(),
Loader
=
yaml
.
FullLoader
)
else
:
raise
ValueError
(
"reader config only support yaml"
)
def
init
(
self
):
pass
...
...
run.py
浏览文件 @
d14f44da
...
...
@@ -14,7 +14,7 @@
import
os
import
subprocess
import
sys
import
argparse
import
tempfile
import
yaml
...
...
@@ -22,6 +22,7 @@ import copy
from
paddlerec.core.factory
import
TrainerFactory
from
paddlerec.core.utils
import
envs
from
paddlerec.core.utils
import
util
from
paddlerec.core.utils
import
validation
engines
=
{}
device
=
[
"CPU"
,
"GPU"
]
...
...
@@ -48,9 +49,7 @@ def engine_registry():
def
get_inters_from_yaml
(
file
,
filters
):
with
open
(
file
,
'r'
)
as
rb
:
_envs
=
yaml
.
load
(
rb
.
read
(),
Loader
=
yaml
.
FullLoader
)
_envs
=
envs
.
load_yaml
(
file
)
flattens
=
envs
.
flatten_environs
(
_envs
)
inters
=
{}
for
k
,
v
in
flattens
.
items
():
...
...
@@ -197,9 +196,7 @@ def cluster_engine(args):
def
master
():
role
=
"MASTER"
from
paddlerec.core.engine.cluster.cluster
import
ClusterEngine
with
open
(
args
.
backend
,
'r'
)
as
rb
:
_envs
=
yaml
.
load
(
rb
.
read
(),
Loader
=
yaml
.
FullLoader
)
_envs
=
envs
.
load_yaml
(
args
.
backend
)
flattens
=
envs
.
flatten_environs
(
_envs
,
"_"
)
flattens
[
"engine_role"
]
=
role
flattens
[
"engine_run_config"
]
=
args
.
model
...
...
@@ -322,8 +319,9 @@ if __name__ == "__main__":
model_name
=
args
.
model
.
split
(
'.'
)[
-
1
]
args
.
model
=
get_abs_model
(
args
.
model
)
if
not
validation
.
yaml_validation
(
args
.
model
):
sys
.
exit
(
-
1
)
engine_registry
()
which_engine
=
get_engine
(
args
)
engine
=
which_engine
(
args
)
engine
.
run
()
setup.py
浏览文件 @
d14f44da
...
...
@@ -21,7 +21,7 @@ from setuptools import setup, find_packages
import
shutil
import
tempfile
requires
=
[
"paddlepaddle == 1.7.2"
,
"
pyyaml
>= 5.1.1"
]
requires
=
[
"paddlepaddle == 1.7.2"
,
"
PyYAML
>= 5.1.1"
]
about
=
{}
about
[
"__title__"
]
=
"paddle-rec"
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录