Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
PaddleRec
提交
c68f2694
P
PaddleRec
项目概览
PaddlePaddle
/
PaddleRec
通知
68
Star
12
Fork
5
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
27
列表
看板
标记
里程碑
合并请求
10
Wiki
1
Wiki
分析
仓库
DevOps
项目成员
Pages
P
PaddleRec
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
27
Issue
27
列表
看板
标记
里程碑
合并请求
10
合并请求
10
Pages
分析
分析
仓库分析
DevOps
Wiki
1
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
c68f2694
编写于
5月 20, 2020
作者:
T
tangwei
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
fix code style
上级
66e1859f
变更
49
展开全部
显示空白变更内容
内联
并排
Showing
49 changed file
with
548 addition
and
190 deletion
+548
-190
core/engine/cluster/cloud/__init__.py
core/engine/cluster/cloud/__init__.py
+13
-0
core/modules/coding/layers.py
core/modules/coding/layers.py
+13
-0
core/trainers/__init__.py
core/trainers/__init__.py
+0
-3
core/trainers/ctr_coding_trainer.py
core/trainers/ctr_coding_trainer.py
+14
-9
core/trainers/ctr_modul_trainer.py
core/trainers/ctr_modul_trainer.py
+145
-71
doc/benchmark.md
doc/benchmark.md
+1
-1
doc/contribute.md
doc/contribute.md
+1
-1
doc/design.md
doc/design.md
+1
-1
doc/distributed_train.md
doc/distributed_train.md
+0
-2
doc/faq.md
doc/faq.md
+1
-1
doc/local_train.md
doc/local_train.md
+1
-1
doc/model_list.md
doc/model_list.md
+0
-1
doc/optimization_model.md
doc/optimization_model.md
+1
-1
doc/predict.md
doc/predict.md
+1
-1
doc/ps_background.md
doc/ps_background.md
+0
-1
models/contentunderstanding/__init__.py
models/contentunderstanding/__init__.py
+13
-0
models/contentunderstanding/classification/config.yaml
models/contentunderstanding/classification/config.yaml
+0
-1
models/contentunderstanding/classification/model.py
models/contentunderstanding/classification/model.py
+5
-2
models/contentunderstanding/classification/reader.py
models/contentunderstanding/classification/reader.py
+2
-2
models/contentunderstanding/readme.md
models/contentunderstanding/readme.md
+0
-1
models/contentunderstanding/tagspace/config.yaml
models/contentunderstanding/tagspace/config.yaml
+0
-1
models/match/__init__.py
models/match/__init__.py
+13
-0
models/match/readme.md
models/match/readme.md
+0
-1
models/multitask/__init__.py
models/multitask/__init__.py
+13
-0
models/multitask/readme.md
models/multitask/readme.md
+0
-1
models/rank/__init__.py
models/rank/__init__.py
+13
-0
models/rank/dcn/data/download.py
models/rank/dcn/data/download.py
+14
-0
models/rank/dcn/data/get_slot_data.py
models/rank/dcn/data/get_slot_data.py
+4
-3
models/rank/dcn/data/preprocess.py
models/rank/dcn/data/preprocess.py
+14
-0
models/rank/deepfm/data/download_preprocess.py
models/rank/deepfm/data/download_preprocess.py
+14
-0
models/rank/deepfm/data/get_slot_data.py
models/rank/deepfm/data/get_slot_data.py
+7
-3
models/rank/deepfm/data/preprocess.py
models/rank/deepfm/data/preprocess.py
+14
-0
models/rank/din/data/build_dataset.py
models/rank/din/data/build_dataset.py
+14
-0
models/rank/din/data/convert_pd.py
models/rank/din/data/convert_pd.py
+14
-0
models/rank/din/data/remap_id.py
models/rank/din/data/remap_id.py
+14
-0
models/rank/dnn/data/get_slot_data.py
models/rank/dnn/data/get_slot_data.py
+4
-2
models/rank/wide_deep/data/data_preparation.py
models/rank/wide_deep/data/data_preparation.py
+90
-43
models/rank/wide_deep/data/get_slot_data.py
models/rank/wide_deep/data/get_slot_data.py
+4
-1
models/rank/xdeepfm/data/download.py
models/rank/xdeepfm/data/download.py
+14
-0
models/rank/xdeepfm/data/get_slot_data.py
models/rank/xdeepfm/data/get_slot_data.py
+4
-1
models/recall/gnn/data_process.sh
models/recall/gnn/data_process.sh
+0
-2
models/recall/gnn/raw_data/convert_data.py
models/recall/gnn/raw_data/convert_data.py
+16
-0
models/recall/gnn/raw_data/download.py
models/recall/gnn/raw_data/download.py
+14
-0
models/recall/readme.md
models/recall/readme.md
+0
-1
models/recall/word2vec/prepare_data.sh
models/recall/word2vec/prepare_data.sh
+0
-3
models/treebased/README.md
models/treebased/README.md
+1
-1
models/treebased/tdm/tree/layer_list.txt
models/treebased/tdm/tree/layer_list.txt
+1
-1
run.py
run.py
+22
-11
setup.py
setup.py
+18
-15
未找到文件。
core/engine/cluster/cloud/__init__.py
浏览文件 @
c68f2694
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
core/modules/coding/layers.py
浏览文件 @
c68f2694
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
core/trainers/__init__.py
浏览文件 @
c68f2694
...
...
@@ -11,7 +11,6 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
trainer implement.
...
...
@@ -22,5 +21,3 @@ Trainer
↘ (for online learning training) OnlineLearningTrainer
"""
core/trainers/ctr_coding_trainer.py
浏览文件 @
c68f2694
...
...
@@ -59,8 +59,10 @@ class CtrTrainer(Trainer):
reader_class
=
envs
.
get_global_env
(
"class"
,
None
,
namespace
)
abs_dir
=
os
.
path
.
dirname
(
os
.
path
.
abspath
(
__file__
))
reader
=
os
.
path
.
join
(
abs_dir
,
'../utils'
,
'dataset_instance.py'
)
pipe_cmd
=
"python {} {} {} {}"
.
format
(
reader
,
reader_class
,
"TRAIN"
,
self
.
_config_yaml
)
train_data_path
=
envs
.
get_global_env
(
"train_data_path"
,
None
,
namespace
)
pipe_cmd
=
"python {} {} {} {}"
.
format
(
reader
,
reader_class
,
"TRAIN"
,
self
.
_config_yaml
)
train_data_path
=
envs
.
get_global_env
(
"train_data_path"
,
None
,
namespace
)
dataset
=
fluid
.
DatasetFactory
().
create_dataset
()
dataset
.
set_use_var
(
inputs
)
...
...
@@ -87,7 +89,8 @@ class CtrTrainer(Trainer):
self
.
model
.
train_net
()
optimizer
=
self
.
model
.
optimizer
()
optimizer
=
fleet
.
distributed_optimizer
(
optimizer
,
strategy
=
{
"use_cvm"
:
False
})
optimizer
=
fleet
.
distributed_optimizer
(
optimizer
,
strategy
=
{
"use_cvm"
:
False
})
optimizer
.
minimize
(
self
.
model
.
get_avg_cost
())
if
fleet
.
is_server
():
...
...
@@ -118,12 +121,14 @@ class CtrTrainer(Trainer):
gs
=
shuf
*
0
fleet
.
_role_maker
.
_node_type_comm
.
Allreduce
(
shuf
,
gs
)
print
(
"trainer id: {}, trainers: {}, gs: {}"
.
format
(
fleet
.
worker_index
(),
fleet
.
worker_num
(),
gs
))
print
(
"trainer id: {}, trainers: {}, gs: {}"
.
format
(
fleet
.
worker_index
(
),
fleet
.
worker_num
(),
gs
))
epochs
=
envs
.
get_global_env
(
"train.epochs"
)
for
i
in
range
(
epochs
):
self
.
_exe
.
train_from_dataset
(
program
=
fluid
.
default_main_program
(),
self
.
_exe
.
train_from_dataset
(
program
=
fluid
.
default_main_program
(),
dataset
=
dataset
,
fetch_list
=
self
.
fetch_vars
,
fetch_info
=
self
.
fetch_alias
,
...
...
core/trainers/ctr_modul_trainer.py
浏览文件 @
c68f2694
此差异已折叠。
点击以展开。
doc/benchmark.md
浏览文件 @
c68f2694
doc/contribute.md
浏览文件 @
c68f2694
doc/design.md
浏览文件 @
c68f2694
doc/distributed_train.md
浏览文件 @
c68f2694
...
...
@@ -7,5 +7,3 @@
### K8S集群运行分布式
> 占位
doc/faq.md
浏览文件 @
c68f2694
doc/local_train.md
浏览文件 @
c68f2694
doc/model_list.md
浏览文件 @
c68f2694
...
...
@@ -12,4 +12,3 @@
| 多任务 |
[
ESMM
](
)
| ✓ | x | ✓ | x | ✓ | ✓ |
| 匹配 |
[
DSSM
](
)
| ✓ | x | ✓ | x | ✓ | ✓ |
| 匹配 |
[
Multiview-Simnet
](
)
| ✓ | x | ✓ | x | ✓ | ✓ |
doc/optimization_model.md
浏览文件 @
c68f2694
doc/predict.md
浏览文件 @
c68f2694
doc/ps_background.md
浏览文件 @
c68f2694
...
...
@@ -5,4 +5,3 @@
## [参数服务器训练](https://www.paddlepaddle.org.cn/tutorials/projectdetail/464839)
models/contentunderstanding/__init__.py
浏览文件 @
c68f2694
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
models/contentunderstanding/classification/config.yaml
浏览文件 @
c68f2694
...
...
@@ -37,4 +37,3 @@ train:
dirname
:
"
inference"
epoch_interval
:
100
save_last
:
True
models/contentunderstanding/classification/model.py
浏览文件 @
c68f2694
...
...
@@ -31,7 +31,8 @@ class Model(ModelBase):
def
train_net
(
self
):
""" network definition """
data
=
fluid
.
data
(
name
=
"input"
,
shape
=
[
None
,
self
.
max_len
],
dtype
=
'int64'
)
data
=
fluid
.
data
(
name
=
"input"
,
shape
=
[
None
,
self
.
max_len
],
dtype
=
'int64'
)
label
=
fluid
.
data
(
name
=
"label"
,
shape
=
[
None
,
1
],
dtype
=
'int64'
)
seq_len
=
fluid
.
data
(
name
=
"seq_len"
,
shape
=
[
None
],
dtype
=
'int64'
)
...
...
@@ -51,7 +52,9 @@ class Model(ModelBase):
# full connect layer
fc_1
=
fluid
.
layers
.
fc
(
input
=
[
conv
],
size
=
self
.
hid_dim
)
# softmax layer
prediction
=
fluid
.
layers
.
fc
(
input
=
[
fc_1
],
size
=
self
.
class_dim
,
act
=
"softmax"
)
prediction
=
fluid
.
layers
.
fc
(
input
=
[
fc_1
],
size
=
self
.
class_dim
,
act
=
"softmax"
)
cost
=
fluid
.
layers
.
cross_entropy
(
input
=
prediction
,
label
=
label
)
avg_cost
=
fluid
.
layers
.
mean
(
x
=
cost
)
acc
=
fluid
.
layers
.
accuracy
(
input
=
prediction
,
label
=
label
)
...
...
models/contentunderstanding/classification/reader.py
浏览文件 @
c68f2694
...
...
@@ -12,7 +12,6 @@
# See the License for the specific language governing permissions and
# limitations under the License.
import
sys
from
paddlerec.core.reader
import
Reader
...
...
@@ -38,7 +37,8 @@ class TrainReader(Reader):
data
=
[
int
(
i
)
for
i
in
data
]
label
=
[
int
(
i
)
for
i
in
label
]
seq_len
=
[
int
(
i
)
for
i
in
seq_len
]
print
>>
sys
.
stderr
,
str
([(
'data'
,
data
),
(
'label'
,
label
),
(
'seq_len'
,
seq_len
)])
print
>>
sys
.
stderr
,
str
(
[(
'data'
,
data
),
(
'label'
,
label
),
(
'seq_len'
,
seq_len
)])
yield
[(
'data'
,
data
),
(
'label'
,
label
),
(
'seq_len'
,
seq_len
)]
return
data_iter
models/contentunderstanding/readme.md
浏览文件 @
c68f2694
...
...
@@ -87,4 +87,3 @@ python -m paddlerec.run -m paddlerec.models.contentunderstanding.classification
| :------------------: | :--------------------: | :---------: |:---------: | :---------: |:---------: |
| ag news dataset | TagSpace | -- | -- | -- | -- |
| -- | Classification | -- | -- | -- | -- |
models/contentunderstanding/tagspace/config.yaml
浏览文件 @
c68f2694
...
...
@@ -47,4 +47,3 @@ train:
dirname
:
"
inference"
epoch_interval
:
100
save_last
:
True
models/match/__init__.py
浏览文件 @
c68f2694
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
models/match/readme.md
浏览文件 @
c68f2694
...
...
@@ -37,4 +37,3 @@
python
-m
paddlerec.run
-m
paddlerec.models.match.dssm
# dssm
python
-m
paddlerec.run
-m
paddlerec.models.match.multiview-simnet
# multiview-simnet
```
models/multitask/__init__.py
浏览文件 @
c68f2694
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
models/multitask/readme.md
浏览文件 @
c68f2694
...
...
@@ -56,4 +56,3 @@ python -m paddlerec.run -m paddlerec.models.multitask.esmm # esmm
| Census-income Data | Share-Bottom | -- | 0.93120/0.99256 |
| Census-income Data | MMoE | -- | 0.94465/0.99324 |
| Ali-CCP | ESMM | -- | 0.97181/0.49967 |
models/rank/__init__.py
浏览文件 @
c68f2694
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
models/rank/dcn/data/download.py
浏览文件 @
c68f2694
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import
os
import
sys
import
io
...
...
models/rank/dcn/data/get_slot_data.py
浏览文件 @
c68f2694
...
...
@@ -26,8 +26,8 @@ from collections import Counter
import
os
import
paddle.fluid.incubate.data_generator
as
dg
class
TrainReader
(
dg
.
MultiSlotDataGenerator
):
class
TrainReader
(
dg
.
MultiSlotDataGenerator
):
def
__init__
(
self
,
config
):
dg
.
MultiSlotDataGenerator
.
__init__
(
self
)
...
...
@@ -109,6 +109,7 @@ class TrainReader(dg.MultiSlotDataGenerator):
return
data_iter
reader
=
TrainReader
(
"../config.yaml"
)
reader
.
init
()
reader
.
run_from_stdin
()
models/rank/dcn/data/preprocess.py
浏览文件 @
c68f2694
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from
__future__
import
print_function
,
absolute_import
,
division
import
os
...
...
models/rank/deepfm/data/download_preprocess.py
浏览文件 @
c68f2694
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import
os
import
shutil
import
sys
...
...
models/rank/deepfm/data/get_slot_data.py
浏览文件 @
c68f2694
...
...
@@ -19,8 +19,9 @@ try:
import
cPickle
as
pickle
except
ImportError
:
import
pickle
class
TrainReader
(
dg
.
MultiSlotDataGenerator
):
class
TrainReader
(
dg
.
MultiSlotDataGenerator
):
def
__init__
(
self
,
config
):
dg
.
MultiSlotDataGenerator
.
__init__
(
self
)
...
...
@@ -77,15 +78,18 @@ class TrainReader(dg.MultiSlotDataGenerator):
def
data_iter
():
feat_idx
,
feat_value
,
label
=
self
.
_process_line
(
line
)
s
=
""
for
i
in
[(
'feat_idx'
,
feat_idx
),
(
'feat_value'
,
feat_value
),
(
'label'
,
label
)]:
for
i
in
[(
'feat_idx'
,
feat_idx
),
(
'feat_value'
,
feat_value
),
(
'label'
,
label
)]:
k
=
i
[
0
]
v
=
i
[
1
]
for
j
in
v
:
s
+=
" "
+
k
+
":"
+
str
(
j
)
print
s
.
strip
()
yield
None
return
data_iter
reader
=
TrainReader
(
"../config.yaml"
)
reader
.
init
()
reader
.
run_from_stdin
()
models/rank/deepfm/data/preprocess.py
浏览文件 @
c68f2694
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import
os
import
numpy
from
collections
import
Counter
...
...
models/rank/din/data/build_dataset.py
浏览文件 @
c68f2694
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from
__future__
import
print_function
import
random
import
pickle
...
...
models/rank/din/data/convert_pd.py
浏览文件 @
c68f2694
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from
__future__
import
print_function
import
pickle
import
pandas
as
pd
...
...
models/rank/din/data/remap_id.py
浏览文件 @
c68f2694
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
from
__future__
import
print_function
import
random
import
pickle
...
...
models/rank/dnn/data/get_slot_data.py
浏览文件 @
c68f2694
...
...
@@ -32,6 +32,7 @@ class CriteoDataset(dg.MultiSlotDataGenerator):
"""
Read the data line by line and process it as a dictionary
"""
def
reader
():
"""
This function needs to be implemented by the user, based on data format
...
...
@@ -59,9 +60,10 @@ class CriteoDataset(dg.MultiSlotDataGenerator):
for
i
in
dense_feature
:
s
+=
" dense_feature:"
+
str
(
i
)
for
i
in
range
(
1
,
1
+
len
(
categorical_range_
)):
s
+=
" "
+
str
(
i
)
+
":"
+
str
(
sparse_feature
[
i
-
1
][
0
])
s
+=
" "
+
str
(
i
)
+
":"
+
str
(
sparse_feature
[
i
-
1
][
0
])
print
s
.
strip
()
yield
None
return
reader
...
...
models/rank/wide_deep/data/data_preparation.py
浏览文件 @
c68f2694
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import
os
import
io
import
args
import
pandas
as
pd
from
sklearn
import
preprocessing
def
_clean_file
(
source_path
,
target_path
):
def
_clean_file
(
source_path
,
target_path
):
"""makes changes to match the CSV format."""
with
io
.
open
(
source_path
,
'r'
)
as
temp_eval_file
:
with
io
.
open
(
target_path
,
'w'
)
as
eval_file
:
...
...
@@ -18,6 +33,7 @@ def _clean_file(source_path,target_path):
line
+=
'
\n
'
eval_file
.
write
(
line
)
def
build_model_columns
(
train_data_path
,
test_data_path
):
# The column names are from
# https://www2.1010data.com/documentationcenter/prod/Tutorials/MachineLearningExamples/CensusIncomeDataSet.html
...
...
@@ -44,61 +60,92 @@ def build_model_columns(train_data_path, test_data_path):
# First group of tasks according to the paper
#label_columns = ['income_50k', 'marital_stat']
categorical_columns
=
[
'education'
,
'marital_status'
,
'relationship'
,
'workclass'
,
'occupation'
]
categorical_columns
=
[
'education'
,
'marital_status'
,
'relationship'
,
'workclass'
,
'occupation'
]
for
col
in
categorical_columns
:
label_train
=
preprocessing
.
LabelEncoder
()
train_df
[
col
]
=
label_train
.
fit_transform
(
train_df
[
col
])
train_df
[
col
]
=
label_train
.
fit_transform
(
train_df
[
col
])
label_test
=
preprocessing
.
LabelEncoder
()
test_df
[
col
]
=
label_test
.
fit_transform
(
test_df
[
col
])
test_df
[
col
]
=
label_test
.
fit_transform
(
test_df
[
col
])
bins
=
[
18
,
25
,
30
,
35
,
40
,
45
,
50
,
55
,
60
,
65
]
train_df
[
'age_buckets'
]
=
pd
.
cut
(
train_df
[
'age'
].
values
.
tolist
(),
bins
,
labels
=
False
)
test_df
[
'age_buckets'
]
=
pd
.
cut
(
test_df
[
'age'
].
values
.
tolist
(),
bins
,
labels
=
False
)
base_columns
=
[
'education'
,
'marital_status'
,
'relationship'
,
'workclass'
,
'occupation'
,
'age_buckets'
]
train_df
[
'age_buckets'
]
=
pd
.
cut
(
train_df
[
'age'
].
values
.
tolist
(),
bins
,
labels
=
False
)
test_df
[
'age_buckets'
]
=
pd
.
cut
(
test_df
[
'age'
].
values
.
tolist
(),
bins
,
labels
=
False
)
base_columns
=
[
'education'
,
'marital_status'
,
'relationship'
,
'workclass'
,
'occupation'
,
'age_buckets'
]
train_df
[
'education_occupation'
]
=
train_df
[
'education'
].
astype
(
str
)
+
'_'
+
train_df
[
'occupation'
].
astype
(
str
)
test_df
[
'education_occupation'
]
=
test_df
[
'education'
].
astype
(
str
)
+
'_'
+
test_df
[
'occupation'
].
astype
(
str
)
train_df
[
'age_buckets_education_occupation'
]
=
train_df
[
'age_buckets'
].
astype
(
str
)
+
'_'
+
train_df
[
'education'
].
astype
(
str
)
+
'_'
+
train_df
[
'occupation'
].
astype
(
str
)
test_df
[
'age_buckets_education_occupation'
]
=
test_df
[
'age_buckets'
].
astype
(
str
)
+
'_'
+
test_df
[
'education'
].
astype
(
str
)
+
'_'
+
test_df
[
'occupation'
].
astype
(
str
)
crossed_columns
=
[
'education_occupation'
,
'age_buckets_education_occupation'
]
train_df
[
'education_occupation'
]
=
train_df
[
'education'
].
astype
(
str
)
+
'_'
+
train_df
[
'occupation'
].
astype
(
str
)
test_df
[
'education_occupation'
]
=
test_df
[
'education'
].
astype
(
str
)
+
'_'
+
test_df
[
'occupation'
].
astype
(
str
)
train_df
[
'age_buckets_education_occupation'
]
=
train_df
[
'age_buckets'
].
astype
(
str
)
+
'_'
+
train_df
[
'education'
].
astype
(
str
)
+
'_'
+
train_df
[
'occupation'
].
astype
(
str
)
test_df
[
'age_buckets_education_occupation'
]
=
test_df
[
'age_buckets'
].
astype
(
str
)
+
'_'
+
test_df
[
'education'
].
astype
(
str
)
+
'_'
+
test_df
[
'occupation'
].
astype
(
str
)
crossed_columns
=
[
'education_occupation'
,
'age_buckets_education_occupation'
]
for
col
in
crossed_columns
:
label_train
=
preprocessing
.
LabelEncoder
()
train_df
[
col
]
=
label_train
.
fit_transform
(
train_df
[
col
])
train_df
[
col
]
=
label_train
.
fit_transform
(
train_df
[
col
])
label_test
=
preprocessing
.
LabelEncoder
()
test_df
[
col
]
=
label_test
.
fit_transform
(
test_df
[
col
])
test_df
[
col
]
=
label_test
.
fit_transform
(
test_df
[
col
])
wide_columns
=
base_columns
+
crossed_columns
train_df_temp
=
pd
.
get_dummies
(
train_df
[
categorical_columns
],
columns
=
categorical_columns
)
test_df_temp
=
pd
.
get_dummies
(
test_df
[
categorical_columns
],
columns
=
categorical_columns
)
train_df_temp
=
pd
.
get_dummies
(
train_df
[
categorical_columns
],
columns
=
categorical_columns
)
test_df_temp
=
pd
.
get_dummies
(
test_df
[
categorical_columns
],
columns
=
categorical_columns
)
train_df
=
train_df
.
join
(
train_df_temp
)
test_df
=
test_df
.
join
(
test_df_temp
)
deep_columns
=
list
(
train_df_temp
.
columns
)
+
[
'age'
,
'education_num'
,
'capital_gain'
,
'capital_loss'
,
'hours_per_week'
]
deep_columns
=
list
(
train_df_temp
.
columns
)
+
[
'age'
,
'education_num'
,
'capital_gain'
,
'capital_loss'
,
'hours_per_week'
]
train_df
[
'label'
]
=
train_df
[
'income_bracket'
].
apply
(
lambda
x
:
1
if
x
==
'>50K'
else
0
)
test_df
[
'label'
]
=
test_df
[
'income_bracket'
].
apply
(
lambda
x
:
1
if
x
==
'>50K'
else
0
)
train_df
[
'label'
]
=
train_df
[
'income_bracket'
].
apply
(
lambda
x
:
1
if
x
==
'>50K'
else
0
)
test_df
[
'label'
]
=
test_df
[
'income_bracket'
].
apply
(
lambda
x
:
1
if
x
==
'>50K'
else
0
)
with
io
.
open
(
'train_data/columns.txt'
,
'w'
)
as
f
:
write_str
=
str
(
len
(
wide_columns
))
+
'
\n
'
+
str
(
len
(
deep_columns
))
+
'
\n
'
with
io
.
open
(
'train_data/columns.txt'
,
'w'
)
as
f
:
write_str
=
str
(
len
(
wide_columns
))
+
'
\n
'
+
str
(
len
(
deep_columns
))
+
'
\n
'
f
.
write
(
write_str
)
f
.
close
()
with
io
.
open
(
'test_data/columns.txt'
,
'w'
)
as
f
:
write_str
=
str
(
len
(
wide_columns
))
+
'
\n
'
+
str
(
len
(
deep_columns
))
+
'
\n
'
with
io
.
open
(
'test_data/columns.txt'
,
'w'
)
as
f
:
write_str
=
str
(
len
(
wide_columns
))
+
'
\n
'
+
str
(
len
(
deep_columns
))
+
'
\n
'
f
.
write
(
write_str
)
f
.
close
()
train_df
[
wide_columns
+
deep_columns
+
[
'label'
]].
fillna
(
0
).
to_csv
(
train_data_path
,
index
=
False
)
test_df
[
wide_columns
+
deep_columns
+
[
'label'
]].
fillna
(
0
).
to_csv
(
test_data_path
,
index
=
False
)
train_df
[
wide_columns
+
deep_columns
+
[
'label'
]].
fillna
(
0
).
to_csv
(
train_data_path
,
index
=
False
)
test_df
[
wide_columns
+
deep_columns
+
[
'label'
]].
fillna
(
0
).
to_csv
(
test_data_path
,
index
=
False
)
def
clean_file
(
train_path
,
test_path
,
train_data_path
,
test_data_path
):
_clean_file
(
train_path
,
train_data_path
)
_clean_file
(
test_path
,
test_data_path
)
if
__name__
==
'__main__'
:
args
=
args
.
parse_args
()
clean_file
(
args
.
train_path
,
args
.
test_path
,
args
.
train_data_path
,
args
.
test_data_path
)
clean_file
(
args
.
train_path
,
args
.
test_path
,
args
.
train_data_path
,
args
.
test_data_path
)
build_model_columns
(
args
.
train_data_path
,
args
.
test_data_path
)
models/rank/wide_deep/data/get_slot_data.py
浏览文件 @
c68f2694
...
...
@@ -20,6 +20,7 @@ except ImportError:
import
pickle
import
paddle.fluid.incubate.data_generator
as
dg
class
TrainReader
(
dg
.
MultiSlotDataGenerator
):
def
__init__
(
self
,
config
):
dg
.
MultiSlotDataGenerator
.
__init__
(
self
)
...
...
@@ -50,7 +51,8 @@ class TrainReader(dg.MultiSlotDataGenerator):
wide_feat
,
deep_deat
,
label
=
self
.
_process_line
(
line
)
s
=
""
for
i
in
[(
'wide_input'
,
wide_feat
),
(
'deep_input'
,
deep_deat
),
(
'label'
,
label
)]:
for
i
in
[(
'wide_input'
,
wide_feat
),
(
'deep_input'
,
deep_deat
),
(
'label'
,
label
)]:
k
=
i
[
0
]
v
=
i
[
1
]
for
j
in
v
:
...
...
@@ -60,6 +62,7 @@ class TrainReader(dg.MultiSlotDataGenerator):
return
data_iter
reader
=
TrainReader
(
"../config.yaml"
)
reader
.
init
()
reader
.
run_from_stdin
()
models/rank/xdeepfm/data/download.py
浏览文件 @
c68f2694
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import
os
import
shutil
import
sys
...
...
models/rank/xdeepfm/data/get_slot_data.py
浏览文件 @
c68f2694
...
...
@@ -21,6 +21,7 @@ except ImportError:
import
pickle
import
paddle.fluid.incubate.data_generator
as
dg
class
TrainReader
(
dg
.
MultiSlotDataGenerator
):
def
__init__
(
self
,
config
):
dg
.
MultiSlotDataGenerator
.
__init__
(
self
)
...
...
@@ -48,7 +49,8 @@ class TrainReader(dg.MultiSlotDataGenerator):
feat_idx
,
feat_value
,
label
=
self
.
_process_line
(
line
)
s
=
""
for
i
in
[(
'feat_idx'
,
feat_idx
),
(
'feat_value'
,
feat_value
),
(
'label'
,
label
)]:
for
i
in
[(
'feat_idx'
,
feat_idx
),
(
'feat_value'
,
feat_value
),
(
'label'
,
label
)]:
k
=
i
[
0
]
v
=
i
[
1
]
for
j
in
v
:
...
...
@@ -58,6 +60,7 @@ class TrainReader(dg.MultiSlotDataGenerator):
return
data_iter
reader
=
TrainReader
(
"../config.yaml"
)
reader
.
init
()
reader
.
run_from_stdin
()
models/recall/gnn/data_process.sh
浏览文件 @
c68f2694
...
...
@@ -31,5 +31,3 @@ mv diginetica/train.txt train_data
mkdir
test_data
mv
diginetica/test.txt test_data
models/recall/gnn/raw_data/convert_data.py
浏览文件 @
c68f2694
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import
argparse
import
time
import
pickle
...
...
@@ -10,6 +24,7 @@ parser.add_argument(
help
=
'dataset dir: diginetica/yoochoose1_4/yoochoose1_64/sample'
)
opt
=
parser
.
parse_args
()
def
process_data
(
file_type
):
path
=
os
.
path
.
join
(
opt
.
data_dir
,
file_type
)
output_path
=
os
.
path
.
splitext
(
path
)[
0
]
+
".txt"
...
...
@@ -23,6 +38,7 @@ def process_data(file_type):
fout
.
write
(
str
(
data
[
i
][
1
]))
fout
.
write
(
"
\n
"
)
process_data
(
"train"
)
process_data
(
"test"
)
...
...
models/recall/gnn/raw_data/download.py
浏览文件 @
c68f2694
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import
requests
import
sys
import
time
...
...
models/recall/readme.md
浏览文件 @
c68f2694
...
...
@@ -78,4 +78,3 @@ python -m paddlerec.run -m paddlerec.models.recall.youtube_dnn # youtube_dnn
| MOVIELENS | NCF | 0.688 | -- |
| -- | Youtube | -- | -- |
| 1 Billion Word Language Model Benchmark | Word2Vec | -- | 0.54 |
models/recall/word2vec/prepare_data.sh
浏览文件 @
c68f2694
...
...
@@ -35,6 +35,3 @@ wget --no-check-certificate https://paddlerec.bj.bcebos.com/word2vec/test_dir.ta
tar
xzvf test_dir.tar
-C
raw_data
mv
raw_data/data/test_dir test_data/
rm
-rf
raw_data
models/treebased/README.md
浏览文件 @
c68f2694
models/treebased/tdm/tree/layer_list.txt
浏览文件 @
c68f2694
run.py
浏览文件 @
c68f2694
...
...
@@ -26,8 +26,10 @@ from paddlerec.core.utils import util
engines
=
{}
device
=
[
"CPU"
,
"GPU"
]
clusters
=
[
"SINGLE"
,
"LOCAL_CLUSTER"
,
"CLUSTER"
]
engine_choices
=
[
"SINGLE"
,
"LOCAL_CLUSTER"
,
"CLUSTER"
,
"TDM_SINGLE"
,
"TDM_LOCAL_CLUSTER"
,
"TDM_CLUSTER"
]
engine_choices
=
[
"SINGLE"
,
"LOCAL_CLUSTER"
,
"CLUSTER"
,
"TDM_SINGLE"
,
"TDM_LOCAL_CLUSTER"
,
"TDM_CLUSTER"
]
custom_model
=
[
'TDM'
]
model_name
=
""
...
...
@@ -66,7 +68,8 @@ def get_engine(args):
engine
=
engine
.
upper
()
if
engine
not
in
engine_choices
:
raise
ValueError
(
"train.engin can not be chosen in {}"
.
format
(
engine_choices
))
raise
ValueError
(
"train.engin can not be chosen in {}"
.
format
(
engine_choices
))
print
(
"engines:
\n
{}"
.
format
(
engines
))
...
...
@@ -77,8 +80,10 @@ def get_engine(args):
def
get_transpiler
():
FNULL
=
open
(
os
.
devnull
,
'w'
)
cmd
=
[
"python"
,
"-c"
,
"import paddle.fluid as fluid; fleet_ptr = fluid.core.Fleet(); [fleet_ptr.copy_table_by_feasign(10, 10, [2020, 1010])];"
]
cmd
=
[
"python"
,
"-c"
,
"import paddle.fluid as fluid; fleet_ptr = fluid.core.Fleet(); [fleet_ptr.copy_table_by_feasign(10, 10, [2020, 1010])];"
]
proc
=
subprocess
.
Popen
(
cmd
,
stdout
=
FNULL
,
stderr
=
FNULL
,
cwd
=
os
.
getcwd
())
ret
=
proc
.
wait
()
if
ret
==
-
11
:
...
...
@@ -152,7 +157,8 @@ def cluster_engine(args):
update_workspace
(
flattens
)
envs
.
set_runtime_environs
(
flattens
)
print
(
envs
.
pretty_print_envs
(
flattens
,
(
"Submit Runtime Envs"
,
"Value"
)))
print
(
envs
.
pretty_print_envs
(
flattens
,
(
"Submit Runtime Envs"
,
"Value"
)))
launch
=
ClusterEngine
(
None
,
args
.
model
)
return
launch
...
...
@@ -163,7 +169,8 @@ def cluster_engine(args):
cluster_envs
=
{}
cluster_envs
[
"train.trainer.trainer"
]
=
trainer
cluster_envs
[
"train.trainer.engine"
]
=
"cluster"
cluster_envs
[
"train.trainer.threads"
]
=
envs
.
get_runtime_environ
(
"CPU_NUM"
)
cluster_envs
[
"train.trainer.threads"
]
=
envs
.
get_runtime_environ
(
"CPU_NUM"
)
cluster_envs
[
"train.trainer.platform"
]
=
envs
.
get_platform
()
print
(
"launch {} engine with cluster to with model: {}"
.
format
(
trainer
,
args
.
model
))
...
...
@@ -181,7 +188,8 @@ def cluster_engine(args):
def
cluster_mpi_engine
(
args
):
print
(
"launch cluster engine with cluster to run model: {}"
.
format
(
args
.
model
))
print
(
"launch cluster engine with cluster to run model: {}"
.
format
(
args
.
model
))
cluster_envs
=
{}
cluster_envs
[
"train.trainer.trainer"
]
=
"CtrCodingTrainer"
...
...
@@ -209,7 +217,8 @@ def local_cluster_engine(args):
cluster_envs
[
"train.trainer.platform"
]
=
envs
.
get_platform
()
cluster_envs
[
"CPU_NUM"
]
=
"2"
print
(
"launch {} engine with cluster to run model: {}"
.
format
(
trainer
,
args
.
model
))
print
(
"launch {} engine with cluster to run model: {}"
.
format
(
trainer
,
args
.
model
))
set_runtime_envs
(
cluster_envs
,
args
.
model
)
launch
=
LocalClusterEngine
(
cluster_envs
,
args
.
model
)
...
...
@@ -217,10 +226,12 @@ def local_cluster_engine(args):
def
local_mpi_engine
(
args
):
print
(
"launch cluster engine with cluster to run model: {}"
.
format
(
args
.
model
))
print
(
"launch cluster engine with cluster to run model: {}"
.
format
(
args
.
model
))
from
paddlerec.core.engine.local_mpi
import
LocalMPIEngine
print
(
"use 1X1 MPI ClusterTraining at localhost to run model: {}"
.
format
(
args
.
model
))
print
(
"use 1X1 MPI ClusterTraining at localhost to run model: {}"
.
format
(
args
.
model
))
mpi
=
util
.
run_which
(
"mpirun"
)
if
not
mpi
:
...
...
setup.py
浏览文件 @
c68f2694
...
...
@@ -11,7 +11,6 @@
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
setup for paddle-rec.
"""
...
...
@@ -22,11 +21,7 @@ from setuptools import setup, find_packages
import
shutil
import
tempfile
requires
=
[
"paddlepaddle == 1.7.2"
,
"pyyaml >= 5.1.1"
]
requires
=
[
"paddlepaddle == 1.7.2"
,
"pyyaml >= 5.1.1"
]
about
=
{}
about
[
"__title__"
]
=
"paddle-rec"
...
...
@@ -48,18 +43,27 @@ def build(dirname):
package_dir
=
os
.
path
.
dirname
(
os
.
path
.
abspath
(
__file__
))
run_cmd
(
"cp -r {}/* {}"
.
format
(
package_dir
,
dirname
))
run_cmd
(
"mkdir {}"
.
format
(
os
.
path
.
join
(
dirname
,
"paddlerec"
)))
run_cmd
(
"mv {} {}"
.
format
(
os
.
path
.
join
(
dirname
,
"core"
),
os
.
path
.
join
(
dirname
,
"paddlerec"
)))
run_cmd
(
"mv {} {}"
.
format
(
os
.
path
.
join
(
dirname
,
"doc"
),
os
.
path
.
join
(
dirname
,
"paddlerec"
)))
run_cmd
(
"mv {} {}"
.
format
(
os
.
path
.
join
(
dirname
,
"models"
),
os
.
path
.
join
(
dirname
,
"paddlerec"
)))
run_cmd
(
"mv {} {}"
.
format
(
os
.
path
.
join
(
dirname
,
"tests"
),
os
.
path
.
join
(
dirname
,
"paddlerec"
)))
run_cmd
(
"mv {} {}"
.
format
(
os
.
path
.
join
(
dirname
,
"tools"
),
os
.
path
.
join
(
dirname
,
"paddlerec"
)))
run_cmd
(
"mv {} {}"
.
format
(
os
.
path
.
join
(
dirname
,
"*.py"
),
os
.
path
.
join
(
dirname
,
"paddlerec"
)))
run_cmd
(
"mv {} {}"
.
format
(
os
.
path
.
join
(
dirname
,
"core"
),
os
.
path
.
join
(
dirname
,
"paddlerec"
)))
run_cmd
(
"mv {} {}"
.
format
(
os
.
path
.
join
(
dirname
,
"doc"
),
os
.
path
.
join
(
dirname
,
"paddlerec"
)))
run_cmd
(
"mv {} {}"
.
format
(
os
.
path
.
join
(
dirname
,
"models"
),
os
.
path
.
join
(
dirname
,
"paddlerec"
)))
run_cmd
(
"mv {} {}"
.
format
(
os
.
path
.
join
(
dirname
,
"tests"
),
os
.
path
.
join
(
dirname
,
"paddlerec"
)))
run_cmd
(
"mv {} {}"
.
format
(
os
.
path
.
join
(
dirname
,
"tools"
),
os
.
path
.
join
(
dirname
,
"paddlerec"
)))
run_cmd
(
"mv {} {}"
.
format
(
os
.
path
.
join
(
dirname
,
"*.py"
),
os
.
path
.
join
(
dirname
,
"paddlerec"
)))
packages
=
find_packages
(
dirname
,
include
=
(
'paddlerec.*'
))
package_dir
=
{
''
:
dirname
}
package_data
=
{}
models_copy
=
[
'data/*.txt'
,
'data/*/*.txt'
,
'*.yaml'
,
'*.sh'
,
'tree/*.npy'
,
'tree/*.txt'
]
models_copy
=
[
'data/*.txt'
,
'data/*/*.txt'
,
'*.yaml'
,
'*.sh'
,
'tree/*.npy'
,
'tree/*.txt'
]
engine_copy
=
[
'*/*.sh'
]
for
package
in
packages
:
if
package
.
startswith
(
"paddlerec.models."
):
...
...
@@ -80,8 +84,7 @@ def build(dirname):
package_data
=
package_data
,
python_requires
=
">=2.7"
,
install_requires
=
requires
,
zip_safe
=
False
)
zip_safe
=
False
)
dirname
=
tempfile
.
mkdtemp
()
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录