Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
PaddleRec
提交
7a3ec4e6
P
PaddleRec
项目概览
PaddlePaddle
/
PaddleRec
通知
68
Star
12
Fork
5
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
27
列表
看板
标记
里程碑
合并请求
10
Wiki
1
Wiki
分析
仓库
DevOps
项目成员
Pages
P
PaddleRec
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
27
Issue
27
列表
看板
标记
里程碑
合并请求
10
合并请求
10
Pages
分析
分析
仓库分析
DevOps
Wiki
1
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
7a3ec4e6
编写于
5月 19, 2020
作者:
T
tangwei
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
for mat
上级
801dfd34
变更
55
隐藏空白更改
内联
并排
Showing
55 changed file
with
515 addition
and
491 deletion
+515
-491
core/engine/engine.py
core/engine/engine.py
+0
-1
core/engine/local_cluster.py
core/engine/local_cluster.py
+0
-1
core/metric.py
core/metric.py
+1
-1
core/metrics/auc_metrics.py
core/metrics/auc_metrics.py
+1
-1
core/trainers/__init__.py
core/trainers/__init__.py
+26
-0
core/trainers/ctr_coding_trainer.py
core/trainers/ctr_coding_trainer.py
+1
-1
core/trainers/ctr_modul_trainer.py
core/trainers/ctr_modul_trainer.py
+1
-1
core/trainers/online_learning_trainer.py
core/trainers/online_learning_trainer.py
+1
-1
core/trainers/single_trainer.py
core/trainers/single_trainer.py
+4
-3
core/trainers/tdm_cluster_trainer.py
core/trainers/tdm_cluster_trainer.py
+0
-1
core/trainers/tdm_single_trainer.py
core/trainers/tdm_single_trainer.py
+0
-1
core/trainers/transpiler_trainer.py
core/trainers/transpiler_trainer.py
+3
-3
core/utils/dataset_holder.py
core/utils/dataset_holder.py
+2
-12
core/utils/fs.py
core/utils/fs.py
+13
-11
core/utils/table.py
core/utils/table.py
+1
-1
models/contentunderstanding/classification/model.py
models/contentunderstanding/classification/model.py
+2
-2
models/contentunderstanding/classification/reader.py
models/contentunderstanding/classification/reader.py
+4
-3
models/contentunderstanding/tagspace/model.py
models/contentunderstanding/tagspace/model.py
+9
-11
models/contentunderstanding/tagspace/reader.py
models/contentunderstanding/tagspace/reader.py
+4
-5
models/match/dssm/model.py
models/match/dssm/model.py
+29
-28
models/match/dssm/synthetic_reader.py
models/match/dssm/synthetic_reader.py
+1
-1
models/match/multiview-simnet/evaluate_reader.py
models/match/multiview-simnet/evaluate_reader.py
+4
-3
models/match/multiview-simnet/generate_synthetic_data.py
models/match/multiview-simnet/generate_synthetic_data.py
+7
-3
models/match/multiview-simnet/model.py
models/match/multiview-simnet/model.py
+34
-31
models/match/multiview-simnet/reader.py
models/match/multiview-simnet/reader.py
+4
-3
models/multitask/esmm/esmm_infer_reader.py
models/multitask/esmm/esmm_infer_reader.py
+12
-11
models/multitask/esmm/esmm_reader.py
models/multitask/esmm/esmm_reader.py
+15
-13
models/multitask/esmm/model.py
models/multitask/esmm/model.py
+34
-36
models/multitask/mmoe/census_reader.py
models/multitask/mmoe/census_reader.py
+2
-2
models/multitask/mmoe/model.py
models/multitask/mmoe/model.py
+31
-31
models/multitask/share-bottom/census_reader.py
models/multitask/share-bottom/census_reader.py
+2
-2
models/multitask/share-bottom/model.py
models/multitask/share-bottom/model.py
+26
-26
models/rank/dcn/criteo_reader.py
models/rank/dcn/criteo_reader.py
+7
-7
models/rank/dcn/model.py
models/rank/dcn/model.py
+10
-11
models/rank/deepfm/criteo_reader.py
models/rank/deepfm/criteo_reader.py
+4
-3
models/rank/deepfm/model.py
models/rank/deepfm/model.py
+27
-27
models/rank/din/reader.py
models/rank/din/reader.py
+10
-15
models/rank/wide_deep/model.py
models/rank/wide_deep/model.py
+38
-29
models/rank/wide_deep/reader.py
models/rank/wide_deep/reader.py
+4
-3
models/rank/xdeepfm/criteo_reader.py
models/rank/xdeepfm/criteo_reader.py
+4
-4
models/rank/xdeepfm/model.py
models/rank/xdeepfm/model.py
+14
-14
models/recall/gnn/evaluate_reader.py
models/recall/gnn/evaluate_reader.py
+9
-7
models/recall/gnn/model.py
models/recall/gnn/model.py
+64
-64
models/recall/gnn/reader.py
models/recall/gnn/reader.py
+9
-7
models/recall/gru4rec/model.py
models/recall/gru4rec/model.py
+0
-2
models/recall/ssr/model.py
models/recall/ssr/model.py
+4
-6
models/recall/ssr/ssr_infer_reader.py
models/recall/ssr/ssr_infer_reader.py
+1
-3
models/recall/ssr/ssr_reader.py
models/recall/ssr/ssr_reader.py
+0
-2
models/recall/word2vec/preprocess.py
models/recall/word2vec/preprocess.py
+14
-16
models/recall/word2vec/w2v_evaluate_reader.py
models/recall/word2vec/w2v_evaluate_reader.py
+7
-8
models/recall/word2vec/w2v_reader.py
models/recall/word2vec/w2v_reader.py
+4
-4
models/treebased/tdm/model.py
models/treebased/tdm/model.py
+8
-8
models/treebased/tdm/tdm_evaluate_reader.py
models/treebased/tdm/tdm_evaluate_reader.py
+1
-0
models/treebased/tdm/tdm_reader.py
models/treebased/tdm/tdm_reader.py
+1
-0
setup.py
setup.py
+1
-1
未找到文件。
core/engine/engine.py
浏览文件 @
7a3ec4e6
...
@@ -29,4 +29,3 @@ class Engine:
...
@@ -29,4 +29,3 @@ class Engine:
@
abc
.
abstractmethod
@
abc
.
abstractmethod
def
run
(
self
):
def
run
(
self
):
pass
pass
core/engine/local_cluster.py
浏览文件 @
7a3ec4e6
...
@@ -20,7 +20,6 @@ import os
...
@@ -20,7 +20,6 @@ import os
import
sys
import
sys
import
subprocess
import
subprocess
from
paddlerec.core.engine.engine
import
Engine
from
paddlerec.core.engine.engine
import
Engine
from
paddlerec.core.utils
import
envs
from
paddlerec.core.utils
import
envs
...
...
core/metric.py
浏览文件 @
7a3ec4e6
...
@@ -53,7 +53,7 @@ class Metric(object):
...
@@ -53,7 +53,7 @@ class Metric(object):
pass
pass
@
abc
.
abstractmethod
@
abc
.
abstractmethod
def
get_result_to_string
(
self
):
def
__str__
(
self
):
"""
"""
Return:
Return:
result(string) : calculate result with string format, for output
result(string) : calculate result with string format, for output
...
...
core/metrics/auc_metrics.py
浏览文件 @
7a3ec4e6
...
@@ -200,7 +200,7 @@ class AUCMetric(Metric):
...
@@ -200,7 +200,7 @@ class AUCMetric(Metric):
""" """
""" """
return
self
.
_result
return
self
.
_result
def
get_result_to_string
(
self
):
def
__str__
(
self
):
""" """
""" """
result
=
self
.
get_result
()
result
=
self
.
get_result
()
result_str
=
"%s AUC=%.6f BUCKET_ERROR=%.6f MAE=%.6f RMSE=%.6f "
\
result_str
=
"%s AUC=%.6f BUCKET_ERROR=%.6f MAE=%.6f RMSE=%.6f "
\
...
...
core/trainers/__init__.py
浏览文件 @
7a3ec4e6
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
trainer implement.
↗ (single/cluster) CtrTrainer
Trainer
↗ (for single training) SingleTrainer/TDMSingleTrainer
↘ TranspilerTrainer → (for cluster training) ClusterTrainer/TDMClusterTrainer
↘ (for online learning training) OnlineLearningTrainer
"""
core/trainers/ctr_coding_trainer.py
浏览文件 @
7a3ec4e6
...
@@ -23,7 +23,7 @@ from paddlerec.core.utils import envs
...
@@ -23,7 +23,7 @@ from paddlerec.core.utils import envs
from
paddlerec.core.trainer
import
Trainer
from
paddlerec.core.trainer
import
Trainer
class
Ctr
Paddle
Trainer
(
Trainer
):
class
CtrTrainer
(
Trainer
):
"""R
"""R
"""
"""
...
...
core/trainers/ctr_modul_trainer.py
浏览文件 @
7a3ec4e6
...
@@ -72,7 +72,7 @@ def worker_numric_max(value, env="mpi"):
...
@@ -72,7 +72,7 @@ def worker_numric_max(value, env="mpi"):
return
wroker_numric_opt
(
value
,
env
,
"max"
)
return
wroker_numric_opt
(
value
,
env
,
"max"
)
class
Ctr
Paddle
Trainer
(
Trainer
):
class
CtrTrainer
(
Trainer
):
"""R
"""R
"""
"""
...
...
core/trainers/online_learning_trainer.py
浏览文件 @
7a3ec4e6
...
@@ -31,7 +31,7 @@ from paddlerec.core.utils import envs
...
@@ -31,7 +31,7 @@ from paddlerec.core.utils import envs
from
paddlerec.core.trainers.transpiler_trainer
import
TranspileTrainer
from
paddlerec.core.trainers.transpiler_trainer
import
TranspileTrainer
class
Cluster
Trainer
(
TranspileTrainer
):
class
OnlineLearning
Trainer
(
TranspileTrainer
):
def
processor_register
(
self
):
def
processor_register
(
self
):
role
=
PaddleCloudRoleMaker
()
role
=
PaddleCloudRoleMaker
()
fleet
.
init
(
role
)
fleet
.
init
(
role
)
...
...
core/trainers/single_trainer.py
浏览文件 @
7a3ec4e6
...
@@ -36,7 +36,8 @@ class SingleTrainer(TranspileTrainer):
...
@@ -36,7 +36,8 @@ class SingleTrainer(TranspileTrainer):
self
.
regist_context_processor
(
'uninit'
,
self
.
instance
)
self
.
regist_context_processor
(
'uninit'
,
self
.
instance
)
self
.
regist_context_processor
(
'init_pass'
,
self
.
init
)
self
.
regist_context_processor
(
'init_pass'
,
self
.
init
)
self
.
regist_context_processor
(
'startup_pass'
,
self
.
startup
)
self
.
regist_context_processor
(
'startup_pass'
,
self
.
startup
)
if
envs
.
get_platform
()
==
"LINUX"
and
envs
.
get_global_env
(
"dataset_class"
,
None
,
"train.reader"
)
!=
"DataLoader"
:
if
envs
.
get_platform
()
==
"LINUX"
and
envs
.
get_global_env
(
"dataset_class"
,
None
,
"train.reader"
)
!=
"DataLoader"
:
self
.
regist_context_processor
(
'train_pass'
,
self
.
dataset_train
)
self
.
regist_context_processor
(
'train_pass'
,
self
.
dataset_train
)
else
:
else
:
self
.
regist_context_processor
(
'train_pass'
,
self
.
dataloader_train
)
self
.
regist_context_processor
(
'train_pass'
,
self
.
dataloader_train
)
...
@@ -122,8 +123,8 @@ class SingleTrainer(TranspileTrainer):
...
@@ -122,8 +123,8 @@ class SingleTrainer(TranspileTrainer):
fetch_info
=
self
.
fetch_alias
,
fetch_info
=
self
.
fetch_alias
,
print_period
=
self
.
fetch_period
)
print_period
=
self
.
fetch_period
)
end_time
=
time
.
time
()
end_time
=
time
.
time
()
times
=
end_time
-
begin_time
times
=
end_time
-
begin_time
print
(
"epoch {} using time {}, speed {:.2f} lines/s"
.
format
(
i
,
times
,
ins
/
times
))
print
(
"epoch {} using time {}, speed {:.2f} lines/s"
.
format
(
i
,
times
,
ins
/
times
))
self
.
save
(
i
,
"train"
,
is_fleet
=
False
)
self
.
save
(
i
,
"train"
,
is_fleet
=
False
)
context
[
'status'
]
=
'infer_pass'
context
[
'status'
]
=
'infer_pass'
...
...
core/trainers/tdm_cluster_trainer.py
浏览文件 @
7a3ec4e6
...
@@ -27,7 +27,6 @@ from paddle.fluid.incubate.fleet.parameter_server.distribute_transpiler import f
...
@@ -27,7 +27,6 @@ from paddle.fluid.incubate.fleet.parameter_server.distribute_transpiler import f
from
paddlerec.core.utils
import
envs
from
paddlerec.core.utils
import
envs
from
paddlerec.core.trainers.cluster_trainer
import
ClusterTrainer
from
paddlerec.core.trainers.cluster_trainer
import
ClusterTrainer
logging
.
basicConfig
(
format
=
"%(asctime)s - %(levelname)s - %(message)s"
)
logging
.
basicConfig
(
format
=
"%(asctime)s - %(levelname)s - %(message)s"
)
logger
=
logging
.
getLogger
(
"fluid"
)
logger
=
logging
.
getLogger
(
"fluid"
)
logger
.
setLevel
(
logging
.
INFO
)
logger
.
setLevel
(
logging
.
INFO
)
...
...
core/trainers/tdm_single_trainer.py
浏览文件 @
7a3ec4e6
...
@@ -24,7 +24,6 @@ import paddle.fluid as fluid
...
@@ -24,7 +24,6 @@ import paddle.fluid as fluid
from
paddlerec.core.trainers.single_trainer
import
SingleTrainer
from
paddlerec.core.trainers.single_trainer
import
SingleTrainer
from
paddlerec.core.utils
import
envs
from
paddlerec.core.utils
import
envs
logging
.
basicConfig
(
format
=
"%(asctime)s - %(levelname)s - %(message)s"
)
logging
.
basicConfig
(
format
=
"%(asctime)s - %(levelname)s - %(message)s"
)
logger
=
logging
.
getLogger
(
"fluid"
)
logger
=
logging
.
getLogger
(
"fluid"
)
logger
.
setLevel
(
logging
.
INFO
)
logger
.
setLevel
(
logging
.
INFO
)
...
...
core/trainers/transpiler_trainer.py
浏览文件 @
7a3ec4e6
...
@@ -147,8 +147,8 @@ class TranspileTrainer(Trainer):
...
@@ -147,8 +147,8 @@ class TranspileTrainer(Trainer):
if
not
need_save
(
epoch_id
,
save_interval
,
False
):
if
not
need_save
(
epoch_id
,
save_interval
,
False
):
return
return
# print("save inference model is not supported now.")
# print("save inference model is not supported now.")
# return
# return
feed_varnames
=
envs
.
get_global_env
(
feed_varnames
=
envs
.
get_global_env
(
"save.inference.feed_varnames"
,
None
,
namespace
)
"save.inference.feed_varnames"
,
None
,
namespace
)
...
@@ -248,7 +248,7 @@ class TranspileTrainer(Trainer):
...
@@ -248,7 +248,7 @@ class TranspileTrainer(Trainer):
'evaluate_model_path'
,
""
,
namespace
=
'evaluate'
))]
'evaluate_model_path'
,
""
,
namespace
=
'evaluate'
))]
is_return_numpy
=
envs
.
get_global_env
(
is_return_numpy
=
envs
.
get_global_env
(
'is_return_numpy'
,
True
,
namespace
=
'evaluate'
)
'is_return_numpy'
,
True
,
namespace
=
'evaluate'
)
for
(
epoch
,
model_dir
)
in
model_list
:
for
(
epoch
,
model_dir
)
in
model_list
:
print
(
"Begin to infer No.{} model, model_dir: {}"
.
format
(
print
(
"Begin to infer No.{} model, model_dir: {}"
.
format
(
...
...
core/utils/dataset.py
→
core/utils/dataset
_holder
.py
浏览文件 @
7a3ec4e6
...
@@ -22,7 +22,7 @@ from paddlerec.core.utils import fs as fs
...
@@ -22,7 +22,7 @@ from paddlerec.core.utils import fs as fs
from
paddlerec.core.utils
import
util
as
util
from
paddlerec.core.utils
import
util
as
util
class
Dataset
(
object
):
class
Dataset
Holder
(
object
):
"""
"""
Dataset Base
Dataset Base
"""
"""
...
@@ -62,7 +62,7 @@ class Dataset(object):
...
@@ -62,7 +62,7 @@ class Dataset(object):
pass
pass
class
TimeSplitDataset
(
Dataset
):
class
TimeSplitDataset
Holder
(
DatasetHolder
):
"""
"""
Dataset with time split dir. root_path/$DAY/$HOUR
Dataset with time split dir. root_path/$DAY/$HOUR
"""
"""
...
@@ -142,16 +142,6 @@ class TimeSplitDataset(Dataset):
...
@@ -142,16 +142,6 @@ class TimeSplitDataset(Dataset):
data_time
=
data_time
+
datetime
.
timedelta
(
minutes
=
self
.
_split_interval
)
data_time
=
data_time
+
datetime
.
timedelta
(
minutes
=
self
.
_split_interval
)
return
data_file_list
return
data_file_list
class
FluidTimeSplitDataset
(
TimeSplitDataset
):
"""
A Dataset with time split for PaddleFluid
"""
def
__init__
(
self
,
config
):
""" """
TimeSplitDataset
.
__init__
(
self
,
config
)
def
_alloc_dataset
(
self
,
file_list
):
def
_alloc_dataset
(
self
,
file_list
):
""" """
""" """
dataset
=
fluid
.
DatasetFactory
().
create_dataset
(
self
.
_config
[
'dataset_type'
])
dataset
=
fluid
.
DatasetFactory
().
create_dataset
(
self
.
_config
[
'dataset_type'
])
...
...
core/utils/fs.py
浏览文件 @
7a3ec4e6
...
@@ -29,12 +29,12 @@ class LocalFSClient(object):
...
@@ -29,12 +29,12 @@ class LocalFSClient(object):
"""
"""
Util for local disk file_system io
Util for local disk file_system io
"""
"""
def
__init__
(
self
):
def
__init__
(
self
):
"""R
"""R
"""
"""
pass
pass
def
write
(
self
,
content
,
path
,
mode
):
def
write
(
self
,
content
,
path
,
mode
):
"""
"""
write to file
write to file
...
@@ -44,7 +44,7 @@ class LocalFSClient(object):
...
@@ -44,7 +44,7 @@ class LocalFSClient(object):
mode(string): w/a w:clear_write a:append_write
mode(string): w/a w:clear_write a:append_write
"""
"""
temp_dir
=
os
.
path
.
dirname
(
path
)
temp_dir
=
os
.
path
.
dirname
(
path
)
if
not
os
.
path
.
exists
(
temp_dir
):
if
not
os
.
path
.
exists
(
temp_dir
):
os
.
makedirs
(
temp_dir
)
os
.
makedirs
(
temp_dir
)
f
=
open
(
path
,
mode
)
f
=
open
(
path
,
mode
)
f
.
write
(
content
)
f
.
write
(
content
)
...
@@ -76,7 +76,7 @@ class LocalFSClient(object):
...
@@ -76,7 +76,7 @@ class LocalFSClient(object):
"""R
"""R
"""
"""
os
.
system
(
"rm -rf "
+
path
)
os
.
system
(
"rm -rf "
+
path
)
def
is_exist
(
self
,
path
):
def
is_exist
(
self
,
path
):
"""R
"""R
"""
"""
...
@@ -95,13 +95,14 @@ class FileHandler(object):
...
@@ -95,13 +95,14 @@ class FileHandler(object):
"""
"""
A Smart file handler. auto judge local/afs by path
A Smart file handler. auto judge local/afs by path
"""
"""
def
__init__
(
self
,
config
):
def
__init__
(
self
,
config
):
"""R
"""R
"""
"""
if
'fs_name'
in
config
:
if
'fs_name'
in
config
:
hadoop_home
=
"$HADOOP_HOME"
hadoop_home
=
"$HADOOP_HOME"
hdfs_configs
=
{
hdfs_configs
=
{
"hadoop.job.ugi"
:
config
[
'fs_ugi'
],
"hadoop.job.ugi"
:
config
[
'fs_ugi'
],
"fs.default.name"
:
config
[
'fs_name'
]
"fs.default.name"
:
config
[
'fs_name'
]
}
}
self
.
_hdfs_client
=
HDFSClient
(
hadoop_home
,
hdfs_configs
)
self
.
_hdfs_client
=
HDFSClient
(
hadoop_home
,
hdfs_configs
)
...
@@ -132,7 +133,8 @@ class FileHandler(object):
...
@@ -132,7 +133,8 @@ class FileHandler(object):
if
mode
.
find
(
'a'
)
>=
0
:
if
mode
.
find
(
'a'
)
>=
0
:
org_content
=
self
.
_hdfs_client
.
cat
(
dest_path
)
org_content
=
self
.
_hdfs_client
.
cat
(
dest_path
)
content
=
content
+
org_content
content
=
content
+
org_content
self
.
_local_fs_client
.
write
(
content
,
temp_local_file
,
mode
)
#fleet hdfs_client only support upload, so write tmp file
self
.
_local_fs_client
.
write
(
content
,
temp_local_file
,
mode
)
# fleet hdfs_client only support upload, so write tmp file
self
.
_hdfs_client
.
delete
(
dest_path
+
".tmp"
)
self
.
_hdfs_client
.
delete
(
dest_path
+
".tmp"
)
self
.
_hdfs_client
.
upload
(
dest_path
+
".tmp"
,
temp_local_file
)
self
.
_hdfs_client
.
upload
(
dest_path
+
".tmp"
,
temp_local_file
)
self
.
_hdfs_client
.
delete
(
dest_path
+
".bak"
)
self
.
_hdfs_client
.
delete
(
dest_path
+
".bak"
)
...
@@ -140,7 +142,7 @@ class FileHandler(object):
...
@@ -140,7 +142,7 @@ class FileHandler(object):
self
.
_hdfs_client
.
rename
(
dest_path
+
".tmp"
,
dest_path
)
self
.
_hdfs_client
.
rename
(
dest_path
+
".tmp"
,
dest_path
)
else
:
else
:
self
.
_local_fs_client
.
write
(
content
,
dest_path
,
mode
)
self
.
_local_fs_client
.
write
(
content
,
dest_path
,
mode
)
def
cat
(
self
,
path
):
def
cat
(
self
,
path
):
"""R
"""R
"""
"""
...
@@ -149,7 +151,7 @@ class FileHandler(object):
...
@@ -149,7 +151,7 @@ class FileHandler(object):
return
hdfs_cat
return
hdfs_cat
else
:
else
:
return
self
.
_local_fs_client
.
cat
(
path
)
return
self
.
_local_fs_client
.
cat
(
path
)
def
ls
(
self
,
path
):
def
ls
(
self
,
path
):
"""R
"""R
"""
"""
...
@@ -161,7 +163,7 @@ class FileHandler(object):
...
@@ -161,7 +163,7 @@ class FileHandler(object):
files
=
self
.
_local_fs_client
.
ls
(
path
)
files
=
self
.
_local_fs_client
.
ls
(
path
)
files
=
[
path
+
'/'
+
fi
for
fi
in
files
]
# absulte path
files
=
[
path
+
'/'
+
fi
for
fi
in
files
]
# absulte path
return
files
return
files
def
cp
(
self
,
org_path
,
dest_path
):
def
cp
(
self
,
org_path
,
dest_path
):
"""R
"""R
"""
"""
...
@@ -171,6 +173,6 @@ class FileHandler(object):
...
@@ -171,6 +173,6 @@ class FileHandler(object):
return
self
.
_local_fs_client
.
cp
(
org_path
,
dest_path
)
return
self
.
_local_fs_client
.
cp
(
org_path
,
dest_path
)
if
not
org_is_afs
and
dest_is_afs
:
if
not
org_is_afs
and
dest_is_afs
:
return
self
.
_hdfs_client
.
upload
(
dest_path
,
org_path
)
return
self
.
_hdfs_client
.
upload
(
dest_path
,
org_path
)
if
org_is_afs
and
not
dest_is_afs
:
if
org_is_afs
and
not
dest_is_afs
:
return
self
.
_hdfs_client
.
download
(
org_path
,
dest_path
)
return
self
.
_hdfs_client
.
download
(
org_path
,
dest_path
)
print
(
"Not Suppor hdfs cp currently"
)
print
(
"Not Suppor hdfs cp currently"
)
core/utils/table.py
浏览文件 @
7a3ec4e6
...
@@ -18,7 +18,7 @@ class TableMeta(object):
...
@@ -18,7 +18,7 @@ class TableMeta(object):
Simple ParamTable Meta, Contain table_id
Simple ParamTable Meta, Contain table_id
"""
"""
TableId
=
1
TableId
=
1
@
staticmethod
@
staticmethod
def
alloc_new_table
(
table_id
):
def
alloc_new_table
(
table_id
):
"""
"""
...
...
models/contentunderstanding/classification/model.py
浏览文件 @
7a3ec4e6
...
@@ -30,7 +30,7 @@ class Model(ModelBase):
...
@@ -30,7 +30,7 @@ class Model(ModelBase):
def
train_net
(
self
):
def
train_net
(
self
):
""" network definition """
""" network definition """
data
=
fluid
.
data
(
name
=
"input"
,
shape
=
[
None
,
self
.
max_len
],
dtype
=
'int64'
)
data
=
fluid
.
data
(
name
=
"input"
,
shape
=
[
None
,
self
.
max_len
],
dtype
=
'int64'
)
label
=
fluid
.
data
(
name
=
"label"
,
shape
=
[
None
,
1
],
dtype
=
'int64'
)
label
=
fluid
.
data
(
name
=
"label"
,
shape
=
[
None
,
1
],
dtype
=
'int64'
)
seq_len
=
fluid
.
data
(
name
=
"seq_len"
,
shape
=
[
None
],
dtype
=
'int64'
)
seq_len
=
fluid
.
data
(
name
=
"seq_len"
,
shape
=
[
None
],
dtype
=
'int64'
)
...
@@ -54,7 +54,7 @@ class Model(ModelBase):
...
@@ -54,7 +54,7 @@ class Model(ModelBase):
prediction
=
fluid
.
layers
.
fc
(
input
=
[
fc_1
],
size
=
self
.
class_dim
,
act
=
"softmax"
)
prediction
=
fluid
.
layers
.
fc
(
input
=
[
fc_1
],
size
=
self
.
class_dim
,
act
=
"softmax"
)
cost
=
fluid
.
layers
.
cross_entropy
(
input
=
prediction
,
label
=
label
)
cost
=
fluid
.
layers
.
cross_entropy
(
input
=
prediction
,
label
=
label
)
avg_cost
=
fluid
.
layers
.
mean
(
x
=
cost
)
avg_cost
=
fluid
.
layers
.
mean
(
x
=
cost
)
acc
=
fluid
.
layers
.
accuracy
(
input
=
prediction
,
label
=
label
)
acc
=
fluid
.
layers
.
accuracy
(
input
=
prediction
,
label
=
label
)
self
.
cost
=
avg_cost
self
.
cost
=
avg_cost
self
.
_metrics
[
"acc"
]
=
acc
self
.
_metrics
[
"acc"
]
=
acc
...
...
models/contentunderstanding/classification/reader.py
浏览文件 @
7a3ec4e6
...
@@ -22,12 +22,12 @@ class TrainReader(Reader):
...
@@ -22,12 +22,12 @@ class TrainReader(Reader):
def
init
(
self
):
def
init
(
self
):
pass
pass
def
_process_line
(
self
,
l
):
def
_process_line
(
self
,
l
):
l
=
l
.
strip
().
split
(
" "
)
l
=
l
.
strip
().
split
(
" "
)
data
=
l
[
0
:
10
]
data
=
l
[
0
:
10
]
seq_len
=
l
[
10
:
11
]
seq_len
=
l
[
10
:
11
]
label
=
l
[
11
:]
label
=
l
[
11
:]
return
data
,
label
,
seq_len
return
data
,
label
,
seq_len
def
generate_sample
(
self
,
line
):
def
generate_sample
(
self
,
line
):
def
data_iter
():
def
data_iter
():
...
@@ -38,6 +38,7 @@ class TrainReader(Reader):
...
@@ -38,6 +38,7 @@ class TrainReader(Reader):
data
=
[
int
(
i
)
for
i
in
data
]
data
=
[
int
(
i
)
for
i
in
data
]
label
=
[
int
(
i
)
for
i
in
label
]
label
=
[
int
(
i
)
for
i
in
label
]
seq_len
=
[
int
(
i
)
for
i
in
seq_len
]
seq_len
=
[
int
(
i
)
for
i
in
seq_len
]
print
>>
sys
.
stderr
,
str
([(
'data'
,
data
),
(
'label'
,
label
),
(
'seq_len'
,
seq_len
)])
print
>>
sys
.
stderr
,
str
([(
'data'
,
data
),
(
'label'
,
label
),
(
'seq_len'
,
seq_len
)])
yield
[(
'data'
,
data
),
(
'label'
,
label
),
(
'seq_len'
,
seq_len
)]
yield
[(
'data'
,
data
),
(
'label'
,
label
),
(
'seq_len'
,
seq_len
)]
return
data_iter
return
data_iter
models/contentunderstanding/tagspace/model.py
浏览文件 @
7a3ec4e6
...
@@ -18,6 +18,7 @@ import paddle.fluid.layers.tensor as tensor
...
@@ -18,6 +18,7 @@ import paddle.fluid.layers.tensor as tensor
import
paddle.fluid.layers.control_flow
as
cf
import
paddle.fluid.layers.control_flow
as
cf
from
paddlerec.core.model
import
Model
as
ModelBase
from
paddlerec.core.model
import
Model
as
ModelBase
from
paddlerec.core.utils
import
envs
class
Model
(
ModelBase
):
class
Model
(
ModelBase
):
...
@@ -25,14 +26,13 @@ class Model(ModelBase):
...
@@ -25,14 +26,13 @@ class Model(ModelBase):
ModelBase
.
__init__
(
self
,
config
)
ModelBase
.
__init__
(
self
,
config
)
self
.
cost
=
None
self
.
cost
=
None
self
.
metrics
=
{}
self
.
metrics
=
{}
self
.
vocab_text_size
=
11447
#envs.get_global_env("vocab_text_size", None, self._namespace)
self
.
vocab_text_size
=
envs
.
get_global_env
(
"vocab_text_size"
,
None
,
self
.
_namespace
)
self
.
vocab_tag_size
=
4
#envs.get_global_env("vocab_tag_size", None, self._namespace)
self
.
vocab_tag_size
=
envs
.
get_global_env
(
"vocab_tag_size"
,
None
,
self
.
_namespace
)
self
.
emb_dim
=
10
#envs.get_global_env("emb_dim", None, self._namespace)
self
.
emb_dim
=
envs
.
get_global_env
(
"emb_dim"
,
None
,
self
.
_namespace
)
self
.
hid_dim
=
1000
#envs.get_global_env("hid_dim", None, self._namespace)
self
.
hid_dim
=
envs
.
get_global_env
(
"hid_dim"
,
None
,
self
.
_namespace
)
self
.
win_size
=
5
#envs.get_global_env("win_size", None, self._namespace)
self
.
win_size
=
envs
.
get_global_env
(
"win_size"
,
None
,
self
.
_namespace
)
self
.
margin
=
0.1
#envs.get_global_env("margin", None, self._namespace)
self
.
margin
=
envs
.
get_global_env
(
"margin"
,
None
,
self
.
_namespace
)
self
.
neg_size
=
3
#envs.get_global_env("neg_size", None, self._namespace)
self
.
neg_size
=
envs
.
get_global_env
(
"neg_size"
,
None
,
self
.
_namespace
)
print
self
.
emb_dim
def
train_net
(
self
):
def
train_net
(
self
):
""" network definition """
""" network definition """
...
@@ -96,11 +96,9 @@ class Model(ModelBase):
...
@@ -96,11 +96,9 @@ class Model(ModelBase):
return
self
.
metrics
return
self
.
metrics
def
optimizer
(
self
):
def
optimizer
(
self
):
learning_rate
=
0.01
#
envs.get_global_env("hyper_parameters.base_lr", None, self._namespace)
learning_rate
=
envs
.
get_global_env
(
"hyper_parameters.base_lr"
,
None
,
self
.
_namespace
)
sgd_optimizer
=
fluid
.
optimizer
.
Adagrad
(
learning_rate
=
learning_rate
)
sgd_optimizer
=
fluid
.
optimizer
.
Adagrad
(
learning_rate
=
learning_rate
)
#sgd_optimizer.minimize(avg_cost)
return
sgd_optimizer
return
sgd_optimizer
def
infer_net
(
self
,
parameter_list
):
def
infer_net
(
self
,
parameter_list
):
self
.
train_net
()
self
.
train_net
()
models/contentunderstanding/tagspace/reader.py
浏览文件 @
7a3ec4e6
...
@@ -19,11 +19,12 @@ import numpy as np
...
@@ -19,11 +19,12 @@ import numpy as np
from
paddlerec.core.reader
import
Reader
from
paddlerec.core.reader
import
Reader
class
TrainReader
(
Reader
):
class
TrainReader
(
Reader
):
def
init
(
self
):
def
init
(
self
):
pass
pass
def
_process_line
(
self
,
l
):
def
_process_line
(
self
,
l
):
tag_size
=
4
tag_size
=
4
neg_size
=
3
neg_size
=
3
l
=
l
.
strip
().
split
(
","
)
l
=
l
.
strip
().
split
(
","
)
...
@@ -46,10 +47,7 @@ class TrainReader(Reader):
...
@@ -46,10 +47,7 @@ class TrainReader(Reader):
neg_index
=
rand_i
neg_index
=
rand_i
neg_tag
.
append
(
neg_index
)
neg_tag
.
append
(
neg_index
)
sum_n
+=
1
sum_n
+=
1
# if n > 0 and len(text) > n:
return
text
,
pos_tag
,
neg_tag
# #yield None
# return None, None, None
return
text
,
pos_tag
,
neg_tag
def
generate_sample
(
self
,
line
):
def
generate_sample
(
self
,
line
):
def
data_iter
():
def
data_iter
():
...
@@ -58,4 +56,5 @@ class TrainReader(Reader):
...
@@ -58,4 +56,5 @@ class TrainReader(Reader):
yield
None
yield
None
return
return
yield
[(
'text'
,
text
),
(
'pos_tag'
,
pos_tag
),
(
'neg_tag'
,
neg_tag
)]
yield
[(
'text'
,
text
),
(
'pos_tag'
,
pos_tag
),
(
'neg_tag'
,
neg_tag
)]
return
data_iter
return
data_iter
models/match/dssm/model.py
浏览文件 @
7a3ec4e6
...
@@ -24,11 +24,12 @@ class Model(ModelBase):
...
@@ -24,11 +24,12 @@ class Model(ModelBase):
def
input
(
self
):
def
input
(
self
):
TRIGRAM_D
=
envs
.
get_global_env
(
"hyper_parameters.TRIGRAM_D"
,
None
,
self
.
_namespace
)
TRIGRAM_D
=
envs
.
get_global_env
(
"hyper_parameters.TRIGRAM_D"
,
None
,
self
.
_namespace
)
Neg
=
envs
.
get_global_env
(
"hyper_parameters.NEG"
,
None
,
self
.
_namespace
)
Neg
=
envs
.
get_global_env
(
"hyper_parameters.NEG"
,
None
,
self
.
_namespace
)
self
.
query
=
fluid
.
data
(
name
=
"query"
,
shape
=
[
-
1
,
TRIGRAM_D
],
dtype
=
'float32'
,
lod_level
=
0
)
self
.
query
=
fluid
.
data
(
name
=
"query"
,
shape
=
[
-
1
,
TRIGRAM_D
],
dtype
=
'float32'
,
lod_level
=
0
)
self
.
doc_pos
=
fluid
.
data
(
name
=
"doc_pos"
,
shape
=
[
-
1
,
TRIGRAM_D
],
dtype
=
'float32'
,
lod_level
=
0
)
self
.
doc_pos
=
fluid
.
data
(
name
=
"doc_pos"
,
shape
=
[
-
1
,
TRIGRAM_D
],
dtype
=
'float32'
,
lod_level
=
0
)
self
.
doc_negs
=
[
fluid
.
data
(
name
=
"doc_neg_"
+
str
(
i
),
shape
=
[
-
1
,
TRIGRAM_D
],
dtype
=
"float32"
,
lod_level
=
0
)
for
i
in
range
(
Neg
)]
self
.
doc_negs
=
[
fluid
.
data
(
name
=
"doc_neg_"
+
str
(
i
),
shape
=
[
-
1
,
TRIGRAM_D
],
dtype
=
"float32"
,
lod_level
=
0
)
for
i
in
range
(
Neg
)]
self
.
_data_var
.
append
(
self
.
query
)
self
.
_data_var
.
append
(
self
.
query
)
self
.
_data_var
.
append
(
self
.
doc_pos
)
self
.
_data_var
.
append
(
self
.
doc_pos
)
for
input
in
self
.
doc_negs
:
for
input
in
self
.
doc_negs
:
...
@@ -37,40 +38,40 @@ class Model(ModelBase):
...
@@ -37,40 +38,40 @@ class Model(ModelBase):
if
self
.
_platform
!=
"LINUX"
:
if
self
.
_platform
!=
"LINUX"
:
self
.
_data_loader
=
fluid
.
io
.
DataLoader
.
from_generator
(
self
.
_data_loader
=
fluid
.
io
.
DataLoader
.
from_generator
(
feed_list
=
self
.
_data_var
,
capacity
=
64
,
use_double_buffer
=
False
,
iterable
=
False
)
feed_list
=
self
.
_data_var
,
capacity
=
64
,
use_double_buffer
=
False
,
iterable
=
False
)
def
net
(
self
,
is_infer
=
False
):
def
net
(
self
,
is_infer
=
False
):
hidden_layers
=
envs
.
get_global_env
(
"hyper_parameters.fc_sizes"
,
None
,
self
.
_namespace
)
hidden_layers
=
envs
.
get_global_env
(
"hyper_parameters.fc_sizes"
,
None
,
self
.
_namespace
)
hidden_acts
=
envs
.
get_global_env
(
"hyper_parameters.fc_acts"
,
None
,
self
.
_namespace
)
hidden_acts
=
envs
.
get_global_env
(
"hyper_parameters.fc_acts"
,
None
,
self
.
_namespace
)
def
fc
(
data
,
hidden_layers
,
hidden_acts
,
names
):
def
fc
(
data
,
hidden_layers
,
hidden_acts
,
names
):
fc_inputs
=
[
data
]
fc_inputs
=
[
data
]
for
i
in
range
(
len
(
hidden_layers
)):
for
i
in
range
(
len
(
hidden_layers
)):
xavier
=
fluid
.
initializer
.
Xavier
(
uniform
=
True
,
fan_in
=
fc_inputs
[
-
1
].
shape
[
1
],
fan_out
=
hidden_layers
[
i
])
xavier
=
fluid
.
initializer
.
Xavier
(
uniform
=
True
,
fan_in
=
fc_inputs
[
-
1
].
shape
[
1
],
fan_out
=
hidden_layers
[
i
])
out
=
fluid
.
layers
.
fc
(
input
=
fc_inputs
[
-
1
],
out
=
fluid
.
layers
.
fc
(
input
=
fc_inputs
[
-
1
],
size
=
hidden_layers
[
i
],
size
=
hidden_layers
[
i
],
act
=
hidden_acts
[
i
],
act
=
hidden_acts
[
i
],
param_attr
=
xavier
,
param_attr
=
xavier
,
bias_attr
=
xavier
,
bias_attr
=
xavier
,
name
=
names
[
i
])
name
=
names
[
i
])
fc_inputs
.
append
(
out
)
fc_inputs
.
append
(
out
)
return
fc_inputs
[
-
1
]
return
fc_inputs
[
-
1
]
query_fc
=
fc
(
self
.
query
,
hidden_layers
,
hidden_acts
,
[
'query_l1'
,
'query_l2'
,
'query_l3'
])
query_fc
=
fc
(
self
.
query
,
hidden_layers
,
hidden_acts
,
[
'query_l1'
,
'query_l2'
,
'query_l3'
])
doc_pos_fc
=
fc
(
self
.
doc_pos
,
hidden_layers
,
hidden_acts
,
[
'doc_pos_l1'
,
'doc_pos_l2'
,
'doc_pos_l3'
])
doc_pos_fc
=
fc
(
self
.
doc_pos
,
hidden_layers
,
hidden_acts
,
[
'doc_pos_l1'
,
'doc_pos_l2'
,
'doc_pos_l3'
])
self
.
R_Q_D_p
=
fluid
.
layers
.
cos_sim
(
query_fc
,
doc_pos_fc
)
self
.
R_Q_D_p
=
fluid
.
layers
.
cos_sim
(
query_fc
,
doc_pos_fc
)
if
is_infer
:
if
is_infer
:
return
return
R_Q_D_ns
=
[]
R_Q_D_ns
=
[]
for
i
,
doc_neg
in
enumerate
(
self
.
doc_negs
):
for
i
,
doc_neg
in
enumerate
(
self
.
doc_negs
):
doc_neg_fc_i
=
fc
(
doc_neg
,
hidden_layers
,
hidden_acts
,
[
'doc_neg_l1_'
+
str
(
i
),
'doc_neg_l2_'
+
str
(
i
),
'doc_neg_l3_'
+
str
(
i
)])
doc_neg_fc_i
=
fc
(
doc_neg
,
hidden_layers
,
hidden_acts
,
[
'doc_neg_l1_'
+
str
(
i
),
'doc_neg_l2_'
+
str
(
i
),
'doc_neg_l3_'
+
str
(
i
)])
R_Q_D_ns
.
append
(
fluid
.
layers
.
cos_sim
(
query_fc
,
doc_neg_fc_i
))
R_Q_D_ns
.
append
(
fluid
.
layers
.
cos_sim
(
query_fc
,
doc_neg_fc_i
))
concat_Rs
=
fluid
.
layers
.
concat
(
input
=
[
self
.
R_Q_D_p
]
+
R_Q_D_ns
,
axis
=-
1
)
concat_Rs
=
fluid
.
layers
.
concat
(
input
=
[
self
.
R_Q_D_p
]
+
R_Q_D_ns
,
axis
=-
1
)
prob
=
fluid
.
layers
.
softmax
(
concat_Rs
,
axis
=
1
)
prob
=
fluid
.
layers
.
softmax
(
concat_Rs
,
axis
=
1
)
hit_prob
=
fluid
.
layers
.
slice
(
prob
,
axes
=
[
0
,
1
],
starts
=
[
0
,
0
],
ends
=
[
4
,
1
])
hit_prob
=
fluid
.
layers
.
slice
(
prob
,
axes
=
[
0
,
1
],
starts
=
[
0
,
0
],
ends
=
[
4
,
1
])
loss
=
-
fluid
.
layers
.
reduce_sum
(
fluid
.
layers
.
log
(
hit_prob
))
loss
=
-
fluid
.
layers
.
reduce_sum
(
fluid
.
layers
.
log
(
hit_prob
))
self
.
avg_cost
=
fluid
.
layers
.
mean
(
x
=
loss
)
self
.
avg_cost
=
fluid
.
layers
.
mean
(
x
=
loss
)
...
@@ -100,10 +101,10 @@ class Model(ModelBase):
...
@@ -100,10 +101,10 @@ class Model(ModelBase):
self
.
doc_pos
=
fluid
.
data
(
name
=
"doc_pos"
,
shape
=
[
-
1
,
TRIGRAM_D
],
dtype
=
'float32'
,
lod_level
=
0
)
self
.
doc_pos
=
fluid
.
data
(
name
=
"doc_pos"
,
shape
=
[
-
1
,
TRIGRAM_D
],
dtype
=
'float32'
,
lod_level
=
0
)
self
.
_infer_data_var
=
[
self
.
query
,
self
.
doc_pos
]
self
.
_infer_data_var
=
[
self
.
query
,
self
.
doc_pos
]
self
.
_infer_data_loader
=
fluid
.
io
.
DataLoader
.
from_generator
(
self
.
_infer_data_loader
=
fluid
.
io
.
DataLoader
.
from_generator
(
feed_list
=
self
.
_infer_data_var
,
capacity
=
64
,
use_double_buffer
=
False
,
iterable
=
False
)
feed_list
=
self
.
_infer_data_var
,
capacity
=
64
,
use_double_buffer
=
False
,
iterable
=
False
)
def
infer_net
(
self
):
def
infer_net
(
self
):
self
.
infer_input
()
self
.
infer_input
()
self
.
net
(
is_infer
=
True
)
self
.
net
(
is_infer
=
True
)
self
.
infer_results
()
self
.
infer_results
()
models/match/dssm/synthetic_reader.py
浏览文件 @
7a3ec4e6
...
@@ -37,7 +37,7 @@ class TrainReader(Reader):
...
@@ -37,7 +37,7 @@ class TrainReader(Reader):
neg_docs
=
[]
neg_docs
=
[]
for
i
in
range
(
len
(
features
)
-
2
):
for
i
in
range
(
len
(
features
)
-
2
):
feature_names
.
append
(
'doc_neg_'
+
str
(
i
))
feature_names
.
append
(
'doc_neg_'
+
str
(
i
))
neg_docs
.
append
(
map
(
float
,
features
[
i
+
2
].
split
(
','
)))
neg_docs
.
append
(
map
(
float
,
features
[
i
+
2
].
split
(
','
)))
yield
zip
(
feature_names
,
[
query
]
+
[
pos_doc
]
+
neg_docs
)
yield
zip
(
feature_names
,
[
query
]
+
[
pos_doc
]
+
neg_docs
)
...
...
models/match/multiview-simnet/evaluate_reader.py
浏览文件 @
7a3ec4e6
...
@@ -18,8 +18,8 @@ from paddlerec.core.utils import envs
...
@@ -18,8 +18,8 @@ from paddlerec.core.utils import envs
class
EvaluateReader
(
Reader
):
class
EvaluateReader
(
Reader
):
def
init
(
self
):
def
init
(
self
):
self
.
query_slots
=
envs
.
get_global_env
(
"hyper_parameters.query_slots"
,
None
,
"train.model"
)
self
.
query_slots
=
envs
.
get_global_env
(
"hyper_parameters.query_slots"
,
None
,
"train.model"
)
self
.
title_slots
=
envs
.
get_global_env
(
"hyper_parameters.title_slots"
,
None
,
"train.model"
)
self
.
title_slots
=
envs
.
get_global_env
(
"hyper_parameters.title_slots"
,
None
,
"train.model"
)
self
.
all_slots
=
[]
self
.
all_slots
=
[]
for
i
in
range
(
self
.
query_slots
):
for
i
in
range
(
self
.
query_slots
):
...
@@ -49,6 +49,7 @@ class EvaluateReader(Reader):
...
@@ -49,6 +49,7 @@ class EvaluateReader(Reader):
if
visit
:
if
visit
:
self
.
_all_slots_dict
[
slot
][
0
]
=
False
self
.
_all_slots_dict
[
slot
][
0
]
=
False
else
:
else
:
output
[
index
][
1
].
append
(
padding
)
output
[
index
][
1
].
append
(
padding
)
yield
output
yield
output
return
data_iter
return
data_iter
models/match/multiview-simnet/generate_synthetic_data.py
浏览文件 @
7a3ec4e6
...
@@ -14,10 +14,12 @@
...
@@ -14,10 +14,12 @@
import
random
import
random
class
Dataset
:
class
Dataset
:
def
__init__
(
self
):
def
__init__
(
self
):
pass
pass
class
SyntheticDataset
(
Dataset
):
class
SyntheticDataset
(
Dataset
):
def
__init__
(
self
,
sparse_feature_dim
,
query_slot_num
,
title_slot_num
,
dataset_size
=
10000
):
def
__init__
(
self
,
sparse_feature_dim
,
query_slot_num
,
title_slot_num
,
dataset_size
=
10000
):
# ids are randomly generated
# ids are randomly generated
...
@@ -39,7 +41,7 @@ class SyntheticDataset(Dataset):
...
@@ -39,7 +41,7 @@ class SyntheticDataset(Dataset):
for
i
in
range
(
self
.
query_slot_num
):
for
i
in
range
(
self
.
query_slot_num
):
qslot
=
generate_ids
(
self
.
ids_per_slot
,
qslot
=
generate_ids
(
self
.
ids_per_slot
,
self
.
sparse_feature_dim
)
self
.
sparse_feature_dim
)
qslot
=
[
str
(
fea
)
+
':'
+
str
(
i
)
for
fea
in
qslot
]
qslot
=
[
str
(
fea
)
+
':'
+
str
(
i
)
for
fea
in
qslot
]
query_slots
+=
qslot
query_slots
+=
qslot
for
i
in
range
(
self
.
title_slot_num
):
for
i
in
range
(
self
.
title_slot_num
):
pt_slot
=
generate_ids
(
self
.
ids_per_slot
,
pt_slot
=
generate_ids
(
self
.
ids_per_slot
,
...
@@ -50,7 +52,8 @@ class SyntheticDataset(Dataset):
...
@@ -50,7 +52,8 @@ class SyntheticDataset(Dataset):
for
i
in
range
(
self
.
title_slot_num
):
for
i
in
range
(
self
.
title_slot_num
):
nt_slot
=
generate_ids
(
self
.
ids_per_slot
,
nt_slot
=
generate_ids
(
self
.
ids_per_slot
,
self
.
sparse_feature_dim
)
self
.
sparse_feature_dim
)
nt_slot
=
[
str
(
fea
)
+
':'
+
str
(
i
+
self
.
query_slot_num
+
self
.
title_slot_num
)
for
fea
in
nt_slot
]
nt_slot
=
[
str
(
fea
)
+
':'
+
str
(
i
+
self
.
query_slot_num
+
self
.
title_slot_num
)
for
fea
in
nt_slot
]
neg_title_slots
+=
nt_slot
neg_title_slots
+=
nt_slot
yield
query_slots
+
pos_title_slots
+
neg_title_slots
yield
query_slots
+
pos_title_slots
+
neg_title_slots
else
:
else
:
...
@@ -67,6 +70,7 @@ class SyntheticDataset(Dataset):
...
@@ -67,6 +70,7 @@ class SyntheticDataset(Dataset):
def
test
(
self
):
def
test
(
self
):
return
self
.
_reader_creator
(
False
)
return
self
.
_reader_creator
(
False
)
if
__name__
==
'__main__'
:
if
__name__
==
'__main__'
:
sparse_feature_dim
=
1000001
sparse_feature_dim
=
1000001
query_slots
=
1
query_slots
=
1
...
@@ -75,7 +79,7 @@ if __name__ == '__main__':
...
@@ -75,7 +79,7 @@ if __name__ == '__main__':
dataset
=
SyntheticDataset
(
sparse_feature_dim
,
query_slots
,
title_slots
,
dataset_size
)
dataset
=
SyntheticDataset
(
sparse_feature_dim
,
query_slots
,
title_slots
,
dataset_size
)
train_reader
=
dataset
.
train
()
train_reader
=
dataset
.
train
()
test_reader
=
dataset
.
test
()
test_reader
=
dataset
.
test
()
with
open
(
"data/train/train.txt"
,
'w'
)
as
fout
:
with
open
(
"data/train/train.txt"
,
'w'
)
as
fout
:
for
data
in
train_reader
():
for
data
in
train_reader
():
fout
.
write
(
' '
.
join
(
data
))
fout
.
write
(
' '
.
join
(
data
))
...
...
models/match/multiview-simnet/model.py
浏览文件 @
7a3ec4e6
...
@@ -19,6 +19,7 @@ import paddle.fluid.layers.control_flow as cf
...
@@ -19,6 +19,7 @@ import paddle.fluid.layers.control_flow as cf
from
paddlerec.core.utils
import
envs
from
paddlerec.core.utils
import
envs
from
paddlerec.core.model
import
Model
as
ModelBase
from
paddlerec.core.model
import
Model
as
ModelBase
class
BowEncoder
(
object
):
class
BowEncoder
(
object
):
""" bow-encoder """
""" bow-encoder """
...
@@ -94,13 +95,14 @@ class SimpleEncoderFactory(object):
...
@@ -94,13 +95,14 @@ class SimpleEncoderFactory(object):
rnn_encode
=
GrnnEncoder
(
hidden_size
=
enc_hid_size
)
rnn_encode
=
GrnnEncoder
(
hidden_size
=
enc_hid_size
)
return
rnn_encode
return
rnn_encode
class
Model
(
ModelBase
):
class
Model
(
ModelBase
):
def
__init__
(
self
,
config
):
def
__init__
(
self
,
config
):
ModelBase
.
__init__
(
self
,
config
)
ModelBase
.
__init__
(
self
,
config
)
self
.
init_config
()
self
.
init_config
()
def
init_config
(
self
):
def
init_config
(
self
):
self
.
_fetch_interval
=
1
self
.
_fetch_interval
=
1
query_encoder
=
envs
.
get_global_env
(
"hyper_parameters.query_encoder"
,
None
,
self
.
_namespace
)
query_encoder
=
envs
.
get_global_env
(
"hyper_parameters.query_encoder"
,
None
,
self
.
_namespace
)
title_encoder
=
envs
.
get_global_env
(
"hyper_parameters.title_encoder"
,
None
,
self
.
_namespace
)
title_encoder
=
envs
.
get_global_env
(
"hyper_parameters.title_encoder"
,
None
,
self
.
_namespace
)
query_encode_dim
=
envs
.
get_global_env
(
"hyper_parameters.query_encode_dim"
,
None
,
self
.
_namespace
)
query_encode_dim
=
envs
.
get_global_env
(
"hyper_parameters.query_encode_dim"
,
None
,
self
.
_namespace
)
...
@@ -112,19 +114,19 @@ class Model(ModelBase):
...
@@ -112,19 +114,19 @@ class Model(ModelBase):
factory
.
create
(
query_encoder
,
query_encode_dim
)
factory
.
create
(
query_encoder
,
query_encode_dim
)
for
i
in
range
(
query_slots
)
for
i
in
range
(
query_slots
)
]
]
self
.
title_encoders
=
[
self
.
title_encoders
=
[
factory
.
create
(
title_encoder
,
title_encode_dim
)
factory
.
create
(
title_encoder
,
title_encode_dim
)
for
i
in
range
(
title_slots
)
for
i
in
range
(
title_slots
)
]
]
self
.
emb_size
=
envs
.
get_global_env
(
"hyper_parameters.sparse_feature_dim"
,
None
,
self
.
_namespace
)
self
.
emb_size
=
envs
.
get_global_env
(
"hyper_parameters.sparse_feature_dim"
,
None
,
self
.
_namespace
)
self
.
emb_dim
=
envs
.
get_global_env
(
"hyper_parameters.embedding_dim"
,
None
,
self
.
_namespace
)
self
.
emb_dim
=
envs
.
get_global_env
(
"hyper_parameters.embedding_dim"
,
None
,
self
.
_namespace
)
self
.
emb_shape
=
[
self
.
emb_size
,
self
.
emb_dim
]
self
.
emb_shape
=
[
self
.
emb_size
,
self
.
emb_dim
]
self
.
hidden_size
=
envs
.
get_global_env
(
"hyper_parameters.hidden_size"
,
None
,
self
.
_namespace
)
self
.
hidden_size
=
envs
.
get_global_env
(
"hyper_parameters.hidden_size"
,
None
,
self
.
_namespace
)
self
.
margin
=
0.1
self
.
margin
=
0.1
def
input
(
self
,
is_train
=
True
):
def
input
(
self
,
is_train
=
True
):
self
.
q_slots
=
[
self
.
q_slots
=
[
fluid
.
data
(
fluid
.
data
(
name
=
"%d"
%
i
,
shape
=
[
None
,
1
],
lod_level
=
1
,
dtype
=
'int64'
)
name
=
"%d"
%
i
,
shape
=
[
None
,
1
],
lod_level
=
1
,
dtype
=
'int64'
)
for
i
in
range
(
len
(
self
.
query_encoders
))
for
i
in
range
(
len
(
self
.
query_encoders
))
...
@@ -135,22 +137,23 @@ class Model(ModelBase):
...
@@ -135,22 +137,23 @@ class Model(ModelBase):
for
i
in
range
(
len
(
self
.
title_encoders
))
for
i
in
range
(
len
(
self
.
title_encoders
))
]
]
if
is_train
==
False
:
if
is_train
==
False
:
return
self
.
q_slots
+
self
.
pt_slots
return
self
.
q_slots
+
self
.
pt_slots
self
.
nt_slots
=
[
self
.
nt_slots
=
[
fluid
.
data
(
fluid
.
data
(
name
=
"%d"
%
(
i
+
len
(
self
.
query_encoders
)
+
len
(
self
.
title_encoders
)),
shape
=
[
None
,
1
],
lod_level
=
1
,
dtype
=
'int64'
)
name
=
"%d"
%
(
i
+
len
(
self
.
query_encoders
)
+
len
(
self
.
title_encoders
)),
shape
=
[
None
,
1
],
lod_level
=
1
,
dtype
=
'int64'
)
for
i
in
range
(
len
(
self
.
title_encoders
))
for
i
in
range
(
len
(
self
.
title_encoders
))
]
]
return
self
.
q_slots
+
self
.
pt_slots
+
self
.
nt_slots
return
self
.
q_slots
+
self
.
pt_slots
+
self
.
nt_slots
def
train_input
(
self
):
def
train_input
(
self
):
res
=
self
.
input
()
res
=
self
.
input
()
self
.
_data_var
=
res
self
.
_data_var
=
res
use_dataloader
=
envs
.
get_global_env
(
"hyper_parameters.use_DataLoader"
,
False
,
self
.
_namespace
)
use_dataloader
=
envs
.
get_global_env
(
"hyper_parameters.use_DataLoader"
,
False
,
self
.
_namespace
)
if
self
.
_platform
!=
"LINUX"
or
use_dataloader
:
if
self
.
_platform
!=
"LINUX"
or
use_dataloader
:
self
.
_data_loader
=
fluid
.
io
.
DataLoader
.
from_generator
(
self
.
_data_loader
=
fluid
.
io
.
DataLoader
.
from_generator
(
...
@@ -158,15 +161,15 @@ class Model(ModelBase):
...
@@ -158,15 +161,15 @@ class Model(ModelBase):
def
get_acc
(
self
,
x
,
y
):
def
get_acc
(
self
,
x
,
y
):
less
=
tensor
.
cast
(
cf
.
less_than
(
x
,
y
),
dtype
=
'float32'
)
less
=
tensor
.
cast
(
cf
.
less_than
(
x
,
y
),
dtype
=
'float32'
)
label_ones
=
fluid
.
layers
.
fill_constant_batch_size_like
(
label_ones
=
fluid
.
layers
.
fill_constant_batch_size_like
(
input
=
x
,
dtype
=
'float32'
,
shape
=
[
-
1
,
1
],
value
=
1.0
)
input
=
x
,
dtype
=
'float32'
,
shape
=
[
-
1
,
1
],
value
=
1.0
)
correct
=
fluid
.
layers
.
reduce_sum
(
less
)
correct
=
fluid
.
layers
.
reduce_sum
(
less
)
total
=
fluid
.
layers
.
reduce_sum
(
label_ones
)
total
=
fluid
.
layers
.
reduce_sum
(
label_ones
)
acc
=
fluid
.
layers
.
elementwise_div
(
correct
,
total
)
acc
=
fluid
.
layers
.
elementwise_div
(
correct
,
total
)
return
acc
return
acc
def
net
(
self
):
def
net
(
self
):
q_embs
=
[
q_embs
=
[
fluid
.
embedding
(
fluid
.
embedding
(
input
=
query
,
size
=
self
.
emb_shape
,
param_attr
=
"emb"
)
input
=
query
,
size
=
self
.
emb_shape
,
param_attr
=
"emb"
)
for
query
in
self
.
q_slots
for
query
in
self
.
q_slots
...
@@ -181,8 +184,8 @@ class Model(ModelBase):
...
@@ -181,8 +184,8 @@ class Model(ModelBase):
input
=
title
,
size
=
self
.
emb_shape
,
param_attr
=
"emb"
)
input
=
title
,
size
=
self
.
emb_shape
,
param_attr
=
"emb"
)
for
title
in
self
.
nt_slots
for
title
in
self
.
nt_slots
]
]
# encode each embedding field with encoder
# encode each embedding field with encoder
q_encodes
=
[
q_encodes
=
[
self
.
query_encoders
[
i
].
forward
(
emb
)
for
i
,
emb
in
enumerate
(
q_embs
)
self
.
query_encoders
[
i
].
forward
(
emb
)
for
i
,
emb
in
enumerate
(
q_embs
)
]
]
...
@@ -198,7 +201,7 @@ class Model(ModelBase):
...
@@ -198,7 +201,7 @@ class Model(ModelBase):
pt_concat
=
fluid
.
layers
.
concat
(
pt_encodes
)
pt_concat
=
fluid
.
layers
.
concat
(
pt_encodes
)
nt_concat
=
fluid
.
layers
.
concat
(
nt_encodes
)
nt_concat
=
fluid
.
layers
.
concat
(
nt_encodes
)
# projection of hidden layer
# projection of hidden layer
q_hid
=
fluid
.
layers
.
fc
(
q_concat
,
q_hid
=
fluid
.
layers
.
fc
(
q_concat
,
size
=
self
.
hidden_size
,
size
=
self
.
hidden_size
,
param_attr
=
'q_fc.w'
,
param_attr
=
'q_fc.w'
,
...
@@ -216,7 +219,7 @@ class Model(ModelBase):
...
@@ -216,7 +219,7 @@ class Model(ModelBase):
cos_pos
=
fluid
.
layers
.
cos_sim
(
q_hid
,
pt_hid
)
cos_pos
=
fluid
.
layers
.
cos_sim
(
q_hid
,
pt_hid
)
cos_neg
=
fluid
.
layers
.
cos_sim
(
q_hid
,
nt_hid
)
cos_neg
=
fluid
.
layers
.
cos_sim
(
q_hid
,
nt_hid
)
# pairwise hinge_loss
# pairwise hinge_loss
loss_part1
=
fluid
.
layers
.
elementwise_sub
(
loss_part1
=
fluid
.
layers
.
elementwise_sub
(
tensor
.
fill_constant_batch_size_like
(
tensor
.
fill_constant_batch_size_like
(
input
=
cos_pos
,
input
=
cos_pos
,
...
@@ -233,7 +236,7 @@ class Model(ModelBase):
...
@@ -233,7 +236,7 @@ class Model(ModelBase):
loss_part2
)
loss_part2
)
self
.
avg_cost
=
fluid
.
layers
.
mean
(
loss_part3
)
self
.
avg_cost
=
fluid
.
layers
.
mean
(
loss_part3
)
self
.
acc
=
self
.
get_acc
(
cos_neg
,
cos_pos
)
self
.
acc
=
self
.
get_acc
(
cos_neg
,
cos_pos
)
def
avg_loss
(
self
):
def
avg_loss
(
self
):
self
.
_cost
=
self
.
avg_cost
self
.
_cost
=
self
.
avg_cost
...
@@ -250,19 +253,19 @@ class Model(ModelBase):
...
@@ -250,19 +253,19 @@ class Model(ModelBase):
def
optimizer
(
self
):
def
optimizer
(
self
):
learning_rate
=
envs
.
get_global_env
(
"hyper_parameters.learning_rate"
,
None
,
self
.
_namespace
)
learning_rate
=
envs
.
get_global_env
(
"hyper_parameters.learning_rate"
,
None
,
self
.
_namespace
)
optimizer
=
fluid
.
optimizer
.
Adam
(
learning_rate
=
learning_rate
)
optimizer
=
fluid
.
optimizer
.
Adam
(
learning_rate
=
learning_rate
)
return
optimizer
return
optimizer
def
infer_input
(
self
):
def
infer_input
(
self
):
res
=
self
.
input
(
is_train
=
False
)
res
=
self
.
input
(
is_train
=
False
)
self
.
_infer_data_var
=
res
self
.
_infer_data_var
=
res
self
.
_infer_data_loader
=
fluid
.
io
.
DataLoader
.
from_generator
(
self
.
_infer_data_loader
=
fluid
.
io
.
DataLoader
.
from_generator
(
feed_list
=
self
.
_infer_data_var
,
capacity
=
64
,
use_double_buffer
=
False
,
iterable
=
False
)
feed_list
=
self
.
_infer_data_var
,
capacity
=
64
,
use_double_buffer
=
False
,
iterable
=
False
)
def
infer_net
(
self
):
def
infer_net
(
self
):
self
.
infer_input
()
self
.
infer_input
()
# lookup embedding for each slot
# lookup embedding for each slot
q_embs
=
[
q_embs
=
[
fluid
.
embedding
(
fluid
.
embedding
(
input
=
query
,
size
=
self
.
emb_shape
,
param_attr
=
"emb"
)
input
=
query
,
size
=
self
.
emb_shape
,
param_attr
=
"emb"
)
...
@@ -273,14 +276,14 @@ class Model(ModelBase):
...
@@ -273,14 +276,14 @@ class Model(ModelBase):
input
=
title
,
size
=
self
.
emb_shape
,
param_attr
=
"emb"
)
input
=
title
,
size
=
self
.
emb_shape
,
param_attr
=
"emb"
)
for
title
in
self
.
pt_slots
for
title
in
self
.
pt_slots
]
]
# encode each embedding field with encoder
# encode each embedding field with encoder
q_encodes
=
[
q_encodes
=
[
self
.
query_encoders
[
i
].
forward
(
emb
)
for
i
,
emb
in
enumerate
(
q_embs
)
self
.
query_encoders
[
i
].
forward
(
emb
)
for
i
,
emb
in
enumerate
(
q_embs
)
]
]
pt_encodes
=
[
pt_encodes
=
[
self
.
title_encoders
[
i
].
forward
(
emb
)
for
i
,
emb
in
enumerate
(
pt_embs
)
self
.
title_encoders
[
i
].
forward
(
emb
)
for
i
,
emb
in
enumerate
(
pt_embs
)
]
]
# concat multi view for query, pos_title, neg_title
# concat multi view for query, pos_title, neg_title
q_concat
=
fluid
.
layers
.
concat
(
q_encodes
)
q_concat
=
fluid
.
layers
.
concat
(
q_encodes
)
pt_concat
=
fluid
.
layers
.
concat
(
pt_encodes
)
pt_concat
=
fluid
.
layers
.
concat
(
pt_encodes
)
# projection of hidden layer
# projection of hidden layer
...
...
models/match/multiview-simnet/reader.py
浏览文件 @
7a3ec4e6
...
@@ -18,8 +18,8 @@ from paddlerec.core.utils import envs
...
@@ -18,8 +18,8 @@ from paddlerec.core.utils import envs
class
TrainReader
(
Reader
):
class
TrainReader
(
Reader
):
def
init
(
self
):
def
init
(
self
):
self
.
query_slots
=
envs
.
get_global_env
(
"hyper_parameters.query_slots"
,
None
,
"train.model"
)
self
.
query_slots
=
envs
.
get_global_env
(
"hyper_parameters.query_slots"
,
None
,
"train.model"
)
self
.
title_slots
=
envs
.
get_global_env
(
"hyper_parameters.title_slots"
,
None
,
"train.model"
)
self
.
title_slots
=
envs
.
get_global_env
(
"hyper_parameters.title_slots"
,
None
,
"train.model"
)
self
.
all_slots
=
[]
self
.
all_slots
=
[]
for
i
in
range
(
self
.
query_slots
):
for
i
in
range
(
self
.
query_slots
):
...
@@ -52,6 +52,7 @@ class TrainReader(Reader):
...
@@ -52,6 +52,7 @@ class TrainReader(Reader):
if
visit
:
if
visit
:
self
.
_all_slots_dict
[
slot
][
0
]
=
False
self
.
_all_slots_dict
[
slot
][
0
]
=
False
else
:
else
:
output
[
index
][
1
].
append
(
padding
)
output
[
index
][
1
].
append
(
padding
)
yield
output
yield
output
return
data_iter
return
data_iter
models/multitask/esmm/esmm_infer_reader.py
浏览文件 @
7a3ec4e6
...
@@ -18,14 +18,14 @@ from collections import defaultdict
...
@@ -18,14 +18,14 @@ from collections import defaultdict
from
paddlerec.core.reader
import
Reader
from
paddlerec.core.reader
import
Reader
class
EvaluateReader
(
Reader
):
class
EvaluateReader
(
Reader
):
def
init
(
self
):
def
init
(
self
):
all_field_id
=
[
'101'
,
'109_14'
,
'110_14'
,
'127_14'
,
'150_14'
,
'121'
,
'122'
,
'124'
,
'125'
,
'126'
,
'127'
,
'128'
,
'129'
,
all_field_id
=
[
'101'
,
'109_14'
,
'110_14'
,
'127_14'
,
'150_14'
,
'121'
,
'122'
,
'124'
,
'125'
,
'126'
,
'127'
,
'128'
,
'129'
,
'205'
,
'206'
,
'207'
,
'210'
,
'216'
,
'508'
,
'509'
,
'702'
,
'853'
,
'301'
]
'205'
,
'206'
,
'207'
,
'210'
,
'216'
,
'508'
,
'509'
,
'702'
,
'853'
,
'301'
]
self
.
all_field_id_dict
=
defaultdict
(
int
)
self
.
all_field_id_dict
=
defaultdict
(
int
)
for
i
,
field_id
in
enumerate
(
all_field_id
):
for
i
,
field_id
in
enumerate
(
all_field_id
):
self
.
all_field_id_dict
[
field_id
]
=
[
False
,
i
]
self
.
all_field_id_dict
[
field_id
]
=
[
False
,
i
]
def
generate_sample
(
self
,
line
):
def
generate_sample
(
self
,
line
):
"""
"""
...
@@ -39,25 +39,26 @@ class EvaluateReader(Reader):
...
@@ -39,25 +39,26 @@ class EvaluateReader(Reader):
features
=
line
.
strip
().
split
(
','
)
features
=
line
.
strip
().
split
(
','
)
ctr
=
int
(
features
[
1
])
ctr
=
int
(
features
[
1
])
cvr
=
int
(
features
[
2
])
cvr
=
int
(
features
[
2
])
padding
=
0
padding
=
0
output
=
[(
field_id
,[])
for
field_id
in
self
.
all_field_id_dict
]
output
=
[(
field_id
,
[])
for
field_id
in
self
.
all_field_id_dict
]
for
elem
in
features
[
4
:]:
for
elem
in
features
[
4
:]:
field_id
,
feat_id
=
elem
.
strip
().
split
(
':'
)
field_id
,
feat_id
=
elem
.
strip
().
split
(
':'
)
if
field_id
not
in
self
.
all_field_id_dict
:
if
field_id
not
in
self
.
all_field_id_dict
:
continue
continue
self
.
all_field_id_dict
[
field_id
][
0
]
=
True
self
.
all_field_id_dict
[
field_id
][
0
]
=
True
index
=
self
.
all_field_id_dict
[
field_id
][
1
]
index
=
self
.
all_field_id_dict
[
field_id
][
1
]
output
[
index
][
1
].
append
(
int
(
feat_id
))
output
[
index
][
1
].
append
(
int
(
feat_id
))
for
field_id
in
self
.
all_field_id_dict
:
for
field_id
in
self
.
all_field_id_dict
:
visited
,
index
=
self
.
all_field_id_dict
[
field_id
]
visited
,
index
=
self
.
all_field_id_dict
[
field_id
]
if
visited
:
if
visited
:
self
.
all_field_id_dict
[
field_id
][
0
]
=
False
self
.
all_field_id_dict
[
field_id
][
0
]
=
False
else
:
else
:
output
[
index
][
1
].
append
(
padding
)
output
[
index
][
1
].
append
(
padding
)
output
.
append
((
'ctr'
,
[
ctr
]))
output
.
append
((
'ctr'
,
[
ctr
]))
output
.
append
((
'cvr'
,
[
cvr
]))
output
.
append
((
'cvr'
,
[
cvr
]))
yield
output
yield
output
return
reader
return
reader
models/multitask/esmm/esmm_reader.py
浏览文件 @
7a3ec4e6
...
@@ -21,11 +21,12 @@ from paddlerec.core.reader import Reader
...
@@ -21,11 +21,12 @@ from paddlerec.core.reader import Reader
class
TrainReader
(
Reader
):
class
TrainReader
(
Reader
):
def
init
(
self
):
def
init
(
self
):
all_field_id
=
[
'101'
,
'109_14'
,
'110_14'
,
'127_14'
,
'150_14'
,
'121'
,
'122'
,
'124'
,
'125'
,
'126'
,
'127'
,
'128'
,
'129'
,
all_field_id
=
[
'101'
,
'109_14'
,
'110_14'
,
'127_14'
,
'150_14'
,
'121'
,
'122'
,
'124'
,
'125'
,
'126'
,
'127'
,
'128'
,
'129'
,
'205'
,
'206'
,
'207'
,
'210'
,
'216'
,
'508'
,
'509'
,
'702'
,
'853'
,
'301'
]
'205'
,
'206'
,
'207'
,
'210'
,
'216'
,
'508'
,
'509'
,
'702'
,
'853'
,
'301'
]
self
.
all_field_id_dict
=
defaultdict
(
int
)
self
.
all_field_id_dict
=
defaultdict
(
int
)
for
i
,
field_id
in
enumerate
(
all_field_id
):
for
i
,
field_id
in
enumerate
(
all_field_id
):
self
.
all_field_id_dict
[
field_id
]
=
[
False
,
i
]
self
.
all_field_id_dict
[
field_id
]
=
[
False
,
i
]
def
generate_sample
(
self
,
line
):
def
generate_sample
(
self
,
line
):
"""
"""
...
@@ -37,30 +38,31 @@ class TrainReader(Reader):
...
@@ -37,30 +38,31 @@ class TrainReader(Reader):
This function needs to be implemented by the user, based on data format
This function needs to be implemented by the user, based on data format
"""
"""
features
=
line
.
strip
().
split
(
','
)
features
=
line
.
strip
().
split
(
','
)
#ctr = list(map(int, features[1]))
#
ctr = list(map(int, features[1]))
#cvr = list(map(int, features[2]))
#
cvr = list(map(int, features[2]))
ctr
=
int
(
features
[
1
])
ctr
=
int
(
features
[
1
])
cvr
=
int
(
features
[
2
])
cvr
=
int
(
features
[
2
])
padding
=
0
padding
=
0
output
=
[(
field_id
,[])
for
field_id
in
self
.
all_field_id_dict
]
output
=
[(
field_id
,
[])
for
field_id
in
self
.
all_field_id_dict
]
for
elem
in
features
[
4
:]:
for
elem
in
features
[
4
:]:
field_id
,
feat_id
=
elem
.
strip
().
split
(
':'
)
field_id
,
feat_id
=
elem
.
strip
().
split
(
':'
)
if
field_id
not
in
self
.
all_field_id_dict
:
if
field_id
not
in
self
.
all_field_id_dict
:
continue
continue
self
.
all_field_id_dict
[
field_id
][
0
]
=
True
self
.
all_field_id_dict
[
field_id
][
0
]
=
True
index
=
self
.
all_field_id_dict
[
field_id
][
1
]
index
=
self
.
all_field_id_dict
[
field_id
][
1
]
#
feat_id = list(map(int, feat_id))
#
feat_id = list(map(int, feat_id))
output
[
index
][
1
].
append
(
int
(
feat_id
))
output
[
index
][
1
].
append
(
int
(
feat_id
))
for
field_id
in
self
.
all_field_id_dict
:
for
field_id
in
self
.
all_field_id_dict
:
visited
,
index
=
self
.
all_field_id_dict
[
field_id
]
visited
,
index
=
self
.
all_field_id_dict
[
field_id
]
if
visited
:
if
visited
:
self
.
all_field_id_dict
[
field_id
][
0
]
=
False
self
.
all_field_id_dict
[
field_id
][
0
]
=
False
else
:
else
:
output
[
index
][
1
].
append
(
padding
)
output
[
index
][
1
].
append
(
padding
)
output
.
append
((
'ctr'
,
[
ctr
]))
output
.
append
((
'ctr'
,
[
ctr
]))
output
.
append
((
'cvr'
,
[
cvr
]))
output
.
append
((
'cvr'
,
[
cvr
]))
yield
output
yield
output
return
reader
return
reader
models/multitask/esmm/model.py
浏览文件 @
7a3ec4e6
...
@@ -23,71 +23,73 @@ class Model(ModelBase):
...
@@ -23,71 +23,73 @@ class Model(ModelBase):
def
__init__
(
self
,
config
):
def
__init__
(
self
,
config
):
ModelBase
.
__init__
(
self
,
config
)
ModelBase
.
__init__
(
self
,
config
)
def
fc
(
self
,
tag
,
data
,
out_dim
,
active
=
'prelu'
):
def
fc
(
self
,
tag
,
data
,
out_dim
,
active
=
'prelu'
):
init_stddev
=
1.0
init_stddev
=
1.0
scales
=
1.0
/
np
.
sqrt
(
data
.
shape
[
1
])
scales
=
1.0
/
np
.
sqrt
(
data
.
shape
[
1
])
p_attr
=
fluid
.
param_attr
.
ParamAttr
(
name
=
'%s_weight'
%
tag
,
p_attr
=
fluid
.
param_attr
.
ParamAttr
(
name
=
'%s_weight'
%
tag
,
initializer
=
fluid
.
initializer
.
NormalInitializer
(
loc
=
0.0
,
scale
=
init_stddev
*
scales
))
initializer
=
fluid
.
initializer
.
NormalInitializer
(
loc
=
0.0
,
scale
=
init_stddev
*
scales
))
b_attr
=
fluid
.
ParamAttr
(
name
=
'%s_bias'
%
tag
,
initializer
=
fluid
.
initializer
.
Constant
(
0.1
))
b_attr
=
fluid
.
ParamAttr
(
name
=
'%s_bias'
%
tag
,
initializer
=
fluid
.
initializer
.
Constant
(
0.1
))
out
=
fluid
.
layers
.
fc
(
input
=
data
,
out
=
fluid
.
layers
.
fc
(
input
=
data
,
size
=
out_dim
,
size
=
out_dim
,
act
=
active
,
act
=
active
,
param_attr
=
p_attr
,
param_attr
=
p_attr
,
bias_attr
=
b_attr
,
bias_attr
=
b_attr
,
name
=
tag
)
name
=
tag
)
return
out
return
out
def
input_data
(
self
):
def
input_data
(
self
):
sparse_input_ids
=
[
sparse_input_ids
=
[
fluid
.
data
(
name
=
"field_"
+
str
(
i
),
shape
=
[
-
1
,
1
],
dtype
=
"int64"
,
lod_level
=
1
)
for
i
in
range
(
0
,
23
)
fluid
.
data
(
name
=
"field_"
+
str
(
i
),
shape
=
[
-
1
,
1
],
dtype
=
"int64"
,
lod_level
=
1
)
for
i
in
range
(
0
,
23
)
]
]
label_ctr
=
fluid
.
data
(
name
=
"ctr"
,
shape
=
[
-
1
,
1
],
dtype
=
"int64"
)
label_ctr
=
fluid
.
data
(
name
=
"ctr"
,
shape
=
[
-
1
,
1
],
dtype
=
"int64"
)
label_cvr
=
fluid
.
data
(
name
=
"cvr"
,
shape
=
[
-
1
,
1
],
dtype
=
"int64"
)
label_cvr
=
fluid
.
data
(
name
=
"cvr"
,
shape
=
[
-
1
,
1
],
dtype
=
"int64"
)
inputs
=
sparse_input_ids
+
[
label_ctr
]
+
[
label_cvr
]
inputs
=
sparse_input_ids
+
[
label_ctr
]
+
[
label_cvr
]
self
.
_data_var
.
extend
(
inputs
)
self
.
_data_var
.
extend
(
inputs
)
return
inputs
return
inputs
def
net
(
self
,
inputs
,
is_infer
=
False
):
def
net
(
self
,
inputs
,
is_infer
=
False
):
vocab_size
=
envs
.
get_global_env
(
"hyper_parameters.vocab_size"
,
None
,
self
.
_namespace
)
vocab_size
=
envs
.
get_global_env
(
"hyper_parameters.vocab_size"
,
None
,
self
.
_namespace
)
embed_size
=
envs
.
get_global_env
(
"hyper_parameters.embed_size"
,
None
,
self
.
_namespace
)
embed_size
=
envs
.
get_global_env
(
"hyper_parameters.embed_size"
,
None
,
self
.
_namespace
)
emb
=
[]
emb
=
[]
for
data
in
inputs
[
0
:
-
2
]:
for
data
in
inputs
[
0
:
-
2
]:
feat_emb
=
fluid
.
embedding
(
input
=
data
,
feat_emb
=
fluid
.
embedding
(
input
=
data
,
size
=
[
vocab_size
,
embed_size
],
size
=
[
vocab_size
,
embed_size
],
param_attr
=
fluid
.
ParamAttr
(
name
=
'dis_emb'
,
param_attr
=
fluid
.
ParamAttr
(
name
=
'dis_emb'
,
learning_rate
=
5
,
learning_rate
=
5
,
initializer
=
fluid
.
initializer
.
Xavier
(
fan_in
=
embed_size
,
fan_out
=
embed_size
)
initializer
=
fluid
.
initializer
.
Xavier
(
),
fan_in
=
embed_size
,
fan_out
=
embed_size
)
is_sparse
=
True
)
),
field_emb
=
fluid
.
layers
.
sequence_pool
(
input
=
feat_emb
,
pool_type
=
'sum'
)
is_sparse
=
True
)
field_emb
=
fluid
.
layers
.
sequence_pool
(
input
=
feat_emb
,
pool_type
=
'sum'
)
emb
.
append
(
field_emb
)
emb
.
append
(
field_emb
)
concat_emb
=
fluid
.
layers
.
concat
(
emb
,
axis
=
1
)
concat_emb
=
fluid
.
layers
.
concat
(
emb
,
axis
=
1
)
# ctr
# ctr
active
=
'relu'
active
=
'relu'
ctr_fc1
=
self
.
fc
(
'ctr_fc1'
,
concat_emb
,
200
,
active
)
ctr_fc1
=
self
.
fc
(
'ctr_fc1'
,
concat_emb
,
200
,
active
)
ctr_fc2
=
self
.
fc
(
'ctr_fc2'
,
ctr_fc1
,
80
,
active
)
ctr_fc2
=
self
.
fc
(
'ctr_fc2'
,
ctr_fc1
,
80
,
active
)
ctr_out
=
self
.
fc
(
'ctr_out'
,
ctr_fc2
,
2
,
'softmax'
)
ctr_out
=
self
.
fc
(
'ctr_out'
,
ctr_fc2
,
2
,
'softmax'
)
# cvr
# cvr
cvr_fc1
=
self
.
fc
(
'cvr_fc1'
,
concat_emb
,
200
,
active
)
cvr_fc1
=
self
.
fc
(
'cvr_fc1'
,
concat_emb
,
200
,
active
)
cvr_fc2
=
self
.
fc
(
'cvr_fc2'
,
cvr_fc1
,
80
,
active
)
cvr_fc2
=
self
.
fc
(
'cvr_fc2'
,
cvr_fc1
,
80
,
active
)
cvr_out
=
self
.
fc
(
'cvr_out'
,
cvr_fc2
,
2
,
'softmax'
)
cvr_out
=
self
.
fc
(
'cvr_out'
,
cvr_fc2
,
2
,
'softmax'
)
ctr_clk
=
inputs
[
-
2
]
ctr_clk
=
inputs
[
-
2
]
ctcvr_buy
=
inputs
[
-
1
]
ctcvr_buy
=
inputs
[
-
1
]
ctr_prop_one
=
fluid
.
layers
.
slice
(
ctr_out
,
axes
=
[
1
],
starts
=
[
1
],
ends
=
[
2
])
ctr_prop_one
=
fluid
.
layers
.
slice
(
ctr_out
,
axes
=
[
1
],
starts
=
[
1
],
ends
=
[
2
])
cvr_prop_one
=
fluid
.
layers
.
slice
(
cvr_out
,
axes
=
[
1
],
starts
=
[
1
],
ends
=
[
2
])
cvr_prop_one
=
fluid
.
layers
.
slice
(
cvr_out
,
axes
=
[
1
],
starts
=
[
1
],
ends
=
[
2
])
ctcvr_prop_one
=
fluid
.
layers
.
elementwise_mul
(
ctr_prop_one
,
cvr_prop_one
)
ctcvr_prop_one
=
fluid
.
layers
.
elementwise_mul
(
ctr_prop_one
,
cvr_prop_one
)
ctcvr_prop
=
fluid
.
layers
.
concat
(
input
=
[
1
-
ctcvr_prop_one
,
ctcvr_prop_one
],
axis
=
1
)
ctcvr_prop
=
fluid
.
layers
.
concat
(
input
=
[
1
-
ctcvr_prop_one
,
ctcvr_prop_one
],
axis
=
1
)
auc_ctr
,
batch_auc_ctr
,
auc_states_ctr
=
fluid
.
layers
.
auc
(
input
=
ctr_out
,
label
=
ctr_clk
)
auc_ctr
,
batch_auc_ctr
,
auc_states_ctr
=
fluid
.
layers
.
auc
(
input
=
ctr_out
,
label
=
ctr_clk
)
auc_ctcvr
,
batch_auc_ctcvr
,
auc_states_ctcvr
=
fluid
.
layers
.
auc
(
input
=
ctcvr_prop
,
label
=
ctcvr_buy
)
auc_ctcvr
,
batch_auc_ctcvr
,
auc_states_ctcvr
=
fluid
.
layers
.
auc
(
input
=
ctcvr_prop
,
label
=
ctcvr_buy
)
...
@@ -97,27 +99,23 @@ class Model(ModelBase):
...
@@ -97,27 +99,23 @@ class Model(ModelBase):
self
.
_infer_results
[
"AUC_ctcvr"
]
=
auc_ctcvr
self
.
_infer_results
[
"AUC_ctcvr"
]
=
auc_ctcvr
return
return
loss_ctr
=
fluid
.
layers
.
cross_entropy
(
input
=
ctr_out
,
label
=
ctr_clk
)
loss_ctr
=
fluid
.
layers
.
cross_entropy
(
input
=
ctr_out
,
label
=
ctr_clk
)
loss_ctcvr
=
fluid
.
layers
.
cross_entropy
(
input
=
ctcvr_prop
,
label
=
ctcvr_buy
)
loss_ctcvr
=
fluid
.
layers
.
cross_entropy
(
input
=
ctcvr_prop
,
label
=
ctcvr_buy
)
cost
=
loss_ctr
+
loss_ctcvr
cost
=
loss_ctr
+
loss_ctcvr
avg_cost
=
fluid
.
layers
.
mean
(
cost
)
avg_cost
=
fluid
.
layers
.
mean
(
cost
)
self
.
_cost
=
avg_cost
self
.
_cost
=
avg_cost
self
.
_metrics
[
"AUC_ctr"
]
=
auc_ctr
self
.
_metrics
[
"AUC_ctr"
]
=
auc_ctr
self
.
_metrics
[
"BATCH_AUC_ctr"
]
=
batch_auc_ctr
self
.
_metrics
[
"BATCH_AUC_ctr"
]
=
batch_auc_ctr
self
.
_metrics
[
"AUC_ctcvr"
]
=
auc_ctcvr
self
.
_metrics
[
"AUC_ctcvr"
]
=
auc_ctcvr
self
.
_metrics
[
"BATCH_AUC_ctcvr"
]
=
batch_auc_ctcvr
self
.
_metrics
[
"BATCH_AUC_ctcvr"
]
=
batch_auc_ctcvr
def
train_net
(
self
):
def
train_net
(
self
):
input_data
=
self
.
input_data
()
input_data
=
self
.
input_data
()
self
.
net
(
input_data
)
self
.
net
(
input_data
)
def
infer_net
(
self
):
def
infer_net
(
self
):
self
.
_infer_data_var
=
self
.
input_data
()
self
.
_infer_data_var
=
self
.
input_data
()
self
.
_infer_data_loader
=
fluid
.
io
.
DataLoader
.
from_generator
(
self
.
_infer_data_loader
=
fluid
.
io
.
DataLoader
.
from_generator
(
feed_list
=
self
.
_infer_data_var
,
capacity
=
64
,
use_double_buffer
=
False
,
iterable
=
False
)
feed_list
=
self
.
_infer_data_var
,
capacity
=
64
,
use_double_buffer
=
False
,
iterable
=
False
)
self
.
net
(
self
.
_infer_data_var
,
is_infer
=
True
)
self
.
net
(
self
.
_infer_data_var
,
is_infer
=
True
)
models/multitask/mmoe/census_reader.py
浏览文件 @
7a3ec4e6
...
@@ -43,8 +43,8 @@ class TrainReader(Reader):
...
@@ -43,8 +43,8 @@ class TrainReader(Reader):
label_marital
=
[
1
,
0
]
label_marital
=
[
1
,
0
]
elif
int
(
l
[
0
])
==
1
:
elif
int
(
l
[
0
])
==
1
:
label_marital
=
[
0
,
1
]
label_marital
=
[
0
,
1
]
#label_income = np.array(label_income)
#
label_income = np.array(label_income)
#label_marital = np.array(label_marital)
#
label_marital = np.array(label_marital)
feature_name
=
[
"input"
,
"label_income"
,
"label_marital"
]
feature_name
=
[
"input"
,
"label_income"
,
"label_marital"
]
yield
zip
(
feature_name
,
[
data
]
+
[
label_income
]
+
[
label_marital
])
yield
zip
(
feature_name
,
[
data
]
+
[
label_income
]
+
[
label_marital
])
...
...
models/multitask/mmoe/model.py
浏览文件 @
7a3ec4e6
...
@@ -36,22 +36,21 @@ class Model(ModelBase):
...
@@ -36,22 +36,21 @@ class Model(ModelBase):
if
is_infer
:
if
is_infer
:
self
.
_infer_data_var
=
[
input_data
,
label_income
,
label_marital
]
self
.
_infer_data_var
=
[
input_data
,
label_income
,
label_marital
]
self
.
_infer_data_loader
=
fluid
.
io
.
DataLoader
.
from_generator
(
self
.
_infer_data_loader
=
fluid
.
io
.
DataLoader
.
from_generator
(
feed_list
=
self
.
_infer_data_var
,
capacity
=
64
,
use_double_buffer
=
False
,
iterable
=
False
)
feed_list
=
self
.
_infer_data_var
,
capacity
=
64
,
use_double_buffer
=
False
,
iterable
=
False
)
self
.
_data_var
.
extend
([
input_data
,
label_income
,
label_marital
])
self
.
_data_var
.
extend
([
input_data
,
label_income
,
label_marital
])
# f_{i}(x) = activation(W_{i} * x + b), where activation is ReLU according to the paper
# f_{i}(x) = activation(W_{i} * x + b), where activation is ReLU according to the paper
expert_outputs
=
[]
expert_outputs
=
[]
for
i
in
range
(
0
,
expert_num
):
for
i
in
range
(
0
,
expert_num
):
expert_output
=
fluid
.
layers
.
fc
(
input
=
input_data
,
expert_output
=
fluid
.
layers
.
fc
(
input
=
input_data
,
size
=
expert_size
,
size
=
expert_size
,
act
=
'relu'
,
act
=
'relu'
,
bias_attr
=
fluid
.
ParamAttr
(
learning_rate
=
1.0
),
bias_attr
=
fluid
.
ParamAttr
(
learning_rate
=
1.0
),
name
=
'expert_'
+
str
(
i
))
name
=
'expert_'
+
str
(
i
))
expert_outputs
.
append
(
expert_output
)
expert_outputs
.
append
(
expert_output
)
expert_concat
=
fluid
.
layers
.
concat
(
expert_outputs
,
axis
=
1
)
expert_concat
=
fluid
.
layers
.
concat
(
expert_outputs
,
axis
=
1
)
expert_concat
=
fluid
.
layers
.
reshape
(
expert_concat
,[
-
1
,
expert_num
,
expert_size
])
expert_concat
=
fluid
.
layers
.
reshape
(
expert_concat
,
[
-
1
,
expert_num
,
expert_size
])
# g^{k}(x) = activation(W_{gk} * x + b), where activation is softmax according to the paper
# g^{k}(x) = activation(W_{gk} * x + b), where activation is softmax according to the paper
output_layers
=
[]
output_layers
=
[]
for
i
in
range
(
0
,
gate_num
):
for
i
in
range
(
0
,
gate_num
):
...
@@ -61,52 +60,53 @@ class Model(ModelBase):
...
@@ -61,52 +60,53 @@ class Model(ModelBase):
bias_attr
=
fluid
.
ParamAttr
(
learning_rate
=
1.0
),
bias_attr
=
fluid
.
ParamAttr
(
learning_rate
=
1.0
),
name
=
'gate_'
+
str
(
i
))
name
=
'gate_'
+
str
(
i
))
# f^{k}(x) = sum_{i=1}^{n}(g^{k}(x)_{i} * f_{i}(x))
# f^{k}(x) = sum_{i=1}^{n}(g^{k}(x)_{i} * f_{i}(x))
cur_gate_expert
=
fluid
.
layers
.
elementwise_mul
(
expert_concat
,
cur_gate
,
axis
=
0
)
cur_gate_expert
=
fluid
.
layers
.
elementwise_mul
(
expert_concat
,
cur_gate
,
axis
=
0
)
cur_gate_expert
=
fluid
.
layers
.
reduce_sum
(
cur_gate_expert
,
dim
=
1
)
cur_gate_expert
=
fluid
.
layers
.
reduce_sum
(
cur_gate_expert
,
dim
=
1
)
# Build tower layer
# Build tower layer
cur_tower
=
fluid
.
layers
.
fc
(
input
=
cur_gate_expert
,
cur_tower
=
fluid
.
layers
.
fc
(
input
=
cur_gate_expert
,
size
=
tower_size
,
size
=
tower_size
,
act
=
'relu'
,
act
=
'relu'
,
name
=
'task_layer_'
+
str
(
i
))
name
=
'task_layer_'
+
str
(
i
))
out
=
fluid
.
layers
.
fc
(
input
=
cur_tower
,
out
=
fluid
.
layers
.
fc
(
input
=
cur_tower
,
size
=
2
,
size
=
2
,
act
=
'softmax'
,
act
=
'softmax'
,
name
=
'out_'
+
str
(
i
))
name
=
'out_'
+
str
(
i
))
output_layers
.
append
(
out
)
output_layers
.
append
(
out
)
pred_income
=
fluid
.
layers
.
clip
(
output_layers
[
0
],
min
=
1e-15
,
max
=
1.0
-
1e-15
)
pred_income
=
fluid
.
layers
.
clip
(
output_layers
[
0
],
min
=
1e-15
,
max
=
1.0
-
1e-15
)
pred_marital
=
fluid
.
layers
.
clip
(
output_layers
[
1
],
min
=
1e-15
,
max
=
1.0
-
1e-15
)
pred_marital
=
fluid
.
layers
.
clip
(
output_layers
[
1
],
min
=
1e-15
,
max
=
1.0
-
1e-15
)
label_income_1
=
fluid
.
layers
.
slice
(
label_income
,
axes
=
[
1
],
starts
=
[
1
],
ends
=
[
2
])
label_income_1
=
fluid
.
layers
.
slice
(
label_income
,
axes
=
[
1
],
starts
=
[
1
],
ends
=
[
2
])
label_marital_1
=
fluid
.
layers
.
slice
(
label_marital
,
axes
=
[
1
],
starts
=
[
1
],
ends
=
[
2
])
label_marital_1
=
fluid
.
layers
.
slice
(
label_marital
,
axes
=
[
1
],
starts
=
[
1
],
ends
=
[
2
])
auc_income
,
batch_auc_1
,
auc_states_1
=
fluid
.
layers
.
auc
(
input
=
pred_income
,
label
=
fluid
.
layers
.
cast
(
x
=
label_income_1
,
dtype
=
'int64'
))
auc_income
,
batch_auc_1
,
auc_states_1
=
fluid
.
layers
.
auc
(
input
=
pred_income
,
auc_marital
,
batch_auc_2
,
auc_states_2
=
fluid
.
layers
.
auc
(
input
=
pred_marital
,
label
=
fluid
.
layers
.
cast
(
x
=
label_marital_1
,
dtype
=
'int64'
))
label
=
fluid
.
layers
.
cast
(
x
=
label_income_1
,
dtype
=
'int64'
))
auc_marital
,
batch_auc_2
,
auc_states_2
=
fluid
.
layers
.
auc
(
input
=
pred_marital
,
label
=
fluid
.
layers
.
cast
(
x
=
label_marital_1
,
dtype
=
'int64'
))
if
is_infer
:
if
is_infer
:
self
.
_infer_results
[
"AUC_income"
]
=
auc_income
self
.
_infer_results
[
"AUC_income"
]
=
auc_income
self
.
_infer_results
[
"AUC_marital"
]
=
auc_marital
self
.
_infer_results
[
"AUC_marital"
]
=
auc_marital
return
return
cost_income
=
fluid
.
layers
.
cross_entropy
(
input
=
pred_income
,
label
=
label_income
,
soft_label
=
True
)
cost_income
=
fluid
.
layers
.
cross_entropy
(
input
=
pred_income
,
label
=
label_income
,
soft_label
=
True
)
cost_marital
=
fluid
.
layers
.
cross_entropy
(
input
=
pred_marital
,
label
=
label_marital
,
soft_label
=
True
)
cost_marital
=
fluid
.
layers
.
cross_entropy
(
input
=
pred_marital
,
label
=
label_marital
,
soft_label
=
True
)
avg_cost_income
=
fluid
.
layers
.
mean
(
x
=
cost_income
)
avg_cost_income
=
fluid
.
layers
.
mean
(
x
=
cost_income
)
avg_cost_marital
=
fluid
.
layers
.
mean
(
x
=
cost_marital
)
avg_cost_marital
=
fluid
.
layers
.
mean
(
x
=
cost_marital
)
cost
=
avg_cost_income
+
avg_cost_marital
cost
=
avg_cost_income
+
avg_cost_marital
self
.
_cost
=
cost
self
.
_cost
=
cost
self
.
_metrics
[
"AUC_income"
]
=
auc_income
self
.
_metrics
[
"AUC_income"
]
=
auc_income
self
.
_metrics
[
"BATCH_AUC_income"
]
=
batch_auc_1
self
.
_metrics
[
"BATCH_AUC_income"
]
=
batch_auc_1
self
.
_metrics
[
"AUC_marital"
]
=
auc_marital
self
.
_metrics
[
"AUC_marital"
]
=
auc_marital
self
.
_metrics
[
"BATCH_AUC_marital"
]
=
batch_auc_2
self
.
_metrics
[
"BATCH_AUC_marital"
]
=
batch_auc_2
def
train_net
(
self
):
def
train_net
(
self
):
self
.
MMOE
()
self
.
MMOE
()
def
infer_net
(
self
):
def
infer_net
(
self
):
self
.
MMOE
(
is_infer
=
True
)
self
.
MMOE
(
is_infer
=
True
)
models/multitask/share-bottom/census_reader.py
浏览文件 @
7a3ec4e6
...
@@ -43,8 +43,8 @@ class TrainReader(Reader):
...
@@ -43,8 +43,8 @@ class TrainReader(Reader):
label_marital
=
[
1
,
0
]
label_marital
=
[
1
,
0
]
elif
int
(
l
[
0
])
==
1
:
elif
int
(
l
[
0
])
==
1
:
label_marital
=
[
0
,
1
]
label_marital
=
[
0
,
1
]
#label_income = np.array(label_income)
#
label_income = np.array(label_income)
#label_marital = np.array(label_marital)
#
label_marital = np.array(label_marital)
feature_name
=
[
"input"
,
"label_income"
,
"label_marital"
]
feature_name
=
[
"input"
,
"label_income"
,
"label_marital"
]
yield
zip
(
feature_name
,
[
data
]
+
[
label_income
]
+
[
label_marital
])
yield
zip
(
feature_name
,
[
data
]
+
[
label_income
]
+
[
label_marital
])
...
...
models/multitask/share-bottom/model.py
浏览文件 @
7a3ec4e6
...
@@ -32,65 +32,65 @@ class Model(ModelBase):
...
@@ -32,65 +32,65 @@ class Model(ModelBase):
input_data
=
fluid
.
data
(
name
=
"input"
,
shape
=
[
-
1
,
feature_size
],
dtype
=
"float32"
)
input_data
=
fluid
.
data
(
name
=
"input"
,
shape
=
[
-
1
,
feature_size
],
dtype
=
"float32"
)
label_income
=
fluid
.
data
(
name
=
"label_income"
,
shape
=
[
-
1
,
2
],
dtype
=
"float32"
,
lod_level
=
0
)
label_income
=
fluid
.
data
(
name
=
"label_income"
,
shape
=
[
-
1
,
2
],
dtype
=
"float32"
,
lod_level
=
0
)
label_marital
=
fluid
.
data
(
name
=
"label_marital"
,
shape
=
[
-
1
,
2
],
dtype
=
"float32"
,
lod_level
=
0
)
label_marital
=
fluid
.
data
(
name
=
"label_marital"
,
shape
=
[
-
1
,
2
],
dtype
=
"float32"
,
lod_level
=
0
)
if
is_infer
:
if
is_infer
:
self
.
_infer_data_var
=
[
input_data
,
label_income
,
label_marital
]
self
.
_infer_data_var
=
[
input_data
,
label_income
,
label_marital
]
self
.
_infer_data_loader
=
fluid
.
io
.
DataLoader
.
from_generator
(
self
.
_infer_data_loader
=
fluid
.
io
.
DataLoader
.
from_generator
(
feed_list
=
self
.
_infer_data_var
,
capacity
=
64
,
use_double_buffer
=
False
,
iterable
=
False
)
feed_list
=
self
.
_infer_data_var
,
capacity
=
64
,
use_double_buffer
=
False
,
iterable
=
False
)
self
.
_data_var
.
extend
([
input_data
,
label_income
,
label_marital
])
self
.
_data_var
.
extend
([
input_data
,
label_income
,
label_marital
])
bottom_output
=
fluid
.
layers
.
fc
(
input
=
input_data
,
bottom_output
=
fluid
.
layers
.
fc
(
input
=
input_data
,
size
=
bottom_size
,
size
=
bottom_size
,
act
=
'relu'
,
act
=
'relu'
,
bias_attr
=
fluid
.
ParamAttr
(
learning_rate
=
1.0
),
bias_attr
=
fluid
.
ParamAttr
(
learning_rate
=
1.0
),
name
=
'bottom_output'
)
name
=
'bottom_output'
)
# Build tower layer from bottom layer
# Build tower layer from bottom layer
output_layers
=
[]
output_layers
=
[]
for
index
in
range
(
tower_nums
):
for
index
in
range
(
tower_nums
):
tower_layer
=
fluid
.
layers
.
fc
(
input
=
bottom_output
,
tower_layer
=
fluid
.
layers
.
fc
(
input
=
bottom_output
,
size
=
tower_size
,
size
=
tower_size
,
act
=
'relu'
,
act
=
'relu'
,
name
=
'task_layer_'
+
str
(
index
))
name
=
'task_layer_'
+
str
(
index
))
output_layer
=
fluid
.
layers
.
fc
(
input
=
tower_layer
,
output_layer
=
fluid
.
layers
.
fc
(
input
=
tower_layer
,
size
=
2
,
size
=
2
,
act
=
'softmax'
,
act
=
'softmax'
,
name
=
'output_layer_'
+
str
(
index
))
name
=
'output_layer_'
+
str
(
index
))
output_layers
.
append
(
output_layer
)
output_layers
.
append
(
output_layer
)
pred_income
=
fluid
.
layers
.
clip
(
output_layers
[
0
],
min
=
1e-15
,
max
=
1.0
-
1e-15
)
pred_income
=
fluid
.
layers
.
clip
(
output_layers
[
0
],
min
=
1e-15
,
max
=
1.0
-
1e-15
)
pred_marital
=
fluid
.
layers
.
clip
(
output_layers
[
1
],
min
=
1e-15
,
max
=
1.0
-
1e-15
)
pred_marital
=
fluid
.
layers
.
clip
(
output_layers
[
1
],
min
=
1e-15
,
max
=
1.0
-
1e-15
)
label_income_1
=
fluid
.
layers
.
slice
(
label_income
,
axes
=
[
1
],
starts
=
[
1
],
ends
=
[
2
])
label_income_1
=
fluid
.
layers
.
slice
(
label_income
,
axes
=
[
1
],
starts
=
[
1
],
ends
=
[
2
])
label_marital_1
=
fluid
.
layers
.
slice
(
label_marital
,
axes
=
[
1
],
starts
=
[
1
],
ends
=
[
2
])
label_marital_1
=
fluid
.
layers
.
slice
(
label_marital
,
axes
=
[
1
],
starts
=
[
1
],
ends
=
[
2
])
auc_income
,
batch_auc_1
,
auc_states_1
=
fluid
.
layers
.
auc
(
input
=
pred_income
,
label
=
fluid
.
layers
.
cast
(
x
=
label_income_1
,
dtype
=
'int64'
))
auc_income
,
batch_auc_1
,
auc_states_1
=
fluid
.
layers
.
auc
(
input
=
pred_income
,
auc_marital
,
batch_auc_2
,
auc_states_2
=
fluid
.
layers
.
auc
(
input
=
pred_marital
,
label
=
fluid
.
layers
.
cast
(
x
=
label_marital_1
,
dtype
=
'int64'
))
label
=
fluid
.
layers
.
cast
(
x
=
label_income_1
,
dtype
=
'int64'
))
auc_marital
,
batch_auc_2
,
auc_states_2
=
fluid
.
layers
.
auc
(
input
=
pred_marital
,
label
=
fluid
.
layers
.
cast
(
x
=
label_marital_1
,
dtype
=
'int64'
))
if
is_infer
:
if
is_infer
:
self
.
_infer_results
[
"AUC_income"
]
=
auc_income
self
.
_infer_results
[
"AUC_income"
]
=
auc_income
self
.
_infer_results
[
"AUC_marital"
]
=
auc_marital
self
.
_infer_results
[
"AUC_marital"
]
=
auc_marital
return
return
cost_income
=
fluid
.
layers
.
cross_entropy
(
input
=
pred_income
,
label
=
label_income
,
soft_label
=
True
)
cost_income
=
fluid
.
layers
.
cross_entropy
(
input
=
pred_income
,
label
=
label_income
,
soft_label
=
True
)
cost_marital
=
fluid
.
layers
.
cross_entropy
(
input
=
pred_marital
,
label
=
label_marital
,
soft_label
=
True
)
cost_marital
=
fluid
.
layers
.
cross_entropy
(
input
=
pred_marital
,
label
=
label_marital
,
soft_label
=
True
)
cost
=
fluid
.
layers
.
elementwise_add
(
cost_income
,
cost_marital
,
axis
=
1
)
cost
=
fluid
.
layers
.
elementwise_add
(
cost_income
,
cost_marital
,
axis
=
1
)
avg_cost
=
fluid
.
layers
.
mean
(
x
=
cost
)
avg_cost
=
fluid
.
layers
.
mean
(
x
=
cost
)
self
.
_cost
=
avg_cost
self
.
_cost
=
avg_cost
self
.
_metrics
[
"AUC_income"
]
=
auc_income
self
.
_metrics
[
"AUC_income"
]
=
auc_income
self
.
_metrics
[
"BATCH_AUC_income"
]
=
batch_auc_1
self
.
_metrics
[
"BATCH_AUC_income"
]
=
batch_auc_1
self
.
_metrics
[
"AUC_marital"
]
=
auc_marital
self
.
_metrics
[
"AUC_marital"
]
=
auc_marital
self
.
_metrics
[
"BATCH_AUC_marital"
]
=
batch_auc_2
self
.
_metrics
[
"BATCH_AUC_marital"
]
=
batch_auc_2
def
train_net
(
self
):
def
train_net
(
self
):
self
.
model
()
self
.
model
()
def
infer_net
(
self
):
def
infer_net
(
self
):
self
.
model
(
is_infer
=
True
)
self
.
model
(
is_infer
=
True
)
models/rank/dcn/criteo_reader.py
浏览文件 @
7a3ec4e6
...
@@ -21,7 +21,6 @@ try:
...
@@ -21,7 +21,6 @@ try:
except
ImportError
:
except
ImportError
:
import
pickle
import
pickle
from
paddlerec.core.reader
import
Reader
from
paddlerec.core.reader
import
Reader
from
paddlerec.core.utils
import
envs
from
paddlerec.core.utils
import
envs
...
@@ -47,7 +46,7 @@ class TrainReader(Reader):
...
@@ -47,7 +46,7 @@ class TrainReader(Reader):
self
.
label_feat_names
=
target
+
dense_feat_names
+
sparse_feat_names
self
.
label_feat_names
=
target
+
dense_feat_names
+
sparse_feat_names
self
.
cat_feat_idx_dict_list
=
[{}
for
_
in
range
(
26
)]
self
.
cat_feat_idx_dict_list
=
[{}
for
_
in
range
(
26
)]
# TODO: set vocabulary dictionary
# TODO: set vocabulary dictionary
vocab_dir
=
envs
.
get_global_env
(
"feat_dict_name"
,
None
,
"train.reader"
)
vocab_dir
=
envs
.
get_global_env
(
"feat_dict_name"
,
None
,
"train.reader"
)
for
i
in
range
(
26
):
for
i
in
range
(
26
):
...
@@ -55,7 +54,7 @@ class TrainReader(Reader):
...
@@ -55,7 +54,7 @@ class TrainReader(Reader):
for
line
in
open
(
for
line
in
open
(
os
.
path
.
join
(
vocab_dir
,
'C'
+
str
(
i
+
1
)
+
'.txt'
)):
os
.
path
.
join
(
vocab_dir
,
'C'
+
str
(
i
+
1
)
+
'.txt'
)):
self
.
cat_feat_idx_dict_list
[
i
][
line
.
strip
()]
=
lookup_idx
self
.
cat_feat_idx_dict_list
[
i
][
line
.
strip
()]
=
lookup_idx
lookup_idx
+=
1
lookup_idx
+=
1
def
_process_line
(
self
,
line
):
def
_process_line
(
self
,
line
):
features
=
line
.
rstrip
(
'
\n
'
).
split
(
'
\t
'
)
features
=
line
.
rstrip
(
'
\n
'
).
split
(
'
\t
'
)
...
@@ -73,20 +72,21 @@ class TrainReader(Reader):
...
@@ -73,20 +72,21 @@ class TrainReader(Reader):
if
idx
==
2
else
math
.
log
(
1
+
float
(
features
[
idx
])))
if
idx
==
2
else
math
.
log
(
1
+
float
(
features
[
idx
])))
for
idx
in
self
.
cat_idx_
:
for
idx
in
self
.
cat_idx_
:
if
features
[
idx
]
==
''
or
features
[
if
features
[
idx
]
==
''
or
features
[
idx
]
not
in
self
.
cat_feat_idx_dict_list
[
idx
-
14
]:
idx
]
not
in
self
.
cat_feat_idx_dict_list
[
idx
-
14
]:
label_feat_list
[
idx
].
append
(
0
)
label_feat_list
[
idx
].
append
(
0
)
else
:
else
:
label_feat_list
[
idx
].
append
(
self
.
cat_feat_idx_dict_list
[
label_feat_list
[
idx
].
append
(
self
.
cat_feat_idx_dict_list
[
idx
-
14
][
features
[
idx
]])
idx
-
14
][
features
[
idx
]])
label_feat_list
[
0
].
append
(
int
(
features
[
0
]))
label_feat_list
[
0
].
append
(
int
(
features
[
0
]))
return
label_feat_list
return
label_feat_list
def
generate_sample
(
self
,
line
):
def
generate_sample
(
self
,
line
):
"""
"""
Read the data line by line and process it as a dictionary
Read the data line by line and process it as a dictionary
"""
"""
def
data_iter
():
def
data_iter
():
label_feat_list
=
self
.
_process_line
(
line
)
label_feat_list
=
self
.
_process_line
(
line
)
yield
list
(
zip
(
self
.
label_feat_names
,
label_feat_list
))
yield
list
(
zip
(
self
.
label_feat_names
,
label_feat_list
))
return
data_iter
return
data_iter
\ No newline at end of file
models/rank/dcn/model.py
浏览文件 @
7a3ec4e6
...
@@ -23,7 +23,7 @@ from paddlerec.core.model import Model as ModelBase
...
@@ -23,7 +23,7 @@ from paddlerec.core.model import Model as ModelBase
class
Model
(
ModelBase
):
class
Model
(
ModelBase
):
def
__init__
(
self
,
config
):
def
__init__
(
self
,
config
):
ModelBase
.
__init__
(
self
,
config
)
ModelBase
.
__init__
(
self
,
config
)
def
init_network
(
self
):
def
init_network
(
self
):
self
.
cross_num
=
envs
.
get_global_env
(
"hyper_parameters.cross_num"
,
None
,
self
.
_namespace
)
self
.
cross_num
=
envs
.
get_global_env
(
"hyper_parameters.cross_num"
,
None
,
self
.
_namespace
)
self
.
dnn_hidden_units
=
envs
.
get_global_env
(
"hyper_parameters.dnn_hidden_units"
,
None
,
self
.
_namespace
)
self
.
dnn_hidden_units
=
envs
.
get_global_env
(
"hyper_parameters.dnn_hidden_units"
,
None
,
self
.
_namespace
)
...
@@ -50,7 +50,7 @@ class Model(ModelBase):
...
@@ -50,7 +50,7 @@ class Model(ModelBase):
self
.
net_input
=
None
self
.
net_input
=
None
self
.
loss
=
None
self
.
loss
=
None
def
_create_embedding_input
(
self
,
data_dict
):
def
_create_embedding_input
(
self
,
data_dict
):
# sparse embedding
# sparse embedding
sparse_emb_dict
=
OrderedDict
((
name
,
fluid
.
embedding
(
sparse_emb_dict
=
OrderedDict
((
name
,
fluid
.
embedding
(
...
@@ -78,7 +78,7 @@ class Model(ModelBase):
...
@@ -78,7 +78,7 @@ class Model(ModelBase):
net_input
=
fluid
.
layers
.
concat
([
dense_input
,
sparse_input
],
axis
=-
1
)
net_input
=
fluid
.
layers
.
concat
([
dense_input
,
sparse_input
],
axis
=-
1
)
return
net_input
return
net_input
def
_deep_net
(
self
,
input
,
hidden_units
,
use_bn
=
False
,
is_test
=
False
):
def
_deep_net
(
self
,
input
,
hidden_units
,
use_bn
=
False
,
is_test
=
False
):
for
units
in
hidden_units
:
for
units
in
hidden_units
:
input
=
fluid
.
layers
.
fc
(
input
=
input
,
size
=
units
)
input
=
fluid
.
layers
.
fc
(
input
=
input
,
size
=
units
)
...
@@ -95,7 +95,7 @@ class Model(ModelBase):
...
@@ -95,7 +95,7 @@ class Model(ModelBase):
[
input_dim
],
dtype
=
'float32'
,
name
=
prefix
+
"_b"
)
[
input_dim
],
dtype
=
'float32'
,
name
=
prefix
+
"_b"
)
xw
=
fluid
.
layers
.
reduce_sum
(
x
*
w
,
dim
=
1
,
keep_dim
=
True
)
# (N, 1)
xw
=
fluid
.
layers
.
reduce_sum
(
x
*
w
,
dim
=
1
,
keep_dim
=
True
)
# (N, 1)
return
x0
*
xw
+
b
+
x
,
w
return
x0
*
xw
+
b
+
x
,
w
def
_cross_net
(
self
,
input
,
num_corss_layers
):
def
_cross_net
(
self
,
input
,
num_corss_layers
):
x
=
x0
=
input
x
=
x0
=
input
l2_reg_cross_list
=
[]
l2_reg_cross_list
=
[]
...
@@ -106,10 +106,10 @@ class Model(ModelBase):
...
@@ -106,10 +106,10 @@ class Model(ModelBase):
fluid
.
layers
.
concat
(
fluid
.
layers
.
concat
(
l2_reg_cross_list
,
axis
=-
1
))
l2_reg_cross_list
,
axis
=-
1
))
return
x
,
l2_reg_cross_loss
return
x
,
l2_reg_cross_loss
def
_l2_loss
(
self
,
w
):
def
_l2_loss
(
self
,
w
):
return
fluid
.
layers
.
reduce_sum
(
fluid
.
layers
.
square
(
w
))
return
fluid
.
layers
.
reduce_sum
(
fluid
.
layers
.
square
(
w
))
def
train_net
(
self
):
def
train_net
(
self
):
self
.
init_network
()
self
.
init_network
()
self
.
target_input
=
fluid
.
data
(
self
.
target_input
=
fluid
.
data
(
...
@@ -118,14 +118,14 @@ class Model(ModelBase):
...
@@ -118,14 +118,14 @@ class Model(ModelBase):
for
feat_name
in
self
.
feat_dims_dict
:
for
feat_name
in
self
.
feat_dims_dict
:
data_dict
[
feat_name
]
=
fluid
.
data
(
data_dict
[
feat_name
]
=
fluid
.
data
(
name
=
feat_name
,
shape
=
[
None
,
1
],
dtype
=
'float32'
)
name
=
feat_name
,
shape
=
[
None
,
1
],
dtype
=
'float32'
)
self
.
net_input
=
self
.
_create_embedding_input
(
data_dict
)
self
.
net_input
=
self
.
_create_embedding_input
(
data_dict
)
deep_out
=
self
.
_deep_net
(
self
.
net_input
,
self
.
dnn_hidden_units
,
self
.
dnn_use_bn
,
False
)
deep_out
=
self
.
_deep_net
(
self
.
net_input
,
self
.
dnn_hidden_units
,
self
.
dnn_use_bn
,
False
)
cross_out
,
l2_reg_cross_loss
=
self
.
_cross_net
(
self
.
net_input
,
cross_out
,
l2_reg_cross_loss
=
self
.
_cross_net
(
self
.
net_input
,
self
.
cross_num
)
self
.
cross_num
)
last_out
=
fluid
.
layers
.
concat
([
deep_out
,
cross_out
],
axis
=-
1
)
last_out
=
fluid
.
layers
.
concat
([
deep_out
,
cross_out
],
axis
=-
1
)
logit
=
fluid
.
layers
.
fc
(
last_out
,
1
)
logit
=
fluid
.
layers
.
fc
(
last_out
,
1
)
...
@@ -141,7 +141,6 @@ class Model(ModelBase):
...
@@ -141,7 +141,6 @@ class Model(ModelBase):
input
=
prob_2d
,
label
=
label_int
,
slide_steps
=
0
)
input
=
prob_2d
,
label
=
label_int
,
slide_steps
=
0
)
self
.
_metrics
[
"AUC"
]
=
auc_var
self
.
_metrics
[
"AUC"
]
=
auc_var
self
.
_metrics
[
"BATCH_AUC"
]
=
batch_auc_var
self
.
_metrics
[
"BATCH_AUC"
]
=
batch_auc_var
# logloss
# logloss
logloss
=
fluid
.
layers
.
log_loss
(
self
.
prob
,
self
.
target_input
)
logloss
=
fluid
.
layers
.
log_loss
(
self
.
prob
,
self
.
target_input
)
...
...
models/rank/deepfm/criteo_reader.py
浏览文件 @
7a3ec4e6
...
@@ -38,7 +38,7 @@ class TrainReader(Reader):
...
@@ -38,7 +38,7 @@ class TrainReader(Reader):
self
.
categorical_range_
=
range
(
14
,
40
)
self
.
categorical_range_
=
range
(
14
,
40
)
# load preprocessed feature dict
# load preprocessed feature dict
self
.
feat_dict_name
=
envs
.
get_global_env
(
"feat_dict_name"
,
None
,
"train.reader"
)
self
.
feat_dict_name
=
envs
.
get_global_env
(
"feat_dict_name"
,
None
,
"train.reader"
)
self
.
feat_dict_
=
pickle
.
load
(
open
(
self
.
feat_dict_name
,
'rb'
))
self
.
feat_dict_
=
pickle
.
load
(
open
(
self
.
feat_dict_name
,
'rb'
))
def
_process_line
(
self
,
line
):
def
_process_line
(
self
,
line
):
features
=
line
.
rstrip
(
'
\n
'
).
split
(
'
\t
'
)
features
=
line
.
rstrip
(
'
\n
'
).
split
(
'
\t
'
)
...
@@ -62,13 +62,14 @@ class TrainReader(Reader):
...
@@ -62,13 +62,14 @@ class TrainReader(Reader):
feat_value
.
append
(
1.0
)
feat_value
.
append
(
1.0
)
label
=
[
int
(
features
[
0
])]
label
=
[
int
(
features
[
0
])]
return
feat_idx
,
feat_value
,
label
return
feat_idx
,
feat_value
,
label
def
generate_sample
(
self
,
line
):
def
generate_sample
(
self
,
line
):
"""
"""
Read the data line by line and process it as a dictionary
Read the data line by line and process it as a dictionary
"""
"""
def
data_iter
():
def
data_iter
():
feat_idx
,
feat_value
,
label
=
self
.
_process_line
(
line
)
feat_idx
,
feat_value
,
label
=
self
.
_process_line
(
line
)
yield
[(
'feat_idx'
,
feat_idx
),
(
'feat_value'
,
feat_value
),
(
'label'
,
label
)]
yield
[(
'feat_idx'
,
feat_idx
),
(
'feat_value'
,
feat_value
),
(
'label'
,
label
)]
return
data_iter
return
data_iter
\ No newline at end of file
models/rank/deepfm/model.py
浏览文件 @
7a3ec4e6
...
@@ -29,26 +29,27 @@ class Model(ModelBase):
...
@@ -29,26 +29,27 @@ class Model(ModelBase):
is_distributed
=
True
if
envs
.
get_trainer
()
==
"CtrTrainer"
else
False
is_distributed
=
True
if
envs
.
get_trainer
()
==
"CtrTrainer"
else
False
sparse_feature_number
=
envs
.
get_global_env
(
"hyper_parameters.sparse_feature_number"
,
None
,
self
.
_namespace
)
sparse_feature_number
=
envs
.
get_global_env
(
"hyper_parameters.sparse_feature_number"
,
None
,
self
.
_namespace
)
sparse_feature_dim
=
envs
.
get_global_env
(
"hyper_parameters.sparse_feature_dim"
,
None
,
self
.
_namespace
)
sparse_feature_dim
=
envs
.
get_global_env
(
"hyper_parameters.sparse_feature_dim"
,
None
,
self
.
_namespace
)
# ------------------------- network input --------------------------
# ------------------------- network input --------------------------
num_field
=
envs
.
get_global_env
(
"hyper_parameters.num_field"
,
None
,
self
.
_namespace
)
num_field
=
envs
.
get_global_env
(
"hyper_parameters.num_field"
,
None
,
self
.
_namespace
)
raw_feat_idx
=
fluid
.
data
(
name
=
'feat_idx'
,
shape
=
[
None
,
num_field
],
dtype
=
'int64'
)
# None * num_field(defalut:39)
raw_feat_idx
=
fluid
.
data
(
name
=
'feat_idx'
,
shape
=
[
None
,
num_field
],
raw_feat_value
=
fluid
.
data
(
name
=
'feat_value'
,
shape
=
[
None
,
num_field
],
dtype
=
'float32'
)
# None * num_field
dtype
=
'int64'
)
# None * num_field(defalut:39)
raw_feat_value
=
fluid
.
data
(
name
=
'feat_value'
,
shape
=
[
None
,
num_field
],
dtype
=
'float32'
)
# None * num_field
self
.
label
=
fluid
.
data
(
name
=
'label'
,
shape
=
[
None
,
1
],
dtype
=
'float32'
)
# None * 1
self
.
label
=
fluid
.
data
(
name
=
'label'
,
shape
=
[
None
,
1
],
dtype
=
'float32'
)
# None * 1
feat_idx
=
fluid
.
layers
.
reshape
(
raw_feat_idx
,[
-
1
,
1
])
# (None * num_field) * 1
feat_idx
=
fluid
.
layers
.
reshape
(
raw_feat_idx
,
[
-
1
,
1
])
# (None * num_field) * 1
feat_value
=
fluid
.
layers
.
reshape
(
raw_feat_value
,
[
-
1
,
num_field
,
1
])
# None * num_field * 1
feat_value
=
fluid
.
layers
.
reshape
(
raw_feat_value
,
[
-
1
,
num_field
,
1
])
# None * num_field * 1
# ------------------------- set _data_var --------------------------
# ------------------------- set _data_var --------------------------
self
.
_data_var
.
append
(
raw_feat_idx
)
self
.
_data_var
.
append
(
raw_feat_idx
)
self
.
_data_var
.
append
(
raw_feat_value
)
self
.
_data_var
.
append
(
raw_feat_value
)
self
.
_data_var
.
append
(
self
.
label
)
self
.
_data_var
.
append
(
self
.
label
)
if
self
.
_platform
!=
"LINUX"
:
if
self
.
_platform
!=
"LINUX"
:
self
.
_data_loader
=
fluid
.
io
.
DataLoader
.
from_generator
(
self
.
_data_loader
=
fluid
.
io
.
DataLoader
.
from_generator
(
feed_list
=
self
.
_data_var
,
capacity
=
64
,
use_double_buffer
=
False
,
iterable
=
False
)
feed_list
=
self
.
_data_var
,
capacity
=
64
,
use_double_buffer
=
False
,
iterable
=
False
)
#------------------------- first order term --------------------------
#
------------------------- first order term --------------------------
reg
=
envs
.
get_global_env
(
"hyper_parameters.reg"
,
1e-4
,
self
.
_namespace
)
reg
=
envs
.
get_global_env
(
"hyper_parameters.reg"
,
1e-4
,
self
.
_namespace
)
first_weights_re
=
fluid
.
embedding
(
first_weights_re
=
fluid
.
embedding
(
...
@@ -66,7 +67,7 @@ class Model(ModelBase):
...
@@ -66,7 +67,7 @@ class Model(ModelBase):
first_weights_re
,
shape
=
[
-
1
,
num_field
,
1
])
# None * num_field * 1
first_weights_re
,
shape
=
[
-
1
,
num_field
,
1
])
# None * num_field * 1
y_first_order
=
fluid
.
layers
.
reduce_sum
((
first_weights
*
feat_value
),
1
)
y_first_order
=
fluid
.
layers
.
reduce_sum
((
first_weights
*
feat_value
),
1
)
#------------------------- second order term --------------------------
#
------------------------- second order term --------------------------
feat_embeddings_re
=
fluid
.
embedding
(
feat_embeddings_re
=
fluid
.
embedding
(
input
=
feat_idx
,
input
=
feat_idx
,
...
@@ -81,12 +82,12 @@ class Model(ModelBase):
...
@@ -81,12 +82,12 @@ class Model(ModelBase):
feat_embeddings
=
fluid
.
layers
.
reshape
(
feat_embeddings
=
fluid
.
layers
.
reshape
(
feat_embeddings_re
,
feat_embeddings_re
,
shape
=
[
-
1
,
num_field
,
shape
=
[
-
1
,
num_field
,
sparse_feature_dim
])
# None * num_field * embedding_size
sparse_feature_dim
])
# None * num_field * embedding_size
feat_embeddings
=
feat_embeddings
*
feat_value
# None * num_field * embedding_size
feat_embeddings
=
feat_embeddings
*
feat_value
# None * num_field * embedding_size
# sum_square part
# sum_square part
summed_features_emb
=
fluid
.
layers
.
reduce_sum
(
feat_embeddings
,
summed_features_emb
=
fluid
.
layers
.
reduce_sum
(
feat_embeddings
,
1
)
# None * embedding_size
1
)
# None * embedding_size
summed_features_emb_square
=
fluid
.
layers
.
square
(
summed_features_emb_square
=
fluid
.
layers
.
square
(
summed_features_emb
)
# None * embedding_size
summed_features_emb
)
# None * embedding_size
...
@@ -100,13 +101,12 @@ class Model(ModelBase):
...
@@ -100,13 +101,12 @@ class Model(ModelBase):
summed_features_emb_square
-
squared_sum_features_emb
,
1
,
summed_features_emb_square
-
squared_sum_features_emb
,
1
,
keep_dim
=
True
)
# None * 1
keep_dim
=
True
)
# None * 1
# ------------------------- DNN --------------------------
#------------------------- DNN --------------------------
layer_sizes
=
envs
.
get_global_env
(
"hyper_parameters.fc_sizes"
,
None
,
self
.
_namespace
)
layer_sizes
=
envs
.
get_global_env
(
"hyper_parameters.fc_sizes"
,
None
,
self
.
_namespace
)
act
=
envs
.
get_global_env
(
"hyper_parameters.act"
,
None
,
self
.
_namespace
)
act
=
envs
.
get_global_env
(
"hyper_parameters.act"
,
None
,
self
.
_namespace
)
y_dnn
=
fluid
.
layers
.
reshape
(
feat_embeddings
,
y_dnn
=
fluid
.
layers
.
reshape
(
feat_embeddings
,
[
-
1
,
num_field
*
sparse_feature_dim
])
[
-
1
,
num_field
*
sparse_feature_dim
])
for
s
in
layer_sizes
:
for
s
in
layer_sizes
:
y_dnn
=
fluid
.
layers
.
fc
(
y_dnn
=
fluid
.
layers
.
fc
(
input
=
y_dnn
,
input
=
y_dnn
,
...
@@ -128,28 +128,28 @@ class Model(ModelBase):
...
@@ -128,28 +128,28 @@ class Model(ModelBase):
bias_attr
=
fluid
.
ParamAttr
(
bias_attr
=
fluid
.
ParamAttr
(
initializer
=
fluid
.
initializer
.
TruncatedNormalInitializer
(
initializer
=
fluid
.
initializer
.
TruncatedNormalInitializer
(
loc
=
0.0
,
scale
=
init_value_
)))
loc
=
0.0
,
scale
=
init_value_
)))
#------------------------- DeepFM --------------------------
#
------------------------- DeepFM --------------------------
self
.
predict
=
fluid
.
layers
.
sigmoid
(
y_first_order
+
y_second_order
+
y_dnn
)
self
.
predict
=
fluid
.
layers
.
sigmoid
(
y_first_order
+
y_second_order
+
y_dnn
)
def
train_net
(
self
):
def
train_net
(
self
):
self
.
deepfm_net
()
self
.
deepfm_net
()
#------------------------- Cost(logloss) --------------------------
#
------------------------- Cost(logloss) --------------------------
cost
=
fluid
.
layers
.
log_loss
(
input
=
self
.
predict
,
label
=
self
.
label
)
cost
=
fluid
.
layers
.
log_loss
(
input
=
self
.
predict
,
label
=
self
.
label
)
avg_cost
=
fluid
.
layers
.
reduce_sum
(
cost
)
avg_cost
=
fluid
.
layers
.
reduce_sum
(
cost
)
self
.
_cost
=
avg_cost
self
.
_cost
=
avg_cost
#------------------------- Metric(Auc) --------------------------
#
------------------------- Metric(Auc) --------------------------
predict_2d
=
fluid
.
layers
.
concat
([
1
-
self
.
predict
,
self
.
predict
],
1
)
predict_2d
=
fluid
.
layers
.
concat
([
1
-
self
.
predict
,
self
.
predict
],
1
)
label_int
=
fluid
.
layers
.
cast
(
self
.
label
,
'int64'
)
label_int
=
fluid
.
layers
.
cast
(
self
.
label
,
'int64'
)
auc_var
,
batch_auc_var
,
_
=
fluid
.
layers
.
auc
(
input
=
predict_2d
,
auc_var
,
batch_auc_var
,
_
=
fluid
.
layers
.
auc
(
input
=
predict_2d
,
label
=
label_int
,
label
=
label_int
,
slide_steps
=
0
)
slide_steps
=
0
)
self
.
_metrics
[
"AUC"
]
=
auc_var
self
.
_metrics
[
"AUC"
]
=
auc_var
self
.
_metrics
[
"BATCH_AUC"
]
=
batch_auc_var
self
.
_metrics
[
"BATCH_AUC"
]
=
batch_auc_var
...
@@ -159,4 +159,4 @@ class Model(ModelBase):
...
@@ -159,4 +159,4 @@ class Model(ModelBase):
return
optimizer
return
optimizer
def
infer_net
(
self
,
parameter_list
):
def
infer_net
(
self
,
parameter_list
):
self
.
deepfm_net
()
self
.
deepfm_net
()
\ No newline at end of file
models/rank/din/reader.py
浏览文件 @
7a3ec4e6
...
@@ -32,9 +32,9 @@ class TrainReader(Reader):
...
@@ -32,9 +32,9 @@ class TrainReader(Reader):
self
.
train_data_path
=
envs
.
get_global_env
(
"train_data_path"
,
None
,
"train.reader"
)
self
.
train_data_path
=
envs
.
get_global_env
(
"train_data_path"
,
None
,
"train.reader"
)
self
.
res
=
[]
self
.
res
=
[]
self
.
max_len
=
0
self
.
max_len
=
0
data_file_list
=
os
.
listdir
(
self
.
train_data_path
)
data_file_list
=
os
.
listdir
(
self
.
train_data_path
)
for
i
in
range
(
0
,
len
(
data_file_list
)):
for
i
in
range
(
0
,
len
(
data_file_list
)):
train_data_file
=
os
.
path
.
join
(
self
.
train_data_path
,
data_file_list
[
i
])
train_data_file
=
os
.
path
.
join
(
self
.
train_data_path
,
data_file_list
[
i
])
with
open
(
train_data_file
,
"r"
)
as
fin
:
with
open
(
train_data_file
,
"r"
)
as
fin
:
for
line
in
fin
:
for
line
in
fin
:
...
@@ -47,9 +47,6 @@ class TrainReader(Reader):
...
@@ -47,9 +47,6 @@ class TrainReader(Reader):
self
.
batch_size
=
envs
.
get_global_env
(
"batch_size"
,
32
,
"train.reader"
)
self
.
batch_size
=
envs
.
get_global_env
(
"batch_size"
,
32
,
"train.reader"
)
self
.
group_size
=
self
.
batch_size
*
20
self
.
group_size
=
self
.
batch_size
*
20
def
_process_line
(
self
,
line
):
def
_process_line
(
self
,
line
):
line
=
line
.
strip
().
split
(
';'
)
line
=
line
.
strip
().
split
(
';'
)
hist
=
line
[
0
].
split
()
hist
=
line
[
0
].
split
()
...
@@ -58,22 +55,22 @@ class TrainReader(Reader):
...
@@ -58,22 +55,22 @@ class TrainReader(Reader):
cate
=
[
int
(
i
)
for
i
in
cate
]
cate
=
[
int
(
i
)
for
i
in
cate
]
return
[
hist
,
cate
,
[
int
(
line
[
2
])],
[
int
(
line
[
3
])],
[
float
(
line
[
4
])]]
return
[
hist
,
cate
,
[
int
(
line
[
2
])],
[
int
(
line
[
3
])],
[
float
(
line
[
4
])]]
def
generate_sample
(
self
,
line
):
def
generate_sample
(
self
,
line
):
"""
"""
Read the data line by line and process it as a dictionary
Read the data line by line and process it as a dictionary
"""
"""
def
data_iter
():
def
data_iter
():
#feat_idx, feat_value, label = self._process_line(line)
#
feat_idx, feat_value, label = self._process_line(line)
yield
self
.
_process_line
(
line
)
yield
self
.
_process_line
(
line
)
return
data_iter
return
data_iter
def
pad_batch_data
(
self
,
input
,
max_len
):
def
pad_batch_data
(
self
,
input
,
max_len
):
res
=
np
.
array
([
x
+
[
0
]
*
(
max_len
-
len
(
x
))
for
x
in
input
])
res
=
np
.
array
([
x
+
[
0
]
*
(
max_len
-
len
(
x
))
for
x
in
input
])
res
=
res
.
astype
(
"int64"
).
reshape
([
-
1
,
max_len
])
res
=
res
.
astype
(
"int64"
).
reshape
([
-
1
,
max_len
])
return
res
return
res
def
make_data
(
self
,
b
):
def
make_data
(
self
,
b
):
max_len
=
max
(
len
(
x
[
0
])
for
x
in
b
)
max_len
=
max
(
len
(
x
[
0
])
for
x
in
b
)
item
=
self
.
pad_batch_data
([
x
[
0
]
for
x
in
b
],
max_len
)
item
=
self
.
pad_batch_data
([
x
[
0
]
for
x
in
b
],
max_len
)
...
@@ -81,7 +78,7 @@ class TrainReader(Reader):
...
@@ -81,7 +78,7 @@ class TrainReader(Reader):
len_array
=
[
len
(
x
[
0
])
for
x
in
b
]
len_array
=
[
len
(
x
[
0
])
for
x
in
b
]
mask
=
np
.
array
(
mask
=
np
.
array
(
[[
0
]
*
x
+
[
-
1e9
]
*
(
max_len
-
x
)
for
x
in
len_array
]).
reshape
(
[[
0
]
*
x
+
[
-
1e9
]
*
(
max_len
-
x
)
for
x
in
len_array
]).
reshape
(
[
-
1
,
max_len
,
1
])
[
-
1
,
max_len
,
1
])
target_item_seq
=
np
.
array
(
target_item_seq
=
np
.
array
(
[[
x
[
2
]]
*
max_len
for
x
in
b
]).
astype
(
"int64"
).
reshape
([
-
1
,
max_len
])
[[
x
[
2
]]
*
max_len
for
x
in
b
]).
astype
(
"int64"
).
reshape
([
-
1
,
max_len
])
target_cat_seq
=
np
.
array
(
target_cat_seq
=
np
.
array
(
...
@@ -93,7 +90,7 @@ class TrainReader(Reader):
...
@@ -93,7 +90,7 @@ class TrainReader(Reader):
target_item_seq
[
i
],
target_cat_seq
[
i
]
target_item_seq
[
i
],
target_cat_seq
[
i
]
])
])
return
res
return
res
def
batch_reader
(
self
,
reader
,
batch_size
,
group_size
):
def
batch_reader
(
self
,
reader
,
batch_size
,
group_size
):
def
batch_reader
():
def
batch_reader
():
bg
=
[]
bg
=
[]
...
@@ -115,7 +112,7 @@ class TrainReader(Reader):
...
@@ -115,7 +112,7 @@ class TrainReader(Reader):
yield
self
.
make_data
(
b
)
yield
self
.
make_data
(
b
)
return
batch_reader
return
batch_reader
def
base_read
(
self
,
file_dir
):
def
base_read
(
self
,
file_dir
):
res
=
[]
res
=
[]
for
train_file
in
file_dir
:
for
train_file
in
file_dir
:
...
@@ -126,10 +123,8 @@ class TrainReader(Reader):
...
@@ -126,10 +123,8 @@ class TrainReader(Reader):
cate
=
line
[
1
].
split
()
cate
=
line
[
1
].
split
()
res
.
append
([
hist
,
cate
,
line
[
2
],
line
[
3
],
float
(
line
[
4
])])
res
.
append
([
hist
,
cate
,
line
[
2
],
line
[
3
],
float
(
line
[
4
])])
return
res
return
res
def
generate_batch_from_trainfiles
(
self
,
files
):
def
generate_batch_from_trainfiles
(
self
,
files
):
data_set
=
self
.
base_read
(
files
)
data_set
=
self
.
base_read
(
files
)
random
.
shuffle
(
data_set
)
random
.
shuffle
(
data_set
)
return
self
.
batch_reader
(
data_set
,
self
.
batch_size
,
self
.
batch_size
*
20
)
return
self
.
batch_reader
(
data_set
,
self
.
batch_size
,
self
.
batch_size
*
20
)
\ No newline at end of file
models/rank/wide_deep/model.py
浏览文件 @
7a3ec4e6
...
@@ -23,32 +23,39 @@ from paddlerec.core.model import Model as ModelBase
...
@@ -23,32 +23,39 @@ from paddlerec.core.model import Model as ModelBase
class
Model
(
ModelBase
):
class
Model
(
ModelBase
):
def
__init__
(
self
,
config
):
def
__init__
(
self
,
config
):
ModelBase
.
__init__
(
self
,
config
)
ModelBase
.
__init__
(
self
,
config
)
def
wide_part
(
self
,
data
):
def
wide_part
(
self
,
data
):
out
=
fluid
.
layers
.
fc
(
input
=
data
,
out
=
fluid
.
layers
.
fc
(
input
=
data
,
size
=
1
,
size
=
1
,
param_attr
=
fluid
.
ParamAttr
(
initializer
=
fluid
.
initializer
.
TruncatedNormal
(
loc
=
0.0
,
scale
=
1.0
/
math
.
sqrt
(
data
.
shape
[
1
])),
param_attr
=
fluid
.
ParamAttr
(
initializer
=
fluid
.
initializer
.
TruncatedNormal
(
loc
=
0.0
,
regularizer
=
fluid
.
regularizer
.
L2DecayRegularizer
(
regularization_coeff
=
1e-4
)),
scale
=
1.0
/
math
.
sqrt
(
act
=
None
,
data
.
shape
[
name
=
'wide'
)
1
])),
regularizer
=
fluid
.
regularizer
.
L2DecayRegularizer
(
regularization_coeff
=
1e-4
)),
act
=
None
,
name
=
'wide'
)
return
out
return
out
def
fc
(
self
,
data
,
hidden_units
,
active
,
tag
):
def
fc
(
self
,
data
,
hidden_units
,
active
,
tag
):
output
=
fluid
.
layers
.
fc
(
input
=
data
,
output
=
fluid
.
layers
.
fc
(
input
=
data
,
size
=
hidden_units
,
size
=
hidden_units
,
param_attr
=
fluid
.
ParamAttr
(
initializer
=
fluid
.
initializer
.
TruncatedNormal
(
loc
=
0.0
,
scale
=
1.0
/
math
.
sqrt
(
data
.
shape
[
1
]))),
param_attr
=
fluid
.
ParamAttr
(
initializer
=
fluid
.
initializer
.
TruncatedNormal
(
loc
=
0.0
,
act
=
active
,
scale
=
1.0
/
math
.
sqrt
(
name
=
tag
)
data
.
shape
[
1
]))),
act
=
active
,
name
=
tag
)
return
output
return
output
def
deep_part
(
self
,
data
,
hidden1_units
,
hidden2_units
,
hidden3_units
):
def
deep_part
(
self
,
data
,
hidden1_units
,
hidden2_units
,
hidden3_units
):
l1
=
self
.
fc
(
data
,
hidden1_units
,
'relu'
,
'l1'
)
l1
=
self
.
fc
(
data
,
hidden1_units
,
'relu'
,
'l1'
)
l2
=
self
.
fc
(
l1
,
hidden2_units
,
'relu'
,
'l2'
)
l2
=
self
.
fc
(
l1
,
hidden2_units
,
'relu'
,
'l2'
)
l3
=
self
.
fc
(
l2
,
hidden3_units
,
'relu'
,
'l3'
)
l3
=
self
.
fc
(
l2
,
hidden3_units
,
'relu'
,
'l3'
)
return
l3
return
l3
def
train_net
(
self
):
def
train_net
(
self
):
wide_input
=
fluid
.
data
(
name
=
'wide_input'
,
shape
=
[
None
,
8
],
dtype
=
'float32'
)
wide_input
=
fluid
.
data
(
name
=
'wide_input'
,
shape
=
[
None
,
8
],
dtype
=
'float32'
)
deep_input
=
fluid
.
data
(
name
=
'deep_input'
,
shape
=
[
None
,
58
],
dtype
=
'float32'
)
deep_input
=
fluid
.
data
(
name
=
'deep_input'
,
shape
=
[
None
,
58
],
dtype
=
'float32'
)
...
@@ -62,31 +69,33 @@ class Model(ModelBase):
...
@@ -62,31 +69,33 @@ class Model(ModelBase):
hidden3_units
=
envs
.
get_global_env
(
"hyper_parameters.hidden3_units"
,
25
,
self
.
_namespace
)
hidden3_units
=
envs
.
get_global_env
(
"hyper_parameters.hidden3_units"
,
25
,
self
.
_namespace
)
wide_output
=
self
.
wide_part
(
wide_input
)
wide_output
=
self
.
wide_part
(
wide_input
)
deep_output
=
self
.
deep_part
(
deep_input
,
hidden1_units
,
hidden2_units
,
hidden3_units
)
deep_output
=
self
.
deep_part
(
deep_input
,
hidden1_units
,
hidden2_units
,
hidden3_units
)
wide_model
=
fluid
.
layers
.
fc
(
input
=
wide_output
,
wide_model
=
fluid
.
layers
.
fc
(
input
=
wide_output
,
size
=
1
,
size
=
1
,
param_attr
=
fluid
.
ParamAttr
(
initializer
=
fluid
.
initializer
.
TruncatedNormal
(
loc
=
0.0
,
scale
=
1.0
)),
param_attr
=
fluid
.
ParamAttr
(
act
=
None
,
initializer
=
fluid
.
initializer
.
TruncatedNormal
(
loc
=
0.0
,
scale
=
1.0
)),
name
=
'w_wide'
)
act
=
None
,
name
=
'w_wide'
)
deep_model
=
fluid
.
layers
.
fc
(
input
=
deep_output
,
deep_model
=
fluid
.
layers
.
fc
(
input
=
deep_output
,
size
=
1
,
size
=
1
,
param_attr
=
fluid
.
ParamAttr
(
initializer
=
fluid
.
initializer
.
TruncatedNormal
(
loc
=
0.0
,
scale
=
1.0
)),
param_attr
=
fluid
.
ParamAttr
(
act
=
None
,
initializer
=
fluid
.
initializer
.
TruncatedNormal
(
loc
=
0.0
,
scale
=
1.0
)),
name
=
'w_deep'
)
act
=
None
,
name
=
'w_deep'
)
prediction
=
fluid
.
layers
.
elementwise_add
(
wide_model
,
deep_model
)
prediction
=
fluid
.
layers
.
elementwise_add
(
wide_model
,
deep_model
)
pred
=
fluid
.
layers
.
sigmoid
(
fluid
.
layers
.
clip
(
prediction
,
min
=-
15.0
,
max
=
15.0
),
name
=
"prediction"
)
pred
=
fluid
.
layers
.
sigmoid
(
fluid
.
layers
.
clip
(
prediction
,
min
=-
15.0
,
max
=
15.0
),
name
=
"prediction"
)
num_seqs
=
fluid
.
layers
.
create_tensor
(
dtype
=
'int64'
)
num_seqs
=
fluid
.
layers
.
create_tensor
(
dtype
=
'int64'
)
acc
=
fluid
.
layers
.
accuracy
(
input
=
pred
,
label
=
fluid
.
layers
.
cast
(
x
=
label
,
dtype
=
'int64'
),
total
=
num_seqs
)
acc
=
fluid
.
layers
.
accuracy
(
input
=
pred
,
label
=
fluid
.
layers
.
cast
(
x
=
label
,
dtype
=
'int64'
),
total
=
num_seqs
)
auc_var
,
batch_auc
,
auc_states
=
fluid
.
layers
.
auc
(
input
=
pred
,
label
=
fluid
.
layers
.
cast
(
x
=
label
,
dtype
=
'int64'
))
auc_var
,
batch_auc
,
auc_states
=
fluid
.
layers
.
auc
(
input
=
pred
,
label
=
fluid
.
layers
.
cast
(
x
=
label
,
dtype
=
'int64'
))
self
.
_metrics
[
"AUC"
]
=
auc_var
self
.
_metrics
[
"AUC"
]
=
auc_var
self
.
_metrics
[
"BATCH_AUC"
]
=
batch_auc
self
.
_metrics
[
"BATCH_AUC"
]
=
batch_auc
self
.
_metrics
[
"ACC"
]
=
acc
self
.
_metrics
[
"ACC"
]
=
acc
cost
=
fluid
.
layers
.
sigmoid_cross_entropy_with_logits
(
x
=
prediction
,
label
=
label
)
cost
=
fluid
.
layers
.
sigmoid_cross_entropy_with_logits
(
x
=
prediction
,
label
=
label
)
avg_cost
=
fluid
.
layers
.
mean
(
cost
)
avg_cost
=
fluid
.
layers
.
mean
(
cost
)
self
.
_cost
=
avg_cost
self
.
_cost
=
avg_cost
...
@@ -96,4 +105,4 @@ class Model(ModelBase):
...
@@ -96,4 +105,4 @@ class Model(ModelBase):
return
optimizer
return
optimizer
def
infer_net
(
self
,
parameter_list
):
def
infer_net
(
self
,
parameter_list
):
self
.
deepfm_net
()
self
.
deepfm_net
()
\ No newline at end of file
models/rank/wide_deep/reader.py
浏览文件 @
7a3ec4e6
...
@@ -30,16 +30,17 @@ class TrainReader(Reader):
...
@@ -30,16 +30,17 @@ class TrainReader(Reader):
line
=
line
.
strip
().
split
(
','
)
line
=
line
.
strip
().
split
(
','
)
features
=
list
(
map
(
float
,
line
))
features
=
list
(
map
(
float
,
line
))
wide_feat
=
features
[
0
:
8
]
wide_feat
=
features
[
0
:
8
]
deep_feat
=
features
[
8
:
58
+
8
]
deep_feat
=
features
[
8
:
58
+
8
]
label
=
features
[
-
1
]
label
=
features
[
-
1
]
return
wide_feat
,
deep_feat
,
[
label
]
return
wide_feat
,
deep_feat
,
[
label
]
def
generate_sample
(
self
,
line
):
def
generate_sample
(
self
,
line
):
"""
"""
Read the data line by line and process it as a dictionary
Read the data line by line and process it as a dictionary
"""
"""
def
data_iter
():
def
data_iter
():
wide_feat
,
deep_deat
,
label
=
self
.
_process_line
(
line
)
wide_feat
,
deep_deat
,
label
=
self
.
_process_line
(
line
)
yield
[(
'wide_input'
,
wide_feat
),
(
'deep_input'
,
deep_deat
),
(
'label'
,
label
)]
yield
[(
'wide_input'
,
wide_feat
),
(
'deep_input'
,
deep_deat
),
(
'label'
,
label
)]
return
data_iter
return
data_iter
\ No newline at end of file
models/rank/xdeepfm/criteo_reader.py
浏览文件 @
7a3ec4e6
...
@@ -22,10 +22,10 @@ except ImportError:
...
@@ -22,10 +22,10 @@ except ImportError:
from
paddlerec.core.reader
import
Reader
from
paddlerec.core.reader
import
Reader
class
TrainReader
(
Reader
):
class
TrainReader
(
Reader
):
def
init
(
self
):
def
init
(
self
):
pass
pass
def
_process_line
(
self
,
line
):
def
_process_line
(
self
,
line
):
features
=
line
.
strip
(
'
\n
'
).
split
(
'
\t
'
)
features
=
line
.
strip
(
'
\n
'
).
split
(
'
\t
'
)
feat_idx
=
[]
feat_idx
=
[]
...
@@ -35,11 +35,11 @@ class TrainReader(Reader):
...
@@ -35,11 +35,11 @@ class TrainReader(Reader):
feat_value
.
append
(
1.0
)
feat_value
.
append
(
1.0
)
label
=
[
int
(
features
[
0
])]
label
=
[
int
(
features
[
0
])]
return
feat_idx
,
feat_value
,
label
return
feat_idx
,
feat_value
,
label
def
generate_sample
(
self
,
line
):
def
generate_sample
(
self
,
line
):
def
data_iter
():
def
data_iter
():
feat_idx
,
feat_value
,
label
=
self
.
_process_line
(
line
)
feat_idx
,
feat_value
,
label
=
self
.
_process_line
(
line
)
yield
[(
'feat_idx'
,
feat_idx
),
(
'feat_value'
,
feat_value
),
(
'label'
,
yield
[(
'feat_idx'
,
feat_idx
),
(
'feat_value'
,
feat_value
),
(
'label'
,
label
)]
label
)]
return
data_iter
return
data_iter
\ No newline at end of file
models/rank/xdeepfm/model.py
浏览文件 @
7a3ec4e6
...
@@ -26,13 +26,13 @@ class Model(ModelBase):
...
@@ -26,13 +26,13 @@ class Model(ModelBase):
init_value_
=
0.1
init_value_
=
0.1
initer
=
fluid
.
initializer
.
TruncatedNormalInitializer
(
initer
=
fluid
.
initializer
.
TruncatedNormalInitializer
(
loc
=
0.0
,
scale
=
init_value_
)
loc
=
0.0
,
scale
=
init_value_
)
is_distributed
=
True
if
envs
.
get_trainer
()
==
"CtrTrainer"
else
False
is_distributed
=
True
if
envs
.
get_trainer
()
==
"CtrTrainer"
else
False
sparse_feature_number
=
envs
.
get_global_env
(
"hyper_parameters.sparse_feature_number"
,
None
,
self
.
_namespace
)
sparse_feature_number
=
envs
.
get_global_env
(
"hyper_parameters.sparse_feature_number"
,
None
,
self
.
_namespace
)
sparse_feature_dim
=
envs
.
get_global_env
(
"hyper_parameters.sparse_feature_dim"
,
None
,
self
.
_namespace
)
sparse_feature_dim
=
envs
.
get_global_env
(
"hyper_parameters.sparse_feature_dim"
,
None
,
self
.
_namespace
)
# ------------------------- network input --------------------------
# ------------------------- network input --------------------------
num_field
=
envs
.
get_global_env
(
"hyper_parameters.num_field"
,
None
,
self
.
_namespace
)
num_field
=
envs
.
get_global_env
(
"hyper_parameters.num_field"
,
None
,
self
.
_namespace
)
raw_feat_idx
=
fluid
.
data
(
name
=
'feat_idx'
,
shape
=
[
None
,
num_field
],
dtype
=
'int64'
)
raw_feat_idx
=
fluid
.
data
(
name
=
'feat_idx'
,
shape
=
[
None
,
num_field
],
dtype
=
'int64'
)
raw_feat_value
=
fluid
.
data
(
name
=
'feat_value'
,
shape
=
[
None
,
num_field
],
dtype
=
'float32'
)
raw_feat_value
=
fluid
.
data
(
name
=
'feat_value'
,
shape
=
[
None
,
num_field
],
dtype
=
'float32'
)
...
@@ -51,16 +51,16 @@ class Model(ModelBase):
...
@@ -51,16 +51,16 @@ class Model(ModelBase):
feat_embeddings
,
feat_embeddings
,
[
-
1
,
num_field
,
sparse_feature_dim
])
# None * num_field * embedding_size
[
-
1
,
num_field
,
sparse_feature_dim
])
# None * num_field * embedding_size
feat_embeddings
=
feat_embeddings
*
feat_value
# None * num_field * embedding_size
feat_embeddings
=
feat_embeddings
*
feat_value
# None * num_field * embedding_size
# ------------------------- set _data_var --------------------------
# ------------------------- set _data_var --------------------------
self
.
_data_var
.
append
(
raw_feat_idx
)
self
.
_data_var
.
append
(
raw_feat_idx
)
self
.
_data_var
.
append
(
raw_feat_value
)
self
.
_data_var
.
append
(
raw_feat_value
)
self
.
_data_var
.
append
(
self
.
label
)
self
.
_data_var
.
append
(
self
.
label
)
if
self
.
_platform
!=
"LINUX"
:
if
self
.
_platform
!=
"LINUX"
:
self
.
_data_loader
=
fluid
.
io
.
DataLoader
.
from_generator
(
self
.
_data_loader
=
fluid
.
io
.
DataLoader
.
from_generator
(
feed_list
=
self
.
_data_var
,
capacity
=
64
,
use_double_buffer
=
False
,
iterable
=
False
)
feed_list
=
self
.
_data_var
,
capacity
=
64
,
use_double_buffer
=
False
,
iterable
=
False
)
# -------------------- linear --------------------
# -------------------- linear --------------------
weights_linear
=
fluid
.
embedding
(
weights_linear
=
fluid
.
embedding
(
...
@@ -78,7 +78,7 @@ class Model(ModelBase):
...
@@ -78,7 +78,7 @@ class Model(ModelBase):
default_initializer
=
fluid
.
initializer
.
ConstantInitializer
(
value
=
0
))
default_initializer
=
fluid
.
initializer
.
ConstantInitializer
(
value
=
0
))
y_linear
=
fluid
.
layers
.
reduce_sum
(
y_linear
=
fluid
.
layers
.
reduce_sum
(
(
weights_linear
*
feat_value
),
1
)
+
b_linear
(
weights_linear
*
feat_value
),
1
)
+
b_linear
# -------------------- CIN --------------------
# -------------------- CIN --------------------
layer_sizes_cin
=
envs
.
get_global_env
(
"hyper_parameters.layer_sizes_cin"
,
None
,
self
.
_namespace
)
layer_sizes_cin
=
envs
.
get_global_env
(
"hyper_parameters.layer_sizes_cin"
,
None
,
self
.
_namespace
)
...
@@ -89,7 +89,7 @@ class Model(ModelBase):
...
@@ -89,7 +89,7 @@ class Model(ModelBase):
X_0
=
fluid
.
layers
.
reshape
(
X_0
=
fluid
.
layers
.
reshape
(
fluid
.
layers
.
transpose
(
Xs
[
0
],
[
0
,
2
,
1
]),
fluid
.
layers
.
transpose
(
Xs
[
0
],
[
0
,
2
,
1
]),
[
-
1
,
sparse_feature_dim
,
num_field
,
[
-
1
,
sparse_feature_dim
,
num_field
,
1
])
# None, embedding_size, num_field, 1
1
])
# None, embedding_size, num_field, 1
X_k
=
fluid
.
layers
.
reshape
(
X_k
=
fluid
.
layers
.
reshape
(
fluid
.
layers
.
transpose
(
Xs
[
-
1
],
[
0
,
2
,
1
]),
fluid
.
layers
.
transpose
(
Xs
[
-
1
],
[
0
,
2
,
1
]),
[
-
1
,
sparse_feature_dim
,
1
,
last_s
])
# None, embedding_size, 1, last_s
[
-
1
,
sparse_feature_dim
,
1
,
last_s
])
# None, embedding_size, 1, last_s
...
@@ -135,7 +135,7 @@ class Model(ModelBase):
...
@@ -135,7 +135,7 @@ class Model(ModelBase):
layer_sizes_dnn
=
envs
.
get_global_env
(
"hyper_parameters.layer_sizes_dnn"
,
None
,
self
.
_namespace
)
layer_sizes_dnn
=
envs
.
get_global_env
(
"hyper_parameters.layer_sizes_dnn"
,
None
,
self
.
_namespace
)
act
=
envs
.
get_global_env
(
"hyper_parameters.act"
,
None
,
self
.
_namespace
)
act
=
envs
.
get_global_env
(
"hyper_parameters.act"
,
None
,
self
.
_namespace
)
y_dnn
=
fluid
.
layers
.
reshape
(
feat_embeddings
,
y_dnn
=
fluid
.
layers
.
reshape
(
feat_embeddings
,
[
-
1
,
num_field
*
sparse_feature_dim
])
[
-
1
,
num_field
*
sparse_feature_dim
])
for
s
in
layer_sizes_dnn
:
for
s
in
layer_sizes_dnn
:
y_dnn
=
fluid
.
layers
.
fc
(
input
=
y_dnn
,
y_dnn
=
fluid
.
layers
.
fc
(
input
=
y_dnn
,
size
=
s
,
size
=
s
,
...
@@ -151,7 +151,7 @@ class Model(ModelBase):
...
@@ -151,7 +151,7 @@ class Model(ModelBase):
# ------------------- xDeepFM ------------------
# ------------------- xDeepFM ------------------
self
.
predict
=
fluid
.
layers
.
sigmoid
(
y_linear
+
y_cin
+
y_dnn
)
self
.
predict
=
fluid
.
layers
.
sigmoid
(
y_linear
+
y_cin
+
y_dnn
)
def
train_net
(
self
):
def
train_net
(
self
):
self
.
xdeepfm_net
()
self
.
xdeepfm_net
()
...
@@ -163,15 +163,15 @@ class Model(ModelBase):
...
@@ -163,15 +163,15 @@ class Model(ModelBase):
predict_2d
=
fluid
.
layers
.
concat
([
1
-
self
.
predict
,
self
.
predict
],
1
)
predict_2d
=
fluid
.
layers
.
concat
([
1
-
self
.
predict
,
self
.
predict
],
1
)
label_int
=
fluid
.
layers
.
cast
(
self
.
label
,
'int64'
)
label_int
=
fluid
.
layers
.
cast
(
self
.
label
,
'int64'
)
auc_var
,
batch_auc_var
,
_
=
fluid
.
layers
.
auc
(
input
=
predict_2d
,
auc_var
,
batch_auc_var
,
_
=
fluid
.
layers
.
auc
(
input
=
predict_2d
,
label
=
label_int
,
label
=
label_int
,
slide_steps
=
0
)
slide_steps
=
0
)
self
.
_metrics
[
"AUC"
]
=
auc_var
self
.
_metrics
[
"AUC"
]
=
auc_var
self
.
_metrics
[
"BATCH_AUC"
]
=
batch_auc_var
self
.
_metrics
[
"BATCH_AUC"
]
=
batch_auc_var
def
optimizer
(
self
):
def
optimizer
(
self
):
learning_rate
=
envs
.
get_global_env
(
"hyper_parameters.learning_rate"
,
None
,
self
.
_namespace
)
learning_rate
=
envs
.
get_global_env
(
"hyper_parameters.learning_rate"
,
None
,
self
.
_namespace
)
optimizer
=
fluid
.
optimizer
.
Adam
(
learning_rate
,
lazy_mode
=
True
)
optimizer
=
fluid
.
optimizer
.
Adam
(
learning_rate
,
lazy_mode
=
True
)
return
optimizer
return
optimizer
def
infer_net
(
self
,
parameter_list
):
def
infer_net
(
self
,
parameter_list
):
self
.
xdeepfm_net
()
self
.
xdeepfm_net
()
\ No newline at end of file
models/recall/gnn/evaluate_reader.py
浏览文件 @
7a3ec4e6
...
@@ -24,17 +24,17 @@ from paddlerec.core.utils import envs
...
@@ -24,17 +24,17 @@ from paddlerec.core.utils import envs
class
EvaluateReader
(
Reader
):
class
EvaluateReader
(
Reader
):
def
init
(
self
):
def
init
(
self
):
self
.
batch_size
=
envs
.
get_global_env
(
"batch_size"
,
None
,
"evaluate.reader"
)
self
.
batch_size
=
envs
.
get_global_env
(
"batch_size"
,
None
,
"evaluate.reader"
)
self
.
input
=
[]
self
.
input
=
[]
self
.
length
=
None
self
.
length
=
None
def
base_read
(
self
,
files
):
def
base_read
(
self
,
files
):
res
=
[]
res
=
[]
for
f
in
files
:
for
f
in
files
:
with
open
(
f
,
"r"
)
as
fin
:
with
open
(
f
,
"r"
)
as
fin
:
for
line
in
fin
:
for
line
in
fin
:
line
=
line
.
strip
().
split
(
'
\t
'
)
line
=
line
.
strip
().
split
(
'
\t
'
)
res
.
append
(
tuple
([
map
(
int
,
line
[
0
].
split
(
','
)),
int
(
line
[
1
])]))
res
.
append
(
tuple
([
map
(
int
,
line
[
0
].
split
(
','
)),
int
(
line
[
1
])]))
return
res
return
res
def
make_data
(
self
,
cur_batch
,
batch_size
):
def
make_data
(
self
,
cur_batch
,
batch_size
):
...
@@ -122,10 +122,11 @@ class EvaluateReader(Reader):
...
@@ -122,10 +122,11 @@ class EvaluateReader(Reader):
else
:
else
:
# Due to fixed batch_size, discard the remaining ins
# Due to fixed batch_size, discard the remaining ins
return
return
#cur_batch = remain_data[i:]
# cur_batch = remain_data[i:]
#yield self.make_data(cur_batch, group_remain % batch_size)
# yield self.make_data(cur_batch, group_remain % batch_size)
return
_reader
return
_reader
def
generate_batch_from_trainfiles
(
self
,
files
):
def
generate_batch_from_trainfiles
(
self
,
files
):
self
.
input
=
self
.
base_read
(
files
)
self
.
input
=
self
.
base_read
(
files
)
self
.
length
=
len
(
self
.
input
)
self
.
length
=
len
(
self
.
input
)
...
@@ -134,4 +135,5 @@ class EvaluateReader(Reader):
...
@@ -134,4 +135,5 @@ class EvaluateReader(Reader):
def
generate_sample
(
self
,
line
):
def
generate_sample
(
self
,
line
):
def
data_iter
():
def
data_iter
():
yield
[]
yield
[]
return
data_iter
return
data_iter
models/recall/gnn/model.py
浏览文件 @
7a3ec4e6
...
@@ -26,19 +26,19 @@ class Model(ModelBase):
...
@@ -26,19 +26,19 @@ class Model(ModelBase):
def
__init__
(
self
,
config
):
def
__init__
(
self
,
config
):
ModelBase
.
__init__
(
self
,
config
)
ModelBase
.
__init__
(
self
,
config
)
self
.
init_config
()
self
.
init_config
()
def
init_config
(
self
):
def
init_config
(
self
):
self
.
_fetch_interval
=
1
self
.
_fetch_interval
=
1
self
.
items_num
,
self
.
ins_num
=
self
.
config_read
(
envs
.
get_global_env
(
"hyper_parameters.config_path"
,
None
,
self
.
_namespace
))
self
.
items_num
,
self
.
ins_num
=
self
.
config_read
(
envs
.
get_global_env
(
"hyper_parameters.config_path"
,
None
,
self
.
_namespace
))
self
.
train_batch_size
=
envs
.
get_global_env
(
"batch_size"
,
None
,
"train.reader"
)
self
.
train_batch_size
=
envs
.
get_global_env
(
"batch_size"
,
None
,
"train.reader"
)
self
.
evaluate_batch_size
=
envs
.
get_global_env
(
"batch_size"
,
None
,
"evaluate.reader"
)
self
.
evaluate_batch_size
=
envs
.
get_global_env
(
"batch_size"
,
None
,
"evaluate.reader"
)
self
.
hidden_size
=
envs
.
get_global_env
(
"hyper_parameters.sparse_feature_dim"
,
None
,
self
.
_namespace
)
self
.
hidden_size
=
envs
.
get_global_env
(
"hyper_parameters.sparse_feature_dim"
,
None
,
self
.
_namespace
)
self
.
step
=
envs
.
get_global_env
(
"hyper_parameters.gnn_propogation_steps"
,
None
,
self
.
_namespace
)
self
.
step
=
envs
.
get_global_env
(
"hyper_parameters.gnn_propogation_steps"
,
None
,
self
.
_namespace
)
def
config_read
(
self
,
config_path
=
None
):
def
config_read
(
self
,
config_path
=
None
):
if
config_path
is
None
:
if
config_path
is
None
:
raise
ValueError
(
"please set train.model.hyper_parameters.config_path at first"
)
raise
ValueError
(
"please set train.model.hyper_parameters.config_path at first"
)
with
open
(
config_path
,
"r"
)
as
fin
:
with
open
(
config_path
,
"r"
)
as
fin
:
item_nums
=
int
(
fin
.
readline
().
strip
())
item_nums
=
int
(
fin
.
readline
().
strip
())
ins_nums
=
int
(
fin
.
readline
().
strip
())
ins_nums
=
int
(
fin
.
readline
().
strip
())
...
@@ -48,49 +48,49 @@ class Model(ModelBase):
...
@@ -48,49 +48,49 @@ class Model(ModelBase):
self
.
items
=
fluid
.
data
(
self
.
items
=
fluid
.
data
(
name
=
"items"
,
name
=
"items"
,
shape
=
[
bs
,
-
1
],
shape
=
[
bs
,
-
1
],
dtype
=
"int64"
)
#
[batch_size, uniq_max]
dtype
=
"int64"
)
#
[batch_size, uniq_max]
self
.
seq_index
=
fluid
.
data
(
self
.
seq_index
=
fluid
.
data
(
name
=
"seq_index"
,
name
=
"seq_index"
,
shape
=
[
bs
,
-
1
,
2
],
shape
=
[
bs
,
-
1
,
2
],
dtype
=
"int32"
)
#
[batch_size, seq_max, 2]
dtype
=
"int32"
)
#
[batch_size, seq_max, 2]
self
.
last_index
=
fluid
.
data
(
self
.
last_index
=
fluid
.
data
(
name
=
"last_index"
,
name
=
"last_index"
,
shape
=
[
bs
,
2
],
shape
=
[
bs
,
2
],
dtype
=
"int32"
)
#
[batch_size, 2]
dtype
=
"int32"
)
#
[batch_size, 2]
self
.
adj_in
=
fluid
.
data
(
self
.
adj_in
=
fluid
.
data
(
name
=
"adj_in"
,
name
=
"adj_in"
,
shape
=
[
bs
,
-
1
,
-
1
],
shape
=
[
bs
,
-
1
,
-
1
],
dtype
=
"float32"
)
#
[batch_size, seq_max, seq_max]
dtype
=
"float32"
)
#
[batch_size, seq_max, seq_max]
self
.
adj_out
=
fluid
.
data
(
self
.
adj_out
=
fluid
.
data
(
name
=
"adj_out"
,
name
=
"adj_out"
,
shape
=
[
bs
,
-
1
,
-
1
],
shape
=
[
bs
,
-
1
,
-
1
],
dtype
=
"float32"
)
#
[batch_size, seq_max, seq_max]
dtype
=
"float32"
)
#
[batch_size, seq_max, seq_max]
self
.
mask
=
fluid
.
data
(
self
.
mask
=
fluid
.
data
(
name
=
"mask"
,
name
=
"mask"
,
shape
=
[
bs
,
-
1
,
1
],
shape
=
[
bs
,
-
1
,
1
],
dtype
=
"float32"
)
#
[batch_size, seq_max, 1]
dtype
=
"float32"
)
#
[batch_size, seq_max, 1]
self
.
label
=
fluid
.
data
(
self
.
label
=
fluid
.
data
(
name
=
"label"
,
name
=
"label"
,
shape
=
[
bs
,
1
],
shape
=
[
bs
,
1
],
dtype
=
"int64"
)
#[batch_size, 1]
dtype
=
"int64"
)
# [batch_size, 1]
res
=
[
self
.
items
,
self
.
seq_index
,
self
.
last_index
,
self
.
adj_in
,
self
.
adj_out
,
self
.
mask
,
self
.
label
]
res
=
[
self
.
items
,
self
.
seq_index
,
self
.
last_index
,
self
.
adj_in
,
self
.
adj_out
,
self
.
mask
,
self
.
label
]
return
res
return
res
def
train_input
(
self
):
def
train_input
(
self
):
res
=
self
.
input
(
self
.
train_batch_size
)
res
=
self
.
input
(
self
.
train_batch_size
)
self
.
_data_var
=
res
self
.
_data_var
=
res
use_dataloader
=
envs
.
get_global_env
(
"hyper_parameters.use_DataLoader"
,
False
,
self
.
_namespace
)
use_dataloader
=
envs
.
get_global_env
(
"hyper_parameters.use_DataLoader"
,
False
,
self
.
_namespace
)
if
self
.
_platform
!=
"LINUX"
or
use_dataloader
:
if
self
.
_platform
!=
"LINUX"
or
use_dataloader
:
self
.
_data_loader
=
fluid
.
io
.
DataLoader
.
from_generator
(
self
.
_data_loader
=
fluid
.
io
.
DataLoader
.
from_generator
(
feed_list
=
self
.
_data_var
,
capacity
=
256
,
use_double_buffer
=
False
,
iterable
=
False
)
feed_list
=
self
.
_data_var
,
capacity
=
256
,
use_double_buffer
=
False
,
iterable
=
False
)
def
net
(
self
,
items_num
,
hidden_size
,
step
,
bs
):
def
net
(
self
,
items_num
,
hidden_size
,
step
,
bs
):
stdv
=
1.0
/
math
.
sqrt
(
hidden_size
)
stdv
=
1.0
/
math
.
sqrt
(
hidden_size
)
def
embedding_layer
(
input
,
table_name
,
emb_dim
,
initializer_instance
=
None
):
def
embedding_layer
(
input
,
table_name
,
emb_dim
,
initializer_instance
=
None
):
emb
=
fluid
.
embedding
(
emb
=
fluid
.
embedding
(
input
=
input
,
input
=
input
,
size
=
[
items_num
,
emb_dim
],
size
=
[
items_num
,
emb_dim
],
...
@@ -98,10 +98,10 @@ class Model(ModelBase):
...
@@ -98,10 +98,10 @@ class Model(ModelBase):
name
=
table_name
,
name
=
table_name
,
initializer
=
initializer_instance
),
initializer
=
initializer_instance
),
)
)
return
emb
return
emb
sparse_initializer
=
fluid
.
initializer
.
Uniform
(
low
=-
stdv
,
high
=
stdv
)
sparse_initializer
=
fluid
.
initializer
.
Uniform
(
low
=-
stdv
,
high
=
stdv
)
items_emb
=
embedding_layer
(
self
.
items
,
"emb"
,
hidden_size
,
sparse_initializer
)
items_emb
=
embedding_layer
(
self
.
items
,
"emb"
,
hidden_size
,
sparse_initializer
)
pre_state
=
items_emb
pre_state
=
items_emb
for
i
in
range
(
step
):
for
i
in
range
(
step
):
pre_state
=
layers
.
reshape
(
x
=
pre_state
,
shape
=
[
bs
,
-
1
,
hidden_size
])
pre_state
=
layers
.
reshape
(
x
=
pre_state
,
shape
=
[
bs
,
-
1
,
hidden_size
])
...
@@ -114,7 +114,7 @@ class Model(ModelBase):
...
@@ -114,7 +114,7 @@ class Model(ModelBase):
param_attr
=
fluid
.
ParamAttr
(
initializer
=
fluid
.
initializer
.
Uniform
(
param_attr
=
fluid
.
ParamAttr
(
initializer
=
fluid
.
initializer
.
Uniform
(
low
=-
stdv
,
high
=
stdv
)),
low
=-
stdv
,
high
=
stdv
)),
bias_attr
=
fluid
.
ParamAttr
(
initializer
=
fluid
.
initializer
.
Uniform
(
bias_attr
=
fluid
.
ParamAttr
(
initializer
=
fluid
.
initializer
.
Uniform
(
low
=-
stdv
,
high
=
stdv
)))
#[batch_size, uniq_max, h]
low
=-
stdv
,
high
=
stdv
)))
#
[batch_size, uniq_max, h]
state_out
=
layers
.
fc
(
state_out
=
layers
.
fc
(
input
=
pre_state
,
input
=
pre_state
,
name
=
"state_out"
,
name
=
"state_out"
,
...
@@ -124,13 +124,13 @@ class Model(ModelBase):
...
@@ -124,13 +124,13 @@ class Model(ModelBase):
param_attr
=
fluid
.
ParamAttr
(
initializer
=
fluid
.
initializer
.
Uniform
(
param_attr
=
fluid
.
ParamAttr
(
initializer
=
fluid
.
initializer
.
Uniform
(
low
=-
stdv
,
high
=
stdv
)),
low
=-
stdv
,
high
=
stdv
)),
bias_attr
=
fluid
.
ParamAttr
(
initializer
=
fluid
.
initializer
.
Uniform
(
bias_attr
=
fluid
.
ParamAttr
(
initializer
=
fluid
.
initializer
.
Uniform
(
low
=-
stdv
,
high
=
stdv
)))
#[batch_size, uniq_max, h]
low
=-
stdv
,
high
=
stdv
)))
#
[batch_size, uniq_max, h]
state_adj_in
=
layers
.
matmul
(
self
.
adj_in
,
state_in
)
#[batch_size, uniq_max, h]
state_adj_in
=
layers
.
matmul
(
self
.
adj_in
,
state_in
)
#
[batch_size, uniq_max, h]
state_adj_out
=
layers
.
matmul
(
self
.
adj_out
,
state_out
)
#
[batch_size, uniq_max, h]
state_adj_out
=
layers
.
matmul
(
self
.
adj_out
,
state_out
)
#
[batch_size, uniq_max, h]
gru_input
=
layers
.
concat
([
state_adj_in
,
state_adj_out
],
axis
=
2
)
gru_input
=
layers
.
concat
([
state_adj_in
,
state_adj_out
],
axis
=
2
)
gru_input
=
layers
.
reshape
(
x
=
gru_input
,
shape
=
[
-
1
,
hidden_size
*
2
])
gru_input
=
layers
.
reshape
(
x
=
gru_input
,
shape
=
[
-
1
,
hidden_size
*
2
])
gru_fc
=
layers
.
fc
(
gru_fc
=
layers
.
fc
(
input
=
gru_input
,
input
=
gru_input
,
...
@@ -141,11 +141,11 @@ class Model(ModelBase):
...
@@ -141,11 +141,11 @@ class Model(ModelBase):
input
=
gru_fc
,
input
=
gru_fc
,
hidden
=
layers
.
reshape
(
x
=
pre_state
,
shape
=
[
-
1
,
hidden_size
]),
hidden
=
layers
.
reshape
(
x
=
pre_state
,
shape
=
[
-
1
,
hidden_size
]),
size
=
3
*
hidden_size
)
size
=
3
*
hidden_size
)
final_state
=
layers
.
reshape
(
pre_state
,
shape
=
[
bs
,
-
1
,
hidden_size
])
final_state
=
layers
.
reshape
(
pre_state
,
shape
=
[
bs
,
-
1
,
hidden_size
])
seq
=
layers
.
gather_nd
(
final_state
,
self
.
seq_index
)
seq
=
layers
.
gather_nd
(
final_state
,
self
.
seq_index
)
last
=
layers
.
gather_nd
(
final_state
,
self
.
last_index
)
last
=
layers
.
gather_nd
(
final_state
,
self
.
last_index
)
seq_fc
=
layers
.
fc
(
seq_fc
=
layers
.
fc
(
input
=
seq
,
input
=
seq
,
name
=
"seq_fc"
,
name
=
"seq_fc"
,
...
@@ -155,7 +155,7 @@ class Model(ModelBase):
...
@@ -155,7 +155,7 @@ class Model(ModelBase):
num_flatten_dims
=
2
,
num_flatten_dims
=
2
,
param_attr
=
fluid
.
ParamAttr
(
param_attr
=
fluid
.
ParamAttr
(
initializer
=
fluid
.
initializer
.
Uniform
(
initializer
=
fluid
.
initializer
.
Uniform
(
low
=-
stdv
,
high
=
stdv
)))
#
[batch_size, seq_max, h]
low
=-
stdv
,
high
=
stdv
)))
#
[batch_size, seq_max, h]
last_fc
=
layers
.
fc
(
last_fc
=
layers
.
fc
(
input
=
last
,
input
=
last
,
name
=
"last_fc"
,
name
=
"last_fc"
,
...
@@ -165,22 +165,22 @@ class Model(ModelBase):
...
@@ -165,22 +165,22 @@ class Model(ModelBase):
num_flatten_dims
=
1
,
num_flatten_dims
=
1
,
param_attr
=
fluid
.
ParamAttr
(
param_attr
=
fluid
.
ParamAttr
(
initializer
=
fluid
.
initializer
.
Uniform
(
initializer
=
fluid
.
initializer
.
Uniform
(
low
=-
stdv
,
high
=
stdv
)))
#
[bathc_size, h]
low
=-
stdv
,
high
=
stdv
)))
#
[bathc_size, h]
seq_fc_t
=
layers
.
transpose
(
seq_fc_t
=
layers
.
transpose
(
seq_fc
,
perm
=
[
1
,
0
,
2
])
#[seq_max, batch_size, h]
seq_fc
,
perm
=
[
1
,
0
,
2
])
#
[seq_max, batch_size, h]
add
=
layers
.
elementwise_add
(
add
=
layers
.
elementwise_add
(
seq_fc_t
,
last_fc
)
#[seq_max, batch_size, h]
seq_fc_t
,
last_fc
)
#
[seq_max, batch_size, h]
b
=
layers
.
create_parameter
(
b
=
layers
.
create_parameter
(
shape
=
[
hidden_size
],
shape
=
[
hidden_size
],
dtype
=
'float32'
,
dtype
=
'float32'
,
default_initializer
=
fluid
.
initializer
.
Constant
(
value
=
0.0
))
#[h]
default_initializer
=
fluid
.
initializer
.
Constant
(
value
=
0.0
))
#
[h]
add
=
layers
.
elementwise_add
(
add
,
b
)
#[seq_max, batch_size, h]
add
=
layers
.
elementwise_add
(
add
,
b
)
#
[seq_max, batch_size, h]
add_sigmoid
=
layers
.
sigmoid
(
add
)
#[seq_max, batch_size, h]
add_sigmoid
=
layers
.
sigmoid
(
add
)
# [seq_max, batch_size, h]
add_sigmoid
=
layers
.
transpose
(
add_sigmoid
=
layers
.
transpose
(
add_sigmoid
,
perm
=
[
1
,
0
,
2
])
#[batch_size, seq_max, h]
add_sigmoid
,
perm
=
[
1
,
0
,
2
])
#
[batch_size, seq_max, h]
weight
=
layers
.
fc
(
weight
=
layers
.
fc
(
input
=
add_sigmoid
,
input
=
add_sigmoid
,
name
=
"weight_fc"
,
name
=
"weight_fc"
,
...
@@ -190,13 +190,13 @@ class Model(ModelBase):
...
@@ -190,13 +190,13 @@ class Model(ModelBase):
bias_attr
=
False
,
bias_attr
=
False
,
param_attr
=
fluid
.
ParamAttr
(
param_attr
=
fluid
.
ParamAttr
(
initializer
=
fluid
.
initializer
.
Uniform
(
initializer
=
fluid
.
initializer
.
Uniform
(
low
=-
stdv
,
high
=
stdv
)))
#[batch_size, seq_max, 1]
low
=-
stdv
,
high
=
stdv
)))
#
[batch_size, seq_max, 1]
weight
*=
self
.
mask
weight
*=
self
.
mask
weight_mask
=
layers
.
elementwise_mul
(
seq
,
weight
,
axis
=
0
)
#
[batch_size, seq_max, h]
weight_mask
=
layers
.
elementwise_mul
(
seq
,
weight
,
axis
=
0
)
#
[batch_size, seq_max, h]
global_attention
=
layers
.
reduce_sum
(
weight_mask
,
dim
=
1
)
#
[batch_size, h]
global_attention
=
layers
.
reduce_sum
(
weight_mask
,
dim
=
1
)
#
[batch_size, h]
final_attention
=
layers
.
concat
(
final_attention
=
layers
.
concat
(
[
global_attention
,
last
],
axis
=
1
)
#[batch_size, 2*h]
[
global_attention
,
last
],
axis
=
1
)
#
[batch_size, 2*h]
final_attention_fc
=
layers
.
fc
(
final_attention_fc
=
layers
.
fc
(
input
=
final_attention
,
input
=
final_attention
,
name
=
"final_attention_fc"
,
name
=
"final_attention_fc"
,
...
@@ -204,14 +204,14 @@ class Model(ModelBase):
...
@@ -204,14 +204,14 @@ class Model(ModelBase):
bias_attr
=
False
,
bias_attr
=
False
,
act
=
None
,
act
=
None
,
param_attr
=
fluid
.
ParamAttr
(
initializer
=
fluid
.
initializer
.
Uniform
(
param_attr
=
fluid
.
ParamAttr
(
initializer
=
fluid
.
initializer
.
Uniform
(
low
=-
stdv
,
high
=
stdv
)))
#[batch_size, h]
low
=-
stdv
,
high
=
stdv
)))
#
[batch_size, h]
# all_vocab = layers.create_global_var(
# all_vocab = layers.create_global_var(
# shape=[items_num - 1],
# shape=[items_num - 1],
# value=0,
# value=0,
# dtype="int64",
# dtype="int64",
# persistable=True,
# persistable=True,
# name="all_vocab")
# name="all_vocab")
all_vocab
=
np
.
arange
(
1
,
items_num
).
reshape
((
-
1
)).
astype
(
'int32'
)
all_vocab
=
np
.
arange
(
1
,
items_num
).
reshape
((
-
1
)).
astype
(
'int32'
)
all_vocab
=
fluid
.
layers
.
cast
(
x
=
fluid
.
layers
.
assign
(
all_vocab
),
dtype
=
'int64'
)
all_vocab
=
fluid
.
layers
.
cast
(
x
=
fluid
.
layers
.
assign
(
all_vocab
),
dtype
=
'int64'
)
...
@@ -221,13 +221,13 @@ class Model(ModelBase):
...
@@ -221,13 +221,13 @@ class Model(ModelBase):
name
=
"emb"
,
name
=
"emb"
,
initializer
=
fluid
.
initializer
.
Uniform
(
initializer
=
fluid
.
initializer
.
Uniform
(
low
=-
stdv
,
high
=
stdv
)),
low
=-
stdv
,
high
=
stdv
)),
size
=
[
items_num
,
hidden_size
])
#[all_vocab, h]
size
=
[
items_num
,
hidden_size
])
#
[all_vocab, h]
logits
=
layers
.
matmul
(
logits
=
layers
.
matmul
(
x
=
final_attention_fc
,
y
=
all_emb
,
x
=
final_attention_fc
,
y
=
all_emb
,
transpose_y
=
True
)
#[batch_size, all_vocab]
transpose_y
=
True
)
#
[batch_size, all_vocab]
softmax
=
layers
.
softmax_with_cross_entropy
(
softmax
=
layers
.
softmax_with_cross_entropy
(
logits
=
logits
,
label
=
self
.
label
)
#[batch_size, 1]
logits
=
logits
,
label
=
self
.
label
)
#
[batch_size, 1]
self
.
loss
=
layers
.
reduce_mean
(
softmax
)
# [1]
self
.
loss
=
layers
.
reduce_mean
(
softmax
)
# [1]
self
.
acc
=
layers
.
accuracy
(
input
=
logits
,
label
=
self
.
label
,
k
=
20
)
self
.
acc
=
layers
.
accuracy
(
input
=
logits
,
label
=
self
.
label
,
k
=
20
)
...
@@ -250,7 +250,7 @@ class Model(ModelBase):
...
@@ -250,7 +250,7 @@ class Model(ModelBase):
decay_steps
=
envs
.
get_global_env
(
"hyper_parameters.decay_steps"
,
None
,
self
.
_namespace
)
decay_steps
=
envs
.
get_global_env
(
"hyper_parameters.decay_steps"
,
None
,
self
.
_namespace
)
decay_rate
=
envs
.
get_global_env
(
"hyper_parameters.decay_rate"
,
None
,
self
.
_namespace
)
decay_rate
=
envs
.
get_global_env
(
"hyper_parameters.decay_rate"
,
None
,
self
.
_namespace
)
l2
=
envs
.
get_global_env
(
"hyper_parameters.l2"
,
None
,
self
.
_namespace
)
l2
=
envs
.
get_global_env
(
"hyper_parameters.l2"
,
None
,
self
.
_namespace
)
optimizer
=
fluid
.
optimizer
.
Adam
(
optimizer
=
fluid
.
optimizer
.
Adam
(
learning_rate
=
fluid
.
layers
.
exponential_decay
(
learning_rate
=
fluid
.
layers
.
exponential_decay
(
learning_rate
=
learning_rate
,
learning_rate
=
learning_rate
,
decay_steps
=
decay_steps
*
step_per_epoch
,
decay_steps
=
decay_steps
*
step_per_epoch
,
...
@@ -258,18 +258,18 @@ class Model(ModelBase):
...
@@ -258,18 +258,18 @@ class Model(ModelBase):
regularization
=
fluid
.
regularizer
.
L2DecayRegularizer
(
regularization
=
fluid
.
regularizer
.
L2DecayRegularizer
(
regularization_coeff
=
l2
))
regularization_coeff
=
l2
))
return
optimizer
return
optimizer
def
infer_input
(
self
):
def
infer_input
(
self
):
self
.
_reader_namespace
=
"evaluate.reader"
self
.
_reader_namespace
=
"evaluate.reader"
res
=
self
.
input
(
self
.
evaluate_batch_size
)
res
=
self
.
input
(
self
.
evaluate_batch_size
)
self
.
_infer_data_var
=
res
self
.
_infer_data_var
=
res
self
.
_infer_data_loader
=
fluid
.
io
.
DataLoader
.
from_generator
(
self
.
_infer_data_loader
=
fluid
.
io
.
DataLoader
.
from_generator
(
feed_list
=
self
.
_infer_data_var
,
capacity
=
64
,
use_double_buffer
=
False
,
iterable
=
False
)
feed_list
=
self
.
_infer_data_var
,
capacity
=
64
,
use_double_buffer
=
False
,
iterable
=
False
)
def
infer_net
(
self
):
def
infer_net
(
self
):
self
.
infer_input
()
self
.
infer_input
()
self
.
net
(
self
.
items_num
,
self
.
hidden_size
,
self
.
step
,
self
.
evaluate_batch_size
)
self
.
net
(
self
.
items_num
,
self
.
hidden_size
,
self
.
step
,
self
.
evaluate_batch_size
)
self
.
_infer_results
[
'acc'
]
=
self
.
acc
self
.
_infer_results
[
'acc'
]
=
self
.
acc
self
.
_infer_results
[
'loss'
]
=
self
.
loss
self
.
_infer_results
[
'loss'
]
=
self
.
loss
models/recall/gnn/reader.py
浏览文件 @
7a3ec4e6
...
@@ -24,17 +24,17 @@ from paddlerec.core.utils import envs
...
@@ -24,17 +24,17 @@ from paddlerec.core.utils import envs
class
TrainReader
(
Reader
):
class
TrainReader
(
Reader
):
def
init
(
self
):
def
init
(
self
):
self
.
batch_size
=
envs
.
get_global_env
(
"batch_size"
,
None
,
"train.reader"
)
self
.
batch_size
=
envs
.
get_global_env
(
"batch_size"
,
None
,
"train.reader"
)
self
.
input
=
[]
self
.
input
=
[]
self
.
length
=
None
self
.
length
=
None
def
base_read
(
self
,
files
):
def
base_read
(
self
,
files
):
res
=
[]
res
=
[]
for
f
in
files
:
for
f
in
files
:
with
open
(
f
,
"r"
)
as
fin
:
with
open
(
f
,
"r"
)
as
fin
:
for
line
in
fin
:
for
line
in
fin
:
line
=
line
.
strip
().
split
(
'
\t
'
)
line
=
line
.
strip
().
split
(
'
\t
'
)
res
.
append
(
tuple
([
map
(
int
,
line
[
0
].
split
(
','
)),
int
(
line
[
1
])]))
res
.
append
(
tuple
([
map
(
int
,
line
[
0
].
split
(
','
)),
int
(
line
[
1
])]))
return
res
return
res
def
make_data
(
self
,
cur_batch
,
batch_size
):
def
make_data
(
self
,
cur_batch
,
batch_size
):
...
@@ -122,10 +122,11 @@ class TrainReader(Reader):
...
@@ -122,10 +122,11 @@ class TrainReader(Reader):
else
:
else
:
# Due to fixed batch_size, discard the remaining ins
# Due to fixed batch_size, discard the remaining ins
return
return
#cur_batch = remain_data[i:]
# cur_batch = remain_data[i:]
#yield self.make_data(cur_batch, group_remain % batch_size)
# yield self.make_data(cur_batch, group_remain % batch_size)
return
_reader
return
_reader
def
generate_batch_from_trainfiles
(
self
,
files
):
def
generate_batch_from_trainfiles
(
self
,
files
):
self
.
input
=
self
.
base_read
(
files
)
self
.
input
=
self
.
base_read
(
files
)
self
.
length
=
len
(
self
.
input
)
self
.
length
=
len
(
self
.
input
)
...
@@ -134,4 +135,5 @@ class TrainReader(Reader):
...
@@ -134,4 +135,5 @@ class TrainReader(Reader):
def
generate_sample
(
self
,
line
):
def
generate_sample
(
self
,
line
):
def
data_iter
():
def
data_iter
():
yield
[]
yield
[]
return
data_iter
return
data_iter
models/recall/gru4rec/model.py
浏览文件 @
7a3ec4e6
...
@@ -86,10 +86,8 @@ class Model(ModelBase):
...
@@ -86,10 +86,8 @@ class Model(ModelBase):
self
.
_metrics
[
"cost"
]
=
avg_cost
self
.
_metrics
[
"cost"
]
=
avg_cost
self
.
_metrics
[
"acc"
]
=
acc
self
.
_metrics
[
"acc"
]
=
acc
def
train_net
(
self
):
def
train_net
(
self
):
self
.
all_vocab_network
()
self
.
all_vocab_network
()
def
infer_net
(
self
):
def
infer_net
(
self
):
self
.
all_vocab_network
(
is_infer
=
True
)
self
.
all_vocab_network
(
is_infer
=
True
)
models/recall/ssr/model.py
浏览文件 @
7a3ec4e6
...
@@ -51,6 +51,7 @@ class GrnnEncoder(object):
...
@@ -51,6 +51,7 @@ class GrnnEncoder(object):
bias_attr
=
self
.
param_name
+
".bias"
)
bias_attr
=
self
.
param_name
+
".bias"
)
return
fluid
.
layers
.
sequence_pool
(
input
=
gru_h
,
pool_type
=
'max'
)
return
fluid
.
layers
.
sequence_pool
(
input
=
gru_h
,
pool_type
=
'max'
)
class
PairwiseHingeLoss
(
object
):
class
PairwiseHingeLoss
(
object
):
def
__init__
(
self
,
margin
=
0.8
):
def
__init__
(
self
,
margin
=
0.8
):
self
.
margin
=
margin
self
.
margin
=
margin
...
@@ -67,6 +68,7 @@ class PairwiseHingeLoss(object):
...
@@ -67,6 +68,7 @@ class PairwiseHingeLoss(object):
loss_part2
)
loss_part2
)
return
loss_part3
return
loss_part3
class
Model
(
ModelBase
):
class
Model
(
ModelBase
):
def
__init__
(
self
,
config
):
def
__init__
(
self
,
config
):
ModelBase
.
__init__
(
self
,
config
)
ModelBase
.
__init__
(
self
,
config
)
...
@@ -77,7 +79,6 @@ class Model(ModelBase):
...
@@ -77,7 +79,6 @@ class Model(ModelBase):
return
correct
return
correct
def
train
(
self
):
def
train
(
self
):
vocab_size
=
envs
.
get_global_env
(
"hyper_parameters.vocab_size"
,
None
,
self
.
_namespace
)
vocab_size
=
envs
.
get_global_env
(
"hyper_parameters.vocab_size"
,
None
,
self
.
_namespace
)
emb_dim
=
envs
.
get_global_env
(
"hyper_parameters.emb_dim"
,
None
,
self
.
_namespace
)
emb_dim
=
envs
.
get_global_env
(
"hyper_parameters.emb_dim"
,
None
,
self
.
_namespace
)
hidden_size
=
envs
.
get_global_env
(
"hyper_parameters.hidden_size"
,
None
,
self
.
_namespace
)
hidden_size
=
envs
.
get_global_env
(
"hyper_parameters.hidden_size"
,
None
,
self
.
_namespace
)
...
@@ -121,16 +122,14 @@ class Model(ModelBase):
...
@@ -121,16 +122,14 @@ class Model(ModelBase):
hinge_loss
=
self
.
pairwise_hinge_loss
.
forward
(
cos_pos
,
cos_neg
)
hinge_loss
=
self
.
pairwise_hinge_loss
.
forward
(
cos_pos
,
cos_neg
)
avg_cost
=
fluid
.
layers
.
mean
(
hinge_loss
)
avg_cost
=
fluid
.
layers
.
mean
(
hinge_loss
)
correct
=
self
.
get_correct
(
cos_neg
,
cos_pos
)
correct
=
self
.
get_correct
(
cos_neg
,
cos_pos
)
self
.
_cost
=
avg_cost
self
.
_cost
=
avg_cost
self
.
_metrics
[
"correct"
]
=
correct
self
.
_metrics
[
"correct"
]
=
correct
self
.
_metrics
[
"hinge_loss"
]
=
hinge_loss
self
.
_metrics
[
"hinge_loss"
]
=
hinge_loss
def
train_net
(
self
):
def
train_net
(
self
):
self
.
train
()
self
.
train
()
def
infer
(
self
):
def
infer
(
self
):
vocab_size
=
envs
.
get_global_env
(
"hyper_parameters.vocab_size"
,
None
,
self
.
_namespace
)
vocab_size
=
envs
.
get_global_env
(
"hyper_parameters.vocab_size"
,
None
,
self
.
_namespace
)
emb_dim
=
envs
.
get_global_env
(
"hyper_parameters.emb_dim"
,
None
,
self
.
_namespace
)
emb_dim
=
envs
.
get_global_env
(
"hyper_parameters.emb_dim"
,
None
,
self
.
_namespace
)
...
@@ -143,7 +142,7 @@ class Model(ModelBase):
...
@@ -143,7 +142,7 @@ class Model(ModelBase):
pos_label
=
fluid
.
data
(
name
=
"pos_label"
,
shape
=
[
None
,
1
],
dtype
=
"int64"
)
pos_label
=
fluid
.
data
(
name
=
"pos_label"
,
shape
=
[
None
,
1
],
dtype
=
"int64"
)
self
.
_infer_data_var
=
[
user_data
,
all_item_data
,
pos_label
]
self
.
_infer_data_var
=
[
user_data
,
all_item_data
,
pos_label
]
self
.
_infer_data_loader
=
fluid
.
io
.
DataLoader
.
from_generator
(
self
.
_infer_data_loader
=
fluid
.
io
.
DataLoader
.
from_generator
(
feed_list
=
self
.
_infer_data_var
,
capacity
=
64
,
use_double_buffer
=
False
,
iterable
=
False
)
feed_list
=
self
.
_infer_data_var
,
capacity
=
64
,
use_double_buffer
=
False
,
iterable
=
False
)
user_emb
=
fluid
.
embedding
(
user_emb
=
fluid
.
embedding
(
input
=
user_data
,
size
=
[
vocab_size
,
emb_dim
],
param_attr
=
"emb.item"
)
input
=
user_data
,
size
=
[
vocab_size
,
emb_dim
],
param_attr
=
"emb.item"
)
...
@@ -170,6 +169,5 @@ class Model(ModelBase):
...
@@ -170,6 +169,5 @@ class Model(ModelBase):
self
.
_infer_results
[
'recall20'
]
=
acc
self
.
_infer_results
[
'recall20'
]
=
acc
def
infer_net
(
self
):
def
infer_net
(
self
):
self
.
infer
()
self
.
infer
()
models/recall/ssr/ssr_infer_reader.py
浏览文件 @
7a3ec4e6
...
@@ -20,12 +20,10 @@ from paddlerec.core.reader import Reader
...
@@ -20,12 +20,10 @@ from paddlerec.core.reader import Reader
from
paddlerec.core.utils
import
envs
from
paddlerec.core.utils
import
envs
class
EvaluateReader
(
Reader
):
class
EvaluateReader
(
Reader
):
def
init
(
self
):
def
init
(
self
):
self
.
vocab_size
=
envs
.
get_global_env
(
"vocab_size"
,
10
,
"train.model.hyper_parameters"
)
self
.
vocab_size
=
envs
.
get_global_env
(
"vocab_size"
,
10
,
"train.model.hyper_parameters"
)
def
generate_sample
(
self
,
line
):
def
generate_sample
(
self
,
line
):
"""
"""
Read the data line by line and process it as a dictionary
Read the data line by line and process it as a dictionary
...
@@ -41,6 +39,6 @@ class EvaluateReader(Reader):
...
@@ -41,6 +39,6 @@ class EvaluateReader(Reader):
src
=
conv_ids
[:
boundary
]
src
=
conv_ids
[:
boundary
]
pos_tgt
=
[
conv_ids
[
boundary
]]
pos_tgt
=
[
conv_ids
[
boundary
]]
feature_name
=
[
"user"
,
"all_item"
,
"p_item"
]
feature_name
=
[
"user"
,
"all_item"
,
"p_item"
]
yield
zip
(
feature_name
,
[
src
]
+
[
np
.
arange
(
self
.
vocab_size
).
astype
(
"int64"
).
tolist
()]
+
[
pos_tgt
])
yield
zip
(
feature_name
,
[
src
]
+
[
np
.
arange
(
self
.
vocab_size
).
astype
(
"int64"
).
tolist
()]
+
[
pos_tgt
])
return
reader
return
reader
models/recall/ssr/ssr_reader.py
浏览文件 @
7a3ec4e6
...
@@ -19,7 +19,6 @@ import random
...
@@ -19,7 +19,6 @@ import random
from
paddlerec.core.reader
import
Reader
from
paddlerec.core.reader
import
Reader
class
TrainReader
(
Reader
):
class
TrainReader
(
Reader
):
def
init
(
self
):
def
init
(
self
):
pass
pass
...
@@ -27,7 +26,6 @@ class TrainReader(Reader):
...
@@ -27,7 +26,6 @@ class TrainReader(Reader):
def
sample_neg_from_seq
(
self
,
seq
):
def
sample_neg_from_seq
(
self
,
seq
):
return
seq
[
random
.
randint
(
0
,
len
(
seq
)
-
1
)]
return
seq
[
random
.
randint
(
0
,
len
(
seq
)
-
1
)]
def
generate_sample
(
self
,
line
):
def
generate_sample
(
self
,
line
):
"""
"""
Read the data line by line and process it as a dictionary
Read the data line by line and process it as a dictionary
...
...
models/recall/word2vec/preprocess.py
浏览文件 @
7a3ec4e6
...
@@ -20,11 +20,8 @@ import random
...
@@ -20,11 +20,8 @@ import random
import
re
import
re
import
six
import
six
import
argparse
import
argparse
prog
=
re
.
compile
(
"[^a-z ]"
,
flags
=
0
)
prog
=
re
.
compile
(
"[^a-z ]"
,
flags
=
0
)
...
@@ -78,7 +75,7 @@ def parse_args():
...
@@ -78,7 +75,7 @@ def parse_args():
def
text_strip
(
text
):
def
text_strip
(
text
):
#English Preprocess Rule
#
English Preprocess Rule
return
prog
.
sub
(
""
,
text
.
lower
())
return
prog
.
sub
(
""
,
text
.
lower
())
...
@@ -120,7 +117,7 @@ def filter_corpus(args):
...
@@ -120,7 +117,7 @@ def filter_corpus(args):
word_all_count
=
0
word_all_count
=
0
id_counts
=
[]
id_counts
=
[]
word_id
=
0
word_id
=
0
#read dict
#
read dict
with
io
.
open
(
args
.
dict_path
,
'r'
,
encoding
=
'utf-8'
)
as
f
:
with
io
.
open
(
args
.
dict_path
,
'r'
,
encoding
=
'utf-8'
)
as
f
:
for
line
in
f
:
for
line
in
f
:
word
,
count
=
line
.
split
()[
0
],
int
(
line
.
split
()[
1
])
word
,
count
=
line
.
split
()[
0
],
int
(
line
.
split
()[
1
])
...
@@ -130,13 +127,13 @@ def filter_corpus(args):
...
@@ -130,13 +127,13 @@ def filter_corpus(args):
id_counts
.
append
(
count
)
id_counts
.
append
(
count
)
word_all_count
+=
count
word_all_count
+=
count
#write word2id file
#
write word2id file
print
(
"write word2id file to : "
+
args
.
dict_path
+
"_word_to_id_"
)
print
(
"write word2id file to : "
+
args
.
dict_path
+
"_word_to_id_"
)
with
io
.
open
(
with
io
.
open
(
args
.
dict_path
+
"_word_to_id_"
,
'w+'
,
encoding
=
'utf-8'
)
as
fid
:
args
.
dict_path
+
"_word_to_id_"
,
'w+'
,
encoding
=
'utf-8'
)
as
fid
:
for
k
,
v
in
word_to_id_
.
items
():
for
k
,
v
in
word_to_id_
.
items
():
fid
.
write
(
k
+
" "
+
str
(
v
)
+
'
\n
'
)
fid
.
write
(
k
+
" "
+
str
(
v
)
+
'
\n
'
)
#filter corpus and convert id
#
filter corpus and convert id
if
not
os
.
path
.
exists
(
args
.
output_corpus_dir
):
if
not
os
.
path
.
exists
(
args
.
output_corpus_dir
):
os
.
makedirs
(
args
.
output_corpus_dir
)
os
.
makedirs
(
args
.
output_corpus_dir
)
for
file
in
os
.
listdir
(
args
.
input_corpus_dir
):
for
file
in
os
.
listdir
(
args
.
input_corpus_dir
):
...
@@ -157,9 +154,9 @@ def filter_corpus(args):
...
@@ -157,9 +154,9 @@ def filter_corpus(args):
count_w
=
id_counts
[
idx
]
count_w
=
id_counts
[
idx
]
corpus_size
=
word_all_count
corpus_size
=
word_all_count
keep_prob
=
(
keep_prob
=
(
math
.
sqrt
(
count_w
/
math
.
sqrt
(
count_w
/
(
args
.
downsample
*
corpus_size
))
+
1
(
args
.
downsample
*
corpus_size
))
+
1
)
*
(
args
.
downsample
*
corpus_size
)
/
count_w
)
*
(
args
.
downsample
*
corpus_size
)
/
count_w
r_value
=
random
.
random
()
r_value
=
random
.
random
()
if
r_value
>
keep_prob
:
if
r_value
>
keep_prob
:
continue
continue
...
@@ -205,7 +202,7 @@ def build_dict(args):
...
@@ -205,7 +202,7 @@ def build_dict(args):
for
item
in
item_to_remove
:
for
item
in
item_to_remove
:
unk_sum
+=
word_count
[
item
]
unk_sum
+=
word_count
[
item
]
del
word_count
[
item
]
del
word_count
[
item
]
#sort by count
#
sort by count
word_count
[
native_to_unicode
(
'<UNK>'
)]
=
unk_sum
word_count
[
native_to_unicode
(
'<UNK>'
)]
=
unk_sum
word_count
=
sorted
(
word_count
=
sorted
(
word_count
.
items
(),
key
=
lambda
word_count
:
-
word_count
[
1
])
word_count
.
items
(),
key
=
lambda
word_count
:
-
word_count
[
1
])
...
@@ -227,17 +224,18 @@ def data_split(args):
...
@@ -227,17 +224,18 @@ def data_split(args):
for
file_
in
files
:
for
file_
in
files
:
with
open
(
os
.
path
.
join
(
raw_data_dir
,
file_
),
'r'
)
as
f
:
with
open
(
os
.
path
.
join
(
raw_data_dir
,
file_
),
'r'
)
as
f
:
contents
.
extend
(
f
.
readlines
())
contents
.
extend
(
f
.
readlines
())
num
=
int
(
args
.
file_nums
)
num
=
int
(
args
.
file_nums
)
lines_per_file
=
len
(
contents
)
/
num
lines_per_file
=
len
(
contents
)
/
num
print
(
"contents: "
,
str
(
len
(
contents
)))
print
(
"contents: "
,
str
(
len
(
contents
)))
print
(
"lines_per_file: "
,
str
(
lines_per_file
))
print
(
"lines_per_file: "
,
str
(
lines_per_file
))
for
i
in
range
(
1
,
num
+
1
):
for
i
in
range
(
1
,
num
+
1
):
with
open
(
os
.
path
.
join
(
new_data_dir
,
"part_"
+
str
(
i
)),
'w'
)
as
fout
:
with
open
(
os
.
path
.
join
(
new_data_dir
,
"part_"
+
str
(
i
)),
'w'
)
as
fout
:
data
=
contents
[(
i
-
1
)
*
lines_per_file
:
min
(
i
*
lines_per_file
,
len
(
contents
))]
data
=
contents
[(
i
-
1
)
*
lines_per_file
:
min
(
i
*
lines_per_file
,
len
(
contents
))]
for
line
in
data
:
for
line
in
data
:
fout
.
write
(
line
)
fout
.
write
(
line
)
if
__name__
==
"__main__"
:
if
__name__
==
"__main__"
:
args
=
parse_args
()
args
=
parse_args
()
...
...
models/recall/word2vec/w2v_evaluate_reader.py
浏览文件 @
7a3ec4e6
...
@@ -22,7 +22,7 @@ from paddlerec.core.utils import envs
...
@@ -22,7 +22,7 @@ from paddlerec.core.utils import envs
class
EvaluateReader
(
Reader
):
class
EvaluateReader
(
Reader
):
def
init
(
self
):
def
init
(
self
):
dict_path
=
envs
.
get_global_env
(
"word_id_dict_path"
,
None
,
"evaluate.reader"
)
dict_path
=
envs
.
get_global_env
(
"word_id_dict_path"
,
None
,
"evaluate.reader"
)
self
.
word_to_id
=
dict
()
self
.
word_to_id
=
dict
()
self
.
id_to_word
=
dict
()
self
.
id_to_word
=
dict
()
with
io
.
open
(
dict_path
,
'r'
,
encoding
=
'utf-8'
)
as
f
:
with
io
.
open
(
dict_path
,
'r'
,
encoding
=
'utf-8'
)
as
f
:
...
@@ -48,19 +48,16 @@ class EvaluateReader(Reader):
...
@@ -48,19 +48,16 @@ class EvaluateReader(Reader):
if
isinstance
(
s
,
str
):
if
isinstance
(
s
,
str
):
return
True
return
True
return
False
return
False
def
_to_unicode
(
self
,
s
,
ignore_errors
=
False
):
def
_to_unicode
(
self
,
s
,
ignore_errors
=
False
):
if
self
.
_is_unicode
(
s
):
if
self
.
_is_unicode
(
s
):
return
s
return
s
error_mode
=
"ignore"
if
ignore_errors
else
"strict"
error_mode
=
"ignore"
if
ignore_errors
else
"strict"
return
s
.
decode
(
"utf-8"
,
errors
=
error_mode
)
return
s
.
decode
(
"utf-8"
,
errors
=
error_mode
)
def
strip_lines
(
self
,
line
,
vocab
):
def
strip_lines
(
self
,
line
,
vocab
):
return
self
.
_replace_oov
(
vocab
,
self
.
native_to_unicode
(
line
))
return
self
.
_replace_oov
(
vocab
,
self
.
native_to_unicode
(
line
))
def
_replace_oov
(
self
,
original_vocab
,
line
):
def
_replace_oov
(
self
,
original_vocab
,
line
):
"""Replace out-of-vocab words with "<UNK>".
"""Replace out-of-vocab words with "<UNK>".
This maintains compatibility with published results.
This maintains compatibility with published results.
...
@@ -78,5 +75,7 @@ class EvaluateReader(Reader):
...
@@ -78,5 +75,7 @@ class EvaluateReader(Reader):
def
reader
():
def
reader
():
features
=
self
.
strip_lines
(
line
.
lower
(),
self
.
word_to_id
)
features
=
self
.
strip_lines
(
line
.
lower
(),
self
.
word_to_id
)
features
=
features
.
split
()
features
=
features
.
split
()
yield
[(
'analogy_a'
,
[
self
.
word_to_id
[
features
[
0
]]]),
(
'analogy_b'
,
[
self
.
word_to_id
[
features
[
1
]]]),
(
'analogy_c'
,
[
self
.
word_to_id
[
features
[
2
]]]),
(
'analogy_d'
,
[
self
.
word_to_id
[
features
[
3
]]])]
yield
[(
'analogy_a'
,
[
self
.
word_to_id
[
features
[
0
]]]),
(
'analogy_b'
,
[
self
.
word_to_id
[
features
[
1
]]]),
(
'analogy_c'
,
[
self
.
word_to_id
[
features
[
2
]]]),
(
'analogy_d'
,
[
self
.
word_to_id
[
features
[
3
]]])]
return
reader
return
reader
models/recall/word2vec/w2v_reader.py
浏览文件 @
7a3ec4e6
...
@@ -40,7 +40,7 @@ class NumpyRandomInt(object):
...
@@ -40,7 +40,7 @@ class NumpyRandomInt(object):
class
TrainReader
(
Reader
):
class
TrainReader
(
Reader
):
def
init
(
self
):
def
init
(
self
):
dict_path
=
envs
.
get_global_env
(
"word_count_dict_path"
,
None
,
"train.reader"
)
dict_path
=
envs
.
get_global_env
(
"word_count_dict_path"
,
None
,
"train.reader"
)
self
.
window_size
=
envs
.
get_global_env
(
"hyper_parameters.window_size"
,
None
,
"train.model"
)
self
.
window_size
=
envs
.
get_global_env
(
"hyper_parameters.window_size"
,
None
,
"train.model"
)
self
.
neg_num
=
envs
.
get_global_env
(
"hyper_parameters.neg_num"
,
None
,
"train.model"
)
self
.
neg_num
=
envs
.
get_global_env
(
"hyper_parameters.neg_num"
,
None
,
"train.model"
)
self
.
with_shuffle_batch
=
envs
.
get_global_env
(
"hyper_parameters.with_shuffle_batch"
,
None
,
"train.model"
)
self
.
with_shuffle_batch
=
envs
.
get_global_env
(
"hyper_parameters.with_shuffle_batch"
,
None
,
"train.model"
)
...
@@ -75,7 +75,7 @@ class TrainReader(Reader):
...
@@ -75,7 +75,7 @@ class TrainReader(Reader):
start_point
=
0
start_point
=
0
end_point
=
idx
+
target_window
end_point
=
idx
+
target_window
targets
=
words
[
start_point
:
idx
]
+
words
[
idx
+
1
:
end_point
+
1
]
targets
=
words
[
start_point
:
idx
]
+
words
[
idx
+
1
:
end_point
+
1
]
return
targets
return
targets
def
generate_sample
(
self
,
line
):
def
generate_sample
(
self
,
line
):
def
reader
():
def
reader
():
...
@@ -87,7 +87,7 @@ class TrainReader(Reader):
...
@@ -87,7 +87,7 @@ class TrainReader(Reader):
output
=
[(
'input_word'
,
[
int
(
target_id
)]),
(
'true_label'
,
[
int
(
context_id
)])]
output
=
[(
'input_word'
,
[
int
(
target_id
)]),
(
'true_label'
,
[
int
(
context_id
)])]
if
not
self
.
with_shuffle_batch
:
if
not
self
.
with_shuffle_batch
:
neg_array
=
self
.
cs
.
searchsorted
(
np
.
random
.
sample
(
self
.
neg_num
))
neg_array
=
self
.
cs
.
searchsorted
(
np
.
random
.
sample
(
self
.
neg_num
))
output
+=
[(
'neg_label'
,
[
int
(
str
(
i
))
for
i
in
neg_array
])]
output
+=
[(
'neg_label'
,
[
int
(
str
(
i
))
for
i
in
neg_array
])]
yield
output
yield
output
return
reader
return
reader
models/treebased/tdm/model.py
浏览文件 @
7a3ec4e6
...
@@ -134,7 +134,7 @@ class Model(ModelBase):
...
@@ -134,7 +134,7 @@ class Model(ModelBase):
sample_nodes_emb
=
[
sample_nodes_emb
=
[
fluid
.
layers
.
reshape
(
sample_nodes_emb
[
i
],
fluid
.
layers
.
reshape
(
sample_nodes_emb
[
i
],
[
-
1
,
self
.
neg_sampling_list
[
i
]
+
[
-
1
,
self
.
neg_sampling_list
[
i
]
+
self
.
output_positive
,
self
.
node_emb_size
]
self
.
output_positive
,
self
.
node_emb_size
]
)
for
i
in
range
(
self
.
max_layers
)
)
for
i
in
range
(
self
.
max_layers
)
]
]
...
@@ -229,7 +229,7 @@ class Model(ModelBase):
...
@@ -229,7 +229,7 @@ class Model(ModelBase):
act
=
self
.
act
,
act
=
self
.
act
,
param_attr
=
fluid
.
ParamAttr
(
param_attr
=
fluid
.
ParamAttr
(
name
=
"trans.layer_fc.weight."
+
str
(
i
)),
name
=
"trans.layer_fc.weight."
+
str
(
i
)),
bias_attr
=
fluid
.
ParamAttr
(
name
=
"trans.layer_fc.bias."
+
str
(
i
)),
bias_attr
=
fluid
.
ParamAttr
(
name
=
"trans.layer_fc.bias."
+
str
(
i
)),
)
for
i
in
range
(
self
.
max_layers
)
)
for
i
in
range
(
self
.
max_layers
)
]
]
...
@@ -268,8 +268,8 @@ class Model(ModelBase):
...
@@ -268,8 +268,8 @@ class Model(ModelBase):
num_flatten_dims
=
2
,
num_flatten_dims
=
2
,
act
=
self
.
act
,
act
=
self
.
act
,
param_attr
=
fluid
.
ParamAttr
(
param_attr
=
fluid
.
ParamAttr
(
name
=
"cls.concat_fc.weight."
+
str
(
i
)),
name
=
"cls.concat_fc.weight."
+
str
(
i
)),
bias_attr
=
fluid
.
ParamAttr
(
name
=
"cls.concat_fc.bias."
+
str
(
i
))
bias_attr
=
fluid
.
ParamAttr
(
name
=
"cls.concat_fc.bias."
+
str
(
i
))
)
for
i
in
range
(
self
.
max_layers
)
)
for
i
in
range
(
self
.
max_layers
)
]
]
...
@@ -348,7 +348,7 @@ class Model(ModelBase):
...
@@ -348,7 +348,7 @@ class Model(ModelBase):
current_layer_node_num
=
self
.
first_layer_node
.
shape
[
1
]
current_layer_node_num
=
self
.
first_layer_node
.
shape
[
1
]
else
:
else
:
current_layer_node_num
=
current_layer_node
.
shape
[
1
]
*
\
current_layer_node_num
=
current_layer_node
.
shape
[
1
]
*
\
current_layer_node
.
shape
[
2
]
current_layer_node
.
shape
[
2
]
current_layer_node
=
fluid
.
layers
.
reshape
(
current_layer_node
=
fluid
.
layers
.
reshape
(
current_layer_node
,
[
-
1
,
current_layer_node_num
])
current_layer_node
,
[
-
1
,
current_layer_node_num
])
...
@@ -458,7 +458,7 @@ class Model(ModelBase):
...
@@ -458,7 +458,7 @@ class Model(ModelBase):
param_attr
=
fluid
.
ParamAttr
(
param_attr
=
fluid
.
ParamAttr
(
name
=
"trans.layer_fc.weight."
+
str
(
layer_idx
)),
name
=
"trans.layer_fc.weight."
+
str
(
layer_idx
)),
bias_attr
=
fluid
.
ParamAttr
(
bias_attr
=
fluid
.
ParamAttr
(
name
=
"trans.layer_fc.bias."
+
str
(
layer_idx
)),
name
=
"trans.layer_fc.bias."
+
str
(
layer_idx
)),
)
)
return
input_layer_fc_out
return
input_layer_fc_out
...
@@ -479,6 +479,6 @@ class Model(ModelBase):
...
@@ -479,6 +479,6 @@ class Model(ModelBase):
num_flatten_dims
=
2
,
num_flatten_dims
=
2
,
act
=
self
.
act
,
act
=
self
.
act
,
param_attr
=
fluid
.
ParamAttr
(
param_attr
=
fluid
.
ParamAttr
(
name
=
"cls.concat_fc.weight."
+
str
(
layer_idx
)),
name
=
"cls.concat_fc.weight."
+
str
(
layer_idx
)),
bias_attr
=
fluid
.
ParamAttr
(
name
=
"cls.concat_fc.bias."
+
str
(
layer_idx
)))
bias_attr
=
fluid
.
ParamAttr
(
name
=
"cls.concat_fc.bias."
+
str
(
layer_idx
)))
return
hidden_states_fc
return
hidden_states_fc
models/treebased/tdm/tdm_evaluate_reader.py
浏览文件 @
7a3ec4e6
...
@@ -28,6 +28,7 @@ class EvaluateReader(Reader):
...
@@ -28,6 +28,7 @@ class EvaluateReader(Reader):
"""
"""
Read the data line by line and process it as a dictionary
Read the data line by line and process it as a dictionary
"""
"""
def
reader
():
def
reader
():
"""
"""
This function needs to be implemented by the user, based on data format
This function needs to be implemented by the user, based on data format
...
...
models/treebased/tdm/tdm_reader.py
浏览文件 @
7a3ec4e6
...
@@ -28,6 +28,7 @@ class TrainReader(Reader):
...
@@ -28,6 +28,7 @@ class TrainReader(Reader):
"""
"""
Read the data line by line and process it as a dictionary
Read the data line by line and process it as a dictionary
"""
"""
def
reader
():
def
reader
():
"""
"""
This function needs to be implemented by the user, based on data format
This function needs to be implemented by the user, based on data format
...
...
setup.py
浏览文件 @
7a3ec4e6
...
@@ -36,7 +36,7 @@ about["__author__"] = "paddle-dev"
...
@@ -36,7 +36,7 @@ about["__author__"] = "paddle-dev"
about
[
"__author_email__"
]
=
"paddle-dev@baidu.com"
about
[
"__author_email__"
]
=
"paddle-dev@baidu.com"
about
[
"__url__"
]
=
"https://github.com/PaddlePaddle/PaddleRec"
about
[
"__url__"
]
=
"https://github.com/PaddlePaddle/PaddleRec"
readme
=
"
...
"
readme
=
""
def
run_cmd
(
command
):
def
run_cmd
(
command
):
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录