Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
BaiXuePrincess
PaddleRec
提交
1630b6cd
P
PaddleRec
项目概览
BaiXuePrincess
/
PaddleRec
与 Fork 源项目一致
Fork自
PaddlePaddle / PaddleRec
通知
1
Star
0
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
PaddleRec
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
1630b6cd
编写于
7月 21, 2020
作者:
M
malin10
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
update gnn
上级
bf0adc5b
变更
5
显示空白变更内容
内联
并排
Showing
5 changed file
with
51 addition
and
45 deletion
+51
-45
models/recall/gnn/config.yaml
models/recall/gnn/config.yaml
+7
-7
models/recall/gnn/data/download.py
models/recall/gnn/data/download.py
+7
-2
models/recall/gnn/data/preprocess.py
models/recall/gnn/data/preprocess.py
+11
-21
models/recall/gnn/data_prepare.sh
models/recall/gnn/data_prepare.sh
+20
-10
models/recall/gnn/model.py
models/recall/gnn/model.py
+6
-5
未找到文件。
models/recall/gnn/config.yaml
浏览文件 @
1630b6cd
...
...
@@ -49,31 +49,31 @@ runner:
-
name
:
train_runner
class
:
train
# num of epochs
epochs
:
2
epochs
:
5
# device to run training or infer
device
:
cpu
save_checkpoint_interval
:
1
# save model interval of epochs
save_inference_interval
:
1
# save inference
save_checkpoint_path
:
"
increment"
# save checkpoint path
save_inference_path
:
"
inference"
# save inference path
save_checkpoint_path
:
"
increment
_gnn
"
# save checkpoint path
save_inference_path
:
"
inference
_gnn
"
# save inference path
save_inference_feed_varnames
:
[]
# feed vars of save inference
save_inference_fetch_varnames
:
[]
# fetch vars of save inference
init_model_path
:
"
"
# load model path
print_interval
:
1
print_interval
:
1
0
-
name
:
infer_runner
class
:
infer
# device to run training or infer
device
:
cpu
print_interval
:
1
init_model_path
:
"
increment
/0
"
# load model path
init_model_path
:
"
increment
_gnn
"
# load model path
# runner will run all the phase in each epoch
phase
:
-
name
:
phase
1
-
name
:
phase
_train
model
:
"
{workspace}/model.py"
# user-defined model
dataset_name
:
dataset_train
# select dataset by name
thread_num
:
1
# - name: phase
2
# - name: phase
_infer
# model: "{workspace}/model.py" # user-defined model
# dataset_name: dataset_infer # select dataset by name
# thread_num: 1
models/recall/gnn/data/download.py
浏览文件 @
1630b6cd
...
...
@@ -57,5 +57,10 @@ def _download_file(url, savepath, print_progress):
progress
(
"[%-50s] %.2f%%"
%
(
'='
*
50
,
100
),
end
=
True
)
_download_file
(
"https://sr-gnn.bj.bcebos.com/train-item-views.csv"
,
if
sys
.
argv
[
1
]
==
"diginetica"
:
_download_file
(
"https://sr-gnn.bj.bcebos.com/train-item-views.csv"
,
"./train-item-views.csv"
,
True
)
elif
sys
.
argv
[
1
]
==
"yoochoose"
:
_download_file
(
"https://paddlerec.bj.bcebos.com/gnn%2Fyoochoose-clicks.dat"
,
"./yoochoose-clicks.dat"
,
True
)
models/recall/gnn/data/preprocess.py
浏览文件 @
1630b6cd
...
...
@@ -41,39 +41,29 @@ with open(dataset, "r") as f:
curdate
=
None
for
data
in
reader
:
sessid
=
data
[
'session_id'
]
if
curdate
and
not
curid
==
sessid
:
date
=
''
if
opt
.
dataset
==
'yoochoose'
:
date
=
time
.
mktime
(
time
.
strptime
(
curdate
[:
19
],
'%Y-%m-%dT%H:%M:%S'
))
else
:
date
=
time
.
mktime
(
time
.
strptime
(
curdate
,
'%Y-%m-%d'
))
sess_date
[
curid
]
=
date
curid
=
sessid
if
opt
.
dataset
==
'yoochoose'
:
item
=
data
[
'item_id'
]
date
=
time
.
mktime
(
time
.
strptime
(
data
[
'timestamp'
][:
19
],
'%Y-%m-%dT%H:%M:%S'
))
else
:
item
=
data
[
'item_id'
],
int
(
data
[
'timeframe'
])
curdate
=
''
if
opt
.
dataset
==
'yoochoose'
:
curdate
=
data
[
'timestamp'
]
else
:
curdate
=
data
[
'eventdate'
]
date
=
time
.
mktime
(
time
.
strptime
(
data
[
'eventdate'
],
'%Y-%m-%d'
))
if
sessid
not
in
sess_date
:
sess_date
[
sessid
]
=
date
elif
date
>
sess_date
[
sessid
]:
sess_date
[
sessid
]
=
date
if
sessid
in
sess_clicks
:
sess_clicks
[
sessid
]
+=
[
item
]
else
:
sess_clicks
[
sessid
]
=
[
item
]
ctr
+=
1
date
=
''
if
opt
.
dataset
==
'yoochoose'
:
date
=
time
.
mktime
(
time
.
strptime
(
curdate
[:
19
],
'%Y-%m-%dT%H:%M:%S'
))
else
:
date
=
time
.
mktime
(
time
.
strptime
(
curdate
,
'%Y-%m-%d'
))
if
opt
.
dataset
!=
'yoochoose'
:
for
i
in
list
(
sess_clicks
):
sorted_clicks
=
sorted
(
sess_clicks
[
i
],
key
=
operator
.
itemgetter
(
1
))
sess_clicks
[
i
]
=
[
c
[
0
]
for
c
in
sorted_clicks
]
sess_date
[
curid
]
=
date
print
(
"-- Reading data @ %ss"
%
datetime
.
datetime
.
now
())
# Filter out length 1 sessions
...
...
@@ -160,7 +150,7 @@ def obtian_tra():
train_dates
+=
[
date
]
train_seqs
+=
[
outseq
]
print
(
item_ctr
)
# 43098, 37484
with
open
(
"./
diginetica/
config.txt"
,
"w"
)
as
fout
:
with
open
(
"./config.txt"
,
"w"
)
as
fout
:
fout
.
write
(
str
(
item_ctr
)
+
"
\n
"
)
return
train_ids
,
train_dates
,
train_seqs
...
...
models/recall/gnn/data_prepare.sh
浏览文件 @
1630b6cd
...
...
@@ -15,21 +15,31 @@
# limitations under the License.
set
-e
echo
"begin to download data"
cd
data
&&
python download.py
mkdir
diginetica
python preprocess.py
--dataset
diginetica
dataset
=
$1
src
=
$1
if
[[
$src
==
"yoochoose1_4"
||
$src
==
"yoochoose1_64"
]]
;
then
src
=
"yoochoose"
elif
[[
$src
==
"diginetica"
]]
;
then
src
=
"diginetica"
else
echo
"Usage: sh data_prepare.sh [diginetica|yoochoose1_4|yoochoose1_64]"
exit
1
fi
echo
"begin to download data"
cd
data
&&
python download.py
$src
mkdir
$dataset
python preprocess.py
--dataset
$src
echo
"begin to convert data (binary -> txt)"
python convert_data.py
--data_dir
diginetica
python convert_data.py
--data_dir
$dataset
cat
diginetica/train.txt |
wc
-l
>>
diginetica/
config.txt
cat
${
dataset
}
/train.txt |
wc
-l
>>
config.txt
rm
-rf
train
&&
mkdir
train
mv
diginetica
/train.txt train
mv
${
dataset
}
/train.txt train
rm
-rf
test
&&
mkdir test
mv
diginetica/test.txt
test
mv
diginetica/config.txt ./config.txt
mv
${
dataset
}
/test.txt
test
models/recall/gnn/model.py
浏览文件 @
1630b6cd
...
...
@@ -20,6 +20,7 @@ import paddle.fluid.layers as layers
from
paddlerec.core.utils
import
envs
from
paddlerec.core.model
import
ModelBase
from
paddlerec.core.metrics
import
Precision
class
Model
(
ModelBase
):
...
...
@@ -235,16 +236,16 @@ class Model(ModelBase):
softmax
=
layers
.
softmax_with_cross_entropy
(
logits
=
logits
,
label
=
inputs
[
6
])
# [batch_size, 1]
self
.
loss
=
layers
.
reduce_mean
(
softmax
)
# [1]
self
.
acc
=
layers
.
accuracy
(
input
=
logits
,
label
=
inputs
[
6
],
k
=
20
)
acc
=
Precision
(
input
=
logits
,
label
=
inputs
[
6
],
k
=
20
)
self
.
_cost
=
self
.
loss
if
is_infer
:
self
.
_infer_results
[
'
acc'
]
=
self
.
acc
self
.
_infer_results
[
'
loss
'
]
=
self
.
loss
self
.
_infer_results
[
'
P@20'
]
=
acc
self
.
_infer_results
[
'
LOSS
'
]
=
self
.
loss
return
self
.
_metrics
[
"LOSS"
]
=
self
.
loss
self
.
_metrics
[
"
train_acc"
]
=
self
.
acc
self
.
_metrics
[
"
Train_P@20"
]
=
acc
def
optimizer
(
self
):
step_per_epoch
=
self
.
corpus_size
//
self
.
train_batch_size
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录