Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
PaddleRec
提交
ddf6ec25
P
PaddleRec
项目概览
PaddlePaddle
/
PaddleRec
通知
68
Star
12
Fork
5
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
27
列表
看板
标记
里程碑
合并请求
10
Wiki
1
Wiki
分析
仓库
DevOps
项目成员
Pages
P
PaddleRec
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
27
Issue
27
列表
看板
标记
里程碑
合并请求
10
合并请求
10
Pages
分析
分析
仓库分析
DevOps
Wiki
1
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
ddf6ec25
编写于
9月 11, 2020
作者:
C
Chengmo
提交者:
GitHub
9月 11, 2020
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
fix demo (#213)
* fix demo * fix * fix * fix code style
上级
b1f708fc
变更
14
隐藏空白更改
内联
并排
Showing
14 changed file
with
342 addition
and
111 deletion
+342
-111
core/trainers/framework/runner.py
core/trainers/framework/runner.py
+32
-4
doc/yaml.md
doc/yaml.md
+1
-0
models/demo/movie_recommand/README.md
models/demo/movie_recommand/README.md
+29
-0
models/demo/movie_recommand/data_prepare.sh
models/demo/movie_recommand/data_prepare.sh
+10
-3
models/demo/movie_recommand/offline_test.sh
models/demo/movie_recommand/offline_test.sh
+8
-5
models/demo/movie_recommand/online_rank.sh
models/demo/movie_recommand/online_rank.sh
+6
-5
models/demo/movie_recommand/online_recall.sh
models/demo/movie_recommand/online_recall.sh
+5
-4
models/demo/movie_recommand/rank/config.yaml
models/demo/movie_recommand/rank/config.yaml
+5
-42
models/demo/movie_recommand/rank/config_test_offline.yaml
models/demo/movie_recommand/rank/config_test_offline.yaml
+60
-0
models/demo/movie_recommand/rank/config_test_online.yaml
models/demo/movie_recommand/rank/config_test_online.yaml
+57
-0
models/demo/movie_recommand/recall/config.yaml
models/demo/movie_recommand/recall/config.yaml
+5
-43
models/demo/movie_recommand/recall/config_test_offline.yaml
models/demo/movie_recommand/recall/config_test_offline.yaml
+57
-0
models/demo/movie_recommand/recall/config_test_online.yaml
models/demo/movie_recommand/recall/config_test_online.yaml
+59
-0
models/demo/movie_recommand/train.sh
models/demo/movie_recommand/train.sh
+8
-5
未找到文件。
core/trainers/framework/runner.py
浏览文件 @
ddf6ec25
...
@@ -19,6 +19,7 @@ import time
...
@@ -19,6 +19,7 @@ import time
import
warnings
import
warnings
import
numpy
as
np
import
numpy
as
np
import
random
import
random
import
json
import
logging
import
logging
import
paddle.fluid
as
fluid
import
paddle.fluid
as
fluid
...
@@ -147,17 +148,22 @@ class RunnerBase(object):
...
@@ -147,17 +148,22 @@ class RunnerBase(object):
metrics_format
=
[]
metrics_format
=
[]
if
context
[
"is_infer"
]:
if
context
[
"is_infer"
]:
metrics_format
.
append
(
"
\t
[Infer]
\t
{}: {{}}"
.
format
(
"batch"
))
metrics_format
.
append
(
"
\t
[Infer]
{}: {{}}"
.
format
(
"batch"
))
else
:
else
:
metrics_format
.
append
(
"
\t
[Train]
\t
{}: {{}}"
.
format
(
"batch"
))
metrics_format
.
append
(
"
\t
[Train]"
)
if
"current_epoch"
in
context
:
metrics_format
.
append
(
" epoch: {}"
.
format
(
context
[
"current_epoch"
]))
metrics_format
.
append
(
" {}: {{}}"
.
format
(
"batch"
))
metrics_format
.
append
(
"{}: {{:.2f}}s"
.
format
(
"time_each_interval"
))
metrics_format
.
append
(
"{}: {{:.2f}}s"
.
format
(
"time_each_interval"
))
metrics_names
=
[
"total_batch"
]
metrics_names
=
[
"total_batch"
]
metrics_indexes
=
dict
()
for
name
,
var
in
metrics
.
items
():
for
name
,
var
in
metrics
.
items
():
metrics_names
.
append
(
name
)
metrics_names
.
append
(
name
)
metrics_varnames
.
append
(
var
.
name
)
metrics_varnames
.
append
(
var
.
name
)
metrics_indexes
[
var
.
name
]
=
len
(
metrics_varnames
)
-
1
metrics_format
.
append
(
"{}: {{}}"
.
format
(
name
))
metrics_format
.
append
(
"{}: {{}}"
.
format
(
name
))
metrics_format
=
", "
.
join
(
metrics_format
)
metrics_format
=
", "
.
join
(
metrics_format
)
...
@@ -166,6 +172,7 @@ class RunnerBase(object):
...
@@ -166,6 +172,7 @@ class RunnerBase(object):
batch_id
=
0
batch_id
=
0
begin_time
=
time
.
time
()
begin_time
=
time
.
time
()
scope
=
context
[
"model"
][
model_name
][
"scope"
]
scope
=
context
[
"model"
][
model_name
][
"scope"
]
runner_results
=
[]
result
=
None
result
=
None
with
fluid
.
scope_guard
(
scope
):
with
fluid
.
scope_guard
(
scope
):
try
:
try
:
...
@@ -182,18 +189,35 @@ class RunnerBase(object):
...
@@ -182,18 +189,35 @@ class RunnerBase(object):
]
]
metrics
.
extend
(
metrics_rets
)
metrics
.
extend
(
metrics_rets
)
batch_runner_result
=
{}
for
k
,
v
in
metrics_indexes
.
items
():
batch_runner_result
[
k
]
=
np
.
array
(
metrics_rets
[
v
]).
tolist
()
runner_results
.
append
(
batch_runner_result
)
if
batch_id
%
fetch_period
==
0
and
batch_id
!=
0
:
if
batch_id
%
fetch_period
==
0
and
batch_id
!=
0
:
end_time
=
time
.
time
()
end_time
=
time
.
time
()
seconds
=
end_time
-
begin_time
seconds
=
end_time
-
begin_time
metrics_logging
=
metrics
[:]
metrics_logging
=
metrics
[:]
metrics_logging
=
metrics
.
insert
(
1
,
seconds
)
metrics_logging
=
metrics
.
insert
(
1
,
seconds
)
begin_time
=
end_time
begin_time
=
end_time
logging
.
info
(
metrics_format
.
format
(
*
metrics
))
logging
.
info
(
metrics_format
.
format
(
*
metrics
))
batch_id
+=
1
batch_id
+=
1
except
fluid
.
core
.
EOFException
:
except
fluid
.
core
.
EOFException
:
reader
.
reset
()
reader
.
reset
()
runner_result_save_path
=
envs
.
get_global_env
(
"runner."
+
context
[
"runner_name"
]
+
".runner_result_dump_path"
,
None
)
if
runner_result_save_path
:
if
"current_epoch"
in
context
:
runner_result_save_path
=
runner_result_save_path
+
"_epoch_{}"
.
format
(
context
[
"current_epoch"
])
logging
.
info
(
"Dump runner result in {}"
.
format
(
runner_result_save_path
))
with
open
(
runner_result_save_path
,
'w+'
)
as
fout
:
json
.
dump
(
runner_results
,
fout
)
if
batch_id
>
0
:
if
batch_id
>
0
:
result
=
dict
(
zip
(
metrics_names
,
metrics
))
result
=
dict
(
zip
(
metrics_names
,
metrics
))
return
result
return
result
...
@@ -402,6 +426,7 @@ class SingleRunner(RunnerBase):
...
@@ -402,6 +426,7 @@ class SingleRunner(RunnerBase):
filelist
=
context
[
"file_list"
]
filelist
=
context
[
"file_list"
]
context
[
"file_list"
]
=
shuffle_files
(
need_shuffle_files
,
context
[
"file_list"
]
=
shuffle_files
(
need_shuffle_files
,
filelist
)
filelist
)
context
[
"current_epoch"
]
=
epoch
begin_time
=
time
.
time
()
begin_time
=
time
.
time
()
result
=
self
.
_run
(
context
,
model_dict
)
result
=
self
.
_run
(
context
,
model_dict
)
end_time
=
time
.
time
()
end_time
=
time
.
time
()
...
@@ -450,6 +475,7 @@ class PSRunner(RunnerBase):
...
@@ -450,6 +475,7 @@ class PSRunner(RunnerBase):
filelist
=
context
[
"file_list"
]
filelist
=
context
[
"file_list"
]
context
[
"file_list"
]
=
shuffle_files
(
need_shuffle_files
,
context
[
"file_list"
]
=
shuffle_files
(
need_shuffle_files
,
filelist
)
filelist
)
context
[
"current_epoch"
]
=
epoch
begin_time
=
time
.
time
()
begin_time
=
time
.
time
()
result
=
self
.
_run
(
context
,
model_dict
)
result
=
self
.
_run
(
context
,
model_dict
)
end_time
=
time
.
time
()
end_time
=
time
.
time
()
...
@@ -500,6 +526,7 @@ class CollectiveRunner(RunnerBase):
...
@@ -500,6 +526,7 @@ class CollectiveRunner(RunnerBase):
filelist
=
context
[
"file_list"
]
filelist
=
context
[
"file_list"
]
context
[
"file_list"
]
=
shuffle_files
(
need_shuffle_files
,
context
[
"file_list"
]
=
shuffle_files
(
need_shuffle_files
,
filelist
)
filelist
)
context
[
"current_epoch"
]
=
epoch
begin_time
=
time
.
time
()
begin_time
=
time
.
time
()
self
.
_run
(
context
,
model_dict
)
self
.
_run
(
context
,
model_dict
)
end_time
=
time
.
time
()
end_time
=
time
.
time
()
...
@@ -533,6 +560,7 @@ class PslibRunner(RunnerBase):
...
@@ -533,6 +560,7 @@ class PslibRunner(RunnerBase):
filelist
=
context
[
"file_list"
]
filelist
=
context
[
"file_list"
]
context
[
"file_list"
]
=
shuffle_files
(
need_shuffle_files
,
context
[
"file_list"
]
=
shuffle_files
(
need_shuffle_files
,
filelist
)
filelist
)
context
[
"current_epoch"
]
=
epoch
begin_time
=
time
.
time
()
begin_time
=
time
.
time
()
self
.
_run
(
context
,
model_dict
)
self
.
_run
(
context
,
model_dict
)
end_time
=
time
.
time
()
end_time
=
time
.
time
()
...
...
doc/yaml.md
浏览文件 @
ddf6ec25
...
@@ -38,6 +38,7 @@
...
@@ -38,6 +38,7 @@
| runner_class_path | string | 路径 | 否 | 自定义runner流程实现的地址 |
| runner_class_path | string | 路径 | 否 | 自定义runner流程实现的地址 |
| terminal_class_path | string | 路径 | 否 | 自定义terminal流程实现的地址 |
| terminal_class_path | string | 路径 | 否 | 自定义terminal流程实现的地址 |
| init_pretraining_model_path | string | 路径 | 否 |自定义的startup流程中需要传入这个参数,finetune中需要加载的参数的地址 |
| init_pretraining_model_path | string | 路径 | 否 |自定义的startup流程中需要传入这个参数,finetune中需要加载的参数的地址 |
| runner_result_dump_path | string | 路径 | 否 | 运行中metrics的结果使用json.dump到文件的地址,若是在训练的runner中使用, 会自动加上epoch后缀 |
...
...
models/demo/movie_recommand/README.md
0 → 100644
浏览文件 @
ddf6ec25
# PaddleRec 基于 Movielens 数据集的全流程示例
## 模型的详细教程可以查阅: [十分钟!全流程!从零搭建推荐系统](https://aistudio.baidu.com/aistudio/projectdetail/559336)
## 本地运行流程
在本地需要安装
`PaddleRec`
及
`PaddlePaddle`
,推荐在
`Linux`
+
`python2.7`
环境下执行此demo
本地运行流程与AiStudio流程基本一致,细节略有区别
### 离线训练
```
shell
sh train.sh
```
### 离线测试
```
shell
sh offline_test.sh
```
### 模拟在线召回
```
shell
sh online_recall.sh
```
### 模拟在线排序
```
shell
sh online_rank.sh
```
models/demo/movie_recommand/data_prepare.sh
浏览文件 @
ddf6ec25
cd
data
cd
data
echo
"---> Download movielens 1M data ..."
wget http://files.grouplens.org/datasets/movielens/ml-1m.zip
wget http://files.grouplens.org/datasets/movielens/ml-1m.zip
echo
"---> Unzip ml-1m.zip ..."
unzip ml-1m.zip
unzip ml-1m.zip
rm
ml-1m.zip
echo
"---> Split movielens data ..."
python split.py
python split.py
mkdir
train/
mkdir
-p
train/
mkdir test
/
mkdir
-p
test
/
echo
"---> Process train & test data ..."
python process_ml_1m.py process_raw ./ml-1m/train.dat |
sort
-t
$'
\t
'
-k
9
-n
>
log.data.train
python process_ml_1m.py process_raw ./ml-1m/train.dat |
sort
-t
$'
\t
'
-k
9
-n
>
log.data.train
python process_ml_1m.py process_raw ./ml-1m/test.dat |
sort
-t
$'
\t
'
-k
9
-n
>
log.data.test
python process_ml_1m.py process_raw ./ml-1m/test.dat |
sort
-t
$'
\t
'
-k
9
-n
>
log.data.test
python process_ml_1m.py
hash
log.data.train
>
./train/data.txt
python process_ml_1m.py
hash
log.data.train
>
./train/data.txt
...
@@ -15,4 +20,6 @@ python process_ml_1m.py hash log.data.test > ./test/data.txt
...
@@ -15,4 +20,6 @@ python process_ml_1m.py hash log.data.test > ./test/data.txt
rm
log.data.train
rm
log.data.train
rm
log.data.test
rm
log.data.test
cd
../
cd
..
echo
"---> Finish data process"
models/demo/movie_recommand/offline_test.sh
浏览文件 @
ddf6ec25
## modify config.yaml to infer mode at first
## modify config.yaml to infer mode at first
cd
recall
echo
"Recall offline test ..."
python
-m
paddlerec.run
-m
./config.yaml
echo
"Model config at models/demo/movie_recommand/recall/config_offline_test.yaml"
cd
../rank
python
-m
paddlerec.run
-m
./recall/config_test_offline.yaml
python
-m
paddlerec.run
-m
./config.yaml
cd
..
echo
"Rank offline test ..."
echo
"Model config at models/demo/movie_recommand/rank/config_offline_test.yaml"
python
-m
paddlerec.run
-m
./rank/config_test_offline.yaml
echo
"recall offline test result:"
echo
"recall offline test result:"
python parse.py recall_offline recall/infer_result
python parse.py recall_offline recall/infer_result
echo
"rank offline test result:"
echo
"rank offline test result:"
python parse.py rank_offline rank/infer_result
python parse.py rank_offline rank/infer_result
models/demo/movie_recommand/online_rank.sh
浏览文件 @
ddf6ec25
cd
data
cd
data
echo
"Create online test data ..."
python process_ml_1m.py data_rank
>
online_user/test/data.txt
python process_ml_1m.py data_rank
>
online_user/test/data.txt
## modify recall/config.yaml to online_infer mode
cd
..
cd
../rank
echo
"Rank online test ..."
python
-m
paddlerec.run
-m
./config.yaml
echo
"Model config at models/demo/movie_recommand/rank/config_online_test.yaml"
cd
../
python
-m
paddlerec.run
-m
./rank/config_test_online.yaml
python parse.py rank_online rank/infer_result
python parse.py rank_online
./
rank/infer_result
models/demo/movie_recommand/online_recall.sh
浏览文件 @
ddf6ec25
cd
data
cd
data
echo
"Create online test data ..."
mkdir
online_user/test
mkdir
online_user/test
python process_ml_1m.py data_recall
>
online_user/test/data.txt
python process_ml_1m.py data_recall
>
online_user/test/data.txt
## modify recall/config.yaml to online_infer mode
cd
..
cd
../recall
echo
"Recall online test ..."
python
-m
paddlerec.run
-m
./config.yaml
echo
"Model config at models/demo/movie_recommand/recall/config_online_test.yaml"
cd
../
python
-m
paddlerec.run
-m
./recall/config_test_online.yaml
python parse.py recall_online recall/infer_result
python parse.py recall_online recall/infer_result
models/demo/movie_recommand/rank/config.yaml
浏览文件 @
ddf6ec25
...
@@ -12,28 +12,16 @@
...
@@ -12,28 +12,16 @@
# See the License for the specific language governing permissions and
# See the License for the specific language governing permissions and
# limitations under the License.
# limitations under the License.
workspace
:
"
models/demo/movie_recommand
"
workspace
:
"
./
"
# list of dataset
# list of dataset
dataset
:
dataset
:
-
name
:
dataset_train
# name of dataset to distinguish different datasets
-
name
:
dataset_train
# name of dataset to distinguish different datasets
batch_size
:
128
batch_size
:
128
type
:
QueueDataset
type
:
DataLoader
data_path
:
"
{workspace}/data/train"
data_path
:
"
{workspace}/data/train"
sparse_slots
:
"
logid
time
userid
gender
age
occupation
movieid
title
genres
label"
sparse_slots
:
"
logid
time
userid
gender
age
occupation
movieid
title
genres
label"
dense_slots
:
"
"
dense_slots
:
"
"
-
name
:
dataset_infer
# name
batch_size
:
128
type
:
DataLoader
data_path
:
"
{workspace}/data/test"
sparse_slots
:
"
logid
time
userid
gender
age
occupation
movieid
title
genres
label"
dense_slots
:
"
"
-
name
:
dataset_online_infer
# name
batch_size
:
10
type
:
DataLoader
data_path
:
"
{workspace}/data/online_user/test"
sparse_slots
:
"
logid
time
userid
gender
age
occupation
movieid
title
genres
label"
dense_slots
:
"
"
# hyper parameters of user-defined network
# hyper parameters of user-defined network
hyper_parameters
:
hyper_parameters
:
...
@@ -51,42 +39,17 @@ hyper_parameters:
...
@@ -51,42 +39,17 @@ hyper_parameters:
# train
# train
mode
:
runner_train
mode
:
runner_train
## online or offline infer
#mode: runner_infer
runner
:
runner
:
-
name
:
runner_train
-
name
:
runner_train
class
:
train
class
:
train
save_checkpoint_interval
:
1
# save model interval of epochs
save_checkpoint_interval
:
1
# save model interval of epochs
save_inference_interval
:
1
# save inference
save_checkpoint_path
:
"
increment_rank"
# save checkpoint path
save_checkpoint_path
:
"
increment"
# save checkpoint path
save_inference_path
:
"
inference"
# save inference path
epochs
:
10
epochs
:
10
device
:
cpu
device
:
cpu
-
name
:
runner_infer
class
:
infer
print_interval
:
10000
init_model_path
:
"
increment/9"
# load model path
#train
#train
phase
:
phase
:
-
name
:
phase1
-
name
:
phase1
model
:
"
{workspace}/model.py"
# user-defined model
model
:
"
{workspace}/
rank/
model.py"
# user-defined model
dataset_name
:
dataset_train
# select dataset by name
dataset_name
:
dataset_train
# select dataset by name
thread_num
:
12
thread_num
:
4
##offline infer
#phase:
#- name: phase1
# model: "{workspace}/model.py" # user-defined model
# dataset_name: dataset_infer # select dataset by name
# save_path: "./infer_result"
# thread_num: 1
##offline infer
#phase:
#- name: phase1
# model: "{workspace}/model.py" # user-defined model
# dataset_name: dataset_online_infer # select dataset by name
# save_path: "./infer_result"
# thread_num: 1
models/demo/movie_recommand/rank/config_test_offline.yaml
0 → 100644
浏览文件 @
ddf6ec25
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#workspace: "paddlerec.models.demo.movie_recommand"
workspace
:
"
./"
# list of dataset
dataset
:
-
name
:
dataset_infer
# name
batch_size
:
128
type
:
DataLoader
data_path
:
"
{workspace}/data/test"
sparse_slots
:
"
logid
time
userid
gender
age
occupation
movieid
title
genres
label"
dense_slots
:
"
"
# hyper parameters of user-defined network
hyper_parameters
:
# optimizer config
optimizer
:
class
:
Adam
learning_rate
:
0.001
strategy
:
async
# user-defined <key, value> pairs
sparse_feature_number
:
60000000
sparse_feature_dim
:
9
dense_input_dim
:
13
fc_sizes
:
[
512
,
256
,
128
,
32
]
# train
mode
:
runner_infer
## online or offline infer
#mode: runner_infer
runner
:
-
name
:
runner_infer
epochs
:
1
device
:
cpu
class
:
infer
print_interval
:
10000
runner_result_dump_path
:
"
{workspace}/rank/infer_result"
init_model_path
:
"
increment_rank/9"
# load model path
#offline infer
phase
:
-
name
:
phase1
model
:
"
{workspace}/rank/model.py"
# user-defined model
dataset_name
:
dataset_infer
# select dataset by name
thread_num
:
1
models/demo/movie_recommand/rank/config_test_online.yaml
0 → 100644
浏览文件 @
ddf6ec25
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
workspace
:
"
./"
# list of dataset
dataset
:
-
name
:
dataset_online_infer
# name
batch_size
:
10
type
:
DataLoader
data_path
:
"
{workspace}/data/online_user/test"
sparse_slots
:
"
logid
time
userid
gender
age
occupation
movieid
title
genres
label"
dense_slots
:
"
"
# hyper parameters of user-defined network
hyper_parameters
:
# optimizer config
optimizer
:
class
:
Adam
learning_rate
:
0.001
strategy
:
async
# user-defined <key, value> pairs
sparse_feature_number
:
60000000
sparse_feature_dim
:
9
dense_input_dim
:
13
fc_sizes
:
[
512
,
256
,
128
,
32
]
# train
mode
:
runner_infer
runner
:
-
name
:
runner_infer
epochs
:
1
device
:
cpu
class
:
infer
print_interval
:
10000
runner_result_dump_path
:
"
{workspace}/rank/infer_result"
init_model_path
:
"
increment_rank/9"
# load model path
#offline infer
phase
:
-
name
:
phase1
model
:
"
{workspace}/rank/model.py"
# user-defined model
dataset_name
:
dataset_online_infer
# select dataset by name
thread_num
:
1
models/demo/movie_recommand/recall/config.yaml
浏览文件 @
ddf6ec25
...
@@ -12,28 +12,16 @@
...
@@ -12,28 +12,16 @@
# See the License for the specific language governing permissions and
# See the License for the specific language governing permissions and
# limitations under the License.
# limitations under the License.
workspace
:
"
models/demo/movie_recommand
"
workspace
:
"
./
"
# list of dataset
# list of dataset
dataset
:
dataset
:
-
name
:
dataset_train
# name of dataset to distinguish different datasets
-
name
:
dataset_train
# name of dataset to distinguish different datasets
batch_size
:
128
batch_size
:
128
type
:
QueueDataset
type
:
DataLoader
data_path
:
"
{workspace}/data/train"
data_path
:
"
{workspace}/data/train"
sparse_slots
:
"
logid
time
userid
gender
age
occupation
movieid
title
genres
label"
sparse_slots
:
"
logid
time
userid
gender
age
occupation
movieid
title
genres
label"
dense_slots
:
"
"
dense_slots
:
"
"
-
name
:
dataset_infer
# name
batch_size
:
128
type
:
DataLoader
data_path
:
"
{workspace}/data/test"
sparse_slots
:
"
logid
time
userid
gender
age
occupation
movieid
title
genres
label"
dense_slots
:
"
"
-
name
:
dataset_online_infer
# name
batch_size
:
128
type
:
DataLoader
data_path
:
"
{workspace}/data/online_user/test"
sparse_slots
:
"
logid
time
userid
gender
age
occupation
movieid
title
genres
label"
dense_slots
:
"
"
# hyper parameters of user-defined network
# hyper parameters of user-defined network
hyper_parameters
:
hyper_parameters
:
...
@@ -50,43 +38,17 @@ hyper_parameters:
...
@@ -50,43 +38,17 @@ hyper_parameters:
# train
# train
mode
:
runner_train
mode
:
runner_train
## online or offline infer
#mode: runner_infer
runner
:
runner
:
-
name
:
runner_train
-
name
:
runner_train
class
:
train
class
:
train
save_checkpoint_interval
:
1
# save model interval of epochs
save_checkpoint_interval
:
1
# save model interval of epochs
save_inference_interval
:
1
# save inference
save_checkpoint_path
:
"
increment_recall"
# save checkpoint path
save_checkpoint_path
:
"
increment"
# save checkpoint path
save_inference_path
:
"
inference"
# save inference path
epochs
:
10
epochs
:
10
device
:
cpu
device
:
cpu
-
name
:
runner_infer
class
:
infer
print_interval
:
10000
init_model_path
:
"
increment/9"
# load model path
#train
#train
phase
:
phase
:
-
name
:
phase1
-
name
:
phase1
model
:
"
{workspace}/model.py"
# user-defined model
model
:
"
{workspace}/
recall/
model.py"
# user-defined model
dataset_name
:
dataset_train
# select dataset by name
dataset_name
:
dataset_train
# select dataset by name
thread_num
:
12
thread_num
:
4
##offline infer
#phase:
#- name: phase1
# model: "{workspace}/model.py" # user-defined model
# dataset_name: dataset_infer # select dataset by name
# save_path: "./infer_result"
# thread_num: 1
##offline infer
#phase:
#- name: phase1
# model: "{workspace}/model.py" # user-defined model
# dataset_name: dataset_online_infer # select dataset by name
# save_path: "./infer_result"
# thread_num: 1
models/demo/movie_recommand/recall/config_test_offline.yaml
0 → 100644
浏览文件 @
ddf6ec25
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#workspace: "paddlerec.models.demo.movie_recommand"
workspace
:
"
./"
# list of dataset
dataset
:
-
name
:
dataset_infer
# name
batch_size
:
128
type
:
DataLoader
data_path
:
"
{workspace}/data/test"
sparse_slots
:
"
logid
time
userid
gender
age
occupation
movieid
title
genres
label"
dense_slots
:
"
"
# hyper parameters of user-defined network
hyper_parameters
:
# optimizer config
optimizer
:
class
:
Adam
learning_rate
:
0.001
strategy
:
async
# user-defined <key, value> pairs
sparse_feature_number
:
60000000
sparse_feature_dim
:
9
dense_input_dim
:
13
fc_sizes
:
[
512
,
256
,
128
,
32
]
# train
mode
:
runner_infer
runner
:
-
name
:
runner_infer
epochs
:
1
device
:
cpu
class
:
infer
print_interval
:
100000
runner_result_dump_path
:
"
{workspace}/recall/infer_result"
init_model_path
:
"
increment_recall/9"
# load model path
#offline infer
phase
:
-
name
:
phase1
model
:
"
{workspace}/recall/model.py"
# user-defined model
dataset_name
:
dataset_infer
thread_num
:
1
models/demo/movie_recommand/recall/config_test_online.yaml
0 → 100644
浏览文件 @
ddf6ec25
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#workspace: "paddlerec.models.demo.movie_recommand"
workspace
:
./
# list of dataset
dataset
:
-
name
:
dataset_online_infer
# name
batch_size
:
128
type
:
DataLoader
data_path
:
"
{workspace}/data/online_user/test"
sparse_slots
:
"
logid
time
userid
gender
age
occupation
movieid
title
genres
label"
dense_slots
:
"
"
# hyper parameters of user-defined network
hyper_parameters
:
# optimizer config
optimizer
:
class
:
Adam
learning_rate
:
0.001
strategy
:
async
# user-defined <key, value> pairs
sparse_feature_number
:
60000000
sparse_feature_dim
:
9
dense_input_dim
:
13
fc_sizes
:
[
512
,
256
,
128
,
32
]
# train
mode
:
runner_infer
## online or offline infer
#mode: runner_infer
runner
:
-
name
:
runner_infer
epochs
:
1
device
:
cpu
class
:
infer
print_interval
:
10000
runner_result_dump_path
:
"
{workspace}/recall/infer_result"
init_model_path
:
"
increment_recall/9"
# load model path
#offline infer
phase
:
-
name
:
phase1
model
:
"
{workspace}/recall/model.py"
# user-defined model
dataset_name
:
dataset_online_infer
# select dataset by name
thread_num
:
1
models/demo/movie_recommand/train.sh
浏览文件 @
ddf6ec25
cd
recall
echo
"Recall offline training ..."
python
-m
paddlerec.run
-m
./config.yaml &> log &
echo
"Model config at models/demo/movie_recommand/recall/config.yaml"
cd
../rank
python
-m
paddlerec.run
-m
./recall/config.yaml
python
-m
paddlerec.run
-m
./config.yaml &> log &
cd
..
echo
"----------------------------------------"
echo
"Rank offline training ..."
echo
"Model config at models/demo/movie_recommand/rank/config.yaml"
python
-m
paddlerec.run
-m
./rank/config.yaml
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录