未验证 提交 ddf6ec25 编写于 作者: C Chengmo 提交者: GitHub

fix demo (#213)

* fix demo

* fix

* fix

* fix code style
上级 b1f708fc
...@@ -19,6 +19,7 @@ import time ...@@ -19,6 +19,7 @@ import time
import warnings import warnings
import numpy as np import numpy as np
import random import random
import json
import logging import logging
import paddle.fluid as fluid import paddle.fluid as fluid
...@@ -147,17 +148,22 @@ class RunnerBase(object): ...@@ -147,17 +148,22 @@ class RunnerBase(object):
metrics_format = [] metrics_format = []
if context["is_infer"]: if context["is_infer"]:
metrics_format.append("\t[Infer]\t{}: {{}}".format("batch")) metrics_format.append("\t[Infer] {}: {{}}".format("batch"))
else: else:
metrics_format.append("\t[Train]\t{}: {{}}".format("batch")) metrics_format.append("\t[Train]")
if "current_epoch" in context:
metrics_format.append(" epoch: {}".format(context[
"current_epoch"]))
metrics_format.append(" {}: {{}}".format("batch"))
metrics_format.append("{}: {{:.2f}}s".format("time_each_interval")) metrics_format.append("{}: {{:.2f}}s".format("time_each_interval"))
metrics_names = ["total_batch"] metrics_names = ["total_batch"]
metrics_indexes = dict()
for name, var in metrics.items(): for name, var in metrics.items():
metrics_names.append(name) metrics_names.append(name)
metrics_varnames.append(var.name) metrics_varnames.append(var.name)
metrics_indexes[var.name] = len(metrics_varnames) - 1
metrics_format.append("{}: {{}}".format(name)) metrics_format.append("{}: {{}}".format(name))
metrics_format = ", ".join(metrics_format) metrics_format = ", ".join(metrics_format)
...@@ -166,6 +172,7 @@ class RunnerBase(object): ...@@ -166,6 +172,7 @@ class RunnerBase(object):
batch_id = 0 batch_id = 0
begin_time = time.time() begin_time = time.time()
scope = context["model"][model_name]["scope"] scope = context["model"][model_name]["scope"]
runner_results = []
result = None result = None
with fluid.scope_guard(scope): with fluid.scope_guard(scope):
try: try:
...@@ -182,18 +189,35 @@ class RunnerBase(object): ...@@ -182,18 +189,35 @@ class RunnerBase(object):
] ]
metrics.extend(metrics_rets) metrics.extend(metrics_rets)
batch_runner_result = {}
for k, v in metrics_indexes.items():
batch_runner_result[k] = np.array(metrics_rets[
v]).tolist()
runner_results.append(batch_runner_result)
if batch_id % fetch_period == 0 and batch_id != 0: if batch_id % fetch_period == 0 and batch_id != 0:
end_time = time.time() end_time = time.time()
seconds = end_time - begin_time seconds = end_time - begin_time
metrics_logging = metrics[:] metrics_logging = metrics[:]
metrics_logging = metrics.insert(1, seconds) metrics_logging = metrics.insert(1, seconds)
begin_time = end_time begin_time = end_time
logging.info(metrics_format.format(*metrics)) logging.info(metrics_format.format(*metrics))
batch_id += 1 batch_id += 1
except fluid.core.EOFException: except fluid.core.EOFException:
reader.reset() reader.reset()
runner_result_save_path = envs.get_global_env(
"runner." + context["runner_name"] + ".runner_result_dump_path",
None)
if runner_result_save_path:
if "current_epoch" in context:
runner_result_save_path = runner_result_save_path + "_epoch_{}".format(
context["current_epoch"])
logging.info("Dump runner result in {}".format(
runner_result_save_path))
with open(runner_result_save_path, 'w+') as fout:
json.dump(runner_results, fout)
if batch_id > 0: if batch_id > 0:
result = dict(zip(metrics_names, metrics)) result = dict(zip(metrics_names, metrics))
return result return result
...@@ -402,6 +426,7 @@ class SingleRunner(RunnerBase): ...@@ -402,6 +426,7 @@ class SingleRunner(RunnerBase):
filelist = context["file_list"] filelist = context["file_list"]
context["file_list"] = shuffle_files(need_shuffle_files, context["file_list"] = shuffle_files(need_shuffle_files,
filelist) filelist)
context["current_epoch"] = epoch
begin_time = time.time() begin_time = time.time()
result = self._run(context, model_dict) result = self._run(context, model_dict)
end_time = time.time() end_time = time.time()
...@@ -450,6 +475,7 @@ class PSRunner(RunnerBase): ...@@ -450,6 +475,7 @@ class PSRunner(RunnerBase):
filelist = context["file_list"] filelist = context["file_list"]
context["file_list"] = shuffle_files(need_shuffle_files, context["file_list"] = shuffle_files(need_shuffle_files,
filelist) filelist)
context["current_epoch"] = epoch
begin_time = time.time() begin_time = time.time()
result = self._run(context, model_dict) result = self._run(context, model_dict)
end_time = time.time() end_time = time.time()
...@@ -500,6 +526,7 @@ class CollectiveRunner(RunnerBase): ...@@ -500,6 +526,7 @@ class CollectiveRunner(RunnerBase):
filelist = context["file_list"] filelist = context["file_list"]
context["file_list"] = shuffle_files(need_shuffle_files, context["file_list"] = shuffle_files(need_shuffle_files,
filelist) filelist)
context["current_epoch"] = epoch
begin_time = time.time() begin_time = time.time()
self._run(context, model_dict) self._run(context, model_dict)
end_time = time.time() end_time = time.time()
...@@ -533,6 +560,7 @@ class PslibRunner(RunnerBase): ...@@ -533,6 +560,7 @@ class PslibRunner(RunnerBase):
filelist = context["file_list"] filelist = context["file_list"]
context["file_list"] = shuffle_files(need_shuffle_files, context["file_list"] = shuffle_files(need_shuffle_files,
filelist) filelist)
context["current_epoch"] = epoch
begin_time = time.time() begin_time = time.time()
self._run(context, model_dict) self._run(context, model_dict)
end_time = time.time() end_time = time.time()
......
...@@ -38,6 +38,7 @@ ...@@ -38,6 +38,7 @@
| runner_class_path | string | 路径 | 否 | 自定义runner流程实现的地址 | | runner_class_path | string | 路径 | 否 | 自定义runner流程实现的地址 |
| terminal_class_path | string | 路径 | 否 | 自定义terminal流程实现的地址 | | terminal_class_path | string | 路径 | 否 | 自定义terminal流程实现的地址 |
| init_pretraining_model_path | string | 路径 | 否 |自定义的startup流程中需要传入这个参数,finetune中需要加载的参数的地址 | | init_pretraining_model_path | string | 路径 | 否 |自定义的startup流程中需要传入这个参数,finetune中需要加载的参数的地址 |
| runner_result_dump_path | string | 路径 | 否 | 运行中metrics的结果使用json.dump到文件的地址,若是在训练的runner中使用, 会自动加上epoch后缀 |
......
# PaddleRec 基于 Movielens 数据集的全流程示例
## 模型的详细教程可以查阅: [十分钟!全流程!从零搭建推荐系统](https://aistudio.baidu.com/aistudio/projectdetail/559336)
## 本地运行流程
在本地需要安装`PaddleRec``PaddlePaddle`,推荐在`Linux` + `python2.7` 环境下执行此demo
本地运行流程与AiStudio流程基本一致,细节略有区别
### 离线训练
```shell
sh train.sh
```
### 离线测试
```shell
sh offline_test.sh
```
### 模拟在线召回
```shell
sh online_recall.sh
```
### 模拟在线排序
```shell
sh online_rank.sh
```
cd data cd data
echo "---> Download movielens 1M data ..."
wget http://files.grouplens.org/datasets/movielens/ml-1m.zip wget http://files.grouplens.org/datasets/movielens/ml-1m.zip
echo "---> Unzip ml-1m.zip ..."
unzip ml-1m.zip unzip ml-1m.zip
rm ml-1m.zip
echo "---> Split movielens data ..."
python split.py python split.py
mkdir train/ mkdir -p train/
mkdir test/ mkdir -p test/
echo "---> Process train & test data ..."
python process_ml_1m.py process_raw ./ml-1m/train.dat | sort -t $'\t' -k 9 -n > log.data.train python process_ml_1m.py process_raw ./ml-1m/train.dat | sort -t $'\t' -k 9 -n > log.data.train
python process_ml_1m.py process_raw ./ml-1m/test.dat | sort -t $'\t' -k 9 -n > log.data.test python process_ml_1m.py process_raw ./ml-1m/test.dat | sort -t $'\t' -k 9 -n > log.data.test
python process_ml_1m.py hash log.data.train > ./train/data.txt python process_ml_1m.py hash log.data.train > ./train/data.txt
...@@ -15,4 +20,6 @@ python process_ml_1m.py hash log.data.test > ./test/data.txt ...@@ -15,4 +20,6 @@ python process_ml_1m.py hash log.data.test > ./test/data.txt
rm log.data.train rm log.data.train
rm log.data.test rm log.data.test
cd ../ cd ..
echo "---> Finish data process"
## modify config.yaml to infer mode at first ## modify config.yaml to infer mode at first
cd recall echo "Recall offline test ..."
python -m paddlerec.run -m ./config.yaml echo "Model config at models/demo/movie_recommand/recall/config_offline_test.yaml"
cd ../rank python -m paddlerec.run -m ./recall/config_test_offline.yaml
python -m paddlerec.run -m ./config.yaml
cd .. echo "Rank offline test ..."
echo "Model config at models/demo/movie_recommand/rank/config_offline_test.yaml"
python -m paddlerec.run -m ./rank/config_test_offline.yaml
echo "recall offline test result:" echo "recall offline test result:"
python parse.py recall_offline recall/infer_result python parse.py recall_offline recall/infer_result
echo "rank offline test result:" echo "rank offline test result:"
python parse.py rank_offline rank/infer_result python parse.py rank_offline rank/infer_result
cd data cd data
echo "Create online test data ..."
python process_ml_1m.py data_rank > online_user/test/data.txt python process_ml_1m.py data_rank > online_user/test/data.txt
## modify recall/config.yaml to online_infer mode cd ..
cd ../rank echo "Rank online test ..."
python -m paddlerec.run -m ./config.yaml echo "Model config at models/demo/movie_recommand/rank/config_online_test.yaml"
cd ../ python -m paddlerec.run -m ./rank/config_test_online.yaml
python parse.py rank_online rank/infer_result python parse.py rank_online ./rank/infer_result
cd data cd data
echo "Create online test data ..."
mkdir online_user/test mkdir online_user/test
python process_ml_1m.py data_recall > online_user/test/data.txt python process_ml_1m.py data_recall > online_user/test/data.txt
## modify recall/config.yaml to online_infer mode cd ..
cd ../recall echo "Recall online test ..."
python -m paddlerec.run -m ./config.yaml echo "Model config at models/demo/movie_recommand/recall/config_online_test.yaml"
cd ../ python -m paddlerec.run -m ./recall/config_test_online.yaml
python parse.py recall_online recall/infer_result python parse.py recall_online recall/infer_result
...@@ -12,28 +12,16 @@ ...@@ -12,28 +12,16 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
workspace: "models/demo/movie_recommand" workspace: "./"
# list of dataset # list of dataset
dataset: dataset:
- name: dataset_train # name of dataset to distinguish different datasets - name: dataset_train # name of dataset to distinguish different datasets
batch_size: 128 batch_size: 128
type: QueueDataset type: DataLoader
data_path: "{workspace}/data/train" data_path: "{workspace}/data/train"
sparse_slots: "logid time userid gender age occupation movieid title genres label" sparse_slots: "logid time userid gender age occupation movieid title genres label"
dense_slots: "" dense_slots: ""
- name: dataset_infer # name
batch_size: 128
type: DataLoader
data_path: "{workspace}/data/test"
sparse_slots: "logid time userid gender age occupation movieid title genres label"
dense_slots: ""
- name: dataset_online_infer # name
batch_size: 10
type: DataLoader
data_path: "{workspace}/data/online_user/test"
sparse_slots: "logid time userid gender age occupation movieid title genres label"
dense_slots: ""
# hyper parameters of user-defined network # hyper parameters of user-defined network
hyper_parameters: hyper_parameters:
...@@ -51,42 +39,17 @@ hyper_parameters: ...@@ -51,42 +39,17 @@ hyper_parameters:
# train # train
mode: runner_train mode: runner_train
## online or offline infer
#mode: runner_infer
runner: runner:
- name: runner_train - name: runner_train
class: train class: train
save_checkpoint_interval: 1 # save model interval of epochs save_checkpoint_interval: 1 # save model interval of epochs
save_inference_interval: 1 # save inference save_checkpoint_path: "increment_rank" # save checkpoint path
save_checkpoint_path: "increment" # save checkpoint path
save_inference_path: "inference" # save inference path
epochs: 10 epochs: 10
device: cpu device: cpu
- name: runner_infer
class: infer
print_interval: 10000
init_model_path: "increment/9" # load model path
#train #train
phase: phase:
- name: phase1 - name: phase1
model: "{workspace}/model.py" # user-defined model model: "{workspace}/rank/model.py" # user-defined model
dataset_name: dataset_train # select dataset by name dataset_name: dataset_train # select dataset by name
thread_num: 12 thread_num: 4
##offline infer
#phase:
#- name: phase1
# model: "{workspace}/model.py" # user-defined model
# dataset_name: dataset_infer # select dataset by name
# save_path: "./infer_result"
# thread_num: 1
##offline infer
#phase:
#- name: phase1
# model: "{workspace}/model.py" # user-defined model
# dataset_name: dataset_online_infer # select dataset by name
# save_path: "./infer_result"
# thread_num: 1
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#workspace: "paddlerec.models.demo.movie_recommand"
workspace: "./"
# list of dataset
dataset:
- name: dataset_infer # name
batch_size: 128
type: DataLoader
data_path: "{workspace}/data/test"
sparse_slots: "logid time userid gender age occupation movieid title genres label"
dense_slots: ""
# hyper parameters of user-defined network
hyper_parameters:
# optimizer config
optimizer:
class: Adam
learning_rate: 0.001
strategy: async
# user-defined <key, value> pairs
sparse_feature_number: 60000000
sparse_feature_dim: 9
dense_input_dim: 13
fc_sizes: [512, 256, 128, 32]
# train
mode: runner_infer
## online or offline infer
#mode: runner_infer
runner:
- name: runner_infer
epochs: 1
device: cpu
class: infer
print_interval: 10000
runner_result_dump_path: "{workspace}/rank/infer_result"
init_model_path: "increment_rank/9" # load model path
#offline infer
phase:
- name: phase1
model: "{workspace}/rank/model.py" # user-defined model
dataset_name: dataset_infer # select dataset by name
thread_num: 1
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
workspace: "./"
# list of dataset
dataset:
- name: dataset_online_infer # name
batch_size: 10
type: DataLoader
data_path: "{workspace}/data/online_user/test"
sparse_slots: "logid time userid gender age occupation movieid title genres label"
dense_slots: ""
# hyper parameters of user-defined network
hyper_parameters:
# optimizer config
optimizer:
class: Adam
learning_rate: 0.001
strategy: async
# user-defined <key, value> pairs
sparse_feature_number: 60000000
sparse_feature_dim: 9
dense_input_dim: 13
fc_sizes: [512, 256, 128, 32]
# train
mode: runner_infer
runner:
- name: runner_infer
epochs: 1
device: cpu
class: infer
print_interval: 10000
runner_result_dump_path: "{workspace}/rank/infer_result"
init_model_path: "increment_rank/9" # load model path
#offline infer
phase:
- name: phase1
model: "{workspace}/rank/model.py" # user-defined model
dataset_name: dataset_online_infer # select dataset by name
thread_num: 1
...@@ -12,28 +12,16 @@ ...@@ -12,28 +12,16 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
workspace: "models/demo/movie_recommand" workspace: "./"
# list of dataset # list of dataset
dataset: dataset:
- name: dataset_train # name of dataset to distinguish different datasets - name: dataset_train # name of dataset to distinguish different datasets
batch_size: 128 batch_size: 128
type: QueueDataset type: DataLoader
data_path: "{workspace}/data/train" data_path: "{workspace}/data/train"
sparse_slots: "logid time userid gender age occupation movieid title genres label" sparse_slots: "logid time userid gender age occupation movieid title genres label"
dense_slots: "" dense_slots: ""
- name: dataset_infer # name
batch_size: 128
type: DataLoader
data_path: "{workspace}/data/test"
sparse_slots: "logid time userid gender age occupation movieid title genres label"
dense_slots: ""
- name: dataset_online_infer # name
batch_size: 128
type: DataLoader
data_path: "{workspace}/data/online_user/test"
sparse_slots: "logid time userid gender age occupation movieid title genres label"
dense_slots: ""
# hyper parameters of user-defined network # hyper parameters of user-defined network
hyper_parameters: hyper_parameters:
...@@ -50,43 +38,17 @@ hyper_parameters: ...@@ -50,43 +38,17 @@ hyper_parameters:
# train # train
mode: runner_train mode: runner_train
## online or offline infer
#mode: runner_infer
runner: runner:
- name: runner_train - name: runner_train
class: train class: train
save_checkpoint_interval: 1 # save model interval of epochs save_checkpoint_interval: 1 # save model interval of epochs
save_inference_interval: 1 # save inference save_checkpoint_path: "increment_recall" # save checkpoint path
save_checkpoint_path: "increment" # save checkpoint path
save_inference_path: "inference" # save inference path
epochs: 10 epochs: 10
device: cpu device: cpu
- name: runner_infer
class: infer
print_interval: 10000
init_model_path: "increment/9" # load model path
#train #train
phase: phase:
- name: phase1 - name: phase1
model: "{workspace}/model.py" # user-defined model model: "{workspace}/recall/model.py" # user-defined model
dataset_name: dataset_train # select dataset by name dataset_name: dataset_train # select dataset by name
thread_num: 12 thread_num: 4
##offline infer
#phase:
#- name: phase1
# model: "{workspace}/model.py" # user-defined model
# dataset_name: dataset_infer # select dataset by name
# save_path: "./infer_result"
# thread_num: 1
##offline infer
#phase:
#- name: phase1
# model: "{workspace}/model.py" # user-defined model
# dataset_name: dataset_online_infer # select dataset by name
# save_path: "./infer_result"
# thread_num: 1
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#workspace: "paddlerec.models.demo.movie_recommand"
workspace: "./"
# list of dataset
dataset:
- name: dataset_infer # name
batch_size: 128
type: DataLoader
data_path: "{workspace}/data/test"
sparse_slots: "logid time userid gender age occupation movieid title genres label"
dense_slots: ""
# hyper parameters of user-defined network
hyper_parameters:
# optimizer config
optimizer:
class: Adam
learning_rate: 0.001
strategy: async
# user-defined <key, value> pairs
sparse_feature_number: 60000000
sparse_feature_dim: 9
dense_input_dim: 13
fc_sizes: [512, 256, 128, 32]
# train
mode: runner_infer
runner:
- name: runner_infer
epochs: 1
device: cpu
class: infer
print_interval: 100000
runner_result_dump_path: "{workspace}/recall/infer_result"
init_model_path: "increment_recall/9" # load model path
#offline infer
phase:
- name: phase1
model: "{workspace}/recall/model.py" # user-defined model
dataset_name: dataset_infer
thread_num: 1
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#workspace: "paddlerec.models.demo.movie_recommand"
workspace: ./
# list of dataset
dataset:
- name: dataset_online_infer # name
batch_size: 128
type: DataLoader
data_path: "{workspace}/data/online_user/test"
sparse_slots: "logid time userid gender age occupation movieid title genres label"
dense_slots: ""
# hyper parameters of user-defined network
hyper_parameters:
# optimizer config
optimizer:
class: Adam
learning_rate: 0.001
strategy: async
# user-defined <key, value> pairs
sparse_feature_number: 60000000
sparse_feature_dim: 9
dense_input_dim: 13
fc_sizes: [512, 256, 128, 32]
# train
mode: runner_infer
## online or offline infer
#mode: runner_infer
runner:
- name: runner_infer
epochs: 1
device: cpu
class: infer
print_interval: 10000
runner_result_dump_path: "{workspace}/recall/infer_result"
init_model_path: "increment_recall/9" # load model path
#offline infer
phase:
- name: phase1
model: "{workspace}/recall/model.py" # user-defined model
dataset_name: dataset_online_infer # select dataset by name
thread_num: 1
cd recall echo "Recall offline training ..."
python -m paddlerec.run -m ./config.yaml &> log & echo "Model config at models/demo/movie_recommand/recall/config.yaml"
cd ../rank python -m paddlerec.run -m ./recall/config.yaml
python -m paddlerec.run -m ./config.yaml &> log &
cd .. echo "----------------------------------------"
echo "Rank offline training ..."
echo "Model config at models/demo/movie_recommand/rank/config.yaml"
python -m paddlerec.run -m ./rank/config.yaml
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册