未验证 提交 ddf6ec25 编写于 作者: C Chengmo 提交者: GitHub

fix demo (#213)

* fix demo

* fix

* fix

* fix code style
上级 b1f708fc
......@@ -19,6 +19,7 @@ import time
import warnings
import numpy as np
import random
import json
import logging
import paddle.fluid as fluid
......@@ -147,17 +148,22 @@ class RunnerBase(object):
metrics_format = []
if context["is_infer"]:
metrics_format.append("\t[Infer]\t{}: {{}}".format("batch"))
metrics_format.append("\t[Infer] {}: {{}}".format("batch"))
else:
metrics_format.append("\t[Train]\t{}: {{}}".format("batch"))
metrics_format.append("\t[Train]")
if "current_epoch" in context:
metrics_format.append(" epoch: {}".format(context[
"current_epoch"]))
metrics_format.append(" {}: {{}}".format("batch"))
metrics_format.append("{}: {{:.2f}}s".format("time_each_interval"))
metrics_names = ["total_batch"]
metrics_indexes = dict()
for name, var in metrics.items():
metrics_names.append(name)
metrics_varnames.append(var.name)
metrics_indexes[var.name] = len(metrics_varnames) - 1
metrics_format.append("{}: {{}}".format(name))
metrics_format = ", ".join(metrics_format)
......@@ -166,6 +172,7 @@ class RunnerBase(object):
batch_id = 0
begin_time = time.time()
scope = context["model"][model_name]["scope"]
runner_results = []
result = None
with fluid.scope_guard(scope):
try:
......@@ -182,18 +189,35 @@ class RunnerBase(object):
]
metrics.extend(metrics_rets)
batch_runner_result = {}
for k, v in metrics_indexes.items():
batch_runner_result[k] = np.array(metrics_rets[
v]).tolist()
runner_results.append(batch_runner_result)
if batch_id % fetch_period == 0 and batch_id != 0:
end_time = time.time()
seconds = end_time - begin_time
metrics_logging = metrics[:]
metrics_logging = metrics.insert(1, seconds)
begin_time = end_time
logging.info(metrics_format.format(*metrics))
batch_id += 1
except fluid.core.EOFException:
reader.reset()
runner_result_save_path = envs.get_global_env(
"runner." + context["runner_name"] + ".runner_result_dump_path",
None)
if runner_result_save_path:
if "current_epoch" in context:
runner_result_save_path = runner_result_save_path + "_epoch_{}".format(
context["current_epoch"])
logging.info("Dump runner result in {}".format(
runner_result_save_path))
with open(runner_result_save_path, 'w+') as fout:
json.dump(runner_results, fout)
if batch_id > 0:
result = dict(zip(metrics_names, metrics))
return result
......@@ -402,6 +426,7 @@ class SingleRunner(RunnerBase):
filelist = context["file_list"]
context["file_list"] = shuffle_files(need_shuffle_files,
filelist)
context["current_epoch"] = epoch
begin_time = time.time()
result = self._run(context, model_dict)
end_time = time.time()
......@@ -450,6 +475,7 @@ class PSRunner(RunnerBase):
filelist = context["file_list"]
context["file_list"] = shuffle_files(need_shuffle_files,
filelist)
context["current_epoch"] = epoch
begin_time = time.time()
result = self._run(context, model_dict)
end_time = time.time()
......@@ -500,6 +526,7 @@ class CollectiveRunner(RunnerBase):
filelist = context["file_list"]
context["file_list"] = shuffle_files(need_shuffle_files,
filelist)
context["current_epoch"] = epoch
begin_time = time.time()
self._run(context, model_dict)
end_time = time.time()
......@@ -533,6 +560,7 @@ class PslibRunner(RunnerBase):
filelist = context["file_list"]
context["file_list"] = shuffle_files(need_shuffle_files,
filelist)
context["current_epoch"] = epoch
begin_time = time.time()
self._run(context, model_dict)
end_time = time.time()
......
......@@ -38,6 +38,7 @@
| runner_class_path | string | 路径 | 否 | 自定义runner流程实现的地址 |
| terminal_class_path | string | 路径 | 否 | 自定义terminal流程实现的地址 |
| init_pretraining_model_path | string | 路径 | 否 |自定义的startup流程中需要传入这个参数,finetune中需要加载的参数的地址 |
| runner_result_dump_path | string | 路径 | 否 | 运行中metrics的结果使用json.dump到文件的地址,若是在训练的runner中使用, 会自动加上epoch后缀 |
......
# PaddleRec 基于 Movielens 数据集的全流程示例
## 模型的详细教程可以查阅: [十分钟!全流程!从零搭建推荐系统](https://aistudio.baidu.com/aistudio/projectdetail/559336)
## 本地运行流程
在本地需要安装`PaddleRec``PaddlePaddle`,推荐在`Linux` + `python2.7` 环境下执行此demo
本地运行流程与AiStudio流程基本一致,细节略有区别
### 离线训练
```shell
sh train.sh
```
### 离线测试
```shell
sh offline_test.sh
```
### 模拟在线召回
```shell
sh online_recall.sh
```
### 模拟在线排序
```shell
sh online_rank.sh
```
cd data
echo "---> Download movielens 1M data ..."
wget http://files.grouplens.org/datasets/movielens/ml-1m.zip
echo "---> Unzip ml-1m.zip ..."
unzip ml-1m.zip
rm ml-1m.zip
echo "---> Split movielens data ..."
python split.py
mkdir train/
mkdir test/
mkdir -p train/
mkdir -p test/
echo "---> Process train & test data ..."
python process_ml_1m.py process_raw ./ml-1m/train.dat | sort -t $'\t' -k 9 -n > log.data.train
python process_ml_1m.py process_raw ./ml-1m/test.dat | sort -t $'\t' -k 9 -n > log.data.test
python process_ml_1m.py hash log.data.train > ./train/data.txt
......@@ -15,4 +20,6 @@ python process_ml_1m.py hash log.data.test > ./test/data.txt
rm log.data.train
rm log.data.test
cd ../
cd ..
echo "---> Finish data process"
## modify config.yaml to infer mode at first
cd recall
python -m paddlerec.run -m ./config.yaml
cd ../rank
python -m paddlerec.run -m ./config.yaml
cd ..
echo "Recall offline test ..."
echo "Model config at models/demo/movie_recommand/recall/config_offline_test.yaml"
python -m paddlerec.run -m ./recall/config_test_offline.yaml
echo "Rank offline test ..."
echo "Model config at models/demo/movie_recommand/rank/config_offline_test.yaml"
python -m paddlerec.run -m ./rank/config_test_offline.yaml
echo "recall offline test result:"
python parse.py recall_offline recall/infer_result
echo "rank offline test result:"
python parse.py rank_offline rank/infer_result
cd data
echo "Create online test data ..."
python process_ml_1m.py data_rank > online_user/test/data.txt
## modify recall/config.yaml to online_infer mode
cd ../rank
python -m paddlerec.run -m ./config.yaml
cd ../
python parse.py rank_online rank/infer_result
cd ..
echo "Rank online test ..."
echo "Model config at models/demo/movie_recommand/rank/config_online_test.yaml"
python -m paddlerec.run -m ./rank/config_test_online.yaml
python parse.py rank_online ./rank/infer_result
cd data
echo "Create online test data ..."
mkdir online_user/test
python process_ml_1m.py data_recall > online_user/test/data.txt
## modify recall/config.yaml to online_infer mode
cd ../recall
python -m paddlerec.run -m ./config.yaml
cd ../
cd ..
echo "Recall online test ..."
echo "Model config at models/demo/movie_recommand/recall/config_online_test.yaml"
python -m paddlerec.run -m ./recall/config_test_online.yaml
python parse.py recall_online recall/infer_result
......@@ -12,26 +12,14 @@
# See the License for the specific language governing permissions and
# limitations under the License.
workspace: "models/demo/movie_recommand"
workspace: "./"
# list of dataset
dataset:
- name: dataset_train # name of dataset to distinguish different datasets
batch_size: 128
type: QueueDataset
data_path: "{workspace}/data/train"
sparse_slots: "logid time userid gender age occupation movieid title genres label"
dense_slots: ""
- name: dataset_infer # name
batch_size: 128
type: DataLoader
data_path: "{workspace}/data/test"
sparse_slots: "logid time userid gender age occupation movieid title genres label"
dense_slots: ""
- name: dataset_online_infer # name
batch_size: 10
type: DataLoader
data_path: "{workspace}/data/online_user/test"
data_path: "{workspace}/data/train"
sparse_slots: "logid time userid gender age occupation movieid title genres label"
dense_slots: ""
......@@ -51,42 +39,17 @@ hyper_parameters:
# train
mode: runner_train
## online or offline infer
#mode: runner_infer
runner:
- name: runner_train
class: train
save_checkpoint_interval: 1 # save model interval of epochs
save_inference_interval: 1 # save inference
save_checkpoint_path: "increment" # save checkpoint path
save_inference_path: "inference" # save inference path
save_checkpoint_path: "increment_rank" # save checkpoint path
epochs: 10
device: cpu
- name: runner_infer
class: infer
print_interval: 10000
init_model_path: "increment/9" # load model path
#train
phase:
- name: phase1
model: "{workspace}/model.py" # user-defined model
model: "{workspace}/rank/model.py" # user-defined model
dataset_name: dataset_train # select dataset by name
thread_num: 12
##offline infer
#phase:
#- name: phase1
# model: "{workspace}/model.py" # user-defined model
# dataset_name: dataset_infer # select dataset by name
# save_path: "./infer_result"
# thread_num: 1
##offline infer
#phase:
#- name: phase1
# model: "{workspace}/model.py" # user-defined model
# dataset_name: dataset_online_infer # select dataset by name
# save_path: "./infer_result"
# thread_num: 1
thread_num: 4
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#workspace: "paddlerec.models.demo.movie_recommand"
workspace: "./"
# list of dataset
dataset:
- name: dataset_infer # name
batch_size: 128
type: DataLoader
data_path: "{workspace}/data/test"
sparse_slots: "logid time userid gender age occupation movieid title genres label"
dense_slots: ""
# hyper parameters of user-defined network
hyper_parameters:
# optimizer config
optimizer:
class: Adam
learning_rate: 0.001
strategy: async
# user-defined <key, value> pairs
sparse_feature_number: 60000000
sparse_feature_dim: 9
dense_input_dim: 13
fc_sizes: [512, 256, 128, 32]
# train
mode: runner_infer
## online or offline infer
#mode: runner_infer
runner:
- name: runner_infer
epochs: 1
device: cpu
class: infer
print_interval: 10000
runner_result_dump_path: "{workspace}/rank/infer_result"
init_model_path: "increment_rank/9" # load model path
#offline infer
phase:
- name: phase1
model: "{workspace}/rank/model.py" # user-defined model
dataset_name: dataset_infer # select dataset by name
thread_num: 1
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
workspace: "./"
# list of dataset
dataset:
- name: dataset_online_infer # name
batch_size: 10
type: DataLoader
data_path: "{workspace}/data/online_user/test"
sparse_slots: "logid time userid gender age occupation movieid title genres label"
dense_slots: ""
# hyper parameters of user-defined network
hyper_parameters:
# optimizer config
optimizer:
class: Adam
learning_rate: 0.001
strategy: async
# user-defined <key, value> pairs
sparse_feature_number: 60000000
sparse_feature_dim: 9
dense_input_dim: 13
fc_sizes: [512, 256, 128, 32]
# train
mode: runner_infer
runner:
- name: runner_infer
epochs: 1
device: cpu
class: infer
print_interval: 10000
runner_result_dump_path: "{workspace}/rank/infer_result"
init_model_path: "increment_rank/9" # load model path
#offline infer
phase:
- name: phase1
model: "{workspace}/rank/model.py" # user-defined model
dataset_name: dataset_online_infer # select dataset by name
thread_num: 1
......@@ -12,26 +12,14 @@
# See the License for the specific language governing permissions and
# limitations under the License.
workspace: "models/demo/movie_recommand"
workspace: "./"
# list of dataset
dataset:
- name: dataset_train # name of dataset to distinguish different datasets
batch_size: 128
type: QueueDataset
data_path: "{workspace}/data/train"
sparse_slots: "logid time userid gender age occupation movieid title genres label"
dense_slots: ""
- name: dataset_infer # name
batch_size: 128
type: DataLoader
data_path: "{workspace}/data/test"
sparse_slots: "logid time userid gender age occupation movieid title genres label"
dense_slots: ""
- name: dataset_online_infer # name
batch_size: 128
type: DataLoader
data_path: "{workspace}/data/online_user/test"
data_path: "{workspace}/data/train"
sparse_slots: "logid time userid gender age occupation movieid title genres label"
dense_slots: ""
......@@ -50,43 +38,17 @@ hyper_parameters:
# train
mode: runner_train
## online or offline infer
#mode: runner_infer
runner:
- name: runner_train
class: train
save_checkpoint_interval: 1 # save model interval of epochs
save_inference_interval: 1 # save inference
save_checkpoint_path: "increment" # save checkpoint path
save_inference_path: "inference" # save inference path
save_checkpoint_path: "increment_recall" # save checkpoint path
epochs: 10
device: cpu
- name: runner_infer
class: infer
print_interval: 10000
init_model_path: "increment/9" # load model path
#train
phase:
- name: phase1
model: "{workspace}/model.py" # user-defined model
model: "{workspace}/recall/model.py" # user-defined model
dataset_name: dataset_train # select dataset by name
thread_num: 12
##offline infer
#phase:
#- name: phase1
# model: "{workspace}/model.py" # user-defined model
# dataset_name: dataset_infer # select dataset by name
# save_path: "./infer_result"
# thread_num: 1
##offline infer
#phase:
#- name: phase1
# model: "{workspace}/model.py" # user-defined model
# dataset_name: dataset_online_infer # select dataset by name
# save_path: "./infer_result"
# thread_num: 1
thread_num: 4
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#workspace: "paddlerec.models.demo.movie_recommand"
workspace: "./"
# list of dataset
dataset:
- name: dataset_infer # name
batch_size: 128
type: DataLoader
data_path: "{workspace}/data/test"
sparse_slots: "logid time userid gender age occupation movieid title genres label"
dense_slots: ""
# hyper parameters of user-defined network
hyper_parameters:
# optimizer config
optimizer:
class: Adam
learning_rate: 0.001
strategy: async
# user-defined <key, value> pairs
sparse_feature_number: 60000000
sparse_feature_dim: 9
dense_input_dim: 13
fc_sizes: [512, 256, 128, 32]
# train
mode: runner_infer
runner:
- name: runner_infer
epochs: 1
device: cpu
class: infer
print_interval: 100000
runner_result_dump_path: "{workspace}/recall/infer_result"
init_model_path: "increment_recall/9" # load model path
#offline infer
phase:
- name: phase1
model: "{workspace}/recall/model.py" # user-defined model
dataset_name: dataset_infer
thread_num: 1
# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#workspace: "paddlerec.models.demo.movie_recommand"
workspace: ./
# list of dataset
dataset:
- name: dataset_online_infer # name
batch_size: 128
type: DataLoader
data_path: "{workspace}/data/online_user/test"
sparse_slots: "logid time userid gender age occupation movieid title genres label"
dense_slots: ""
# hyper parameters of user-defined network
hyper_parameters:
# optimizer config
optimizer:
class: Adam
learning_rate: 0.001
strategy: async
# user-defined <key, value> pairs
sparse_feature_number: 60000000
sparse_feature_dim: 9
dense_input_dim: 13
fc_sizes: [512, 256, 128, 32]
# train
mode: runner_infer
## online or offline infer
#mode: runner_infer
runner:
- name: runner_infer
epochs: 1
device: cpu
class: infer
print_interval: 10000
runner_result_dump_path: "{workspace}/recall/infer_result"
init_model_path: "increment_recall/9" # load model path
#offline infer
phase:
- name: phase1
model: "{workspace}/recall/model.py" # user-defined model
dataset_name: dataset_online_infer # select dataset by name
thread_num: 1
cd recall
python -m paddlerec.run -m ./config.yaml &> log &
cd ../rank
python -m paddlerec.run -m ./config.yaml &> log &
cd ..
echo "Recall offline training ..."
echo "Model config at models/demo/movie_recommand/recall/config.yaml"
python -m paddlerec.run -m ./recall/config.yaml
echo "----------------------------------------"
echo "Rank offline training ..."
echo "Model config at models/demo/movie_recommand/rank/config.yaml"
python -m paddlerec.run -m ./rank/config.yaml
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册