From fe13861f81392ea9152c6b380966d6f5dce03249 Mon Sep 17 00:00:00 2001 From: malin10 Date: Wed, 23 Sep 2020 12:12:09 +0800 Subject: [PATCH] test=develop, update readme --- models/recall/gru4rec/README.md | 90 ++++++++++++++------- models/recall/gru4rec/config.yaml | 8 +- models/recall/gru4rec/data_prepare.sh | 2 +- models/recall/gru4rec/infer.py | 109 -------------------------- 4 files changed, 67 insertions(+), 142 deletions(-) delete mode 100644 models/recall/gru4rec/infer.py diff --git a/models/recall/gru4rec/README.md b/models/recall/gru4rec/README.md index 0a3d6f5f..c9696b00 100644 --- a/models/recall/gru4rec/README.md +++ b/models/recall/gru4rec/README.md @@ -65,7 +65,7 @@ python download.py 3. 训练集、测试集划分。原始数据集里最新日期七天内的作为训练集,更早之前的数据作为测试集。 ``` python preprocess.py -python convert_data.py +python convert_format.py ``` 这一步之后,会在data/目录下得到两个文件,rsc15_train_tr_paddle.txt为原始训练文件,rsc15_test_paddle.txt为原始测试文件。格式如下所示: ``` @@ -80,7 +80,7 @@ python convert_data.py 214821275 214821275 214821371 214821371 214821371 214717089 214563337 214706462 214717436 214743335 214826837 214819762 214717867 21471786 ``` -- Step3: 数据整理。将训练文件统一放在data/all_train目录下,测试文件统一放在data/all_test目录下。 +- Step3: 生成字典并整理数据路径。这一步会根据训练和测试文件生成字典和对应的paddle输入文件,并将训练文件统一放在data/all_train目录下,测试文件统一放在data/all_test目录下。 ``` mkdir raw_train_data && mkdir raw_test_data mv rsc15_train_tr_paddle.txt raw_train_data/ && mv rsc15_test_paddle.txt raw_test_data/ @@ -108,32 +108,38 @@ os : windows/linux/macos ### 单机训练 +在config.yaml文件中设置好设备,epochs等。 ``` -mode: [cpu_train_runner, cpu_infer_runner] - runner: - name: cpu_train_runner class: train - device: cpu + device: cpu # gpu epochs: 10 - save_checkpoint_interval: 2 - save_inference_interval: 4 + save_checkpoint_interval: 1 + save_inference_interval: 1 save_checkpoint_path: "increment_gru4rec" save_inference_path: "inference_gru4rec" + save_inference_feed_varnames: ["src_wordseq", "dst_wordseq"] # feed vars of save inference + save_inference_fetch_varnames: ["mean_0.tmp_0", "top_k_0.tmp_0"] print_interval: 10 - phase: train + phases: [train] + +``` + +### 单机预测 + +在config.yaml文件中设置好设备,epochs等。 +``` - name: cpu_infer_runner class: infer init_model_path: "increment_gru4rec" - device: cpu - phase: infer + device: cpu # gpu + phases: [infer] ``` -### 单机预测 - ### 运行 ``` -python -m paddlerec.run -m paddlerec.models.recall.w2v +python -m paddlerec.run -m paddlerec.models.recall.gru4rec ``` ### 结果展示 @@ -143,28 +149,54 @@ python -m paddlerec.run -m paddlerec.models.recall.w2v ``` Running SingleStartup. Running SingleRunner. -batch: 1, acc: [0.03125] -batch: 2, acc: [0.0625] -batch: 3, acc: [0.] +2020-09-22 03:31:18,167-INFO: [Train], epoch: 0, batch: 10, time_each_interval: 4.34s, RecallCnt: [1669.], cost: [8.366313], InsCnt: [16228.], Acc(Recall@20): [0.10284693] +2020-09-22 03:31:21,982-INFO: [Train], epoch: 0, batch: 20, time_each_interval: 3.82s, RecallCnt: [3168.], cost: [8.170701], InsCnt: [31943.], Acc(Recall@20): [0.09917666] +2020-09-22 03:31:25,797-INFO: [Train], epoch: 0, batch: 30, time_each_interval: 3.81s, RecallCnt: [4855.], cost: [8.017181], InsCnt: [47892.], Acc(Recall@20): [0.10137393] ... -epoch 0 done, use time: 0.0605320930481, global metrics: acc=[0.] +epoch 0 done, use time: 6003.78719687, global metrics: cost=[4.4394927], InsCnt=23622448.0 RecallCnt=14547467.0 Acc(Recall@20)=0.6158323218660487 +2020-09-22 05:11:17,761-INFO: save epoch_id:0 model into: "inference_gru4rec/0" ... -epoch 19 done, use time: 0.33447098732, global metrics: acc=[0.] +epoch 9 done, use time: 6009.97707605, global metrics: cost=[4.069373], InsCnt=236237470.0 RecallCnt=162838200.0 Acc(Recall@20)=0.6892988086157644 +2020-09-22 20:17:11,358-INFO: save epoch_id:9 model into: "inference_gru4rec/9" +PaddleRec Finish ``` 样例数据预测结果展示: ``` -user:0, top K videos:[40, 31, 4, 33, 93] -user:1, top K videos:[35, 57, 58, 40, 17] -user:2, top K videos:[35, 17, 88, 40, 9] -user:3, top K videos:[73, 35, 39, 58, 38] -user:4, top K videos:[40, 31, 57, 4, 73] -user:5, top K videos:[38, 9, 7, 88, 22] -user:6, top K videos:[35, 73, 14, 58, 28] -user:7, top K videos:[35, 73, 58, 38, 56] -user:8, top K videos:[38, 40, 9, 35, 99] -user:9, top K videos:[88, 73, 9, 35, 28] -user:10, top K videos:[35, 52, 28, 54, 73] +Running SingleInferStartup. +Running SingleInferRunner. +load persistables from increment_gru4rec/9 +2020-09-23 03:46:21,081-INFO: [Infer] batch: 20, time_each_interval: 3.68s, RecallCnt: [24875.], InsCnt: [35581.], Acc(Recall@20): [0.6991091] +Infer infer of epoch 9 done, use time: 5.25408315659, global metrics: InsCnt=52551.0 RecallCnt=36720.0 Acc(Recall@20)=0.698749785922247 +... +Infer infer of epoch 0 done, use time: 5.20699501038, global metrics: InsCnt=52551.0 RecallCnt=33664.0 Acc(Recall@20)=0.6405967536298073 +PaddleRec Finish +``` + +## 论文复现 + +用原论文的完整数据复现论文效果需要在config.yaml修改超参: +- batch_size: 修改config.yaml中dataset_train数据集的batch_size为500。 +- epochs: 修改config.yaml中runner的epochs为10。 + +使用gpu训练10轮 测试结果为 + +epoch | 测试recall@20 | 速度(s) +-- | -- | -- +1 | 0.6406 | 6003 +2 | 0.6727 | 6007 +3 | 0.6831 | 6108 +4 | 0.6885 | 6025 +5 | 0.6913 | 6019 +6 | 0.6931 | 6011 +7 | 0.6952 | 6015 +8 | 0.6968 | 6076 +9 | 0.6972 | 6076 +10 | 0.6987| 6009 + +修改后运行方案:修改config.yaml中的'workspace'为config.yaml的目录位置,执行 +``` +python -m paddlerec.run -m /home/your/dir/config.yaml #调试模式 直接指定本地config的绝对路径 ``` ## 进阶使用 diff --git a/models/recall/gru4rec/config.yaml b/models/recall/gru4rec/config.yaml index c07e9517..67ac5f76 100644 --- a/models/recall/gru4rec/config.yaml +++ b/models/recall/gru4rec/config.yaml @@ -41,7 +41,7 @@ hyper_parameters: strategy: async #use infer_runner mode and modify 'phase' below if infer -mode: [cpu_train_runner] +mode: [cpu_train_runner, cpu_infer_runner] #mode: infer_runner runner: @@ -53,13 +53,15 @@ runner: save_inference_interval: 1 save_checkpoint_path: "increment_gru4rec" save_inference_path: "inference_gru4rec" + save_inference_feed_varnames: ["src_wordseq", "dst_wordseq"] # feed vars of save inference + save_inference_fetch_varnames: ["mean_0.tmp_0", "top_k_0.tmp_0"] print_interval: 10 - phase: train + phases: [train] - name: cpu_infer_runner class: infer init_model_path: "increment_gru4rec" device: cpu - phase: infer + phases: [infer] phase: - name: train diff --git a/models/recall/gru4rec/data_prepare.sh b/models/recall/gru4rec/data_prepare.sh index 6dea52f3..f3dc2b3f 100644 --- a/models/recall/gru4rec/data_prepare.sh +++ b/models/recall/gru4rec/data_prepare.sh @@ -21,7 +21,7 @@ cd data && python download.py python preprocess.py echo "begin to convert data (binary -> txt)" -python convert_data.py +python convert_format.py mkdir raw_train_data && mkdir raw_test_data mv rsc15_train_tr_paddle.txt raw_train_data/ && mv rsc15_test_paddle.txt raw_test_data/ diff --git a/models/recall/gru4rec/infer.py b/models/recall/gru4rec/infer.py deleted file mode 100644 index 7a9bef18..00000000 --- a/models/recall/gru4rec/infer.py +++ /dev/null @@ -1,109 +0,0 @@ -# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. -# -# Licensed under the Apache License, Version 2.0 (the "License"); -# you may not use this file except in compliance with the License. -# You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. - -import argparse -import sys -import time -import math -import unittest -import contextlib -import numpy as np -import six -import paddle.fluid as fluid -import paddle - -import utils - - -def parse_args(): - parser = argparse.ArgumentParser("gru4rec benchmark.") - parser.add_argument( - '--test_dir', type=str, default='test_data', help='test file address') - parser.add_argument( - '--start_index', type=int, default='1', help='start index') - parser.add_argument( - '--last_index', type=int, default='10', help='end index') - parser.add_argument( - '--model_dir', type=str, default='model_recall20', help='model dir') - parser.add_argument( - '--use_cuda', type=int, default='0', help='whether use cuda') - parser.add_argument( - '--batch_size', type=int, default='5', help='batch_size') - parser.add_argument( - '--vocab_path', type=str, default='vocab.txt', help='vocab file') - args = parser.parse_args() - return args - - -def infer(test_reader, use_cuda, model_path): - """ inference function """ - place = fluid.CUDAPlace(0) if use_cuda else fluid.CPUPlace() - exe = fluid.Executor(place) - - with fluid.scope_guard(fluid.Scope()): - infer_program, feed_target_names, fetch_vars = fluid.io.load_inference_model( - model_path, exe) - accum_num_recall = 0.0 - accum_num_sum = 0.0 - t0 = time.time() - step_id = 0 - for data in test_reader(): - step_id += 1 - src_wordseq = utils.to_lodtensor([dat[0] for dat in data], place) - label_data = [dat[1] for dat in data] - dst_wordseq = utils.to_lodtensor(label_data, place) - para = exe.run( - infer_program, - feed={"src_wordseq": src_wordseq, - "dst_wordseq": dst_wordseq}, - fetch_list=fetch_vars, - return_numpy=False) - - acc_ = para[1]._get_float_element(0) - data_length = len( - np.concatenate( - label_data, axis=0).astype("int64")) - accum_num_sum += (data_length) - accum_num_recall += (data_length * acc_) - if step_id % 1 == 0: - print("step:%d recall@20:%.4f" % - (step_id, accum_num_recall / accum_num_sum)) - t1 = time.time() - print("model:%s recall@20:%.3f time_cost(s):%.2f" % - (model_path, accum_num_recall / accum_num_sum, t1 - t0)) - - -if __name__ == "__main__": - utils.check_version() - args = parse_args() - start_index = args.start_index - last_index = args.last_index - test_dir = args.test_dir - model_dir = args.model_dir - batch_size = args.batch_size - vocab_path = args.vocab_path - use_cuda = True if args.use_cuda else False - print("start index: ", start_index, " last_index:", last_index) - vocab_size, test_reader = utils.prepare_data( - test_dir, - vocab_path, - batch_size=batch_size, - buffer_size=1000, - word_freq_threshold=0, - is_train=False) - - for epoch in range(start_index, last_index + 1): - epoch_path = model_dir + "/epoch_" + str(epoch) - infer( - test_reader=test_reader, use_cuda=use_cuda, model_path=epoch_path) -- GitLab