diff --git a/PaddleRec/din/README.md b/PaddleRec/din/README.md index 75f21e5d44451ff04d90ed6ceb2f8b01513c5297..5402687f8927dc715a0a8224ff0f190abd7a5ff2 100644 --- a/PaddleRec/din/README.md +++ b/PaddleRec/din/README.md @@ -42,6 +42,9 @@ cd data && sh data_process.sh && cd .. pip install pandas ``` +**Windows系统下请用户自行下载数据进行解压,下载链接为:[reviews_Electronics](http://snap.stanford.edu/data/amazon/productGraph/categoryFiles/reviews_Electronics_5.json.gz)和[meta_Electronics](http://snap.stanford.edu/data/amazon/productGraph/categoryFiles/meta_Electronics.json.gz)。** + + * Step 2: 产生训练集、测试集和config文件 ``` python build_dataset.py diff --git a/PaddleRec/din/train.py b/PaddleRec/din/train.py index 0c865f7d9ee7e4b0a0d505f26c1dc2d61181d6e1..a6ffebd5046f4e8ba7c80e6e69c9bfe25b28ea04 100644 --- a/PaddleRec/din/train.py +++ b/PaddleRec/din/train.py @@ -136,8 +136,8 @@ def train(): if (global_step > 400000 and global_step % PRINT_STEP == 0) or ( global_step <= 400000 and global_step % 50000 == 0): - save_dir = args.model_dir + "/global_step_" + str( - global_step) + save_dir = os.path.join(args.model_dir, "/global_step_" + str( + global_step)) feed_var_name = [ "hist_item_seq", "hist_cat_seq", "target_item", "target_cat", "label", "mask", "target_item_seq", diff --git a/PaddleRec/gnn/README.md b/PaddleRec/gnn/README.md index e3a5099ea8cb778dc74793204e6e64e9565a3530..53bd6143655e390142c901ab3eca426840fc3a26 100644 --- a/PaddleRec/gnn/README.md +++ b/PaddleRec/gnn/README.md @@ -41,11 +41,12 @@ SR-GNN模型的介绍可以参阅论文[Session-based Recommendation with Graph * Step 1: 运行如下命令,下载DIGINETICA数据集并进行预处理 ``` -cd data && sh download.sh +cd data && python download.py ``` * Step 2: 产生训练集、测试集和config文件 ``` +mkdir diginetica python preprocess.py --dataset diginetica cd .. ``` diff --git a/PaddleRec/gnn/data/download.py b/PaddleRec/gnn/data/download.py new file mode 100644 index 0000000000000000000000000000000000000000..69a1ee20b2d634e9eca47c621dce82ac2d98b5f2 --- /dev/null +++ b/PaddleRec/gnn/data/download.py @@ -0,0 +1,47 @@ +import requests +import sys +import time +import os + +lasttime = time.time() +FLUSH_INTERVAL = 0.1 + + +def progress(str, end=False): + global lasttime + if end: + str += "\n" + lasttime = 0 + if time.time() - lasttime >= FLUSH_INTERVAL: + sys.stdout.write("\r%s" % str) + lasttime = time.time() + sys.stdout.flush() + + +def _download_file(url, savepath, print_progress): + r = requests.get(url, stream=True) + total_length = r.headers.get('content-length') + + if total_length is None: + with open(savepath, 'wb') as f: + shutil.copyfileobj(r.raw, f) + else: + with open(savepath, 'wb') as f: + dl = 0 + total_length = int(total_length) + starttime = time.time() + if print_progress: + print("Downloading %s" % os.path.basename(savepath)) + for data in r.iter_content(chunk_size=4096): + dl += len(data) + f.write(data) + if print_progress: + done = int(50 * dl / total_length) + progress("[%-50s] %.2f%%" % + ('=' * done, float(100 * dl) / total_length)) + if print_progress: + progress("[%-50s] %.2f%%" % ('=' * 50, 100), end=True) + + +_download_file("https://sr-gnn.bj.bcebos.com/train-item-views.csv", + "./train-item-views.csv", True) diff --git a/PaddleRec/gnn/data/download.sh b/PaddleRec/gnn/data/download.sh deleted file mode 100644 index a6d3ca7081e7e12cf49cb7008089f17b5ccdbae4..0000000000000000000000000000000000000000 --- a/PaddleRec/gnn/data/download.sh +++ /dev/null @@ -1,4 +0,0 @@ -#!/bin/bash - -wget --no-check-certificate https://sr-gnn.bj.bcebos.com/train-item-views.csv -mkdir diginetica diff --git a/PaddleRec/gnn/infer.py b/PaddleRec/gnn/infer.py index 20125f7253186381b0beb6d5b2bef4d43276a767..ded29c60318cabaa96758ea7fff5ee45363080ad 100644 --- a/PaddleRec/gnn/infer.py +++ b/PaddleRec/gnn/infer.py @@ -61,7 +61,7 @@ def infer(args): infer_program = fluid.default_main_program().clone(for_test=True) for epoch_num in range(args.start_index, args.last_index + 1): - model_path = args.model_path + "epoch_" + str(epoch_num) + model_path = os.path.join(args.model_path, "epoch_" + str(epoch_num)) try: if not os.path.exists(model_path): raise ValueError() diff --git a/PaddleRec/gnn/train.py b/PaddleRec/gnn/train.py index a470e62895f590e912b3fc647ac7a5c6a26fd361..0e1ca1d1d3b837ba96440357b2d1125e29f22048 100644 --- a/PaddleRec/gnn/train.py +++ b/PaddleRec/gnn/train.py @@ -140,7 +140,7 @@ def train(): except fluid.core.EOFException: py_reader.reset() logger.info("epoch loss: %.4lf" % (np.mean(epoch_sum))) - save_dir = args.model_path + "/epoch_" + str(i) + save_dir = os.path.join(args.model_path, "epoch_" + str(i)) fetch_vars = [loss, acc] fluid.io.save_inference_model(save_dir, feed_list, fetch_vars, exe) logger.info("model saved in " + save_dir)