diff --git a/PaddleRec/din/README.md b/PaddleRec/din/README.md index 3538ba760ff9b80807a6a56aed4b75400c97ae03..8644a75c8053769049031aab415f86f71f171138 100644 --- a/PaddleRec/din/README.md +++ b/PaddleRec/din/README.md @@ -41,6 +41,9 @@ cd data && sh data_process.sh && cd .. pip install pandas ``` +**Windows系统下请用户自行下载数据进行解压,下载链接为:[reviews_Electronics](http://snap.stanford.edu/data/amazon/productGraph/categoryFiles/reviews_Electronics_5.json.gz)和[meta_Electronics](http://snap.stanford.edu/data/amazon/productGraph/categoryFiles/meta_Electronics.json.gz)。** + + * Step 2: 产生训练集、测试集和config文件 ``` python build_dataset.py diff --git a/PaddleRec/din/train.py b/PaddleRec/din/train.py index 9697961510c786ddd7f9f4fd4560e3f8d62fe4b1..36e99580e8b66c8347b3e791c20c2de514c87dce 100644 --- a/PaddleRec/din/train.py +++ b/PaddleRec/din/train.py @@ -136,8 +136,8 @@ def train(): if (global_step > 400000 and global_step % PRINT_STEP == 0) or ( global_step <= 400000 and global_step % 50000 == 0): - save_dir = args.model_dir + "/global_step_" + str( - global_step) + save_dir = os.path.join(args.model_dir, "/global_step_" + str( + global_step)) feed_var_name = [ "hist_item_seq", "hist_cat_seq", "target_item", "target_cat", "label", "mask", "target_item_seq", diff --git a/PaddleRec/gnn/README.md b/PaddleRec/gnn/README.md index e9a32861e2ecfd4e27e0ce6f2d7e08697ff85b6d..e541c793fed6175613f8a1936980547e847187ca 100644 --- a/PaddleRec/gnn/README.md +++ b/PaddleRec/gnn/README.md @@ -40,11 +40,12 @@ SR-GNN模型的介绍可以参阅论文[Session-based Recommendation with Graph * Step 1: 运行如下命令,下载DIGINETICA数据集并进行预处理 ``` -cd data && sh download.sh +cd data && python download.py ``` * Step 2: 产生训练集、测试集和config文件 ``` +mkdir diginetica python preprocess.py --dataset diginetica cd .. ``` diff --git a/PaddleRec/gnn/data/download.py b/PaddleRec/gnn/data/download.py new file mode 100644 index 0000000000000000000000000000000000000000..69a1ee20b2d634e9eca47c621dce82ac2d98b5f2 --- /dev/null +++ b/PaddleRec/gnn/data/download.py @@ -0,0 +1,47 @@ +import requests +import sys +import time +import os + +lasttime = time.time() +FLUSH_INTERVAL = 0.1 + + +def progress(str, end=False): + global lasttime + if end: + str += "\n" + lasttime = 0 + if time.time() - lasttime >= FLUSH_INTERVAL: + sys.stdout.write("\r%s" % str) + lasttime = time.time() + sys.stdout.flush() + + +def _download_file(url, savepath, print_progress): + r = requests.get(url, stream=True) + total_length = r.headers.get('content-length') + + if total_length is None: + with open(savepath, 'wb') as f: + shutil.copyfileobj(r.raw, f) + else: + with open(savepath, 'wb') as f: + dl = 0 + total_length = int(total_length) + starttime = time.time() + if print_progress: + print("Downloading %s" % os.path.basename(savepath)) + for data in r.iter_content(chunk_size=4096): + dl += len(data) + f.write(data) + if print_progress: + done = int(50 * dl / total_length) + progress("[%-50s] %.2f%%" % + ('=' * done, float(100 * dl) / total_length)) + if print_progress: + progress("[%-50s] %.2f%%" % ('=' * 50, 100), end=True) + + +_download_file("https://sr-gnn.bj.bcebos.com/train-item-views.csv", + "./train-item-views.csv", True) diff --git a/PaddleRec/gnn/data/download.sh b/PaddleRec/gnn/data/download.sh deleted file mode 100644 index a6d3ca7081e7e12cf49cb7008089f17b5ccdbae4..0000000000000000000000000000000000000000 --- a/PaddleRec/gnn/data/download.sh +++ /dev/null @@ -1,4 +0,0 @@ -#!/bin/bash - -wget --no-check-certificate https://sr-gnn.bj.bcebos.com/train-item-views.csv -mkdir diginetica diff --git a/PaddleRec/gnn/infer.py b/PaddleRec/gnn/infer.py index 6f935745933881a7f9f4b8f7ba923ba787e97b64..90c916dcff928a7d32492ed90891a5ccbfac90cb 100644 --- a/PaddleRec/gnn/infer.py +++ b/PaddleRec/gnn/infer.py @@ -60,7 +60,7 @@ def infer(args): infer_program = fluid.default_main_program().clone(for_test=True) for epoch_num in range(args.start_index, args.last_index + 1): - model_path = args.model_path + "epoch_" + str(epoch_num) + model_path = os.path.join(args.model_path, "epoch_" + str(epoch_num)) try: if not os.path.exists(model_path): raise ValueError() diff --git a/PaddleRec/gnn/train.py b/PaddleRec/gnn/train.py index d07b11bf56d4ad2e1121dc0e15f39dd0b5c0e6c1..32277d33e03b98916d5d487d0ac55e459b238b7e 100644 --- a/PaddleRec/gnn/train.py +++ b/PaddleRec/gnn/train.py @@ -139,7 +139,7 @@ def train(): except fluid.core.EOFException: py_reader.reset() logger.info("epoch loss: %.4lf" % (np.mean(epoch_sum))) - save_dir = args.model_path + "/epoch_" + str(i) + save_dir = os.path.join(args.model_path, "epoch_" + str(i)) fetch_vars = [loss, acc] fluid.io.save_inference_model(save_dir, feed_list, fetch_vars, exe) logger.info("model saved in " + save_dir)