# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. import requests import sys import time import os lasttime = time.time() FLUSH_INTERVAL = 0.1 def progress(str, end=False): global lasttime if end: str += "\n" lasttime = 0 if time.time() - lasttime >= FLUSH_INTERVAL: sys.stdout.write("\r%s" % str) lasttime = time.time() sys.stdout.flush() def _download_file(url, savepath, print_progress): r = requests.get(url, stream=True) total_length = r.headers.get('content-length') if total_length is None: with open(savepath, 'wb') as f: shutil.copyfileobj(r.raw, f) else: with open(savepath, 'wb') as f: dl = 0 total_length = int(total_length) starttime = time.time() if print_progress: print("Downloading %s" % os.path.basename(savepath)) for data in r.iter_content(chunk_size=4096): dl += len(data) f.write(data) if print_progress: done = int(50 * dl / total_length) progress("[%-50s] %.2f%%" % ('=' * done, float(100 * dl) / total_length)) if print_progress: progress("[%-50s] %.2f%%" % ('=' * 50, 100), end=True) if sys.argv[1] == "diginetica": _download_file("https://sr-gnn.bj.bcebos.com/train-item-views.csv", "./train-item-views.csv", True) elif sys.argv[1] == "yoochoose": _download_file( "https://paddlerec.bj.bcebos.com/gnn%2Fyoochoose-clicks.dat", "./yoochoose-clicks.dat", True)