From 4157e009c1fb2c0e12e806ce52da0f9e4890084d Mon Sep 17 00:00:00 2001 From: Helin Wang Date: Fri, 3 Mar 2017 01:48:35 +0000 Subject: [PATCH] dataset: print download progress --- python/paddle/v2/dataset/common.py | 20 ++++++++++++++++++-- 1 file changed, 18 insertions(+), 2 deletions(-) diff --git a/python/paddle/v2/dataset/common.py b/python/paddle/v2/dataset/common.py index 397c9e66d49..0944fe973d5 100644 --- a/python/paddle/v2/dataset/common.py +++ b/python/paddle/v2/dataset/common.py @@ -16,6 +16,7 @@ import requests import hashlib import os import shutil +import sys __all__ = ['DATA_HOME', 'download', 'md5file'] @@ -35,6 +36,7 @@ def md5file(fname): def download(url, module_name, md5sum): + print "downloading %s" % url dirname = os.path.join(DATA_HOME, module_name) if not os.path.exists(dirname): os.makedirs(dirname) @@ -42,8 +44,22 @@ def download(url, module_name, md5sum): filename = os.path.join(dirname, url.split('/')[-1]) if not (os.path.exists(filename) and md5file(filename) == md5sum): r = requests.get(url, stream=True) - with open(filename, 'w') as f: - shutil.copyfileobj(r.raw, f) + total_length = r.headers.get('content-length') + + if total_length is None: + with open(filename, 'w') as f: + shutil.copyfileobj(r.raw, f) + else: + with open(filename, 'w') as f: + dl = 0 + total_length = int(total_length) + for data in r.iter_content(chunk_size=4096): + dl += len(data) + f.write(data) + done = int(50 * dl / total_length) + sys.stdout.write("\r[%s%s]" % ('=' * done, + ' ' * (50 - done))) + sys.stdout.flush() return filename -- GitLab