common.py 1.2 KB
Newer Older
Y
Yu Yang 已提交
1
import hashlib
Y
Yu Yang 已提交
2
import os
Y
Yu Yang 已提交
3 4
import shutil
import urllib2
Y
Yu Yang 已提交
5

6
__all__ = ['DATA_HOME', 'download', 'md5file']
Y
Yu Yang 已提交
7

8
DATA_HOME = os.path.expanduser('~/.cache/paddle/dataset')
Y
Yu Yang 已提交
9 10 11

if not os.path.exists(DATA_HOME):
    os.makedirs(DATA_HOME)
Y
Yu Yang 已提交
12 13


14
def download(url, package_name, md5):
Y
Yu Yang 已提交
15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36
    filename = os.path.split(url)[-1]
    assert DATA_HOME is not None
    filepath = os.path.join(DATA_HOME, md5)
    if not os.path.exists(filepath):
        os.makedirs(filepath)
    __full_file__ = os.path.join(filepath, filename)

    def __file_ok__():
        if not os.path.exists(__full_file__):
            return False
        md5_hash = hashlib.md5()
        with open(__full_file__, 'rb') as f:
            for chunk in iter(lambda: f.read(4096), b""):
                md5_hash.update(chunk)

        return md5_hash.hexdigest() == md5

    while not __file_ok__():
        response = urllib2.urlopen(url)
        with open(__full_file__, mode='wb') as of:
            shutil.copyfileobj(fsrc=response, fdst=of)
    return __full_file__
37 38 39 40 41 42 43 44 45


def md5file(fname):
    hash_md5 = hashlib.md5()
    f = open(fname, "rb")
    for chunk in iter(lambda: f.read(4096), b""):
        hash_md5.update(chunk)
    f.close()
    return hash_md5.hexdigest()