common.py 859 字节
Newer Older
1
import requests
Y
Yu Yang 已提交
2
import hashlib
Y
Yu Yang 已提交
3
import os
Y
Yu Yang 已提交
4
import shutil
Y
Yu Yang 已提交
5

6
__all__ = ['DATA_HOME', 'download', 'md5file']
Y
Yu Yang 已提交
7

8
DATA_HOME = os.path.expanduser('~/.cache/paddle/dataset')
Y
Yu Yang 已提交
9 10 11

if not os.path.exists(DATA_HOME):
    os.makedirs(DATA_HOME)
Y
Yu Yang 已提交
12 13


14 15 16 17 18 19 20
def md5file(fname):
    hash_md5 = hashlib.md5()
    f = open(fname, "rb")
    for chunk in iter(lambda: f.read(4096), b""):
        hash_md5.update(chunk)
    f.close()
    return hash_md5.hexdigest()
21 22 23 24 25 26 27 28 29 30 31 32 33 34


def download(url, module_name, md5sum):
    dirname = os.path.join(DATA_HOME, module_name)
    if not os.path.exists(dirname):
        os.makedirs(dirname)

    filename = os.path.join(dirname, url.split('/')[-1])
    if not (os.path.exists(filename) and md5file(filename) == md5sum):
        r = requests.get(url, stream=True)
        with open(filename, 'w') as f:
            shutil.copyfileobj(r.raw, f)

    return filename