diff --git a/ppdet/utils/download.py b/ppdet/utils/download.py index a7a999ebc71416f2a9357418ea6a8bb2ec58a96a..5697e14b6b68bbd295ff35569b60398b582f51ab 100644 --- a/ppdet/utils/download.py +++ b/ppdet/utils/download.py @@ -22,6 +22,8 @@ import shutil import requests import tqdm import hashlib +import binascii +import base64 import tarfile import zipfile @@ -291,9 +293,19 @@ def _download(url, path, md5sum=None): for chunk in req.iter_content(chunk_size=1024): if chunk: f.write(chunk) - shutil.move(tmp_fullname, fullname) - return fullname + # check md5 after download in Content-MD5 in req.headers + content_md5 = req.headers.get('content-md5') + if not content_md5 or _md5check( + tmp_fullname, + binascii.hexlify(base64.b64decode(content_md5.strip('"')))): + shutil.move(tmp_fullname, fullname) + return fullname + else: + logger.warn( + "Download from url imcomplete, try downloading again...") + os.remove(tmp_fullname) + continue def _md5check(fullname, md5sum=None):