download.py 1.4 KB
Newer Older
W
wangxiao1021 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50
#  -*- coding: utf-8 -*-

import os
import requests
import tarfile
import shutil
from tqdm import tqdm


def download(src, url):
    file_size = int(requests.head(url).headers['Content-Length'])

    header = {
        'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/'
        '70.0.3538.67 Safari/537.36'
    }
    pbar = tqdm(total=file_size)
    resp = requests.get(url, headers=header, stream=True)

    with open(src, 'ab') as f:
        for chunk in resp.iter_content(chunk_size=1024):
            if chunk:
                f.write(chunk)
                pbar.update(1024)

    pbar.close()
    return file_size


abs_path = os.path.abspath(__file__)
download_url = "https://ernie.bj.bcebos.com/task_data_zh.tgz"
downlaod_path = os.path.join(os.path.dirname(abs_path), "task_data_zh.tgz")
target_dir = os.path.dirname(abs_path)
download(downlaod_path, download_url)

tar = tarfile.open(downlaod_path)
tar.extractall(target_dir)
os.remove(downlaod_path)

abs_path = os.path.abspath(__file__)
dst_dir = os.path.join(os.path.dirname(abs_path), "data/mrc")
if not os.path.exists(dst_dir) or not os.path.isdir(dst_dir):
    os.makedirs(dst_dir)

for file in os.listdir(os.path.join(target_dir, 'task_data', 'cmrc2018')):
    shutil.move(os.path.join(target_dir, 'task_data', 'cmrc2018', file), dst_dir)

shutil.rmtree(os.path.join(target_dir, 'task_data'))