download.py 1.3 KB
Newer Older
W
wangxiao1021 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30
#  -*- coding: utf-8 -*-

import os
import requests
import tarfile
import shutil
from tqdm import tqdm


def download(src, url):
    file_size = int(requests.head(url).headers['Content-Length'])

    header = {
        'user-agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/'
        '70.0.3538.67 Safari/537.36'
    }
    pbar = tqdm(total=file_size)
    resp = requests.get(url, headers=header, stream=True)

    with open(src, 'ab') as f:
        for chunk in resp.iter_content(chunk_size=1024):
            if chunk:
                f.write(chunk)
                pbar.update(1024)

    pbar.close()
    return file_size


abs_path = os.path.abspath(__file__)
W
wangxiao1021 已提交
31 32
download_url = "https://baidu-nlp.bj.bcebos.com/dmtk_data_1.0.0.tar.gz"
downlaod_path = os.path.join(os.path.dirname(abs_path), "dmtk_data_1.0.0.tar.gz")
W
wangxiao1021 已提交
33 34 35 36 37 38 39
target_dir = os.path.dirname(abs_path)
download(downlaod_path, download_url)

tar = tarfile.open(downlaod_path)
tar.extractall(target_dir)
os.remove(downlaod_path)

W
wangxiao1021 已提交
40 41 42 43 44
shutil.rmtree(os.path.join(target_dir, 'data/dstc2/'))
shutil.rmtree(os.path.join(target_dir, 'data/mrda/'))
shutil.rmtree(os.path.join(target_dir, 'data/multi-woz/'))
shutil.rmtree(os.path.join(target_dir, 'data/swda/'))
shutil.rmtree(os.path.join(target_dir, 'data/udc/'))
W
wangxiao1021 已提交
45