download.py 1.2 KB
Newer Older
W
wangxiao1021 已提交
1
#  -*- coding: utf-8 -*-
W
wangxiao1021 已提交
2
from __future__ import print_function
W
wangxiao1021 已提交
3 4 5
import os
import tarfile
import shutil
W
wangxiao1021 已提交
6 7 8 9 10 11
import sys
import urllib
URLLIB=urllib
if sys.version_info >= (3, 0):
    import urllib.request
    URLLIB=urllib.request
W
wangxiao1021 已提交
12 13

def download(src, url):
W
wangxiao1021 已提交
14 15 16 17 18 19
    def _reporthook(count, chunk_size, total_size):
        bytes_so_far = count * chunk_size
        percent = float(bytes_so_far) / float(total_size)
        if percent > 1:
            percent = 1
        print('\r>> Downloading... {:.1%}'.format(percent), end="")
W
wangxiao1021 已提交
20

W
wangxiao1021 已提交
21
    URLLIB.urlretrieve(url, src, reporthook=_reporthook)
W
wangxiao1021 已提交
22 23

abs_path = os.path.abspath(__file__)
W
wangxiao1021 已提交
24 25
download_url = "https://baidu-nlp.bj.bcebos.com/dmtk_data_1.0.0.tar.gz"
downlaod_path = os.path.join(os.path.dirname(abs_path), "dmtk_data_1.0.0.tar.gz")
W
wangxiao1021 已提交
26 27 28 29 30 31 32
target_dir = os.path.dirname(abs_path)
download(downlaod_path, download_url)

tar = tarfile.open(downlaod_path)
tar.extractall(target_dir)
os.remove(downlaod_path)

W
wangxiao1021 已提交
33 34 35 36 37
shutil.rmtree(os.path.join(target_dir, 'data/dstc2/'))
shutil.rmtree(os.path.join(target_dir, 'data/mrda/'))
shutil.rmtree(os.path.join(target_dir, 'data/multi-woz/'))
shutil.rmtree(os.path.join(target_dir, 'data/swda/'))
shutil.rmtree(os.path.join(target_dir, 'data/udc/'))
W
wangxiao1021 已提交
38
print(" done!")