download.py 1.3 KB
Newer Older
W
wangxiao1021 已提交
1
#  -*- coding: utf-8 -*-
W
wangxiao1021 已提交
2
from __future__ import print_function
W
wangxiao1021 已提交
3 4 5
import os
import tarfile
import shutil
W
wangxiao1021 已提交
6 7 8 9 10 11
import sys
import urllib
URLLIB=urllib
if sys.version_info >= (3, 0):
    import urllib.request
    URLLIB=urllib.request
W
wangxiao1021 已提交
12 13

def download(src, url):
W
wangxiao1021 已提交
14 15 16 17 18 19
    def _reporthook(count, chunk_size, total_size):
        bytes_so_far = count * chunk_size
        percent = float(bytes_so_far) / float(total_size)
        if percent > 1:
            percent = 1
        print('\r>> Downloading... {:.1%}'.format(percent), end="")
W
wangxiao1021 已提交
20

W
wangxiao1021 已提交
21
    URLLIB.urlretrieve(url, src, reporthook=_reporthook)
W
wangxiao1021 已提交
22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41

abs_path = os.path.abspath(__file__)
download_url = "https://ernie.bj.bcebos.com/task_data_zh.tgz"
downlaod_path = os.path.join(os.path.dirname(abs_path), "task_data_zh.tgz")
target_dir = os.path.dirname(abs_path)
download(downlaod_path, download_url)

tar = tarfile.open(downlaod_path)
tar.extractall(target_dir)
os.remove(downlaod_path)

abs_path = os.path.abspath(__file__)
dst_dir = os.path.join(os.path.dirname(abs_path), "data")
if not os.path.exists(dst_dir) or not os.path.isdir(dst_dir):
    os.makedirs(dst_dir)

for file in os.listdir(os.path.join(target_dir, 'task_data', 'cmrc2018')):
    shutil.move(os.path.join(target_dir, 'task_data', 'cmrc2018', file), dst_dir)

shutil.rmtree(os.path.join(target_dir, 'task_data'))
W
wangxiao1021 已提交
42
print(" done!")
W
wangxiao1021 已提交
43