download.py 935 字节
Newer Older
W
wangxiao1021 已提交
1
#  -*- coding: utf-8 -*-
W
wangxiao1021 已提交
2
from __future__ import print_function
W
wangxiao1021 已提交
3
import os
W
wangxiao1021 已提交
4 5 6 7 8 9
import sys
import urllib
URLLIB=urllib
if sys.version_info >= (3, 0):
    import urllib.request
    URLLIB=urllib.request
W
wangxiao1021 已提交
10 11

def download(src, url):
W
wangxiao1021 已提交
12 13 14 15 16 17
    def _reporthook(count, chunk_size, total_size):
        bytes_so_far = count * chunk_size
        percent = float(bytes_so_far) / float(total_size)
        if percent > 1:
            percent = 1
        print('\r>> Downloading... {:.1%}'.format(percent), end="")
W
wangxiao1021 已提交
18

W
wangxiao1021 已提交
19
    URLLIB.urlretrieve(url, src, reporthook=_reporthook)
W
wangxiao1021 已提交
20 21 22 23 24 25 26 27 28 29


abs_path = os.path.abspath(__file__)
data_dir = os.path.join(os.path.dirname(abs_path), "data")
if not os.path.exists(data_dir) or not os.path.isdir(data_dir):
    os.makedirs(data_dir)

download_url = "http://qim.fs.quoracdn.net/quora_duplicate_questions.tsv"
downlaod_path = os.path.join(data_dir, "quora_duplicate_questions.tsv")
download(downlaod_path, download_url)
W
wangxiao1021 已提交
30
print(" done!")