download.py 1.4 KB
Newer Older
Z
Zeyu Chen 已提交
1 2 3 4 5 6 7 8 9 10 11 12 13 14
# -*- coding: utf-8 -*-
#   Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserve.
#
# Licensed under the Apache License, Version 2.0 (the 'License');
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
#     http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an 'AS IS' BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
L
LiuChiachi 已提交
15

Z
Zeyu Chen 已提交
16 17 18 19
import os
import sys
import argparse

L
LiuChiachi 已提交
20
from paddle.utils.download import get_path_from_url
Z
Zeyu Chen 已提交
21 22

TASKS = ['ptb', 'yahoo']
L
LiuChiachi 已提交
23 24
URL = {
    'ptb': 'https://dataset.bj.bcebos.com/imikolov%2Fsimple-examples.tgz',
Z
Zeyu Chen 已提交
25
    'yahoo':
L
LiuChiachi 已提交
26
    'https://paddlenlp.bj.bcebos.com/datasets/yahoo-answer-100k.tar.gz',
Z
Zeyu Chen 已提交
27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44
}


def main(arguments):
    parser = argparse.ArgumentParser()
    parser.add_argument(
        '-d',
        '--data_dir',
        help='directory to save data to',
        type=str,
        default='data')
    parser.add_argument(
        '-t',
        '--task',
        help='tasks to download data for as a comma separated string',
        type=str,
        default='ptb')
    args = parser.parse_args(arguments)
L
LiuChiachi 已提交
45
    get_path_from_url(URL[args.task], args.data_dir)
Z
Zeyu Chen 已提交
46 47 48 49


if __name__ == '__main__':
    sys.exit(main(sys.argv[1:]))