diff --git a/dataset/coco/download.sh b/dataset/coco/download.sh
deleted file mode 100644
index 6f262ccebb635e993b35349890a793430d9ad597..0000000000000000000000000000000000000000
--- a/dataset/coco/download.sh
+++ /dev/null
@@ -1,20 +0,0 @@
-DIR="$( cd "$(dirname "$0")" ; pwd -P )"
-cd "$DIR"
-
-# Download the data.
-echo "Downloading..."
-wget http://images.cocodataset.org/zips/train2014.zip
-wget http://images.cocodataset.org/zips/val2014.zip
-wget http://images.cocodataset.org/zips/train2017.zip
-wget http://images.cocodataset.org/zips/val2017.zip
-wget http://images.cocodataset.org/annotations/annotations_trainval2014.zip
-wget http://images.cocodataset.org/annotations/annotations_trainval2017.zip
-# Extract the data.
-echo "Extracting..."
-unzip train2014.zip
-unzip val2014.zip
-unzip train2017.zip
-unzip val2017.zip
-unzip annotations_trainval2014.zip
-unzip annotations_trainval2017.zip
-
diff --git a/dataset/coco/download_coco.py b/dataset/coco/download_coco.py
new file mode 100644
index 0000000000000000000000000000000000000000..2b4f7e764e17296ccd8905478bf3ccb3818b909f
--- /dev/null
+++ b/dataset/coco/download_coco.py
@@ -0,0 +1,25 @@
+# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+import sys
+import os.path as osp
+import logging
+
+from ppdet.utils.download import download_dataset
+
+logging.basicConfig(level=logging.INFO)
+
+download_path = osp.split(osp.realpath(sys.argv[0]))[0]
+download_dataset(download_path, 'coco')
diff --git a/dataset/fruit/download.sh b/dataset/fruit/download.sh
deleted file mode 100644
index 2ea8d72c2a440213ab7e59441b795831c5d67991..0000000000000000000000000000000000000000
--- a/dataset/fruit/download.sh
+++ /dev/null
@@ -1,10 +0,0 @@
-DIR="$( cd "$(dirname "$0")" ; pwd -P )"
-cd "$DIR"
-
-# Download the data.
-echo "Downloading..."
-wget https://dataset.bj.bcebos.com/PaddleDetection_demo/fruit-detection.tar
-# Extract the data.
-echo "Extracting..."
-tar xvf fruit-detection.tar
-rm -rf fruit-detection.tar
diff --git a/dataset/fruit/download_fruit.py b/dataset/fruit/download_fruit.py
new file mode 100644
index 0000000000000000000000000000000000000000..5cce18895af3eeb81c4e49f4897cc591b2f40f9b
--- /dev/null
+++ b/dataset/fruit/download_fruit.py
@@ -0,0 +1,25 @@
+# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+import sys
+import os.path as osp
+import logging
+
+from ppdet.utils.download import download_dataset
+
+logging.basicConfig(level=logging.INFO)
+
+download_path = osp.split(osp.realpath(sys.argv[0]))[0]
+download_dataset(download_path, 'fruit')
diff --git a/dataset/voc/download.sh b/dataset/voc/download.sh
deleted file mode 100755
index 2c7341a4114013733cb5d002e87d0260c90711b7..0000000000000000000000000000000000000000
--- a/dataset/voc/download.sh
+++ /dev/null
@@ -1,16 +0,0 @@
-DIR="$( cd "$(dirname "$0")" ; pwd -P )"
-cd "$DIR"
-
-# Download the data.
-echo "Downloading..."
-wget http://host.robots.ox.ac.uk/pascal/VOC/voc2012/VOCtrainval_11-May-2012.tar
-wget http://host.robots.ox.ac.uk/pascal/VOC/voc2007/VOCtrainval_06-Nov-2007.tar
-wget http://host.robots.ox.ac.uk/pascal/VOC/voc2007/VOCtest_06-Nov-2007.tar
-# Extract the data.
-echo "Extracting..."
-tar -xf VOCtrainval_11-May-2012.tar
-tar -xf VOCtrainval_06-Nov-2007.tar
-tar -xf VOCtest_06-Nov-2007.tar
-
-echo "Creating data lists..."
-python -c 'from ppdet.utils.voc_utils import merge_and_create_list; merge_and_create_list("VOCdevkit", ["2007", "2012"], "VOCdevkit/VOC_all")'
diff --git a/dataset/voc/download_voc.py b/dataset/voc/download_voc.py
new file mode 100644
index 0000000000000000000000000000000000000000..e7f32657f1697bd82f0f7dfbb52a3d1cb987c4bd
--- /dev/null
+++ b/dataset/voc/download_voc.py
@@ -0,0 +1,25 @@
+# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+import sys
+import os.path as osp
+import logging
+
+from ppdet.utils.download import download_dataset
+
+logging.basicConfig(level=logging.INFO)
+
+download_path = osp.split(osp.realpath(sys.argv[0]))[0]
+download_dataset(download_path, 'voc')
diff --git a/docs/INSTALL.md b/docs/INSTALL.md
index 19759bf3d977ad5481de256ec048d47b03e842c2..3f99b19b67616cc0acef02def3d6ad40e2bae71b 100644
--- a/docs/INSTALL.md
+++ b/docs/INSTALL.md
@@ -110,15 +110,15 @@ On the other hand, to download the datasets, run the following commands:
 - COCO
 
 ```
-cd dataset/coco
-./download.sh
+export PYTHONPATH=$PYTHONPATH:.
+python dataset/coco/download_coco.py
 ```
 
 - Pascal VOC
 
 ```
-cd dataset/voc
-./download.sh
+export PYTHONPATH=$PYTHONPATH:.
+python dataset/voc/download_voc.py
 ```
 
 **Download datasets automatically:**
diff --git a/docs/INSTALL_cn.md b/docs/INSTALL_cn.md
index 1003bb850c82d8e899d109eda337a6958aa74592..6fe1c484e01581d951fca29534c077a9d7c72cb3 100644
--- a/docs/INSTALL_cn.md
+++ b/docs/INSTALL_cn.md
@@ -109,15 +109,15 @@ ln -sf <path/to/voc> <path/to/paddle_detection>/dataset/voc
 - COCO
 
 ```
-cd dataset/coco
-./download.sh
+export PYTHONPATH=$PYTHONPATH:.
+python dataset/coco/download_coco.py
 ```
 
 - Pascal VOC
 
 ```
-cd dataset/voc
-./download.sh
+export PYTHONPATH=$PYTHONPATH:.
+python dataset/voc/download_voc.py
 ```
 
 **自动下载数据集：**
diff --git a/docs/QUICK_STARTED.md b/docs/QUICK_STARTED.md
index d93f76b2d00ba688a57227d33e0ccf854c0d56e1..5b687b35e72718be939b83299a5e9a799d6f5e79 100644
--- a/docs/QUICK_STARTED.md
+++ b/docs/QUICK_STARTED.md
@@ -6,11 +6,11 @@ This tutorial fine-tunes a tiny dataset by pretrained detection model for users
 
 ## Data Preparation
 
-Dataset refers to [Kaggle](https://www.kaggle.com/mbkinaci/fruit-images-for-object-detection), which contains 240 images in train dataset and 60 images in test dataset. Data categories are apple, orange and banana. Download [here](https://dataset.bj.bcebos.com/PaddleDetection_demo/fruit-detection.tar) and uncompress the dataset after download, script for data preparation is located at [download.sh](../dataset/fruit/download.sh). Command is as follows:
+Dataset refers to [Kaggle](https://www.kaggle.com/mbkinaci/fruit-images-for-object-detection), which contains 240 images in train dataset and 60 images in test dataset. Data categories are apple, orange and banana. Download [here](https://dataset.bj.bcebos.com/PaddleDetection_demo/fruit-detection.tar) and uncompress the dataset after download, script for data preparation is located at [download_fruit.py](../dataset/fruit/download_fruit.py). Command is as follows:
 
 ```bash
-cd dataset/fruit
-sh download.sh
+export PYTHONPATH=$PYTHONPATH:.
+python dataset/fruit/download_fruit.py
 ```
 
 - **Note: before started, run the following command and specifiy the GPU**
diff --git a/docs/QUICK_STARTED_cn.md b/docs/QUICK_STARTED_cn.md
index fe15870b256eff07e7fe64e0b2f8ec3855382d50..78c019a6bfe7bcb82ca5a42ea9d65cf26f397aeb 100644
--- a/docs/QUICK_STARTED_cn.md
+++ b/docs/QUICK_STARTED_cn.md
@@ -6,11 +6,11 @@
 
 ## 数据准备
 
-数据集参考[Kaggle数据集](https://www.kaggle.com/mbkinaci/fruit-images-for-object-detection)，其中训练数据集240张图片，测试数据集60张图片，数据类别为3类：苹果，橘子，香蕉。[下载链接](https://dataset.bj.bcebos.com/PaddleDetection_demo/fruit-detection.tar)。数据下载后分别解压即可, 数据准备脚本位于[download.sh](../dataset/fruit/download.sh)。下载数据方式如下：
+数据集参考[Kaggle数据集](https://www.kaggle.com/mbkinaci/fruit-images-for-object-detection)，其中训练数据集240张图片，测试数据集60张图片，数据类别为3类：苹果，橘子，香蕉。[下载链接](https://dataset.bj.bcebos.com/PaddleDetection_demo/fruit-detection.tar)。数据下载后分别解压即可, 数据准备脚本位于[download_fruit.py](../dataset/fruit/download_fruit.py)。下载数据方式如下：
 
 ```bash
-cd dataset/fruit
-sh download.sh
+export PYTHONPATH=$PYTHONPATH:.
+python dataset/fruit/download_fruit.py
 ```
 
 - **注：在开始前，运行如下命令并指定GPU**
diff --git a/ppdet/utils/download.py b/ppdet/utils/download.py
index b40e1404d82e3f8013ac43c843daba98c2dd74f9..05f62749192cf0546aabb181c5397e2551806fb2 100644
--- a/ppdet/utils/download.py
+++ b/ppdet/utils/download.py
@@ -35,7 +35,7 @@ __all__ = ['get_weights_path', 'get_dataset_path']
 WEIGHTS_HOME = osp.expanduser("~/.cache/paddle/weights")
 DATASET_HOME = osp.expanduser("~/.cache/paddle/dataset")
 
-# dict of {dataset_name: (downalod_info, sub_dirs)}
+# dict of {dataset_name: (download_info, sub_dirs)}
 # download info: (url, md5sum)
 DATASETS = {
     'coco': ([
@@ -60,6 +60,11 @@ DATASETS = {
             'http://host.robots.ox.ac.uk/pascal/VOC/voc2007/VOCtest_06-Nov-2007.tar',
             'b6e924de25625d8de591ea690078ad9f', ),
     ], ["VOCdevkit/VOC_all"]),
+    'fruit': ([
+        (
+            'https://dataset.bj.bcebos.com/PaddleDetection_demo/fruit-detection.tar',
+            '374554a7633b1b68d6a5fbb7c061b8ba', ),
+    ], ["fruit-detection"]),
 }
 
 DOWNLOAD_RETRY_LIMIT = 3
@@ -103,25 +108,7 @@ def get_dataset_path(path, annotation, image_dir):
 
             # voc should merge dir and create list after download
             if name == 'voc':
-                logger.info("Download voc dataset successed, merge "
-                            "VOC2007 and VOC2012 to VOC_all...")
-                output_dir = osp.join(data_dir, dataset[1][0])
-                devkit_dir = "/".join(output_dir.split('/')[:-1])
-                years = ['2007', '2012']
-                # merge dir in output_tmp_dir at first, move to 
-                # output_dir after merge sucessed.
-                output_tmp_dir = osp.join(data_dir, 'tmp')
-                if osp.isdir(output_tmp_dir):
-                    shutil.rmtree(output_tmp_dir)
-                # NOTE(dengkaipeng): since using auto download VOC
-                # dataset, VOC default label list should be used, 
-                # do not generate label_list.txt here. For default
-                # label, see ../data/source/voc_loader.py
-                merge_and_create_list(devkit_dir, years, output_tmp_dir)
-                shutil.move(output_tmp_dir, output_dir)
-                # remove source directory VOC2007 and VOC2012
-                shutil.rmtree(osp.join(devkit_dir, "VOC2007"))
-                shutil.rmtree(osp.join(devkit_dir, "VOC2012"))
+                _merge_voc_dir(data_dir, dataset[1][0])
             return data_dir
 
     # not match any dataset in DATASETS
@@ -130,6 +117,28 @@ def get_dataset_path(path, annotation, image_dir):
                      "'voc' and 'coco' currently".format(path, osp.split(path)[-1]))
 
 
+def _merge_voc_dir(data_dir, output_subdir):
+    logger.info("Download voc dataset successed, merge "
+                "VOC2007 and VOC2012 to VOC_all...")
+    output_dir = osp.join(data_dir, output_subdir)
+    devkit_dir = "/".join(output_dir.split('/')[:-1])
+    years = ['2007', '2012']
+    # merge dir in output_tmp_dir at first, move to 
+    # output_dir after merge sucessed.
+    output_tmp_dir = osp.join(data_dir, 'tmp')
+    if osp.isdir(output_tmp_dir):
+        shutil.rmtree(output_tmp_dir)
+    # NOTE: since using auto download VOC
+    # dataset, VOC default label list should be used, 
+    # do not generate label_list.txt here. For default
+    # label, see ../data/source/voc_loader.py
+    merge_and_create_list(devkit_dir, years, output_tmp_dir)
+    shutil.move(output_tmp_dir, output_dir)
+    # remove source directory VOC2007 and VOC2012
+    shutil.rmtree(osp.join(devkit_dir, "VOC2007"))
+    shutil.rmtree(osp.join(devkit_dir, "VOC2012"))
+
+
 def map_path(url, root_dir):
     # parse path after download to decompress under root_dir
     fname = url.split('/')[-1]
@@ -173,6 +182,19 @@ def get_path(url, root_dir, md5sum=None):
     return fullpath
 
 
+def download_dataset(path, dataset=None):
+    if dataset not in DATASETS.keys():
+        logger.error("Unknown dataset {}, it should be "
+                     "{}".format(dataset, DATASETS.keys()))
+        return
+    dataset_info = DATASETS[dataset][0]
+    for info in dataset_info:
+        get_path(info[0], path, info[1])
+    if dataset == 'voc':
+        _merge_voc_dir(path, DATASETS[dataset][1][0])
+    logger.info("Download dataset {} finished.".format(dataset))
+
+
 def _dataset_exists(path, annotation, image_dir):
     """
     Check if user define dataset exists