diff --git a/PaddleCV/PaddleDetection/dataset/coco/download.sh b/PaddleCV/PaddleDetection/dataset/coco/download.sh deleted file mode 100644 index 6f262ccebb635e993b35349890a793430d9ad597..0000000000000000000000000000000000000000 --- a/PaddleCV/PaddleDetection/dataset/coco/download.sh +++ /dev/null @@ -1,20 +0,0 @@ -DIR="$( cd "$(dirname "$0")" ; pwd -P )" -cd "$DIR" - -# Download the data. -echo "Downloading..." -wget http://images.cocodataset.org/zips/train2014.zip -wget http://images.cocodataset.org/zips/val2014.zip -wget http://images.cocodataset.org/zips/train2017.zip -wget http://images.cocodataset.org/zips/val2017.zip -wget http://images.cocodataset.org/annotations/annotations_trainval2014.zip -wget http://images.cocodataset.org/annotations/annotations_trainval2017.zip -# Extract the data. -echo "Extracting..." -unzip train2014.zip -unzip val2014.zip -unzip train2017.zip -unzip val2017.zip -unzip annotations_trainval2014.zip -unzip annotations_trainval2017.zip - diff --git a/PaddleCV/PaddleDetection/dataset/coco/download_coco.py b/PaddleCV/PaddleDetection/dataset/coco/download_coco.py new file mode 100644 index 0000000000000000000000000000000000000000..2b4f7e764e17296ccd8905478bf3ccb3818b909f --- /dev/null +++ b/PaddleCV/PaddleDetection/dataset/coco/download_coco.py @@ -0,0 +1,25 @@ +# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +import sys +import os.path as osp +import logging + +from ppdet.utils.download import download_dataset + +logging.basicConfig(level=logging.INFO) + +download_path = osp.split(osp.realpath(sys.argv[0]))[0] +download_dataset(download_path, 'coco') diff --git a/PaddleCV/PaddleDetection/dataset/fruit/download.sh b/PaddleCV/PaddleDetection/dataset/fruit/download.sh deleted file mode 100644 index 2ea8d72c2a440213ab7e59441b795831c5d67991..0000000000000000000000000000000000000000 --- a/PaddleCV/PaddleDetection/dataset/fruit/download.sh +++ /dev/null @@ -1,10 +0,0 @@ -DIR="$( cd "$(dirname "$0")" ; pwd -P )" -cd "$DIR" - -# Download the data. -echo "Downloading..." -wget https://dataset.bj.bcebos.com/PaddleDetection_demo/fruit-detection.tar -# Extract the data. -echo "Extracting..." -tar xvf fruit-detection.tar -rm -rf fruit-detection.tar diff --git a/PaddleCV/PaddleDetection/dataset/fruit/download_fruit.py b/PaddleCV/PaddleDetection/dataset/fruit/download_fruit.py new file mode 100644 index 0000000000000000000000000000000000000000..5cce18895af3eeb81c4e49f4897cc591b2f40f9b --- /dev/null +++ b/PaddleCV/PaddleDetection/dataset/fruit/download_fruit.py @@ -0,0 +1,25 @@ +# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +import sys +import os.path as osp +import logging + +from ppdet.utils.download import download_dataset + +logging.basicConfig(level=logging.INFO) + +download_path = osp.split(osp.realpath(sys.argv[0]))[0] +download_dataset(download_path, 'fruit') diff --git a/PaddleCV/PaddleDetection/dataset/voc/download.sh b/PaddleCV/PaddleDetection/dataset/voc/download.sh deleted file mode 100755 index 2c7341a4114013733cb5d002e87d0260c90711b7..0000000000000000000000000000000000000000 --- a/PaddleCV/PaddleDetection/dataset/voc/download.sh +++ /dev/null @@ -1,16 +0,0 @@ -DIR="$( cd "$(dirname "$0")" ; pwd -P )" -cd "$DIR" - -# Download the data. -echo "Downloading..." -wget http://host.robots.ox.ac.uk/pascal/VOC/voc2012/VOCtrainval_11-May-2012.tar -wget http://host.robots.ox.ac.uk/pascal/VOC/voc2007/VOCtrainval_06-Nov-2007.tar -wget http://host.robots.ox.ac.uk/pascal/VOC/voc2007/VOCtest_06-Nov-2007.tar -# Extract the data. -echo "Extracting..." -tar -xf VOCtrainval_11-May-2012.tar -tar -xf VOCtrainval_06-Nov-2007.tar -tar -xf VOCtest_06-Nov-2007.tar - -echo "Creating data lists..." -python -c 'from ppdet.utils.voc_utils import merge_and_create_list; merge_and_create_list("VOCdevkit", ["2007", "2012"], "VOCdevkit/VOC_all")' diff --git a/PaddleCV/PaddleDetection/dataset/voc/download_voc.py b/PaddleCV/PaddleDetection/dataset/voc/download_voc.py new file mode 100644 index 0000000000000000000000000000000000000000..e7f32657f1697bd82f0f7dfbb52a3d1cb987c4bd --- /dev/null +++ b/PaddleCV/PaddleDetection/dataset/voc/download_voc.py @@ -0,0 +1,25 @@ +# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +import sys +import os.path as osp +import logging + +from ppdet.utils.download import download_dataset + +logging.basicConfig(level=logging.INFO) + +download_path = osp.split(osp.realpath(sys.argv[0]))[0] +download_dataset(download_path, 'voc') diff --git a/PaddleCV/PaddleDetection/docs/INSTALL.md b/PaddleCV/PaddleDetection/docs/INSTALL.md index 185b1bf8bd444476a70c4a9930f8d3cd715644d5..3876812cfc389077c647aa42d2adc25d654cd748 100644 --- a/PaddleCV/PaddleDetection/docs/INSTALL.md +++ b/PaddleCV/PaddleDetection/docs/INSTALL.md @@ -110,15 +110,15 @@ On the other hand, to download the datasets, run the following commands: - COCO ``` -cd dataset/coco -./download.sh +export PYTHONPATH=$PYTHONPATH:. +python dataset/coco/download_coco.py ``` - Pascal VOC ``` -cd dataset/voc -./download.sh +export PYTHONPATH=$PYTHONPATH:. +python dataset/voc/download_voc.py ``` **Download datasets automatically:** diff --git a/PaddleCV/PaddleDetection/docs/INSTALL_cn.md b/PaddleCV/PaddleDetection/docs/INSTALL_cn.md index 1a54086d8c8305af7b445db43bdcf3e490ef008f..7226274aa214f2956b55c36ddedc61c0f4d15d4e 100644 --- a/PaddleCV/PaddleDetection/docs/INSTALL_cn.md +++ b/PaddleCV/PaddleDetection/docs/INSTALL_cn.md @@ -109,15 +109,15 @@ ln -sf /dataset/voc - COCO ``` -cd dataset/coco -./download.sh +export PYTHONPATH=$PYTHONPATH:. +python dataset/coco/download_coco.py ``` - Pascal VOC ``` -cd dataset/voc -./download.sh +export PYTHONPATH=$PYTHONPATH:. +python dataset/voc/download_voc.py ``` **自动下载数据集:** diff --git a/PaddleCV/PaddleDetection/docs/QUICK_STARTED.md b/PaddleCV/PaddleDetection/docs/QUICK_STARTED.md index d93f76b2d00ba688a57227d33e0ccf854c0d56e1..5b687b35e72718be939b83299a5e9a799d6f5e79 100644 --- a/PaddleCV/PaddleDetection/docs/QUICK_STARTED.md +++ b/PaddleCV/PaddleDetection/docs/QUICK_STARTED.md @@ -6,11 +6,11 @@ This tutorial fine-tunes a tiny dataset by pretrained detection model for users ## Data Preparation -Dataset refers to [Kaggle](https://www.kaggle.com/mbkinaci/fruit-images-for-object-detection), which contains 240 images in train dataset and 60 images in test dataset. Data categories are apple, orange and banana. Download [here](https://dataset.bj.bcebos.com/PaddleDetection_demo/fruit-detection.tar) and uncompress the dataset after download, script for data preparation is located at [download.sh](../dataset/fruit/download.sh). Command is as follows: +Dataset refers to [Kaggle](https://www.kaggle.com/mbkinaci/fruit-images-for-object-detection), which contains 240 images in train dataset and 60 images in test dataset. Data categories are apple, orange and banana. Download [here](https://dataset.bj.bcebos.com/PaddleDetection_demo/fruit-detection.tar) and uncompress the dataset after download, script for data preparation is located at [download_fruit.py](../dataset/fruit/download_fruit.py). Command is as follows: ```bash -cd dataset/fruit -sh download.sh +export PYTHONPATH=$PYTHONPATH:. +python dataset/fruit/download_fruit.py ``` - **Note: before started, run the following command and specifiy the GPU** diff --git a/PaddleCV/PaddleDetection/docs/QUICK_STARTED_cn.md b/PaddleCV/PaddleDetection/docs/QUICK_STARTED_cn.md index fe15870b256eff07e7fe64e0b2f8ec3855382d50..78c019a6bfe7bcb82ca5a42ea9d65cf26f397aeb 100644 --- a/PaddleCV/PaddleDetection/docs/QUICK_STARTED_cn.md +++ b/PaddleCV/PaddleDetection/docs/QUICK_STARTED_cn.md @@ -6,11 +6,11 @@ ## 数据准备 -数据集参考[Kaggle数据集](https://www.kaggle.com/mbkinaci/fruit-images-for-object-detection),其中训练数据集240张图片,测试数据集60张图片,数据类别为3类:苹果,橘子,香蕉。[下载链接](https://dataset.bj.bcebos.com/PaddleDetection_demo/fruit-detection.tar)。数据下载后分别解压即可, 数据准备脚本位于[download.sh](../dataset/fruit/download.sh)。下载数据方式如下: +数据集参考[Kaggle数据集](https://www.kaggle.com/mbkinaci/fruit-images-for-object-detection),其中训练数据集240张图片,测试数据集60张图片,数据类别为3类:苹果,橘子,香蕉。[下载链接](https://dataset.bj.bcebos.com/PaddleDetection_demo/fruit-detection.tar)。数据下载后分别解压即可, 数据准备脚本位于[download_fruit.py](../dataset/fruit/download_fruit.py)。下载数据方式如下: ```bash -cd dataset/fruit -sh download.sh +export PYTHONPATH=$PYTHONPATH:. +python dataset/fruit/download_fruit.py ``` - **注:在开始前,运行如下命令并指定GPU** diff --git a/PaddleCV/PaddleDetection/ppdet/utils/download.py b/PaddleCV/PaddleDetection/ppdet/utils/download.py index b40e1404d82e3f8013ac43c843daba98c2dd74f9..05f62749192cf0546aabb181c5397e2551806fb2 100644 --- a/PaddleCV/PaddleDetection/ppdet/utils/download.py +++ b/PaddleCV/PaddleDetection/ppdet/utils/download.py @@ -35,7 +35,7 @@ __all__ = ['get_weights_path', 'get_dataset_path'] WEIGHTS_HOME = osp.expanduser("~/.cache/paddle/weights") DATASET_HOME = osp.expanduser("~/.cache/paddle/dataset") -# dict of {dataset_name: (downalod_info, sub_dirs)} +# dict of {dataset_name: (download_info, sub_dirs)} # download info: (url, md5sum) DATASETS = { 'coco': ([ @@ -60,6 +60,11 @@ DATASETS = { 'http://host.robots.ox.ac.uk/pascal/VOC/voc2007/VOCtest_06-Nov-2007.tar', 'b6e924de25625d8de591ea690078ad9f', ), ], ["VOCdevkit/VOC_all"]), + 'fruit': ([ + ( + 'https://dataset.bj.bcebos.com/PaddleDetection_demo/fruit-detection.tar', + '374554a7633b1b68d6a5fbb7c061b8ba', ), + ], ["fruit-detection"]), } DOWNLOAD_RETRY_LIMIT = 3 @@ -103,25 +108,7 @@ def get_dataset_path(path, annotation, image_dir): # voc should merge dir and create list after download if name == 'voc': - logger.info("Download voc dataset successed, merge " - "VOC2007 and VOC2012 to VOC_all...") - output_dir = osp.join(data_dir, dataset[1][0]) - devkit_dir = "/".join(output_dir.split('/')[:-1]) - years = ['2007', '2012'] - # merge dir in output_tmp_dir at first, move to - # output_dir after merge sucessed. - output_tmp_dir = osp.join(data_dir, 'tmp') - if osp.isdir(output_tmp_dir): - shutil.rmtree(output_tmp_dir) - # NOTE(dengkaipeng): since using auto download VOC - # dataset, VOC default label list should be used, - # do not generate label_list.txt here. For default - # label, see ../data/source/voc_loader.py - merge_and_create_list(devkit_dir, years, output_tmp_dir) - shutil.move(output_tmp_dir, output_dir) - # remove source directory VOC2007 and VOC2012 - shutil.rmtree(osp.join(devkit_dir, "VOC2007")) - shutil.rmtree(osp.join(devkit_dir, "VOC2012")) + _merge_voc_dir(data_dir, dataset[1][0]) return data_dir # not match any dataset in DATASETS @@ -130,6 +117,28 @@ def get_dataset_path(path, annotation, image_dir): "'voc' and 'coco' currently".format(path, osp.split(path)[-1])) +def _merge_voc_dir(data_dir, output_subdir): + logger.info("Download voc dataset successed, merge " + "VOC2007 and VOC2012 to VOC_all...") + output_dir = osp.join(data_dir, output_subdir) + devkit_dir = "/".join(output_dir.split('/')[:-1]) + years = ['2007', '2012'] + # merge dir in output_tmp_dir at first, move to + # output_dir after merge sucessed. + output_tmp_dir = osp.join(data_dir, 'tmp') + if osp.isdir(output_tmp_dir): + shutil.rmtree(output_tmp_dir) + # NOTE: since using auto download VOC + # dataset, VOC default label list should be used, + # do not generate label_list.txt here. For default + # label, see ../data/source/voc_loader.py + merge_and_create_list(devkit_dir, years, output_tmp_dir) + shutil.move(output_tmp_dir, output_dir) + # remove source directory VOC2007 and VOC2012 + shutil.rmtree(osp.join(devkit_dir, "VOC2007")) + shutil.rmtree(osp.join(devkit_dir, "VOC2012")) + + def map_path(url, root_dir): # parse path after download to decompress under root_dir fname = url.split('/')[-1] @@ -173,6 +182,19 @@ def get_path(url, root_dir, md5sum=None): return fullpath +def download_dataset(path, dataset=None): + if dataset not in DATASETS.keys(): + logger.error("Unknown dataset {}, it should be " + "{}".format(dataset, DATASETS.keys())) + return + dataset_info = DATASETS[dataset][0] + for info in dataset_info: + get_path(info[0], path, info[1]) + if dataset == 'voc': + _merge_voc_dir(path, DATASETS[dataset][1][0]) + logger.info("Download dataset {} finished.".format(dataset)) + + def _dataset_exists(path, annotation, image_dir): """ Check if user define dataset exists diff --git a/PaddleCV/rcnn/README.md b/PaddleCV/rcnn/README.md index 8c8a86c956c2ca4a615ea53dfa672ca227052431..03bec8550592e409bd2fb8d4d38a14a001514f73 100644 --- a/PaddleCV/rcnn/README.md +++ b/PaddleCV/rcnn/README.md @@ -38,8 +38,9 @@ Mask RCNN is a two stage model as well. At the first stage, it generates proposa Train the model on [MS-COCO dataset](http://cocodataset.org/#download), download dataset as below: - cd dataset/coco - ./download.sh +```bash +python dataset/coco/download.py +``` The data catalog structure is as follows: @@ -67,6 +68,8 @@ The data catalog structure is as follows: sh ./pretrained/download.sh +**NOTE:** Windows users can download weights from links in `./pretrained/download.sh`. + Set `pretrained_model` to load pre-trained model. In addition, this parameter is used to load trained model when finetuning as well. Please make sure that pretrained_model is downloaded and loaded correctly, otherwise, the loss may be NAN during training. diff --git a/PaddleCV/rcnn/README_cn.md b/PaddleCV/rcnn/README_cn.md index 615d73ce181433b6a7b9e88a01e20aa40e2c2e90..7e6290b503474a618e1c4e43538a6740217b67dd 100644 --- a/PaddleCV/rcnn/README_cn.md +++ b/PaddleCV/rcnn/README_cn.md @@ -38,8 +38,9 @@ Mask RCNN同样为两阶段框架,第一阶段扫描图像生成候选框; 在[MS-COCO数据集](http://cocodataset.org/#download)上进行训练,通过如下方式下载数据集。 - cd dataset/coco - ./download.sh +```bash +python dataset/coco/download.py +``` 数据目录结构如下: @@ -68,6 +69,8 @@ data/coco/ sh ./pretrained/download.sh +**注意:** Windows用户可通过`./pretrained/download.sh`中的链接直接下载和解压。 + 通过初始化`pretrained_model` 加载预训练模型。同时在参数微调时也采用该设置加载已训练模型。 请在训练前确认预训练模型下载与加载正确,否则训练过程中损失可能会出现NAN。 diff --git a/PaddleCV/rcnn/dataset/coco/download.py b/PaddleCV/rcnn/dataset/coco/download.py new file mode 100644 index 0000000000000000000000000000000000000000..9df49bef6eab9d615e61e3cd429dcfdbeb5708ce --- /dev/null +++ b/PaddleCV/rcnn/dataset/coco/download.py @@ -0,0 +1,61 @@ +# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import os.path as osp +import sys +import zipfile +import logging + +from paddle.dataset.common import download + +logging.basicConfig(level=logging.INFO) +logger = logging.getLogger(__name__) + + +DATASETS = { + 'coco': [ + # coco2017 + ('http://images.cocodataset.org/zips/train2017.zip', + 'cced6f7f71b7629ddf16f17bbcfab6b2', ), + ('http://images.cocodataset.org/zips/val2017.zip', + '442b8da7639aecaf257c1dceb8ba8c80', ), + ('http://images.cocodataset.org/annotations/annotations_trainval2017.zip', + 'f4bbac642086de4f52a3fdda2de5fa2c', ), + # coco2014 + ('http://images.cocodataset.org/zips/train2014.zip', + '0da8c0bd3d6becc4dcb32757491aca88', ), + ('http://images.cocodataset.org/zips/val2014.zip', + 'a3d79f5ed8d289b7a7554ce06a5782b3', ), + ('http://images.cocodataset.org/annotations/annotations_trainval2014.zip', + '0a379cfc70b0e71301e0f377548639bd', ), + ], +} + + +def download_decompress_file(data_dir, url, md5): + logger.info("Downloading from {}".format(url)) + zip_file = download(url, data_dir, md5) + logger.info("Decompressing {}".format(zip_file)) + with zipfile.ZipFile(zip_file) as zf: + zf.extractall(path=data_dir) + os.remove(zip_file) + + +if __name__ == "__main__": + data_dir = osp.split(osp.realpath(sys.argv[0]))[0] + for name, infos in DATASETS.items(): + for info in infos: + download_decompress_file(data_dir, info[0], info[1]) + logger.info("Download dataset {} finished.".format(name)) diff --git a/PaddleCV/rcnn/dataset/coco/download.sh b/PaddleCV/rcnn/dataset/coco/download.sh deleted file mode 100644 index 6f262ccebb635e993b35349890a793430d9ad597..0000000000000000000000000000000000000000 --- a/PaddleCV/rcnn/dataset/coco/download.sh +++ /dev/null @@ -1,20 +0,0 @@ -DIR="$( cd "$(dirname "$0")" ; pwd -P )" -cd "$DIR" - -# Download the data. -echo "Downloading..." -wget http://images.cocodataset.org/zips/train2014.zip -wget http://images.cocodataset.org/zips/val2014.zip -wget http://images.cocodataset.org/zips/train2017.zip -wget http://images.cocodataset.org/zips/val2017.zip -wget http://images.cocodataset.org/annotations/annotations_trainval2014.zip -wget http://images.cocodataset.org/annotations/annotations_trainval2017.zip -# Extract the data. -echo "Extracting..." -unzip train2014.zip -unzip val2014.zip -unzip train2017.zip -unzip val2017.zip -unzip annotations_trainval2014.zip -unzip annotations_trainval2017.zip - diff --git a/PaddleCV/ssd/README.md b/PaddleCV/ssd/README.md index 507aa2219fa683259ec5123d23a25222454e5c7b..6ee63a7867447b3f345e309a4e716dbf98467331 100644 --- a/PaddleCV/ssd/README.md +++ b/PaddleCV/ssd/README.md @@ -26,10 +26,10 @@ Please download [PASCAL VOC dataset](http://host.robots.ox.ac.uk/pascal/VOC/) at ``` cd data/pascalvoc -./download.sh +python download.py ``` -The command `download.sh` also will create training and testing file lists. +The script `download.py` will also create training and testing file lists. ### Train @@ -37,9 +37,11 @@ The command `download.sh` also will create training and testing file lists. We provide two pre-trained models. The one is MobileNet-v1 SSD trained on COCO dataset, but removed the convolutional predictors for COCO dataset. This model can be used to initialize the models when training other datasets, like PASCAL VOC. The other pre-trained model is MobileNet-v1 trained on ImageNet 2012 dataset but removed the last weights and bias in the Fully-Connected layer. Download MobileNet-v1 SSD: - ``` - ./pretrained/download_coco.sh - ``` +```bash +sh ./pretrained/download_coco.sh +``` + +**NOTE:** Windows users can download weights from link in `./pretrained/download_coco.sh`. Declaration: the MobileNet-v1 SSD model is converted by [TensorFlow model](https://github.com/tensorflow/models/blob/f87a58cd96d45de73c9a8330a06b2ab56749a7fa/research/object_detection/g3doc/detection_model_zoo.md). diff --git a/PaddleCV/ssd/README_cn.md b/PaddleCV/ssd/README_cn.md index 14aca40b69d92871692956af58a16057227ff63f..66928c5d401bf0b721744b1d2a14aad6e9a392d8 100644 --- a/PaddleCV/ssd/README_cn.md +++ b/PaddleCV/ssd/README_cn.md @@ -27,10 +27,10 @@ SSD 可以方便地插入到任何一种标准卷积网络中,比如 VGG、Res ``` cd data/pascalvoc -./download.sh +python download.py ``` -`download.sh` 命令会自动创建训练和测试用的列表文件。 +`download.py` 脚本会自动创建训练和测试用的列表文件。 ### 模型训练 @@ -39,9 +39,11 @@ cd data/pascalvoc 我们提供了两个预训练模型。第一个模型是在 COCO 数据集上预训练的 MobileNet-v1 SSD,我们将它的预测头移除了以便在 COCO 以外的数据集上进行训练。第二个模型是在 ImageNet 2012 数据集上预训练的 MobileNet-v1,我们也将最后的全连接层移除以便进行目标检测训练。下载 MobileNet-v1 SSD: - ``` - ./pretrained/download_coco.sh - ``` +```bash +sh ./pretrained/download_coco.sh +``` + +**注意:** Windows用户可通过`./pretrained/download_coco.sh`中的链接直接下载和解压。 声明:MobileNet-v1 SSD 模型转换自[TensorFlow model](https://github.com/tensorflow/models/blob/f87a58cd96d45de73c9a8330a06b2ab56749a7fa/research/object_detection/g3doc/detection_model_zoo.md)。MobileNet-v1 模型转换自[Caffe](https://github.com/shicai/MobileNet-Caffe)。 diff --git a/PaddleCV/ssd/data/coco/download.py b/PaddleCV/ssd/data/coco/download.py new file mode 100644 index 0000000000000000000000000000000000000000..9df49bef6eab9d615e61e3cd429dcfdbeb5708ce --- /dev/null +++ b/PaddleCV/ssd/data/coco/download.py @@ -0,0 +1,61 @@ +# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import os.path as osp +import sys +import zipfile +import logging + +from paddle.dataset.common import download + +logging.basicConfig(level=logging.INFO) +logger = logging.getLogger(__name__) + + +DATASETS = { + 'coco': [ + # coco2017 + ('http://images.cocodataset.org/zips/train2017.zip', + 'cced6f7f71b7629ddf16f17bbcfab6b2', ), + ('http://images.cocodataset.org/zips/val2017.zip', + '442b8da7639aecaf257c1dceb8ba8c80', ), + ('http://images.cocodataset.org/annotations/annotations_trainval2017.zip', + 'f4bbac642086de4f52a3fdda2de5fa2c', ), + # coco2014 + ('http://images.cocodataset.org/zips/train2014.zip', + '0da8c0bd3d6becc4dcb32757491aca88', ), + ('http://images.cocodataset.org/zips/val2014.zip', + 'a3d79f5ed8d289b7a7554ce06a5782b3', ), + ('http://images.cocodataset.org/annotations/annotations_trainval2014.zip', + '0a379cfc70b0e71301e0f377548639bd', ), + ], +} + + +def download_decompress_file(data_dir, url, md5): + logger.info("Downloading from {}".format(url)) + zip_file = download(url, data_dir, md5) + logger.info("Decompressing {}".format(zip_file)) + with zipfile.ZipFile(zip_file) as zf: + zf.extractall(path=data_dir) + os.remove(zip_file) + + +if __name__ == "__main__": + data_dir = osp.split(osp.realpath(sys.argv[0]))[0] + for name, infos in DATASETS.items(): + for info in infos: + download_decompress_file(data_dir, info[0], info[1]) + logger.info("Download dataset {} finished.".format(name)) diff --git a/PaddleCV/ssd/data/coco/download.sh b/PaddleCV/ssd/data/coco/download.sh deleted file mode 100644 index 6f262ccebb635e993b35349890a793430d9ad597..0000000000000000000000000000000000000000 --- a/PaddleCV/ssd/data/coco/download.sh +++ /dev/null @@ -1,20 +0,0 @@ -DIR="$( cd "$(dirname "$0")" ; pwd -P )" -cd "$DIR" - -# Download the data. -echo "Downloading..." -wget http://images.cocodataset.org/zips/train2014.zip -wget http://images.cocodataset.org/zips/val2014.zip -wget http://images.cocodataset.org/zips/train2017.zip -wget http://images.cocodataset.org/zips/val2017.zip -wget http://images.cocodataset.org/annotations/annotations_trainval2014.zip -wget http://images.cocodataset.org/annotations/annotations_trainval2017.zip -# Extract the data. -echo "Extracting..." -unzip train2014.zip -unzip val2014.zip -unzip train2017.zip -unzip val2017.zip -unzip annotations_trainval2014.zip -unzip annotations_trainval2017.zip - diff --git a/PaddleCV/ssd/data/pascalvoc/create_list.py b/PaddleCV/ssd/data/pascalvoc/download.py similarity index 64% rename from PaddleCV/ssd/data/pascalvoc/create_list.py rename to PaddleCV/ssd/data/pascalvoc/download.py index 3f43c4c3158886bfff8fd22e50e69178f8134556..d4f8b7f33675013f8881ac8a69f28aa041c4023d 100644 --- a/PaddleCV/ssd/data/pascalvoc/create_list.py +++ b/PaddleCV/ssd/data/pascalvoc/download.py @@ -1,4 +1,4 @@ -# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -11,10 +11,31 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. + import os import os.path as osp +import sys import re import random +import tarfile +import logging + +from paddle.dataset.common import download + +logging.basicConfig(level=logging.INFO) +logger = logging.getLogger(__name__) + + +DATASETS = { + 'pascalvoc': [ + ('http://host.robots.ox.ac.uk/pascal/VOC/voc2012/VOCtrainval_11-May-2012.tar', + '6cd6e144f989b92b3379bac3b3de84fd', ), + ('http://host.robots.ox.ac.uk/pascal/VOC/voc2007/VOCtrainval_06-Nov-2007.tar', + 'c52e279531787c972589f7e41ab4ae64', ), + ('http://host.robots.ox.ac.uk/pascal/VOC/voc2007/VOCtest_06-Nov-2007.tar', + 'b6e924de25625d8de591ea690078ad9f', ), + ], +} devkit_dir = './VOCdevkit' years = ['2007', '2012'] @@ -73,5 +94,22 @@ def prepare_filelist(devkit_dir, years, output_dir): ftest.write(item[0] + ' ' + item[1] + '\n') -if __name__ == '__main__': - prepare_filelist(devkit_dir, years, '.') + +def download_decompress_file(data_dir, url, md5): + logger.info("Downloading from {}".format(url)) + tar_file = download(url, data_dir, md5) + logger.info("Decompressing {}".format(tar_file)) + with tarfile.open(tar_file) as tf: + tf.extractall(path=data_dir) + os.remove(tar_file) + + +if __name__ == "__main__": + data_dir = osp.split(osp.realpath(sys.argv[0]))[0] + for name, infos in DATASETS.items(): + for info in infos: + download_decompress_file(data_dir, info[0], info[1]) + if name == 'pascalvoc': + logger.info("create list for pascalvoc dataset.") + prepare_filelist(devkit_dir, years, data_dir) + logger.info("Download dataset {} finished.".format(name)) diff --git a/PaddleCV/ssd/data/pascalvoc/download.sh b/PaddleCV/ssd/data/pascalvoc/download.sh deleted file mode 100755 index e16073915c98815c1a23e8aded67ab2db4cfba10..0000000000000000000000000000000000000000 --- a/PaddleCV/ssd/data/pascalvoc/download.sh +++ /dev/null @@ -1,16 +0,0 @@ -DIR="$( cd "$(dirname "$0")" ; pwd -P )" -cd "$DIR" - -# Download the data. -echo "Downloading..." -wget http://host.robots.ox.ac.uk/pascal/VOC/voc2012/VOCtrainval_11-May-2012.tar -wget http://host.robots.ox.ac.uk/pascal/VOC/voc2007/VOCtrainval_06-Nov-2007.tar -wget http://host.robots.ox.ac.uk/pascal/VOC/voc2007/VOCtest_06-Nov-2007.tar -# Extract the data. -echo "Extracting..." -tar -xf VOCtrainval_11-May-2012.tar -tar -xf VOCtrainval_06-Nov-2007.tar -tar -xf VOCtest_06-Nov-2007.tar - -echo "Creating data lists..." -python create_list.py diff --git a/PaddleCV/yolov3/.gitignore b/PaddleCV/yolov3/.gitignore index c8fdc82b116a140c5d09cbf2d76468df195885ec..011d9771cdb71009709e4adedff2472e31f4a1b7 100644 --- a/PaddleCV/yolov3/.gitignore +++ b/PaddleCV/yolov3/.gitignore @@ -7,5 +7,6 @@ checkpoints/ weights/ !weights/*.sh dataset/coco/ +!dataset/coco/*.py log* output* diff --git a/PaddleCV/yolov3/.train.py.swp b/PaddleCV/yolov3/.train.py.swp new file mode 100644 index 0000000000000000000000000000000000000000..136bcb57b3edb47cecb1ebdf72a4b16a694104de Binary files /dev/null and b/PaddleCV/yolov3/.train.py.swp differ diff --git a/PaddleCV/yolov3/README.md b/PaddleCV/yolov3/README.md index 7f71ca9202da1565e90f1e60102b10c04e4aff04..c8b02b6996df344aa379a81c1d1cb7cee0a7f6c1 100644 --- a/PaddleCV/yolov3/README.md +++ b/PaddleCV/yolov3/README.md @@ -50,8 +50,9 @@ 在[MS-COCO数据集](http://cocodataset.org/#download)上进行训练,通过如下方式下载数据集。 - cd dataset/coco - ./download.sh +```bash +python dataset/coco/download.py +``` 数据目录结构如下: @@ -84,6 +85,8 @@ dataset/coco/ sh ./weights/download.sh +**注意:** Windows用户可通过`./weights/download.sh`中的链接直接下载和解压。 + 通过设置`--pretrain` 加载预训练模型。同时在fine-tune时也采用该设置加载已训练模型。 请在训练前确认预训练模型下载与加载正确,否则训练过程中损失可能会出现NAN。 diff --git a/PaddleCV/yolov3/README_en.md b/PaddleCV/yolov3/README_en.md index c468cd8e010a3ee22804557fc7cf61168e871ee5..eb3ac4fe0183d707a43c369768440f583a5164c1 100644 --- a/PaddleCV/yolov3/README_en.md +++ b/PaddleCV/yolov3/README_en.md @@ -50,8 +50,9 @@ To train the model, COCO-API is needed. Installation is as follows: Train the model on [MS-COCO dataset](http://cocodataset.org/#download), we also provide download script as follows: - cd dataset/coco - ./download.sh +```bash +python dataset/coco/download.py +``` The data catalog structure is as follows: @@ -84,6 +85,8 @@ You can defined datasets by yourself, we recommend using annotations in COCO for sh ./weights/download.sh +**NOTE:** Windows users can download weights from links in `./weights/download.sh`. + Set `--pretrain` to load pre-trained model. In addition, this parameter is used to load trained model when finetuning as well. Please make sure that pre-trained model is downloaded and loaded correctly, otherwise, the loss may be NAN during training. diff --git a/PaddleCV/yolov3/dataset/coco/download.py b/PaddleCV/yolov3/dataset/coco/download.py new file mode 100644 index 0000000000000000000000000000000000000000..9df49bef6eab9d615e61e3cd429dcfdbeb5708ce --- /dev/null +++ b/PaddleCV/yolov3/dataset/coco/download.py @@ -0,0 +1,61 @@ +# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import os.path as osp +import sys +import zipfile +import logging + +from paddle.dataset.common import download + +logging.basicConfig(level=logging.INFO) +logger = logging.getLogger(__name__) + + +DATASETS = { + 'coco': [ + # coco2017 + ('http://images.cocodataset.org/zips/train2017.zip', + 'cced6f7f71b7629ddf16f17bbcfab6b2', ), + ('http://images.cocodataset.org/zips/val2017.zip', + '442b8da7639aecaf257c1dceb8ba8c80', ), + ('http://images.cocodataset.org/annotations/annotations_trainval2017.zip', + 'f4bbac642086de4f52a3fdda2de5fa2c', ), + # coco2014 + ('http://images.cocodataset.org/zips/train2014.zip', + '0da8c0bd3d6becc4dcb32757491aca88', ), + ('http://images.cocodataset.org/zips/val2014.zip', + 'a3d79f5ed8d289b7a7554ce06a5782b3', ), + ('http://images.cocodataset.org/annotations/annotations_trainval2014.zip', + '0a379cfc70b0e71301e0f377548639bd', ), + ], +} + + +def download_decompress_file(data_dir, url, md5): + logger.info("Downloading from {}".format(url)) + zip_file = download(url, data_dir, md5) + logger.info("Decompressing {}".format(zip_file)) + with zipfile.ZipFile(zip_file) as zf: + zf.extractall(path=data_dir) + os.remove(zip_file) + + +if __name__ == "__main__": + data_dir = osp.split(osp.realpath(sys.argv[0]))[0] + for name, infos in DATASETS.items(): + for info in infos: + download_decompress_file(data_dir, info[0], info[1]) + logger.info("Download dataset {} finished.".format(name)) diff --git a/PaddleCV/yolov3/dataset/coco/download.sh b/PaddleCV/yolov3/dataset/coco/download.sh deleted file mode 100644 index 6f262ccebb635e993b35349890a793430d9ad597..0000000000000000000000000000000000000000 --- a/PaddleCV/yolov3/dataset/coco/download.sh +++ /dev/null @@ -1,20 +0,0 @@ -DIR="$( cd "$(dirname "$0")" ; pwd -P )" -cd "$DIR" - -# Download the data. -echo "Downloading..." -wget http://images.cocodataset.org/zips/train2014.zip -wget http://images.cocodataset.org/zips/val2014.zip -wget http://images.cocodataset.org/zips/train2017.zip -wget http://images.cocodataset.org/zips/val2017.zip -wget http://images.cocodataset.org/annotations/annotations_trainval2014.zip -wget http://images.cocodataset.org/annotations/annotations_trainval2017.zip -# Extract the data. -echo "Extracting..." -unzip train2014.zip -unzip val2014.zip -unzip train2017.zip -unzip val2017.zip -unzip annotations_trainval2014.zip -unzip annotations_trainval2017.zip -