From 74a3bd049e4965c2db743c614fb21887c1b92cdb Mon Sep 17 00:00:00 2001 From: Kaipeng Deng Date: Thu, 17 Oct 2019 17:18:07 +0800 Subject: [PATCH] refine yolov3/rcnn/ssd dataset download (#3638) * refine yolov3/rcnn/ssd dataset download * refine docs --- PaddleCV/rcnn/README.md | 7 +- PaddleCV/rcnn/README_cn.md | 7 +- PaddleCV/rcnn/dataset/coco/download.py | 61 ++++++++++++++++++ PaddleCV/rcnn/dataset/coco/download.sh | 20 ------ PaddleCV/ssd/README.md | 12 ++-- PaddleCV/ssd/README_cn.md | 12 ++-- PaddleCV/ssd/data/coco/download.py | 61 ++++++++++++++++++ PaddleCV/ssd/data/coco/download.sh | 20 ------ .../pascalvoc/{create_list.py => download.py} | 44 ++++++++++++- PaddleCV/ssd/data/pascalvoc/download.sh | 16 ----- PaddleCV/yolov3/.gitignore | 1 + PaddleCV/yolov3/.train.py.swp | Bin 0 -> 16384 bytes PaddleCV/yolov3/README.md | 7 +- PaddleCV/yolov3/README_en.md | 7 +- PaddleCV/yolov3/dataset/coco/download.py | 61 ++++++++++++++++++ PaddleCV/yolov3/dataset/coco/download.sh | 20 ------ 16 files changed, 259 insertions(+), 97 deletions(-) create mode 100644 PaddleCV/rcnn/dataset/coco/download.py delete mode 100644 PaddleCV/rcnn/dataset/coco/download.sh create mode 100644 PaddleCV/ssd/data/coco/download.py delete mode 100644 PaddleCV/ssd/data/coco/download.sh rename PaddleCV/ssd/data/pascalvoc/{create_list.py => download.py} (64%) delete mode 100755 PaddleCV/ssd/data/pascalvoc/download.sh create mode 100644 PaddleCV/yolov3/.train.py.swp create mode 100644 PaddleCV/yolov3/dataset/coco/download.py delete mode 100644 PaddleCV/yolov3/dataset/coco/download.sh diff --git a/PaddleCV/rcnn/README.md b/PaddleCV/rcnn/README.md index 8c8a86c9..03bec855 100644 --- a/PaddleCV/rcnn/README.md +++ b/PaddleCV/rcnn/README.md @@ -38,8 +38,9 @@ Mask RCNN is a two stage model as well. At the first stage, it generates proposa Train the model on [MS-COCO dataset](http://cocodataset.org/#download), download dataset as below: - cd dataset/coco - ./download.sh +```bash +python dataset/coco/download.py +``` The data catalog structure is as follows: @@ -67,6 +68,8 @@ The data catalog structure is as follows: sh ./pretrained/download.sh +**NOTE:** Windows users can download weights from links in `./pretrained/download.sh`. + Set `pretrained_model` to load pre-trained model. In addition, this parameter is used to load trained model when finetuning as well. Please make sure that pretrained_model is downloaded and loaded correctly, otherwise, the loss may be NAN during training. diff --git a/PaddleCV/rcnn/README_cn.md b/PaddleCV/rcnn/README_cn.md index 615d73ce..7e6290b5 100644 --- a/PaddleCV/rcnn/README_cn.md +++ b/PaddleCV/rcnn/README_cn.md @@ -38,8 +38,9 @@ Mask RCNN同样为两阶段框架,第一阶段扫描图像生成候选框; 在[MS-COCO数据集](http://cocodataset.org/#download)上进行训练,通过如下方式下载数据集。 - cd dataset/coco - ./download.sh +```bash +python dataset/coco/download.py +``` 数据目录结构如下: @@ -68,6 +69,8 @@ data/coco/ sh ./pretrained/download.sh +**注意:** Windows用户可通过`./pretrained/download.sh`中的链接直接下载和解压。 + 通过初始化`pretrained_model` 加载预训练模型。同时在参数微调时也采用该设置加载已训练模型。 请在训练前确认预训练模型下载与加载正确,否则训练过程中损失可能会出现NAN。 diff --git a/PaddleCV/rcnn/dataset/coco/download.py b/PaddleCV/rcnn/dataset/coco/download.py new file mode 100644 index 00000000..9df49bef --- /dev/null +++ b/PaddleCV/rcnn/dataset/coco/download.py @@ -0,0 +1,61 @@ +# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import os.path as osp +import sys +import zipfile +import logging + +from paddle.dataset.common import download + +logging.basicConfig(level=logging.INFO) +logger = logging.getLogger(__name__) + + +DATASETS = { + 'coco': [ + # coco2017 + ('http://images.cocodataset.org/zips/train2017.zip', + 'cced6f7f71b7629ddf16f17bbcfab6b2', ), + ('http://images.cocodataset.org/zips/val2017.zip', + '442b8da7639aecaf257c1dceb8ba8c80', ), + ('http://images.cocodataset.org/annotations/annotations_trainval2017.zip', + 'f4bbac642086de4f52a3fdda2de5fa2c', ), + # coco2014 + ('http://images.cocodataset.org/zips/train2014.zip', + '0da8c0bd3d6becc4dcb32757491aca88', ), + ('http://images.cocodataset.org/zips/val2014.zip', + 'a3d79f5ed8d289b7a7554ce06a5782b3', ), + ('http://images.cocodataset.org/annotations/annotations_trainval2014.zip', + '0a379cfc70b0e71301e0f377548639bd', ), + ], +} + + +def download_decompress_file(data_dir, url, md5): + logger.info("Downloading from {}".format(url)) + zip_file = download(url, data_dir, md5) + logger.info("Decompressing {}".format(zip_file)) + with zipfile.ZipFile(zip_file) as zf: + zf.extractall(path=data_dir) + os.remove(zip_file) + + +if __name__ == "__main__": + data_dir = osp.split(osp.realpath(sys.argv[0]))[0] + for name, infos in DATASETS.items(): + for info in infos: + download_decompress_file(data_dir, info[0], info[1]) + logger.info("Download dataset {} finished.".format(name)) diff --git a/PaddleCV/rcnn/dataset/coco/download.sh b/PaddleCV/rcnn/dataset/coco/download.sh deleted file mode 100644 index 6f262cce..00000000 --- a/PaddleCV/rcnn/dataset/coco/download.sh +++ /dev/null @@ -1,20 +0,0 @@ -DIR="$( cd "$(dirname "$0")" ; pwd -P )" -cd "$DIR" - -# Download the data. -echo "Downloading..." -wget http://images.cocodataset.org/zips/train2014.zip -wget http://images.cocodataset.org/zips/val2014.zip -wget http://images.cocodataset.org/zips/train2017.zip -wget http://images.cocodataset.org/zips/val2017.zip -wget http://images.cocodataset.org/annotations/annotations_trainval2014.zip -wget http://images.cocodataset.org/annotations/annotations_trainval2017.zip -# Extract the data. -echo "Extracting..." -unzip train2014.zip -unzip val2014.zip -unzip train2017.zip -unzip val2017.zip -unzip annotations_trainval2014.zip -unzip annotations_trainval2017.zip - diff --git a/PaddleCV/ssd/README.md b/PaddleCV/ssd/README.md index 507aa221..6ee63a78 100644 --- a/PaddleCV/ssd/README.md +++ b/PaddleCV/ssd/README.md @@ -26,10 +26,10 @@ Please download [PASCAL VOC dataset](http://host.robots.ox.ac.uk/pascal/VOC/) at ``` cd data/pascalvoc -./download.sh +python download.py ``` -The command `download.sh` also will create training and testing file lists. +The script `download.py` will also create training and testing file lists. ### Train @@ -37,9 +37,11 @@ The command `download.sh` also will create training and testing file lists. We provide two pre-trained models. The one is MobileNet-v1 SSD trained on COCO dataset, but removed the convolutional predictors for COCO dataset. This model can be used to initialize the models when training other datasets, like PASCAL VOC. The other pre-trained model is MobileNet-v1 trained on ImageNet 2012 dataset but removed the last weights and bias in the Fully-Connected layer. Download MobileNet-v1 SSD: - ``` - ./pretrained/download_coco.sh - ``` +```bash +sh ./pretrained/download_coco.sh +``` + +**NOTE:** Windows users can download weights from link in `./pretrained/download_coco.sh`. Declaration: the MobileNet-v1 SSD model is converted by [TensorFlow model](https://github.com/tensorflow/models/blob/f87a58cd96d45de73c9a8330a06b2ab56749a7fa/research/object_detection/g3doc/detection_model_zoo.md). diff --git a/PaddleCV/ssd/README_cn.md b/PaddleCV/ssd/README_cn.md index 14aca40b..66928c5d 100644 --- a/PaddleCV/ssd/README_cn.md +++ b/PaddleCV/ssd/README_cn.md @@ -27,10 +27,10 @@ SSD 可以方便地插入到任何一种标准卷积网络中,比如 VGG、Res ``` cd data/pascalvoc -./download.sh +python download.py ``` -`download.sh` 命令会自动创建训练和测试用的列表文件。 +`download.py` 脚本会自动创建训练和测试用的列表文件。 ### 模型训练 @@ -39,9 +39,11 @@ cd data/pascalvoc 我们提供了两个预训练模型。第一个模型是在 COCO 数据集上预训练的 MobileNet-v1 SSD,我们将它的预测头移除了以便在 COCO 以外的数据集上进行训练。第二个模型是在 ImageNet 2012 数据集上预训练的 MobileNet-v1,我们也将最后的全连接层移除以便进行目标检测训练。下载 MobileNet-v1 SSD: - ``` - ./pretrained/download_coco.sh - ``` +```bash +sh ./pretrained/download_coco.sh +``` + +**注意:** Windows用户可通过`./pretrained/download_coco.sh`中的链接直接下载和解压。 声明:MobileNet-v1 SSD 模型转换自[TensorFlow model](https://github.com/tensorflow/models/blob/f87a58cd96d45de73c9a8330a06b2ab56749a7fa/research/object_detection/g3doc/detection_model_zoo.md)。MobileNet-v1 模型转换自[Caffe](https://github.com/shicai/MobileNet-Caffe)。 diff --git a/PaddleCV/ssd/data/coco/download.py b/PaddleCV/ssd/data/coco/download.py new file mode 100644 index 00000000..9df49bef --- /dev/null +++ b/PaddleCV/ssd/data/coco/download.py @@ -0,0 +1,61 @@ +# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import os.path as osp +import sys +import zipfile +import logging + +from paddle.dataset.common import download + +logging.basicConfig(level=logging.INFO) +logger = logging.getLogger(__name__) + + +DATASETS = { + 'coco': [ + # coco2017 + ('http://images.cocodataset.org/zips/train2017.zip', + 'cced6f7f71b7629ddf16f17bbcfab6b2', ), + ('http://images.cocodataset.org/zips/val2017.zip', + '442b8da7639aecaf257c1dceb8ba8c80', ), + ('http://images.cocodataset.org/annotations/annotations_trainval2017.zip', + 'f4bbac642086de4f52a3fdda2de5fa2c', ), + # coco2014 + ('http://images.cocodataset.org/zips/train2014.zip', + '0da8c0bd3d6becc4dcb32757491aca88', ), + ('http://images.cocodataset.org/zips/val2014.zip', + 'a3d79f5ed8d289b7a7554ce06a5782b3', ), + ('http://images.cocodataset.org/annotations/annotations_trainval2014.zip', + '0a379cfc70b0e71301e0f377548639bd', ), + ], +} + + +def download_decompress_file(data_dir, url, md5): + logger.info("Downloading from {}".format(url)) + zip_file = download(url, data_dir, md5) + logger.info("Decompressing {}".format(zip_file)) + with zipfile.ZipFile(zip_file) as zf: + zf.extractall(path=data_dir) + os.remove(zip_file) + + +if __name__ == "__main__": + data_dir = osp.split(osp.realpath(sys.argv[0]))[0] + for name, infos in DATASETS.items(): + for info in infos: + download_decompress_file(data_dir, info[0], info[1]) + logger.info("Download dataset {} finished.".format(name)) diff --git a/PaddleCV/ssd/data/coco/download.sh b/PaddleCV/ssd/data/coco/download.sh deleted file mode 100644 index 6f262cce..00000000 --- a/PaddleCV/ssd/data/coco/download.sh +++ /dev/null @@ -1,20 +0,0 @@ -DIR="$( cd "$(dirname "$0")" ; pwd -P )" -cd "$DIR" - -# Download the data. -echo "Downloading..." -wget http://images.cocodataset.org/zips/train2014.zip -wget http://images.cocodataset.org/zips/val2014.zip -wget http://images.cocodataset.org/zips/train2017.zip -wget http://images.cocodataset.org/zips/val2017.zip -wget http://images.cocodataset.org/annotations/annotations_trainval2014.zip -wget http://images.cocodataset.org/annotations/annotations_trainval2017.zip -# Extract the data. -echo "Extracting..." -unzip train2014.zip -unzip val2014.zip -unzip train2017.zip -unzip val2017.zip -unzip annotations_trainval2014.zip -unzip annotations_trainval2017.zip - diff --git a/PaddleCV/ssd/data/pascalvoc/create_list.py b/PaddleCV/ssd/data/pascalvoc/download.py similarity index 64% rename from PaddleCV/ssd/data/pascalvoc/create_list.py rename to PaddleCV/ssd/data/pascalvoc/download.py index 3f43c4c3..d4f8b7f3 100644 --- a/PaddleCV/ssd/data/pascalvoc/create_list.py +++ b/PaddleCV/ssd/data/pascalvoc/download.py @@ -1,4 +1,4 @@ -# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. +# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -11,10 +11,31 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. + import os import os.path as osp +import sys import re import random +import tarfile +import logging + +from paddle.dataset.common import download + +logging.basicConfig(level=logging.INFO) +logger = logging.getLogger(__name__) + + +DATASETS = { + 'pascalvoc': [ + ('http://host.robots.ox.ac.uk/pascal/VOC/voc2012/VOCtrainval_11-May-2012.tar', + '6cd6e144f989b92b3379bac3b3de84fd', ), + ('http://host.robots.ox.ac.uk/pascal/VOC/voc2007/VOCtrainval_06-Nov-2007.tar', + 'c52e279531787c972589f7e41ab4ae64', ), + ('http://host.robots.ox.ac.uk/pascal/VOC/voc2007/VOCtest_06-Nov-2007.tar', + 'b6e924de25625d8de591ea690078ad9f', ), + ], +} devkit_dir = './VOCdevkit' years = ['2007', '2012'] @@ -73,5 +94,22 @@ def prepare_filelist(devkit_dir, years, output_dir): ftest.write(item[0] + ' ' + item[1] + '\n') -if __name__ == '__main__': - prepare_filelist(devkit_dir, years, '.') + +def download_decompress_file(data_dir, url, md5): + logger.info("Downloading from {}".format(url)) + tar_file = download(url, data_dir, md5) + logger.info("Decompressing {}".format(tar_file)) + with tarfile.open(tar_file) as tf: + tf.extractall(path=data_dir) + os.remove(tar_file) + + +if __name__ == "__main__": + data_dir = osp.split(osp.realpath(sys.argv[0]))[0] + for name, infos in DATASETS.items(): + for info in infos: + download_decompress_file(data_dir, info[0], info[1]) + if name == 'pascalvoc': + logger.info("create list for pascalvoc dataset.") + prepare_filelist(devkit_dir, years, data_dir) + logger.info("Download dataset {} finished.".format(name)) diff --git a/PaddleCV/ssd/data/pascalvoc/download.sh b/PaddleCV/ssd/data/pascalvoc/download.sh deleted file mode 100755 index e1607391..00000000 --- a/PaddleCV/ssd/data/pascalvoc/download.sh +++ /dev/null @@ -1,16 +0,0 @@ -DIR="$( cd "$(dirname "$0")" ; pwd -P )" -cd "$DIR" - -# Download the data. -echo "Downloading..." -wget http://host.robots.ox.ac.uk/pascal/VOC/voc2012/VOCtrainval_11-May-2012.tar -wget http://host.robots.ox.ac.uk/pascal/VOC/voc2007/VOCtrainval_06-Nov-2007.tar -wget http://host.robots.ox.ac.uk/pascal/VOC/voc2007/VOCtest_06-Nov-2007.tar -# Extract the data. -echo "Extracting..." -tar -xf VOCtrainval_11-May-2012.tar -tar -xf VOCtrainval_06-Nov-2007.tar -tar -xf VOCtest_06-Nov-2007.tar - -echo "Creating data lists..." -python create_list.py diff --git a/PaddleCV/yolov3/.gitignore b/PaddleCV/yolov3/.gitignore index c8fdc82b..011d9771 100644 --- a/PaddleCV/yolov3/.gitignore +++ b/PaddleCV/yolov3/.gitignore @@ -7,5 +7,6 @@ checkpoints/ weights/ !weights/*.sh dataset/coco/ +!dataset/coco/*.py log* output* diff --git a/PaddleCV/yolov3/.train.py.swp b/PaddleCV/yolov3/.train.py.swp new file mode 100644 index 0000000000000000000000000000000000000000..136bcb57b3edb47cecb1ebdf72a4b16a694104de GIT binary patch literal 16384 zcmeHN%Wot{884FX-j{+nfDjZOBKK_Up0PJkvW^Uz*xtmf*S7rFWJy-7p6;4)XZw|| zYTL8XY6U{#6bV5NaDW_wcpN}NLI@6!a3K-`iHAfa4v0$vE8r0RfP~*yRXvZjL&yms zbxXgQ?s|On)$jZ2tLp0NwlA+<;t#m@FS+5$sIQi87s0(QS#eegQweb zv;DlBoxlJ7*|~EM%+B98+dk)tJaD^y6qas~B_yMj`(YT1RzFg$Bnw3>Tbnv%>2hn3 z#o7M3mMZ)xb@Rd7HCgA^4Acy~V+KyLbMtd;s@9nLB>&LeyYE;+U7=>6W}s%EW}s%E zW}s%EW}s%EX5jyq0hQm$ZbJP#Y!{^cf7fmQZ`$i|n_g`UROQ~V`KN9EuWzewAM3xG zftrDuftrDuftrDuftrDuftrDuftrDuftrE;0Rw)>*fiQbO&b9C{$IWSzl0b0p9435 zhk(0*Gr(U?G4^NR>%e8;1n}}n#=Zl58@LI0zyJn$^=IpAL4EbuYl%?~j4I`CWIYryA$3`l`FpbdN! z_|5wn`w{R0Pyz~&Ko1Z=6F3E&1a7^Lu^#~YfCsDq%fN?$f4-Noe*mulF9Od4UjnWH zPXUhrr-4(z-`>O6uYex{Uj|lzM}dz6x8BXzOTagPF944NP2knL82csgJ>a`Q0(=U% z3wY&D#{LNW0(cI%2z(Z30e^-4UIV@byZ{^m(*W7^6!2eO9G4uVK{w@#Jm~dZ^S~&* zTSjr{NgUBoqCapGkz~cd%W@SZ(RG3H7yMWX&Kv!J`)SBc0X{5{_yZnfX)lffb&Nm= z=*49ex(`!3+qM~}$#hv#z+^|68)ts#?fZpv#G#ncdgg`DXGdl@?!qX5j*C&x6Niyh zV*-&`1}N%?sZ zrOKJwv|8~45%u?!CqrnL36xXzZ7D#RF%mh(NR5&vJ25KW50;5`zp%PMMKOm zQ55dl{IUrAfi{RSnHN$*b4IsMT+?;n7YQir`2kqcZBdFhOgg@Jq{L)lN|+S9@5iN( zi{@`Kt4m%3-H|=Ioh%XyzeuCB@6iZdoOl`)oJd|2hk2HQvdE9UgGlXpwrjfA4x`Qv zNgrB&nBrctd^^TIMsJjRVxkp(mOFhvN&K^CqbA(H;1R6P3qS1(M=FtfX_@eQ`CRj= zZgH5j$YWs!V_d*-MQ%;4g`pX;GII2zt~p|6l&kCM+)1Tu^>NNjFYMU8K!jtIa&mSI z`28pl(jAUjn?S1DAvBhP*5S|a9jZh{sxU_G>gJ-52upr9rZKlul)|WoJUr`@aR_lz zq7ugyrFn^QYho;Qd6b~@O$8I3^}Vg9?;-HY#@fdIxk>|)h+&ub*AONVLCr|PVLVUz z`@%z-ZS^gKkCr_Znt%J zsRNVsj)b(gs%qn8D7s~z!wvnSPXUMw;RV8WB-j(dH3~hN8XM3|Pa0fjQm~UkahtbD z79@u@#~aNr4-f_S$!VmT^QR#H* z3r8g2k8JMpz%N2q>wLQtL-~nTkB{P;o#o}V6>n#&v%0>rwe79%UP2U~qbMFFc~&SM z!u`EcMX_YPB1`nLAnJ3Q3zsy>lLOY~{>PAI>S+&^t~pa$Nj1Zu(x4E2D2nRI*v!?& z;J!5eHJNFwMs}FEwi6tZ-4o$uiriR=RL;{hnou7jHK=rOax7YeM}9{O6-+q0XF{VQh`DY~d-9$_!DuOQL76KICS| z@qWNCj66fW*NgqWbe^%pqat~a(X3!vNL=g4vm$G z)Hh~y(Bz}@v;-Qox>*-fae{cepuPTxQo4*Dxz-0W>$jfQE3udqp2uyDF>0?&HN(aU zj+P3e{YXa!?fpmd{jSX7QVGvU#c|CT%N?bcEs|qi5(S6X@ij~@sT3}2YzqwSQHmb< z^el4`M7=2BF=pEmR^$B)VTqOr9P>~TNzy~of}J%UTitndV|Rx?*4f(XtnaL@Z1asR zzO=Evyt=cxv5uz;yt96VUtC>ZM*P5f0eWCO7KkhqPoZrb3B-;D4e%oBq6^_sb;%^- z1+o;)^Qq1@U)`SK4|ld#w`W*mH;qYbA)YQHEa`a{j-5k(py$olKhP1pU(ocRGBV{s z5h)$nWY$wy$OzV84RWMCrSgSV>)_zP^>ai$;buj@1yMwvY^|*>t*mda%+9-Qs&OSN zIlZf7U4;pW`-n~$R2d@p7{9(^je#vli+YNmY7ZUjMVQsaVIXovI^pO$boSFga9UrD z%K%N*e&ZZebo%q4AczN4AczN4AczN4AczN4AczN4Acw|1N2&McSQC5ZJS{Z zZIPJ`9=gM}&uHfiTcX(T@jUt@hmDuTMLz9$dhghqu69@TUbfi`pEmo|y(snL>9J3N z`M@)qfG)N#@bSsCX5ZY$wq;i~F0AO+dF+#zO(fF2x@79-axYUJZOTE-(H`}Y611hS zORy^I_Pb$r`7nb-eM7w?u(*Ev~>Ym;d HzlZ%BC&Gqk literal 0 HcmV?d00001 diff --git a/PaddleCV/yolov3/README.md b/PaddleCV/yolov3/README.md index 7f71ca92..c8b02b69 100644 --- a/PaddleCV/yolov3/README.md +++ b/PaddleCV/yolov3/README.md @@ -50,8 +50,9 @@ 在[MS-COCO数据集](http://cocodataset.org/#download)上进行训练,通过如下方式下载数据集。 - cd dataset/coco - ./download.sh +```bash +python dataset/coco/download.py +``` 数据目录结构如下: @@ -84,6 +85,8 @@ dataset/coco/ sh ./weights/download.sh +**注意:** Windows用户可通过`./weights/download.sh`中的链接直接下载和解压。 + 通过设置`--pretrain` 加载预训练模型。同时在fine-tune时也采用该设置加载已训练模型。 请在训练前确认预训练模型下载与加载正确,否则训练过程中损失可能会出现NAN。 diff --git a/PaddleCV/yolov3/README_en.md b/PaddleCV/yolov3/README_en.md index c468cd8e..eb3ac4fe 100644 --- a/PaddleCV/yolov3/README_en.md +++ b/PaddleCV/yolov3/README_en.md @@ -50,8 +50,9 @@ To train the model, COCO-API is needed. Installation is as follows: Train the model on [MS-COCO dataset](http://cocodataset.org/#download), we also provide download script as follows: - cd dataset/coco - ./download.sh +```bash +python dataset/coco/download.py +``` The data catalog structure is as follows: @@ -84,6 +85,8 @@ You can defined datasets by yourself, we recommend using annotations in COCO for sh ./weights/download.sh +**NOTE:** Windows users can download weights from links in `./weights/download.sh`. + Set `--pretrain` to load pre-trained model. In addition, this parameter is used to load trained model when finetuning as well. Please make sure that pre-trained model is downloaded and loaded correctly, otherwise, the loss may be NAN during training. diff --git a/PaddleCV/yolov3/dataset/coco/download.py b/PaddleCV/yolov3/dataset/coco/download.py new file mode 100644 index 00000000..9df49bef --- /dev/null +++ b/PaddleCV/yolov3/dataset/coco/download.py @@ -0,0 +1,61 @@ +# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +import os.path as osp +import sys +import zipfile +import logging + +from paddle.dataset.common import download + +logging.basicConfig(level=logging.INFO) +logger = logging.getLogger(__name__) + + +DATASETS = { + 'coco': [ + # coco2017 + ('http://images.cocodataset.org/zips/train2017.zip', + 'cced6f7f71b7629ddf16f17bbcfab6b2', ), + ('http://images.cocodataset.org/zips/val2017.zip', + '442b8da7639aecaf257c1dceb8ba8c80', ), + ('http://images.cocodataset.org/annotations/annotations_trainval2017.zip', + 'f4bbac642086de4f52a3fdda2de5fa2c', ), + # coco2014 + ('http://images.cocodataset.org/zips/train2014.zip', + '0da8c0bd3d6becc4dcb32757491aca88', ), + ('http://images.cocodataset.org/zips/val2014.zip', + 'a3d79f5ed8d289b7a7554ce06a5782b3', ), + ('http://images.cocodataset.org/annotations/annotations_trainval2014.zip', + '0a379cfc70b0e71301e0f377548639bd', ), + ], +} + + +def download_decompress_file(data_dir, url, md5): + logger.info("Downloading from {}".format(url)) + zip_file = download(url, data_dir, md5) + logger.info("Decompressing {}".format(zip_file)) + with zipfile.ZipFile(zip_file) as zf: + zf.extractall(path=data_dir) + os.remove(zip_file) + + +if __name__ == "__main__": + data_dir = osp.split(osp.realpath(sys.argv[0]))[0] + for name, infos in DATASETS.items(): + for info in infos: + download_decompress_file(data_dir, info[0], info[1]) + logger.info("Download dataset {} finished.".format(name)) diff --git a/PaddleCV/yolov3/dataset/coco/download.sh b/PaddleCV/yolov3/dataset/coco/download.sh deleted file mode 100644 index 6f262cce..00000000 --- a/PaddleCV/yolov3/dataset/coco/download.sh +++ /dev/null @@ -1,20 +0,0 @@ -DIR="$( cd "$(dirname "$0")" ; pwd -P )" -cd "$DIR" - -# Download the data. -echo "Downloading..." -wget http://images.cocodataset.org/zips/train2014.zip -wget http://images.cocodataset.org/zips/val2014.zip -wget http://images.cocodataset.org/zips/train2017.zip -wget http://images.cocodataset.org/zips/val2017.zip -wget http://images.cocodataset.org/annotations/annotations_trainval2014.zip -wget http://images.cocodataset.org/annotations/annotations_trainval2017.zip -# Extract the data. -echo "Extracting..." -unzip train2014.zip -unzip val2014.zip -unzip train2017.zip -unzip val2017.zip -unzip annotations_trainval2014.zip -unzip annotations_trainval2017.zip - -- GitLab