diff --git a/paddlehub/dataset/__init__.py b/paddlehub/dataset/__init__.py index 1cb0086c10d4e0f7ded663d90695cd347712bbce..2239623b526e9038b292ea881c891ae5d0f8d375 100644 --- a/paddlehub/dataset/__init__.py +++ b/paddlehub/dataset/__init__.py @@ -22,3 +22,6 @@ from .lcqmc import LCQMC # CV Dataset from .dogcat import DogCatDataset as DogCat from .flowers import FlowersDataset as Flowers +from .stanford_dogs import StanfordDogsDataset as StanfordDogs +from .food101 import Food101Dataset as Food101 +from .indoor67 import Indoor67Dataset as Indoor67 diff --git a/paddlehub/dataset/base_cv_dataset.py b/paddlehub/dataset/base_cv_dataset.py index 55964151ac66f0af7cd3d7694ff44fba97cc6824..9eb41c1044d83baa642e916579d6867b8bc7f316 100644 --- a/paddlehub/dataset/base_cv_dataset.py +++ b/paddlehub/dataset/base_cv_dataset.py @@ -54,15 +54,16 @@ class ImageClassificationDataset(object): break line = line.strip() items = line.split(" ") - if os.path.isabs(items[0]): - image_path = items[0] + if len(items) > 2: + image_path = " ".join(items[0:-1]) else: - if self.base_path is None: - image_path = items[0] - else: - image_path = os.path.join(self.base_path, items[0]) - label = items[1] - data.append((image_path, items[1])) + image_path = items[0] + if not os.path.isabs(image_path): + if self.base_path is not None: + image_path = os.path.join(self.base_path, + image_path) + label = items[-1] + data.append((image_path, items[-1])) if shuffle: np.random.shuffle(data) diff --git a/paddlehub/dataset/food101.py b/paddlehub/dataset/food101.py new file mode 100644 index 0000000000000000000000000000000000000000..de9c8679053493747d46309fe3a8c2d7c0abc2f6 --- /dev/null +++ b/paddlehub/dataset/food101.py @@ -0,0 +1,37 @@ +# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License" +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import os + +import paddlehub as hub +from paddlehub.dataset.base_cv_dataset import ImageClassificationDataset + + +class Food101Dataset(ImageClassificationDataset): + def __init__(self): + super(Food101Dataset, self).__init__() + dataset_path = os.path.join(hub.common.dir.DATA_HOME, "food-101", + "images") + self.base_path = self._download_dataset( + dataset_path=dataset_path, + url="https://paddlehub-dataset.bj.bcebos.com/Food101.tar.gz") + self.train_list_file = "train_list.txt" + self.test_list_file = "test_list.txt" + self.validate_list_file = "validate_list.txt" + self.label_list_file = "label_list.txt" + self.num_labels = 101 diff --git a/paddlehub/dataset/indoor67.py b/paddlehub/dataset/indoor67.py new file mode 100644 index 0000000000000000000000000000000000000000..59ebc3e7b603004156e300dcef251d025d8a9ef5 --- /dev/null +++ b/paddlehub/dataset/indoor67.py @@ -0,0 +1,36 @@ +# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License" +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import os + +import paddlehub as hub +from paddlehub.dataset.base_cv_dataset import ImageClassificationDataset + + +class Indoor67Dataset(ImageClassificationDataset): + def __init__(self): + super(Indoor67Dataset, self).__init__() + dataset_path = os.path.join(hub.common.dir.DATA_HOME, "Indoor67") + self.base_path = self._download_dataset( + dataset_path=dataset_path, + url="https://paddlehub-dataset.bj.bcebos.com/Indoor67.tar.gz") + self.train_list_file = "train_list.txt" + self.test_list_file = "test_list.txt" + self.validate_list_file = "validate_list.txt" + self.label_list_file = "label_list.txt" + self.num_labels = 67 diff --git a/paddlehub/dataset/stanford_dogs.py b/paddlehub/dataset/stanford_dogs.py new file mode 100644 index 0000000000000000000000000000000000000000..9081027e548f4d05aaf45ee60a76c080b49b360b --- /dev/null +++ b/paddlehub/dataset/stanford_dogs.py @@ -0,0 +1,38 @@ +# Copyright (c) 2019 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License" +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from __future__ import absolute_import +from __future__ import division +from __future__ import print_function + +import os + +import paddlehub as hub +from paddlehub.dataset.base_cv_dataset import ImageClassificationDataset + + +class StanfordDogsDataset(ImageClassificationDataset): + def __init__(self): + super(StanfordDogsDataset, self).__init__() + dataset_path = os.path.join(hub.common.dir.DATA_HOME, + "StanfordDogs-120") + self.base_path = self._download_dataset( + dataset_path=dataset_path, + url="https://paddlehub-dataset.bj.bcebos.com/StanfordDogs-120.tar.gz" + ) + self.train_list_file = "train_list.txt" + self.test_list_file = "test_list.txt" + self.validate_list_file = "validate_list.txt" + self.label_list_file = "label_list.txt" + self.num_labels = 120