add cv dataset

bf0e1b7f · wuzewu · 5eade5ce · bf0e1b7f · bf0e1b7f · bf0e1b7f
5 changed file
--- a/paddlehub/dataset/__init__.py
+++ b/paddlehub/dataset/__init__.py
@@ -22,3 +22,6 @@ from .lcqmc import LCQMC
 # CV Dataset
 from .dogcat import DogCatDataset as DogCat
 from .flowers import FlowersDataset as Flowers
+from .stanford_dogs import StanfordDogsDataset as StanfordDogs
+from .food101 import Food101Dataset as Food101
+from .indoor67 import Indoor67Dataset as Indoor67
--- a/paddlehub/dataset/base_cv_dataset.py
+++ b/paddlehub/dataset/base_cv_dataset.py
@@ -54,15 +54,16 @@ class ImageClassificationDataset(object):
                        break
                    line = line.strip()
                    items = line.split(" ")
-                    if os.path.isabs(items[0]):
+                    if len(items) > 2:
-                        image_path = items[0]
+                        image_path = " ".join(items[0:-1])
                    else:
-                        if self.base_path is None:
+                        image_path = items[0]
-                            image_path = items[0]
+                    if not os.path.isabs(image_path):
-                        else:
+                        if self.base_path is not None:
-                            image_path = os.path.join(self.base_path, items[0])
+                            image_path = os.path.join(self.base_path,
-                    label = items[1]
+                                                      image_path)
-                    data.append((image_path, items[1]))
+                    label = items[-1]
+                    data.append((image_path, items[-1]))
            if shuffle:
                np.random.shuffle(data)

--- a/paddlehub/dataset/food101.py
+++ b/paddlehub/dataset/food101.py
+# Copyright (c) 2019  PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+import os
+import paddlehub as hub
+from paddlehub.dataset.base_cv_dataset import ImageClassificationDataset
+class Food101Dataset(ImageClassificationDataset):
+    def __init__(self):
+        super(Food101Dataset, self).__init__()
+        dataset_path = os.path.join(hub.common.dir.DATA_HOME, "food-101",
+                                    "images")
+        self.base_path = self._download_dataset(
+            dataset_path=dataset_path,
+            url="https://paddlehub-dataset.bj.bcebos.com/Food101.tar.gz")
+        self.train_list_file = "train_list.txt"
+        self.test_list_file = "test_list.txt"
+        self.validate_list_file = "validate_list.txt"
+        self.label_list_file = "label_list.txt"
+        self.num_labels = 101
--- a/paddlehub/dataset/indoor67.py
+++ b/paddlehub/dataset/indoor67.py
+# Copyright (c) 2019  PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+import os
+import paddlehub as hub
+from paddlehub.dataset.base_cv_dataset import ImageClassificationDataset
+class Indoor67Dataset(ImageClassificationDataset):
+    def __init__(self):
+        super(Indoor67Dataset, self).__init__()
+        dataset_path = os.path.join(hub.common.dir.DATA_HOME, "Indoor67")
+        self.base_path = self._download_dataset(
+            dataset_path=dataset_path,
+            url="https://paddlehub-dataset.bj.bcebos.com/Indoor67.tar.gz")
+        self.train_list_file = "train_list.txt"
+        self.test_list_file = "test_list.txt"
+        self.validate_list_file = "validate_list.txt"
+        self.label_list_file = "label_list.txt"
+        self.num_labels = 67
--- a/paddlehub/dataset/stanford_dogs.py
+++ b/paddlehub/dataset/stanford_dogs.py
+# Copyright (c) 2019  PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License"
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from __future__ import absolute_import
+from __future__ import division
+from __future__ import print_function
+import os
+import paddlehub as hub
+from paddlehub.dataset.base_cv_dataset import ImageClassificationDataset
+class StanfordDogsDataset(ImageClassificationDataset):
+    def __init__(self):
+        super(StanfordDogsDataset, self).__init__()
+        dataset_path = os.path.join(hub.common.dir.DATA_HOME,
+                                    "StanfordDogs-120")
+        self.base_path = self._download_dataset(
+            dataset_path=dataset_path,
+            url="https://paddlehub-dataset.bj.bcebos.com/StanfordDogs-120.tar.gz"
+        )
+        self.train_list_file = "train_list.txt"
+        self.test_list_file = "test_list.txt"
+        self.validate_list_file = "validate_list.txt"
+        self.label_list_file = "label_list.txt"
+        self.num_labels = 120