# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. """Reader for Cityscape dataset. """ from __future__ import absolute_import from __future__ import division from __future__ import print_function import os import cv2 import numpy as np import paddle.dataset as dataset DATA_PATH = "./data/cityscape" TRAIN_LIST = DATA_PATH + "/train.list" TEST_LIST = DATA_PATH + "/val.list" IGNORE_LABEL = 255 NUM_CLASSES = 19 TRAIN_DATA_SHAPE = (3, 720, 720) TEST_DATA_SHAPE = (3, 1024, 2048) IMG_MEAN = np.array((103.939, 116.779, 123.68), dtype=np.float32) def train_data_shape(): return TRAIN_DATA_SHAPE def test_data_shape(): return TEST_DATA_SHAPE def num_classes(): return NUM_CLASSES class DataGenerater: def __init__(self, data_list, mode="train", flip=True, scaling=True): self.flip = flip self.scaling = scaling self.image_label = [] with open(data_list, 'r') as f: for line in f: image_file, label_file = line.strip().split(' ') self.image_label.append((image_file, label_file)) def create_train_reader(self, batch_size): """ Create a reader for train dataset. """ def reader(): np.random.shuffle(self.image_label) images = [] labels_sub1 = [] labels_sub2 = [] labels_sub4 = [] count = 0 for image, label in self.image_label: image, label_sub1, label_sub2, label_sub4 = self.process_train_data( image, label) count += 1 images.append(image) labels_sub1.append(label_sub1) labels_sub2.append(label_sub2) labels_sub4.append(label_sub4) if count == batch_size: yield self.mask( np.array(images), np.array(labels_sub1), np.array(labels_sub2), np.array(labels_sub4)) images = [] labels_sub1 = [] labels_sub2 = [] labels_sub4 = [] count = 0 if images: yield self.mask( np.array(images), np.array(labels_sub1), np.array(labels_sub2), np.array(labels_sub4)) return reader def create_test_reader(self): """ Create a reader for test dataset. """ def reader(): for image, label in self.image_label: image, label = self.load(image, label) image = dataset.image.to_chw(image)[np.newaxis, :] label = label[np.newaxis, :, :, np.newaxis].astype("float32") label_mask = np.where((label != IGNORE_LABEL).flatten())[ 0].astype("int32") yield image, label, label_mask return reader def process_train_data(self, image, label): """ Process training data. """ image, label = self.load(image, label) if self.flip: image, label = self.random_flip(image, label) if self.scaling: image, label = self.random_scaling(image, label) image, label = self.resize(image, label, out_size=TRAIN_DATA_SHAPE[1:]) label = label.astype("float32") label_sub1 = dataset.image.to_chw(self.scale_label(label, factor=4)) label_sub2 = dataset.image.to_chw(self.scale_label(label, factor=8)) label_sub4 = dataset.image.to_chw(self.scale_label(label, factor=16)) image = dataset.image.to_chw(image) return image, label_sub1, label_sub2, label_sub4 def load(self, image, label): """ Load image from file. """ image = dataset.image.load_image( DATA_PATH + "/" + image, is_color=True).astype("float32") image -= IMG_MEAN label = dataset.image.load_image( DATA_PATH + "/" + label, is_color=False).astype("float32") return image, label def random_flip(self, image, label): """ Flip image and label randomly. """ r = np.random.rand(1) if r > 0.5: image = dataset.image.left_right_flip(image, is_color=True) label = dataset.image.left_right_flip(label, is_color=False) return image, label def random_scaling(self, image, label): """ Scale image and label randomly. """ scale = np.random.uniform(0.5, 2.0, 1)[0] h_new = int(image.shape[0] * scale) w_new = int(image.shape[1] * scale) image = cv2.resize(image, (w_new, h_new)) label = cv2.resize( label, (w_new, h_new), interpolation=cv2.INTER_NEAREST) return image, label def padding_as(self, image, h, w, is_color): """ Padding image. """ pad_h = max(image.shape[0], h) - image.shape[0] pad_w = max(image.shape[1], w) - image.shape[1] if is_color: return np.pad(image, ((0, pad_h), (0, pad_w), (0, 0)), 'constant') else: return np.pad(image, ((0, pad_h), (0, pad_w)), 'constant') def random_crop(self, im, out_shape, is_color=True): h, w = im.shape[:2] h_start = np.random.randint(0, h - out_shape[0] + 1) w_start = np.random.randint(0, w - out_shape[1] + 1) h_end, w_end = h_start + out_shape[0], w_start + out_shape[1] if is_color: im = im[h_start:h_end, w_start:w_end, :] else: im = im[h_start:h_end, w_start:w_end] return im def resize(self, image, label, out_size): """ Resize image and label by padding or cropping. """ ignore_label = IGNORE_LABEL label = label - ignore_label if len(label.shape) == 2: label = label[:, :, np.newaxis] combined = np.concatenate((image, label), axis=2) combined = self.padding_as( combined, out_size[0], out_size[1], is_color=True) combined = self.random_crop(combined, out_size, is_color=True) image = combined[:, :, 0:3] label = combined[:, :, 3:4] + ignore_label return image, label def scale_label(self, label, factor): """ Scale label according to factor. """ h = label.shape[0] // factor w = label.shape[1] // factor return cv2.resize( label, (h, w), interpolation=cv2.INTER_NEAREST)[:, :, np.newaxis] def mask(self, image, label0, label1, label2): """ Get mask for valid pixels. """ mask_sub1 = np.where(((label0 < (NUM_CLASSES + 1)) & ( label0 != IGNORE_LABEL)).flatten())[0].astype("int32") mask_sub2 = np.where(((label1 < (NUM_CLASSES + 1)) & ( label1 != IGNORE_LABEL)).flatten())[0].astype("int32") mask_sub4 = np.where(((label2 < (NUM_CLASSES + 1)) & ( label2 != IGNORE_LABEL)).flatten())[0].astype("int32") return image.astype( "float32"), label0, mask_sub1, label1, mask_sub2, label2, mask_sub4 def train(batch_size=32, flip=True, scaling=True): """ Cityscape training set reader. It returns a reader, in which each result is a batch with batch_size samples. :param batch_size: The batch size of each result return by the reader. :type batch_size: int :param flip: Whether flip images randomly. :type batch_size: bool :param scaling: Whether scale images randomly. :type batch_size: bool :return: Training reader. :rtype: callable """ reader = DataGenerater( TRAIN_LIST, flip=flip, scaling=scaling).create_train_reader(batch_size) return reader def test(): """ Cityscape validation set reader. It returns a reader, in which each result is a sample. :return: Training reader. :rtype: callable """ reader = DataGenerater(TEST_LIST).create_test_reader() return reader def infer(image_list=TEST_LIST): """ Infer set reader. It returns a reader, in which each result is a sample. :param image_list: The image list file in which each line is a path of image to be infered. :type batch_size: str :return: Infer reader. :rtype: callable """ reader = DataGenerater(image_list).create_test_reader()