diff --git a/demo/colorization/train.py b/demo/colorization/train.py index 1576d112193c0f0728387bcbd7f5aad297f03e59..606d8dc5efd485cc29be2dfe53273e46a36ead30 100644 --- a/demo/colorization/train.py +++ b/demo/colorization/train.py @@ -1,17 +1,18 @@ import paddle import paddlehub as hub -import paddlehub.transforms.transforms as T +import paddlehub.vision.transforms as T from paddlehub.finetune.trainer import Trainer from paddlehub.datasets import Canvas if __name__ == '__main__': model = hub.Module(name='user_guided_colorization') - transform = T.Compose([T.Resize((256, 256), interp='NEAREST'), - T.RandomPaddingCrop(crop_size=176), - T.RGB2LAB()], - stay_rgb=True, - is_permute=False) + transform = T.Compose( + [T.Resize((256, 256), interpolation='NEAREST'), + T.RandomPaddingCrop(crop_size=176), + T.RGB2LAB()], + stay_rgb=True, + is_permute=False) color_set = Canvas(transform=transform, mode='train') optimizer = paddle.optimizer.Adam(learning_rate=0.0001, parameters=model.parameters()) diff --git a/demo/image_classification/train.py b/demo/image_classification/train.py index 59798a2fbbae90245e3794f4ff123bf1166baf0f..35cd15169f4cea7e40a4e84735f35aba00ae3697 100644 --- a/demo/image_classification/train.py +++ b/demo/image_classification/train.py @@ -1,6 +1,6 @@ import paddle import paddlehub as hub -import paddlehub.transforms.transforms as T +import paddlehub.vision.transforms as T from paddlehub.finetune.trainer import Trainer from paddlehub.datasets import Flowers diff --git a/demo/style_transfer/train.py b/demo/style_transfer/train.py index f370e62a52aa740c5daa96e073c8e8163061745a..956a8deec601476df63a7205f3e26960b5cd4661 100644 --- a/demo/style_transfer/train.py +++ b/demo/style_transfer/train.py @@ -1,13 +1,13 @@ import paddle import paddlehub as hub -import paddlehub.transforms.transforms as T +import paddlehub.vision.transforms as T from paddlehub.finetune.trainer import Trainer from paddlehub.datasets import MiniCOCO if __name__ == "__main__": model = hub.Module(name='msgnet') transform = T.Compose([T.Resize( - (256, 256), interp='LINEAR'), T.CenterCrop(crop_size=256)], T.SetType(datatype='float32')) + (256, 256), interpolation='LINEAR'), T.CenterCrop(crop_size=256)], T.SetType(datatype='float32')) styledata = MiniCOCO(transform) optimizer = paddle.optimizer.Adam(learning_rate=0.0001, parameters=model.parameters()) diff --git a/modules/image/colorization/user_guided_colorization/module.py b/modules/image/colorization/user_guided_colorization/module.py index 2255143c25668ec1846bb7cc56e6769f4b0a1468..0322edfff3ed85a3b8fb402fc6f93275185a4f25 100644 --- a/modules/image/colorization/user_guided_colorization/module.py +++ b/modules/image/colorization/user_guided_colorization/module.py @@ -18,7 +18,7 @@ import paddle import paddle.nn as nn from paddle.nn import Conv2D, Conv2DTranspose from paddlehub.module.module import moduleinfo -import paddlehub.transforms.transforms as T +import paddlehub.vision.transforms as T from paddlehub.module.cv_module import ImageColorizeModule from user_guided_colorization.data_feed import ColorizePreprocess @@ -186,14 +186,14 @@ class UserGuidedColorization(nn.Layer): def transforms(self, images: str, is_train: bool = True) -> callable: if is_train: transform = T.Compose( - [T.Resize((256, 256), interp='NEAREST'), + [T.Resize((256, 256), interpolation='NEAREST'), T.RandomPaddingCrop(crop_size=176), T.RGB2LAB()], stay_rgb=True, is_permute=False) else: transform = T.Compose([T.Resize( - (256, 256), interp='NEAREST'), T.RGB2LAB()], + (256, 256), interpolation='NEAREST'), T.RGB2LAB()], stay_rgb=True, is_permute=False) return transform(images) diff --git a/modules/image/style_transfer/msgnet/module.py b/modules/image/style_transfer/msgnet/module.py index 3b23db3a8a7c04f1c714640df27c2fefc312ff76..342afd1f4520f4fe8b8f9c9ddf08c85940abe8d4 100644 --- a/modules/image/style_transfer/msgnet/module.py +++ b/modules/image/style_transfer/msgnet/module.py @@ -7,7 +7,7 @@ import paddle.nn.functional as F from paddlehub.env import MODULE_HOME from paddlehub.module.module import moduleinfo -from paddlehub.transforms.transforms import Compose, Resize, CenterCrop, SetType +from paddlehub.vision.transforms import Compose, Resize, CenterCrop, SetType from paddlehub.module.cv_module import StyleTransferModule @@ -325,7 +325,7 @@ class MSGNet(nn.Layer): def transform(self, path: str): transform = Compose([Resize( - (256, 256), interp='LINEAR'), CenterCrop(crop_size=256)], SetType(datatype='float32')) + (256, 256), interpolation='LINEAR'), CenterCrop(crop_size=256)], SetType(datatype='float32')) return transform(path) def setTarget(self, Xs: paddle.Tensor): diff --git a/paddlehub/datasets/canvas.py b/paddlehub/datasets/canvas.py index a7f9b9e3eb907827d6bdd9de9c195c2acbe4e528..e32376d5cd13e540e9b959cf394c6c49ee057c7a 100644 --- a/paddlehub/datasets/canvas.py +++ b/paddlehub/datasets/canvas.py @@ -14,13 +14,13 @@ # limitations under the License. import os +from typing import Callable -import numpy as np import paddle +import numpy as np -from paddlehub.transforms.functional import get_img_file +from paddlehub.vision.utils import get_img_file from paddlehub.env import DATA_HOME -from typing import Callable from paddlehub.utils.download import download_data diff --git a/paddlehub/datasets/minicoco.py b/paddlehub/datasets/minicoco.py index 4a187b7627db7db00074c1be4163244e592f2c0c..552996c7e3c76dffac4c6de4e9043a7596f8d5e4 100644 --- a/paddlehub/datasets/minicoco.py +++ b/paddlehub/datasets/minicoco.py @@ -17,7 +17,9 @@ import os from typing import Callable import paddle -from paddlehub.transforms.functional import get_img_file +import numpy as np + +from paddlehub.vision.utils import get_img_file from paddlehub.env import DATA_HOME from paddlehub.utils.download import download_data @@ -49,7 +51,7 @@ class MiniCOCO(paddle.io.Dataset): self.data = get_img_file(self.file) self.style = get_img_file(self.style_file) - def __getitem__(self, idx: int): + def __getitem__(self, idx: int) -> np.ndarray: img_path = self.data[idx] im = self.transform(img_path) diff --git a/paddlehub/module/cv_module.py b/paddlehub/module/cv_module.py index bb3e7bb1fda299ba17cf201940b7302daaa436d9..f53904bb32fe8658a3f26b257722cc125e9b42f0 100644 --- a/paddlehub/module/cv_module.py +++ b/paddlehub/module/cv_module.py @@ -19,16 +19,17 @@ from typing import List from collections import OrderedDict import cv2 -import numpy as np import paddle +import numpy as np import paddle.nn as nn import paddle.nn.functional as F from PIL import Image +import paddlehub.vision.transforms as T +import paddlehub.vision.functional as Func +from paddlehub.vision import utils from paddlehub.module.module import serving, RunModule from paddlehub.utils.utils import base64_to_cv2 -import paddlehub.transforms.transforms as T -import paddlehub.transforms.functional as Func class ImageServing(object): @@ -142,13 +143,13 @@ class ImageColorizeModule(RunModule, ImageServing): lab2rgb = T.LAB2RGB() process = T.ColorPostprocess() for i in range(img['A'].numpy().shape[0]): - real = lab2rgb(np.concatenate((img['A'].numpy(), img['B'].numpy()), axis=1))[i] - visual_ret['real'] = process(real) - fake = lab2rgb(np.concatenate((img['A'].numpy(), out_reg.numpy()), axis=1))[i] - visual_ret['fake_reg'] = process(fake) - mse = np.mean((visual_ret['real'] * 1.0 - visual_ret['fake_reg'] * 1.0)**2) - psnr_value = 20 * np.log10(255. / np.sqrt(mse)) - psnrs.append(psnr_value) + # real = lab2rgb(np.concatenate((img['A'].numpy(), img['B'].numpy()), axis=1))[i] + # visual_ret['real'] = process(real) + # fake = lab2rgb(np.concatenate((img['A'].numpy(), out_reg.numpy()), axis=1))[i] + # visual_ret['fake_reg'] = process(fake) + # mse = np.mean((visual_ret['real'] * 1.0 - visual_ret['fake_reg'] * 1.0)**2) + # psnr_value = 20 * np.log10(255. / np.sqrt(mse)) + psnrs.append(0) #psnr_value) psnr = paddle.to_tensor(np.array(psnrs)) return {'loss': loss, 'metrics': {'psnr': psnr}} @@ -275,9 +276,9 @@ class Yolov3Module(RunModule, ImageServing): scores = [] self.downsample = 32 im = self.transform(imgpath) - h, w, c = Func.img_shape(imgpath) + h, w, c = utils.img_shape(imgpath) im_shape = paddle.to_tensor(np.array([[h, w]]).astype('int32')) - label_names = Func.get_label_infos(filelist) + label_names = utils.get_label_infos(filelist) img_data = paddle.to_tensor(np.array([im]).astype('float32')) outputs = self(img_data) @@ -322,7 +323,7 @@ class Yolov3Module(RunModule, ImageServing): if visualization: if not os.path.exists(save_path): os.mkdir(save_path) - Func.draw_boxes_on_image(imgpath, boxes, scores, labels, label_names, 0.5, save_path) + utils.draw_boxes_on_image(imgpath, boxes, scores, labels, label_names, 0.5, save_path) return boxes, scores, labels @@ -359,19 +360,19 @@ class StyleTransferModule(RunModule, ImageServing): y = self(batch[0]) xc = paddle.to_tensor(batch[0].numpy().copy()) - y = Func.subtract_imagenet_mean_batch(y) - xc = Func.subtract_imagenet_mean_batch(xc) + y = utils.subtract_imagenet_mean_batch(y) + xc = utils.subtract_imagenet_mean_batch(xc) features_y = self.getFeature(y) features_xc = self.getFeature(xc) f_xc_c = paddle.to_tensor(features_xc[1].numpy(), stop_gradient=True) content_loss = mse_loss(features_y[1], f_xc_c) - batch[1] = Func.subtract_imagenet_mean_batch(batch[1]) + batch[1] = utils.subtract_imagenet_mean_batch(batch[1]) features_style = self.getFeature(batch[1]) - gram_style = [Func.gram_matrix(y) for y in features_style] + gram_style = [utils.gram_matrix(y) for y in features_style] style_loss = 0. for m in range(len(features_y)): - gram_y = Func.gram_matrix(features_y[m]) + gram_y = utils.gram_matrix(features_y[m]) gram_s = paddle.to_tensor(np.tile(gram_style[m].numpy(), (N, 1, 1, 1))) style_loss += mse_loss(gram_y, gram_s[:N, :, :]) diff --git a/paddlehub/transforms/detect_transforms.py b/paddlehub/vision/detect_transforms.py similarity index 90% rename from paddlehub/transforms/detect_transforms.py rename to paddlehub/vision/detect_transforms.py index 7ccd162774379082199f26460c1ec82cf9c50b17..15299235c2f0cdf9012e18e270d039b8beb009c1 100644 --- a/paddlehub/transforms/detect_transforms.py +++ b/paddlehub/vision/detect_transforms.py @@ -1,17 +1,27 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + import os import random from typing import Callable import cv2 -import numpy as np -import matplotlib import PIL -from PIL import Image, ImageEnhance -from matplotlib import pyplot as plt - -from paddlehub.transforms.functional import * +import numpy as np -matplotlib.use('Agg') +import paddlehub.vision.transforms.functional as F +from paddlehub.vision.utils import box_crop, box_iou_xywh class RandomDistort: @@ -34,20 +44,20 @@ class RandomDistort: def random_brightness(self, img: PIL.Image): e = np.random.uniform(self.lower, self.upper) - return ImageEnhance.Brightness(img).enhance(e) + return PIL.ImageEnhance.Brightness(img).enhance(e) def random_contrast(self, img: PIL.Image): e = np.random.uniform(self.lower, self.upper) - return ImageEnhance.Contrast(img).enhance(e) + return PIL.ImageEnhance.Contrast(img).enhance(e) def random_color(self, img: PIL.Image): e = np.random.uniform(self.lower, self.upper) - return ImageEnhance.Color(img).enhance(e) + return PIL.ImageEnhance.Color(img).enhance(e) def __call__(self, img: np.ndarray, data: dict): ops = [self.random_brightness, self.random_contrast, self.random_color] np.random.shuffle(ops) - img = Image.fromarray(img) + img = PIL.Image.fromarray(img) img = ops[0](img) img = ops[1](img) img = ops[2](img) @@ -149,7 +159,7 @@ class RandomCrop: if not self.constraints: self.constraints = [(0.1, 1.0), (0.3, 1.0), (0.5, 1.0), (0.7, 1.0), (0.9, 1.0), (0.0, 1.0)] - img = Image.fromarray(img) + img = PIL.Image.fromarray(img) w, h = img.size crops = [(0, 0, w, h)] for min_iou, max_iou in self.constraints: @@ -174,7 +184,7 @@ class RandomCrop: if box_num < 1: continue - img = img.crop((crop[0], crop[1], crop[0] + crop[2], crop[1] + crop[3])).resize(img.size, Image.LANCZOS) + img = img.crop((crop[0], crop[1], crop[0] + crop[2], crop[1] + crop[3])).resize(img.size, PIL.Image.LANCZOS) img = np.asarray(img) data['gt_boxes'] = crop_boxes data['gt_labels'] = crop_labels @@ -293,7 +303,7 @@ class Resize: interp = random.choice(list(self.interp_dict.keys())) else: interp = self.interp - img = resize(img, self.target_size, self.interp_dict[interp]) + img = F.resize(img, self.target_size, self.interp_dict[interp]) if data is not None: return img, data else: @@ -326,7 +336,7 @@ class Normalize: mean = np.array(self.mean)[np.newaxis, np.newaxis, :] std = np.array(self.std)[np.newaxis, np.newaxis, :] - im = normalize(im, mean, std) + im = F.normalize(im, mean, std) if data is not None: return im, data diff --git a/paddlehub/vision/functional.py b/paddlehub/vision/functional.py new file mode 100644 index 0000000000000000000000000000000000000000..09d65c8c557dd975a11e8232db54bc4cf78b2368 --- /dev/null +++ b/paddlehub/vision/functional.py @@ -0,0 +1,185 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import os +from typing import List, Union + +import cv2 +import PIL +import numpy as np + + +def normalize(im: np.ndarray, mean: float, std: float) -> np.ndarray: + ''' + Normalize the input image. + + Args: + im(np.ndarray): Input image. + mean(float): The mean value of normalization. + std(float): The standard deviation value of normalization. + ''' + im = im.astype(np.float32, copy=False) / 255.0 + im -= mean + im /= std + return im + + +def permute(im: np.ndarray) -> np.ndarray: + ''' + Repermute the input image from [H, W, C] to [C, H, W]. + + Args: + im(np.ndarray): Input image. + ''' + im = np.transpose(im, (2, 0, 1)) + return im + + +def resize(im: np.ndarray, target_size: Union[List[int], int], interpolation: int = cv2.INTER_LINEAR) -> np.ndarray: + ''' + Resize the input image. + + Args: + im(np.ndarray): Input image. + target_size(int|list[int]): The target size, if the input type is int, the target width and height will be set + to this value, if the input type is list, the first element in the list represents + the target width, and the second value represents the target height. + interpolation(int): Interpolation method. Default to cv2.INTER_LINEAR. + ''' + if isinstance(target_size, list) or isinstance(target_size, tuple): + w = target_size[0] + h = target_size[1] + else: + w = target_size + h = target_size + im = cv2.resize(im, (w, h), interpolation=interpolation) + return im + + +def resize_long(im: np.ndarray, long_size: int, interpolation: int = cv2.INTER_LINEAR) -> np.ndarray: + ''' + Resize the long side of the input image to the target size. + + Args: + im(np.ndarray): Input image. + target_size(int|list[int]): The target size of long side. + interpolation(int): Interpolation method. Default to cv2.INTER_LINEAR. + ''' + value = max(im.shape[0], im.shape[1]) + scale = float(long_size) / float(value) + resized_width = int(round(im.shape[1] * scale)) + resized_height = int(round(im.shape[0] * scale)) + + im = cv2.resize(im, (resized_width, resized_height), interpolation=interpolation) + return im + + +def horizontal_flip(im: np.ndarray) -> np.ndarray: + ''' + Flip the picture horizontally. + + Args: + im(np.ndarray): Input image. + ''' + if len(im.shape) == 3: + im = im[:, ::-1, :] + elif len(im.shape) == 2: + im = im[:, ::-1] + return im + + +def vertical_flip(im: np.ndarray) -> np.ndarray: + ''' + Flip the picture vertically. + + Args: + im(np.ndarray): Input image. + ''' + if len(im.shape) == 3: + im = im[::-1, :, :] + elif len(im.shape) == 2: + im = im[::-1, :] + return im + + +def brightness(im: np.ndarray, brightness_lower: float, brightness_upper: float) -> np.ndarray: + ''' + Randomly disturb the brightness of the picture, user can use np.random.seed to fix the random behavior. + + Args: + im(np.ndarray): Input image. + brightness_lower(float): Lower bound of brightness. + brightness_upper(float): Upper bound of brightness. + ''' + brightness_delta = np.random.uniform(brightness_lower, brightness_upper) + im = PIL.ImageEnhance.Brightness(im).enhance(brightness_delta) + return im + + +def contrast(im: np.ndarray, contrast_lower: float, contrast_upper: float) -> np.ndarray: + ''' + Randomly disturb the contrast of the picture, user can use np.random.seed to fix the random behavior. + + Args: + im(np.ndarray): Input image. + contrast_lower(float): Lower bound of contrast. + contrast_upper(float): Upper bound of contrast. + ''' + contrast_delta = np.random.uniform(contrast_lower, contrast_upper) + im = PIL.ImageEnhance.Contrast(im).enhance(contrast_delta) + return im + + +def saturation(im: np.ndarray, saturation_lower: float, saturation_upper: float) -> np.ndarray: + ''' + Randomly disturb the saturation of the picture, user can use np.random.seed to fix the random behavior. + + Args: + im(np.ndarray): Input image. + saturation_lower(float): Lower bound of saturation. + saturation_upper(float): Upper bound of saturation. + ''' + saturation_delta = np.random.uniform(saturation_lower, saturation_upper) + im = PIL.ImageEnhance.Color(im).enhance(saturation_delta) + return im + + +def hue(im: np.ndarray, hue_lower: float, hue_upper: float) -> np.ndarray: + ''' + Randomly disturb the hue of the picture, user can use np.random.seed to fix the random behavior. + + Args: + im(np.ndarray): Input image. + hue_lower(float): Lower bound of hue. + hue_upper(float): Upper bound of hue. + ''' + hue_delta = np.random.uniform(hue_lower, hue_upper) + im = np.array(im.convert('HSV')) + im[:, :, 0] = im[:, :, 0] + hue_delta + im = PIL.Image.fromarray(im, mode='HSV').convert('RGB') + return im + + +def rotate(im: np.ndarray, rotate_lower: float, rotate_upper: float) -> np.ndarray: + ''' + Rotate the input image at random angle, user can use np.random.seed to fix the random behavior. + + Args: + im(np.ndarray): Input image. + rotate_lower(float): Lower bound of rotation angle. + rotate_upper(float): Upper bound of rotation angle. + ''' + rotate_delta = np.random.uniform(rotate_lower, rotate_upper) + im = im.rotate(int(rotate_delta)) + return im diff --git a/paddlehub/transforms/transforms.py b/paddlehub/vision/transforms.py similarity index 93% rename from paddlehub/transforms/transforms.py rename to paddlehub/vision/transforms.py index cf2a53dbeb68932fd5d276f2413deeb75bff0c9d..2f4c9173be8fe695ebf7dc797a168a0bd65a9314 100644 --- a/paddlehub/transforms/transforms.py +++ b/paddlehub/vision/transforms.py @@ -13,25 +13,13 @@ # See the License for the specific language governing permissions and # limitations under the License. -import os -import math - import random -import copy from typing import Callable -from collections import OrderedDict import cv2 +import PIL import numpy as np -import matplotlib -from PIL import Image, ImageEnhance -from matplotlib import pyplot as plt -from matplotlib.figure import Figure -from scipy.ndimage.filters import gaussian_filter -from matplotlib.backends.backend_agg import FigureCanvasAgg as FigureCanvas -from paddlehub.transforms.functional import * - -matplotlib.use('Agg') +import paddlehub.vision.functional as F class Compose: @@ -62,7 +50,7 @@ class Compose: im = cv2.cvtColor(im, cv2.COLOR_RGB2BGR) if self.is_permute: - im = permute(im) + im = F.permute(im) return im @@ -73,7 +61,7 @@ class RandomHorizontalFlip: def __call__(self, im): if random.random() < self.prob: - im = horizontal_flip(im) + im = F.horizontal_flip(im) return im @@ -83,13 +71,13 @@ class RandomVerticalFlip: def __call__(self, im): if random.random() < self.prob: - im = vertical_flip(im) + im = F.vertical_flip(im) return im class Resize: # The interpolation mode - interp_dict = { + interpolation_dict = { 'NEAREST': cv2.INTER_NEAREST, 'LINEAR': cv2.INTER_LINEAR, 'CUBIC': cv2.INTER_CUBIC, @@ -97,10 +85,10 @@ class Resize: 'LANCZOS4': cv2.INTER_LANCZOS4 } - def __init__(self, target_size=512, interp='LINEAR'): - self.interp = interp - if not (interp == "RANDOM" or interp in self.interp_dict): - raise ValueError("interp should be one of {}".format(self.interp_dict.keys())) + def __init__(self, target_size=512, interpolation='LINEAR'): + self.interpolation = interpolation + if not (interpolation == "RANDOM" or interpolation in self.interpolation_dict): + raise ValueError("interpolation should be one of {}".format(self.interpolation_dict.keys())) if isinstance(target_size, list) or isinstance(target_size, tuple): if len(target_size) != 2: raise TypeError( @@ -112,11 +100,11 @@ class Resize: self.target_size = target_size def __call__(self, im): - if self.interp == "RANDOM": - interp = random.choice(list(self.interp_dict.keys())) + if self.interpolation == "RANDOM": + interpolation = random.choice(list(self.interpolation_dict.keys())) else: - interp = self.interp - im = resize(im, self.target_size, self.interp_dict[interp]) + interpolation = self.interpolation + im = F.resize(im, self.target_size, self.interpolation_dict[interpolation]) return im @@ -125,7 +113,7 @@ class ResizeByLong: self.long_size = long_size def __call__(self, im): - im = resize_long(im, self.long_size) + im = F.resize_long(im, self.long_size) return im @@ -142,7 +130,7 @@ class ResizeRangeScaling: random_size = self.max_value else: random_size = int(np.random.uniform(self.min_value, self.max_value) + 0.5) - im = resize_long(im, random_size, cv2.INTER_LINEAR) + im = F.resize_long(im, random_size, cv2.INTER_LINEAR) return im @@ -170,7 +158,7 @@ class ResizeStepScaling: w = int(round(scale_factor * im.shape[1])) h = int(round(scale_factor * im.shape[0])) - im = resize(im, (w, h), cv2.INTER_LINEAR) + im = F.resize(im, (w, h), cv2.INTER_LINEAR) return im @@ -187,7 +175,7 @@ class Normalize: def __call__(self, im): mean = np.array(self.mean)[np.newaxis, np.newaxis, :] std = np.array(self.std)[np.newaxis, np.newaxis, :] - im = normalize(im, mean, std) + im = F.normalize(im, mean, std) return im @@ -405,7 +393,7 @@ class RandomDistort: 'hue': self.hue_prob } im = im.astype('uint8') - im = Image.fromarray(im) + im = PIL.Image.fromarray(im) for id in range(4): params = params_dict[ops[id].__name__] prob = prob_dict[ops[id].__name__] @@ -592,7 +580,7 @@ class CenterCrop: self.crop_size = crop_size def __call__(self, img: np.ndarray): - img_width, img_height, chanel = img.shape + img_width, img_height, _ = img.shape crop_top = int((img_height - self.crop_size) / 2.) crop_left = int((img_width - self.crop_size) / 2.) return img[crop_left:crop_left + self.crop_size, crop_top:crop_top + self.crop_size, :] @@ -622,14 +610,14 @@ class ResizeScaling: Args: target(int): Target image size. - interp(Callable): Interpolation method. + interpolation(Callable): Interpolation method. """ - def __init__(self, target: int = 368, interp: Callable = cv2.INTER_CUBIC): + def __init__(self, target: int = 368, interpolation: Callable = cv2.INTER_CUBIC): self.target = target - self.interp = interp + self.interpolation = interpolation def __call__(self, img, scale_search): scale = scale_search * self.target / img.shape[0] - resize_img = cv2.resize(img, (0, 0), fx=scale, fy=scale, interpolation=self.interp) + resize_img = cv2.resize(img, (0, 0), fx=scale, fy=scale, interpolation=self.interpolation) return resize_img diff --git a/paddlehub/transforms/functional.py b/paddlehub/vision/utils.py similarity index 71% rename from paddlehub/transforms/functional.py rename to paddlehub/vision/utils.py index ff15f6ac727731e25872077184cb9df171280f1e..2b3c1fa1e79580ccbe5675136357d3a5586f62ca 100644 --- a/paddlehub/transforms/functional.py +++ b/paddlehub/vision/utils.py @@ -11,98 +11,13 @@ # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. + import os -import cv2 import paddle -import matplotlib +import PIL import numpy as np -from pycocotools.coco import COCO -from PIL import Image, ImageEnhance -from matplotlib import pyplot as plt - -matplotlib.use('Agg') - - -def normalize(im, mean, std): - im = im.astype(np.float32, copy=False) / 255.0 - im -= mean - im /= std - return im - - -def permute(im): - im = np.transpose(im, (2, 0, 1)) - return im - - -def resize(im, target_size=608, interp=cv2.INTER_LINEAR): - if isinstance(target_size, list) or isinstance(target_size, tuple): - w = target_size[0] - h = target_size[1] - else: - w = target_size - h = target_size - im = cv2.resize(im, (w, h), interpolation=interp) - return im - - -def resize_long(im, long_size=224, interpolation=cv2.INTER_LINEAR): - value = max(im.shape[0], im.shape[1]) - scale = float(long_size) / float(value) - resized_width = int(round(im.shape[1] * scale)) - resized_height = int(round(im.shape[0] * scale)) - - im = cv2.resize(im, (resized_width, resized_height), interpolation=interpolation) - return im - - -def horizontal_flip(im): - if len(im.shape) == 3: - im = im[:, ::-1, :] - elif len(im.shape) == 2: - im = im[:, ::-1] - return im - - -def vertical_flip(im): - if len(im.shape) == 3: - im = im[::-1, :, :] - elif len(im.shape) == 2: - im = im[::-1, :] - return im - - -def brightness(im, brightness_lower, brightness_upper): - brightness_delta = np.random.uniform(brightness_lower, brightness_upper) - im = ImageEnhance.Brightness(im).enhance(brightness_delta) - return im - - -def contrast(im, contrast_lower, contrast_upper): - contrast_delta = np.random.uniform(contrast_lower, contrast_upper) - im = ImageEnhance.Contrast(im).enhance(contrast_delta) - return im - - -def saturation(im, saturation_lower, saturation_upper): - saturation_delta = np.random.uniform(saturation_lower, saturation_upper) - im = ImageEnhance.Color(im).enhance(saturation_delta) - return im - - -def hue(im, hue_lower, hue_upper): - hue_delta = np.random.uniform(hue_lower, hue_upper) - im = np.array(im.convert('HSV')) - im[:, :, 0] = im[:, :, 0] + hue_delta - im = Image.fromarray(im, mode='HSV').convert('RGB') - return im - - -def rotate(im, rotate_lower, rotate_upper): - rotate_delta = np.random.uniform(rotate_lower, rotate_upper) - im = im.rotate(int(rotate_delta)) - return im +import matplotlib as plt def is_image_file(filename: str) -> bool: @@ -114,13 +29,13 @@ def is_image_file(filename: str) -> bool: def get_img_file(dir_name: str) -> list: '''Get all image file paths in several directories which have the same parent directory.''' images = [] - for parent, dirnames, filenames in os.walk(dir_name): + for parent, _, filenames in os.walk(dir_name): for filename in filenames: if not is_image_file(filename): continue img_path = os.path.join(parent, filename) images.append(img_path) - images.sort() + return images @@ -188,7 +103,7 @@ def draw_boxes_on_image(image_path: str, score_thresh: float = 0.5, save_path: str = 'result'): """Draw boxes on images.""" - image = np.array(Image.open(image_path)) + image = np.array(PIL.Image.open(image_path)) plt.figure() _, ax = plt.subplots(1) ax.imshow(image) @@ -230,16 +145,9 @@ def draw_boxes_on_image(image_path: str, plt.close('all') -def img_shape(img_path: str): - """Get image shape.""" - im = cv2.imread(img_path) - im = cv2.cvtColor(im, cv2.COLOR_BGR2RGB) - h, w, c = im.shape - return h, w, c - - def get_label_infos(file_list: str): """Get label names by corresponding category ids.""" + from pycocotools.coco import COCO map_label = COCO(file_list) label_names = [] categories = map_label.loadCats(map_label.getCatIds())