# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. from ...device import get_cudnn_version from ...fluid.framework import core, in_dygraph_mode, Variable from ...fluid.layer_helper import LayerHelper from ...fluid.data_feeder import check_variable_and_dtype from ...fluid import dygraph_utils import numpy as np # TODO: define specitial functions used in computer vision task from ...fluid.layers import affine_channel #DEFINE_ALIAS from ...fluid.layers import anchor_generator #DEFINE_ALIAS from ...fluid.layers import bipartite_match #DEFINE_ALIAS from ...fluid.layers import box_clip #DEFINE_ALIAS from ...fluid.layers import box_coder #DEFINE_ALIAS from ...fluid.layers import box_decoder_and_assign #DEFINE_ALIAS from ...fluid.layers import collect_fpn_proposals #DEFINE_ALIAS from ...fluid.layers import deformable_roi_pooling #DEFINE_ALIAS from ...fluid.layers import density_prior_box #DEFINE_ALIAS from ...fluid.layers import detection_output #DEFINE_ALIAS from ...fluid.layers import distribute_fpn_proposals #DEFINE_ALIAS from ...fluid.layers import generate_mask_labels #DEFINE_ALIAS from ...fluid.layers import generate_proposal_labels #DEFINE_ALIAS from ...fluid.layers import generate_proposals #DEFINE_ALIAS from ...fluid.layers import grid_sampler #DEFINE_ALIAS from ...fluid.layers import image_resize #DEFINE_ALIAS from ...fluid.layers import prior_box #DEFINE_ALIAS from ...fluid.layers import prroi_pool #DEFINE_ALIAS from ...fluid.layers import psroi_pool #DEFINE_ALIAS from ...fluid.layers import resize_bilinear #DEFINE_ALIAS from ...fluid.layers import resize_nearest #DEFINE_ALIAS from ...fluid.layers import resize_trilinear #DEFINE_ALIAS from ...fluid.layers import roi_align #DEFINE_ALIAS from ...fluid.layers import roi_pool #DEFINE_ALIAS from ...fluid.layers import space_to_depth #DEFINE_ALIAS from ...fluid.layers import yolo_box #DEFINE_ALIAS from ...fluid.layers import yolov3_loss #DEFINE_ALIAS from ...fluid.layers import fsp_matrix #DEFINE_ALIAS from ...fluid.layers import image_resize_short #DEFINE_ALIAS # from ...fluid.layers import pixel_shuffle #DEFINE_ALIAS from ...fluid.layers import retinanet_detection_output #DEFINE_ALIAS from ...fluid.layers import retinanet_target_assign #DEFINE_ALIAS from ...fluid.layers import roi_perspective_transform #DEFINE_ALIAS from ...fluid.layers import shuffle_channel #DEFINE_ALIAS __all__ = [ 'affine_channel', 'affine_grid', 'anchor_generator', 'bipartite_match', 'box_clip', 'box_coder', 'box_decoder_and_assign', 'collect_fpn_proposals', # 'deformable_conv', 'deformable_roi_pooling', 'density_prior_box', 'detection_output', 'distribute_fpn_proposals', 'fsp_matrix', 'generate_mask_labels', 'generate_proposal_labels', 'generate_proposals', 'grid_sampler', 'image_resize', 'image_resize_short', # 'multi_box_head', 'pixel_shuffle', 'prior_box', 'prroi_pool', 'psroi_pool', 'resize_bilinear', 'resize_nearest', 'resize_trilinear', 'retinanet_detection_output', 'retinanet_target_assign', 'roi_align', 'roi_perspective_transform', 'roi_pool', 'shuffle_channel', 'space_to_depth', 'yolo_box', 'yolov3_loss' ] def affine_grid(theta, out_shape, align_corners=True, name=None): """ It generates a grid of (x,y) coordinates using the parameters of the affine transformation that correspond to a set of points where the input feature map should be sampled to produce the transformed output feature map. Args: theta (Tensor) - A tensor with shape [N, 2, 3]. It contains a batch of affine transform parameters. The data type can be float32 or float64. out_shape (Tensor | list | tuple): The shape of target output with format [batch_size, channel, height, width]. ``out_shape`` can be a Tensor or a list or tuple. The data type must be int32. align_corners(bool): Whether to align corners of target feature map and source feature map. Default: True. name(str|None): The default value is None. Normally there is no need for user to set this property. For more information, please refer to :ref:`api_guide_Name`. Returns: Tensor, A Tensor with shape [batch_size, H, W, 2] while 'H' and 'W' are the height and width of feature map in affine transformation. The data type is the same as `theta`. Raises: ValueError: If the type of arguments is not supported. Examples: .. code-block:: python import paddle import paddle.nn.functional as F import numpy as np paddle.disable_static() # theta shape = [1, 2, 3] theta = np.array([[[-0.7, -0.4, 0.3], [ 0.6, 0.5, 1.5]]]).astype("float32") theta_t = paddle.to_tensor(theta) y_t = F.affine_grid( theta_t, [1, 2, 3, 3], align_corners=False) print(y_t.numpy()) #[[[[ 1.0333333 0.76666665] # [ 0.76666665 1.0999999 ] # [ 0.5 1.4333333 ]] # # [[ 0.5666667 1.1666666 ] # [ 0.3 1.5 ] # [ 0.03333333 1.8333334 ]] # # [[ 0.10000002 1.5666667 ] # [-0.16666666 1.9000001 ] # [-0.43333334 2.2333333 ]]]] """ helper = LayerHelper('affine_grid') if not isinstance(theta, Variable): raise ValueError("The theta should be a Tensor.") check_variable_and_dtype(theta, 'theta', ['float32', 'float64'], 'affine_grid') cudnn_version = get_cudnn_version() if cudnn_version is not None and cudnn_version >= 6000 and align_corners: use_cudnn = True else: use_cudnn = False if not (isinstance(out_shape, list) or isinstance(out_shape, tuple) or \ isinstance(out_shape, Variable)): raise ValueError("The out_shape should be a list, tuple or Tensor.") if in_dygraph_mode(): _out_shape = out_shape.numpy().tolist() if isinstance( out_shape, Variable) else out_shape return core.ops.affine_grid(theta, "output_shape", _out_shape, "align_corners", align_corners, "use_cudnn", use_cudnn) out = helper.create_variable_for_type_inference(theta.dtype) ipts = {'Theta': theta} attrs = {"align_corners": align_corners, "use_cudnn": use_cudnn} if isinstance(out_shape, Variable): ipts['OutputShape'] = out_shape check_variable_and_dtype(out_shape, 'out_shape', ['int32'], 'affine_grid') else: attrs['output_shape'] = out_shape helper.append_op( type='affine_grid', inputs=ipts, outputs={'Output': out}, attrs=None if len(attrs) == 0 else attrs) return out def grid_sample(x, grid, mode='bilinear', padding_mode='zeros', align_corners=True, name=None): """ This operation samples input X by using bilinear interpolation or nearest interpolation based on flow field grid, which is usually generated by :code:`affine_grid` . The grid of shape [N, H, W, 2] is the concatenation of (x, y) coordinates with shape [N, H, W] each, where x is indexing the 4th dimension (in width dimension) of input data x and y is indexing the 3rd dimension (in height dimension), finally results is the bilinear interpolation or nearest value of 4 nearest corner points. The output tensor shape will be [N, C, H, W]. .. code-block:: text Step 1: Get (x, y) grid coordinates and scale to [0, H-1/W-1]. .. code-block:: text grid_x = 0.5 * (grid[:, :, :, 0] + 1) * (W - 1) grid_y = 0.5 * (grid[:, :, :, 1] + 1) * (H - 1) Step 2: Indices input data X with grid (x, y) in each [H, W] area, and bilinear interpolate point value by 4 nearest points or nearest interpolate point value by nearest point. wn ------- y_n ------- en | | | | d_n | | | | x_w --d_w-- grid--d_e-- x_e | | | | d_s | | | | ws ------- y_s ------- wn For bilinear interpolation: x_w = floor(x) // west side x coord x_e = x_w + 1 // east side x coord y_n = floor(y) // north side y coord y_s = y_s + 1 // south side y coord d_w = grid_x - x_w // distance to west side d_e = x_e - grid_x // distance to east side d_n = grid_y - y_n // distance to north side d_s = y_s - grid_y // distance to south side wn = X[:, :, y_n, x_w] // north-west point value en = X[:, :, y_n, x_e] // north-east point value ws = X[:, :, y_s, x_w] // south-east point value es = X[:, :, y_s, x_w] // north-east point value output = wn * d_e * d_s + en * d_w * d_s + ws * d_e * d_n + es * d_w * d_n Args: x(Tensor): The input tensor, which is a 4-d tensor with shape [N, C, H, W], N is the batch size, C is the channel number, H and W is the feature height and width. The data type is float32 or float64. grid(Tensor): Input grid tensor of shape [N, grid_H, grid_W, 2]. The data type is float32 or float64. mode(str, optional): The interpolation method which can be 'bilinear' or 'nearest'. Default: 'bilinear'. padding_mode(str, optional) The padding method used when source index is out of input images. It can be 'zeros', 'reflection' and 'border'. Default: zeros. align_corners(bool, optional): If `align_corners` is true, it will projects -1 and 1 to the centers of the corner pixels. Otherwise, it will projects -1 and 1 to the image edges. name(str, optional): For detailed information, please refer to :ref:`api_guide_Name`. Usually name is no need to set and None by default. Returns: Tensor, The shape of output is [N, C, grid_H, grid_W] in which `grid_H` is the height of grid and `grid_W` is the width of grid. The data type is same as input tensor. Examples: .. code-block:: python import paddle import paddle.nn.functional as F import numpy as np # shape=[1, 1, 3, 3] x = np.array([[[[-0.6, 0.8, -0.5], [-0.5, 0.2, 1.2], [ 1.4, 0.3, -0.2]]]]).astype("float64") # grid shape = [1, 3, 4, 2] grid = np.array( [[[[ 0.2, 0.3], [-0.4, -0.3], [-0.9, 0.3], [-0.9, -0.6]], [[ 0.4, 0.1], [ 0.9, -0.8], [ 0.4, 0.5], [ 0.5, -0.2]], [[ 0.1, -0.8], [-0.3, -1. ], [ 0.7, 0.4], [ 0.2, 0.8]]]]).astype("float64") paddle.disable_static() x = paddle.to_tensor(x) grid = paddle.to_tensor(grid) y_t = F.grid_sample( x, grid, mode='bilinear', padding_mode='border', align_corners=True) print(y_t.numpy()) # output shape = [1, 1, 3, 4] # [[[[ 0.34 0.016 0.086 -0.448] # [ 0.55 -0.076 0.35 0.59 ] # [ 0.596 0.38 0.52 0.24 ]]]] """ helper = LayerHelper("grid_sample", **locals()) check_variable_and_dtype(x, 'x', ['float32', 'float64'], 'grid_sampler') check_variable_and_dtype(grid, 'grid', ['float32', 'float64'], 'grid_sampler') if not isinstance(x, Variable): raise ValueError("The x should be a Variable") if not isinstance(grid, Variable): raise ValueError("The grid should be a Variable") _modes = ['bilinear', 'nearest'] _padding_modes = ['zeros', 'reflection', 'border'] if mode not in _modes: raise ValueError( "The mode of grid sample function should be in {}, but got: {}". format(_modes, mode)) if padding_mode not in _padding_modes: raise ValueError( "The padding mode of grid sample function should be in {}, but got: {}". format(_padding_modes, padding_mode)) if not isinstance(align_corners, bool): raise ValueError("The align corners should be bool, but got: {}".format( align_corners)) cudnn_version = get_cudnn_version() use_cudnn = False if (cudnn_version is not None ) and align_corners and mode == 'bilinear' and padding_mode == 'zeros': use_cudnn = True ipts = {'X': x, 'Grid': grid} attrs = { 'mode': mode, 'padding_mode': padding_mode, 'align_corners': align_corners, 'use_cudnn': use_cudnn } if in_dygraph_mode(): attrs = ('mode', mode, 'padding_mode', padding_mode, 'align_corners', align_corners, 'use_cudnn', use_cudnn) out = getattr(core.ops, 'grid_sampler')(x, grid, *attrs) else: out = helper.create_variable_for_type_inference(x.dtype) helper.append_op( type='grid_sampler', inputs=ipts, attrs=attrs, outputs={'Output': out}) return out def pixel_shuffle(x, upscale_factor, data_format="NCHW", name=None): """ This API implements pixel shuffle operation. See more details in :ref:`api_nn_vision_PixelShuffle` . Parameters: x(Tensor): 4-D tensor, the data type should be float32 or float64. upscale_factor(int): factor to increase spatial resolution. data_format (str): The data format of the input and output data. An optional string from: "NCHW", "NHWC". The default is "NCHW". When it is "NCHW", the data is stored in the order of: [batch_size, input_channels, input_height, input_width]. name (str, optional): The default value is None. Normally there is no need for user to set this property. Returns: Out(tensor): Reshaped tensor according to the new dimension. Raises: ValueError: If the square of upscale_factor cannot divide the channels of input. Examples: .. code-block:: python import paddle import paddle.nn.functional as F import numpy as np x = np.random.randn(2, 9, 4, 4).astype(np.float32) paddle.disable_static() x_var = paddle.to_tensor(x) out_var = F.pixel_shuffle(x_var, 3) out = out_var.numpy() print(out.shape) # (2, 1, 12, 12) """ if not in_dygraph_mode(): check_variable_and_dtype(x, 'x', ['float32', 'float64'], 'pixel_shuffle') if not isinstance(upscale_factor, int): raise TypeError("upscale factor must be int type") if data_format not in ["NCHW", "NHWC"]: raise ValueError("Attr(data_format) should be 'NCHW' or 'NHWC'." "But recevie Attr(data_format): {} ".format( data_format)) if in_dygraph_mode(): return core.ops.pixel_shuffle(x, "upscale_factor", upscale_factor, "data_format", data_format) helper = LayerHelper("pixel_shuffle", **locals()) out = helper.create_variable_for_type_inference(dtype=x.dtype) helper.append_op( type="pixel_shuffle", inputs={"X": x}, outputs={"Out": out}, attrs={"upscale_factor": upscale_factor, "data_format": data_format}) return out