# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at # # http://www.apache.org/licenses/LICENSE-2.0 # # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # See the License for the specific language governing permissions and # limitations under the License. # TODO: define specitial functions used in computer vision task from ...fluid.layers import affine_channel #DEFINE_ALIAS from ...fluid.layers import affine_grid #DEFINE_ALIAS from ...fluid.layers import anchor_generator #DEFINE_ALIAS from ...fluid.layers import bipartite_match #DEFINE_ALIAS from ...fluid.layers import box_clip #DEFINE_ALIAS from ...fluid.layers import box_coder #DEFINE_ALIAS from ...fluid.layers import box_decoder_and_assign #DEFINE_ALIAS from ...fluid.layers import collect_fpn_proposals #DEFINE_ALIAS from ...fluid.layers import deformable_roi_pooling #DEFINE_ALIAS from ...fluid.layers import density_prior_box #DEFINE_ALIAS from ...fluid.layers import detection_output #DEFINE_ALIAS from ...fluid.layers import distribute_fpn_proposals #DEFINE_ALIAS from ...fluid.layers import generate_mask_labels #DEFINE_ALIAS from ...fluid.layers import generate_proposal_labels #DEFINE_ALIAS from ...fluid.layers import generate_proposals #DEFINE_ALIAS from ...fluid.layers import image_resize #DEFINE_ALIAS from ...fluid.layers import prior_box #DEFINE_ALIAS from ...fluid.layers import prroi_pool #DEFINE_ALIAS from ...fluid.layers import psroi_pool #DEFINE_ALIAS from ...fluid.layers import resize_bilinear #DEFINE_ALIAS from ...fluid.layers import resize_nearest #DEFINE_ALIAS from ...fluid.layers import resize_trilinear #DEFINE_ALIAS from ...fluid.layers import roi_align #DEFINE_ALIAS from ...fluid.layers import roi_pool #DEFINE_ALIAS from ...fluid.layers import space_to_depth #DEFINE_ALIAS from ...fluid.layers import yolo_box #DEFINE_ALIAS from ...fluid.layers import yolov3_loss #DEFINE_ALIAS from ...fluid.layers import fsp_matrix #DEFINE_ALIAS from ...fluid.layers import image_resize_short #DEFINE_ALIAS from ...fluid.layers import pixel_shuffle #DEFINE_ALIAS from ...fluid.layers import retinanet_detection_output #DEFINE_ALIAS from ...fluid.layers import retinanet_target_assign #DEFINE_ALIAS from ...fluid.layers import roi_perspective_transform #DEFINE_ALIAS from ...fluid.layers import shuffle_channel #DEFINE_ALIAS __all__ = [ 'affine_channel', 'affine_grid', 'anchor_generator', 'bipartite_match', 'box_clip', 'box_coder', 'box_decoder_and_assign', 'collect_fpn_proposals', # 'deformable_conv', 'deformable_roi_pooling', 'density_prior_box', 'detection_output', 'distribute_fpn_proposals', 'fsp_matrix', 'generate_mask_labels', 'generate_proposal_labels', 'generate_proposals', 'grid_sample', 'image_resize', 'image_resize_short', # 'multi_box_head', 'pixel_shuffle', 'prior_box', 'prroi_pool', 'psroi_pool', 'resize_bilinear', 'resize_nearest', 'resize_trilinear', 'retinanet_detection_output', 'retinanet_target_assign', 'roi_align', 'roi_perspective_transform', 'roi_pool', 'shuffle_channel', 'space_to_depth', 'yolo_box', 'yolov3_loss' ] from ...fluid.layer_helper import LayerHelper from ...fluid.data_feeder import check_variable_and_dtype from ...fluid import core, dygraph_utils from ...fluid.framework import Variable, in_dygraph_mode from ...device import get_cudnn_version import numpy as np def grid_sample(x, grid, mode='bilinear', padding_mode='zeros', align_corners=True, name=None): """ This operation samples input X by using bilinear interpolation or nearest interpolation based on flow field grid, which is usually generated by :code:`affine_grid` . The grid of shape [N, H, W, 2] is the concatenation of (x, y) coordinates with shape [N, H, W] each, where x is indexing the 4th dimension (in width dimension) of input data x and y is indexing the 3rd dimension (in height dimension), finally results is the bilinear interpolation or nearest value of 4 nearest corner points. The output tensor shape will be [N, C, H, W]. .. code-block:: text Step 1: Get (x, y) grid coordinates and scale to [0, H-1/W-1]. .. code-block:: text grid_x = 0.5 * (grid[:, :, :, 0] + 1) * (W - 1) grid_y = 0.5 * (grid[:, :, :, 1] + 1) * (H - 1) Step 2: Indices input data X with grid (x, y) in each [H, W] area, and bilinear interpolate point value by 4 nearest points or nearest interpolate point value by nearest point. wn ------- y_n ------- en | | | | d_n | | | | x_w --d_w-- grid--d_e-- x_e | | | | d_s | | | | ws ------- y_s ------- wn For bilinear interpolation: x_w = floor(x) // west side x coord x_e = x_w + 1 // east side x coord y_n = floor(y) // north side y coord y_s = y_s + 1 // south side y coord d_w = grid_x - x_w // distance to west side d_e = x_e - grid_x // distance to east side d_n = grid_y - y_n // distance to north side d_s = y_s - grid_y // distance to south side wn = X[:, :, y_n, x_w] // north-west point value en = X[:, :, y_n, x_e] // north-east point value ws = X[:, :, y_s, x_w] // south-east point value es = X[:, :, y_s, x_w] // north-east point value output = wn * d_e * d_s + en * d_w * d_s + ws * d_e * d_n + es * d_w * d_n Args: x(Tensor): The input tensor, which is a 4-d tensor with shape [N, C, H, W], N is the batch size, C is the channel number, H and W is the feature height and width. The data type is float32 or float64. grid(Tensor): Input grid tensor of shape [N, grid_H, grid_W, 2]. The data type is float32 or float64. mode(str, optional): The interpolation method which can be 'bilinear' or 'nearest'. Default: 'bilinear'. padding_mode(str, optional) The padding method used when source index is out of input images. It can be 'zeros', 'reflect' and 'border'. Default: zeros. align_corners(bool, optional): If `align_corners` is true, it will projects -1 and 1 to the centers of the corner pixels. Otherwise, it will projects -1 and 1 to the image edges. name(str, optional): For detailed information, please refer to :ref:`api_guide_Name`. Usually name is no need to set and None by default. Returns: Tensor, The shape of output is [N, C, grid_H, grid_W] in which `grid_H` is the height of grid and `grid_W` is the width of grid. The data type is same as input tensor. Examples: .. code-block:: python import paddle import paddle.nn.functional as F import numpy as np # shape=[1, 1, 3, 3] x = np.array([[[[-0.6, 0.8, -0.5], [-0.5, 0.2, 1.2], [ 1.4, 0.3, -0.2]]]]).astype("float64") # grid shape = [1, 3, 4, 2] grid = np.array( [[[[ 0.2, 0.3], [-0.4, -0.3], [-0.9, 0.3], [-0.9, -0.6]], [[ 0.4, 0.1], [ 0.9, -0.8], [ 0.4, 0.5], [ 0.5, -0.2]], [[ 0.1, -0.8], [-0.3, -1. ], [ 0.7, 0.4], [ 0.2, 0.8]]]]).astype("float64") paddle.disable_static() x = paddle.to_tensor(x) grid = paddle.to_tensor(grid) y_t = F.grid_sample( x, grid, mode='bilinear', padding_mode='border', align_corners=True) print(y_t.numpy()) # output shape = [1, 1, 3, 4] # [[[[ 0.34 0.016 0.086 -0.448] # [ 0.55 -0.076 0.35 0.59 ] # [ 0.596 0.38 0.52 0.24 ]]]] """ helper = LayerHelper("grid_sample", **locals()) check_variable_and_dtype(x, 'x', ['float32', 'float64'], 'grid_sampler') check_variable_and_dtype(grid, 'grid', ['float32', 'float64'], 'grid_sampler') if not isinstance(x, Variable): raise ValueError("The x should be a Variable") if not isinstance(grid, Variable): raise ValueError("The grid should be a Variable") _modes = ['bilinear', 'nearest'] _padding_modes = ['zeros', 'reflect', 'border'] if mode not in _modes: raise ValueError( "The mode of grid sample function should be in {}, but got: {}". format(_modes, mode)) if padding_mode not in _padding_modes: raise ValueError( "The padding mode of grid sample function should be in {}, but got: {}". format(_padding_modes, padding_mode)) if not isinstance(align_corners, bool): raise ValueError("The align corners should be bool, but got: {}".format( align_corners)) cudnn_version = get_cudnn_version() use_cudnn = False if (cudnn_version is not None ) and align_corners and mode == 'bilinear' and padding_mode == 'zeros': use_cudnn = True ipts = {'X': x, 'Grid': grid} attrs = { 'mode': mode, 'padding_mode': padding_mode, 'align_corners': align_corners, 'use_cudnn': use_cudnn } if in_dygraph_mode(): attrs = ('mode', mode, 'padding_mode', padding_mode, 'align_corners', align_corners, 'use_cudnn', use_cudnn) out = getattr(core.ops, 'grid_sampler')(x, grid, *attrs) else: out = helper.create_variable_for_type_inference(x.dtype) helper.append_op( type='grid_sampler', inputs=ipts, attrs=attrs, outputs={'Output': out}) return out