diff --git a/paddle/fluid/operators/pixel_shuffle_op.cc b/paddle/fluid/operators/pixel_shuffle_op.cc index 1ed7988dcfcc0831156c09a72e958852f3d45fb5..70d232ad6a51e21b863974e70920eb2d9da895e6 100644 --- a/paddle/fluid/operators/pixel_shuffle_op.cc +++ b/paddle/fluid/operators/pixel_shuffle_op.cc @@ -28,25 +28,44 @@ class PixelShuffleOp : public framework::OperatorWithKernel { "Output(Out) of PixelShuffleOp should not be null.")); auto input_dims = ctx->GetInputDim("X"); - PADDLE_ENFORCE_EQ( - input_dims.size(), 4, - platform::errors::InvalidArgument( - "Input should be a 4-D tensor of format [N, C, H, W], but got %u.", - input_dims.size())); + PADDLE_ENFORCE_EQ(input_dims.size(), 4, + platform::errors::InvalidArgument( + "Input should be a 4-D tensor of format [N, C, H, W] " + "or [N, H, W, C], but got %u.", + input_dims.size())); auto upscale_factor = ctx->Attrs().Get("upscale_factor"); - PADDLE_ENFORCE_EQ(input_dims[1] % (upscale_factor * upscale_factor), 0, - platform::errors::InvalidArgument( - "The square of upscale_factor[%u] should divide the " - "number of channel[%u]", - input_dims[1], upscale_factor * upscale_factor)); - + const std::string data_format = + ctx->Attrs().Get("data_format"); + const bool channel_last = (data_format == "NHWC"); + + if (!channel_last) { + PADDLE_ENFORCE_EQ( + input_dims[1] % (upscale_factor * upscale_factor), 0, + platform::errors::InvalidArgument( + "The square of upscale_factor[%u] should divide the " + "number of channel[%u]", + input_dims[1], upscale_factor * upscale_factor)); + } else { + PADDLE_ENFORCE_EQ( + input_dims[3] % (upscale_factor * upscale_factor), 0, + platform::errors::InvalidArgument( + "The square of upscale_factor[%u] should divide the " + "number of channel[%u]", + input_dims[3], upscale_factor * upscale_factor)); + } auto output_dims = input_dims; output_dims[0] = input_dims[0]; - output_dims[1] = input_dims[1] / (upscale_factor * upscale_factor); - output_dims[2] = input_dims[2] * upscale_factor; - output_dims[3] = input_dims[3] * upscale_factor; + if (!channel_last) { + output_dims[1] = input_dims[1] / (upscale_factor * upscale_factor); + output_dims[2] = input_dims[2] * upscale_factor; + output_dims[3] = input_dims[3] * upscale_factor; + } else { + output_dims[1] = input_dims[1] * upscale_factor; + output_dims[2] = input_dims[2] * upscale_factor; + output_dims[3] = input_dims[3] / (upscale_factor * upscale_factor); + } ctx->SetOutputDim("Out", output_dims); } }; @@ -54,14 +73,14 @@ class PixelShuffleOp : public framework::OperatorWithKernel { class PixelShuffleOpMaker : public framework::OpProtoAndCheckerMaker { public: void Make() override { - AddInput( - "X", - "(Tensor, default Tensor), " - "the input feature data of PixelShuffleOp, the layout is [N C H W]."); - AddOutput( - "Out", - "(Tensor, default Tensor), the output of " - "PixelShuffleOp. The layout is [N,C/factor^2,H*factor,W*factor]."); + AddInput("X", + "(Tensor, default Tensor), " + "the input feature data of PixelShuffleOp, the layout is [N, C, " + "H, W] or [N, H, W, C]."); + AddOutput("Out", + "(Tensor, default Tensor), the output of " + "PixelShuffleOp. The layout is [N, C/factor^2, H*factor, " + "W*factor] or [N, H*factor, W*factor, C/factor^2]."); AddAttr("upscale_factor", "the factor to increase spatial resolution by.") .SetDefault(1) @@ -70,6 +89,11 @@ class PixelShuffleOpMaker : public framework::OpProtoAndCheckerMaker { platform::errors::InvalidArgument( "upscale_factor should be larger than 0.")); }); + AddAttr( + "data_format", + "An optional string from: \"NHWC\", \"NCHW\". " + "Defaults to \"NHWC\", Specify the data format of the input data.") + .SetDefault("NCHW"); AddComment(R"DOC( Pixel Shuffle operator @@ -114,19 +138,30 @@ class PixelShuffleGradOp : public framework::OperatorWithKernel { platform::errors::NotFound("Output(X@Grad) should not be null")); auto do_dims = ctx->GetInputDim(framework::GradVarName("Out")); - PADDLE_ENFORCE_EQ( - do_dims.size(), 4, - platform::errors::InvalidArgument( - "Input should be a 4-D tensor of format [N, C, H, W], but got %u.", - do_dims.size())); + PADDLE_ENFORCE_EQ(do_dims.size(), 4, + platform::errors::InvalidArgument( + "Input should be a 4-D tensor of format [N, C, H, W] " + "or [N, H, W, C], but got %u.", + do_dims.size())); auto upscale_factor = ctx->Attrs().Get("upscale_factor"); + const std::string data_format = + ctx->Attrs().Get("data_format"); + const bool channel_last = (data_format == "NHWC"); + auto dx_dims = do_dims; dx_dims[0] = do_dims[0]; - dx_dims[1] = do_dims[1] * (upscale_factor * upscale_factor); - dx_dims[2] = do_dims[2] / upscale_factor; - dx_dims[3] = do_dims[3] / upscale_factor; + + if (!channel_last) { + dx_dims[1] = do_dims[1] * (upscale_factor * upscale_factor); + dx_dims[2] = do_dims[2] / upscale_factor; + dx_dims[3] = do_dims[3] / upscale_factor; + } else { + dx_dims[1] = do_dims[1] / upscale_factor; + dx_dims[2] = do_dims[2] / upscale_factor; + dx_dims[3] = do_dims[3] * (upscale_factor * upscale_factor); + } ctx->SetOutputDim(framework::GradVarName("X"), dx_dims); } }; diff --git a/paddle/fluid/operators/pixel_shuffle_op.h b/paddle/fluid/operators/pixel_shuffle_op.h index 1ae1c7e9d50cb9d701fd0e79337a1906f2f5d545..b2a0db0f838d5dcc3fed2ed9838f1c43240ce0e7 100644 --- a/paddle/fluid/operators/pixel_shuffle_op.h +++ b/paddle/fluid/operators/pixel_shuffle_op.h @@ -11,6 +11,7 @@ limitations under the License. */ #pragma once #include +#include #include #include "paddle/fluid/framework/op_registry.h" #include "paddle/fluid/operators/math/math_function.h" @@ -24,23 +25,33 @@ class PixelShuffleOpKernel : public framework::OpKernel { void Compute(const framework::ExecutionContext& ctx) const override { auto* in = ctx.Input("X"); auto* out = ctx.Output("Out"); + out->mutable_data(ctx.GetPlace()); int factor = ctx.Attr("upscale_factor"); + std::string data_format = ctx.Attr("data_format"); + bool channel_last = (data_format == "NHWC"); + auto in_dims = in->dims(); auto o_dims = out->dims(); framework::Tensor t; t.ShareDataWith(*in); - t.Resize({in_dims[0], o_dims[1], factor, factor, in_dims[2], in_dims[3]}); - + if (!channel_last) { + t.Resize({in_dims[0], o_dims[1], factor, factor, in_dims[2], in_dims[3]}); + } else { + t.Resize({in_dims[0], in_dims[1], in_dims[2], o_dims[3], factor, factor}); + } std::vector axis = {0, 1, 4, 2, 5, 3}; framework::Tensor o; o.ShareDataWith(*out); - o.Resize({in_dims[0], o_dims[1], in_dims[2], factor, in_dims[3], factor}); - + if (!channel_last) { + o.Resize({in_dims[0], o_dims[1], in_dims[2], factor, in_dims[3], factor}); + } else { + o.Resize({in_dims[0], in_dims[1], factor, in_dims[2], factor, o_dims[3]}); + } math::Transpose trans; auto& dev_ctx = ctx.template device_context(); trans(dev_ctx, t, &o, axis); @@ -58,19 +69,32 @@ class PixelShuffleGradOpKernel : public framework::OpKernel { int factor = ctx.Attr("upscale_factor"); + std::string data_format = ctx.Attr("data_format"); + bool channel_last = (data_format == "NHWC"); + auto do_dims = dout->dims(); auto dx_dims = dx->dims(); framework::Tensor t; t.ShareDataWith(*dout); - t.Resize({do_dims[0], do_dims[1], dx_dims[2], factor, dx_dims[3], factor}); - + if (!channel_last) { + t.Resize( + {do_dims[0], do_dims[1], dx_dims[2], factor, dx_dims[3], factor}); + } else { + t.Resize( + {do_dims[0], dx_dims[1], factor, dx_dims[2], factor, do_dims[3]}); + } std::vector axis = {0, 1, 3, 5, 2, 4}; framework::Tensor o; o.ShareDataWith(*dx); - o.Resize({do_dims[0], do_dims[1], factor, factor, dx_dims[2], dx_dims[3]}); - + if (!channel_last) { + o.Resize( + {do_dims[0], do_dims[1], factor, factor, dx_dims[2], dx_dims[3]}); + } else { + o.Resize( + {do_dims[0], dx_dims[1], dx_dims[2], do_dims[3], factor, factor}); + } math::Transpose trans; auto& dev_ctx = ctx.template device_context(); trans(dev_ctx, t, &o, axis); diff --git a/python/paddle/fluid/tests/unittests/test_pixel_shuffle.py b/python/paddle/fluid/tests/unittests/test_pixel_shuffle.py index 0bcb4be3b7fb9380932cf137ac8e4939dcd77288..cf93f39ab8c5c92aa075f2f0a7dca9a5c5d9f485 100644 --- a/python/paddle/fluid/tests/unittests/test_pixel_shuffle.py +++ b/python/paddle/fluid/tests/unittests/test_pixel_shuffle.py @@ -16,16 +16,17 @@ from __future__ import print_function import unittest import numpy as np + from op_test import OpTest +import paddle +import paddle.nn.functional as F +import paddle.fluid.core as core +import paddle.fluid as fluid -class TestPixelShuffle(OpTest): - def setUp(self): - self.op_type = "pixel_shuffle" - n, c, h, w = 2, 9, 4, 4 - up_factor = 3 - shape = [n, c, h, w] - x = np.random.random(shape).astype("float64") +def pixel_shuffle_np(x, up_factor, data_format="NCHW"): + if data_format == "NCHW": + n, c, h, w = x.shape new_shape = (n, c // (up_factor * up_factor), up_factor, up_factor, h, w) # reshape to (num,output_channel,upscale_factor,upscale_factor,h,w) @@ -34,10 +35,42 @@ class TestPixelShuffle(OpTest): npresult = npresult.transpose(0, 1, 4, 2, 5, 3) oshape = [n, c // (up_factor * up_factor), h * up_factor, w * up_factor] npresult = np.reshape(npresult, oshape) + return npresult + else: + n, h, w, c = x.shape + new_shape = (n, h, w, c // (up_factor * up_factor), up_factor, + up_factor) + # reshape to (num,h,w,output_channel,upscale_factor,upscale_factor) + npresult = np.reshape(x, new_shape) + # transpose to (num,h,upscale_factor,w,upscale_factor,output_channel) + npresult = npresult.transpose(0, 1, 4, 2, 5, 3) + oshape = [n, h * up_factor, w * up_factor, c // (up_factor * up_factor)] + npresult = np.reshape(npresult, oshape) + return npresult + + +class TestPixelShuffleOp(OpTest): + def setUp(self): + self.op_type = "pixel_shuffle" + self.init_data_format() + n, c, h, w = 2, 9, 4, 4 + + if self.format == "NCHW": + shape = [n, c, h, w] + if self.format == "NHWC": + shape = [n, h, w, c] + + up_factor = 3 + + x = np.random.random(shape).astype("float64") + npresult = pixel_shuffle_np(x, up_factor, self.format) self.inputs = {'X': x} self.outputs = {'Out': npresult} - self.attrs = {'upscale_factor': up_factor} + self.attrs = {'upscale_factor': up_factor, "data_format": self.format} + + def init_data_format(self): + self.format = "NCHW" def test_check_output(self): self.check_output() @@ -46,5 +79,141 @@ class TestPixelShuffle(OpTest): self.check_grad(['X'], 'Out') +class TestChannelLast(TestPixelShuffleOp): + def init_data_format(self): + self.format = "NHWC" + + +class TestPixelShuffleAPI(unittest.TestCase): + def setUp(self): + self.x_1_np = np.random.random([2, 9, 4, 4]).astype("float64") + self.x_2_np = np.random.random([2, 4, 4, 9]).astype("float64") + self.out_1_np = pixel_shuffle_np(self.x_1_np, 3) + self.out_2_np = pixel_shuffle_np(self.x_2_np, 3, "NHWC") + + def test_static_graph_functional(self): + for use_cuda in ([False, True] + if core.is_compiled_with_cuda() else [False]): + place = paddle.CUDAPlace(0) if use_cuda else paddle.CPUPlace() + + paddle.enable_static() + x_1 = paddle.data(name="x", shape=[2, 9, 4, 4], dtype="float64") + x_2 = paddle.data(name="x2", shape=[2, 4, 4, 9], dtype="float64") + out_1 = F.pixel_shuffle(x_1, 3) + out_2 = F.pixel_shuffle(x_2, 3, "NHWC") + + exe = paddle.static.Executor(place=place) + res_1 = exe.run(fluid.default_main_program(), + feed={"x": self.x_1_np}, + fetch_list=out_1, + use_prune=True) + + res_2 = exe.run(fluid.default_main_program(), + feed={"x2": self.x_2_np}, + fetch_list=out_2, + use_prune=True) + + assert np.allclose(res_1, self.out_1_np) + assert np.allclose(res_2, self.out_2_np) + + # same test between layer and functional in this op. + def test_static_graph_layer(self): + for use_cuda in ([False, True] + if core.is_compiled_with_cuda() else [False]): + place = paddle.CUDAPlace(0) if use_cuda else paddle.CPUPlace() + + paddle.enable_static() + x_1 = paddle.data(name="x", shape=[2, 9, 4, 4], dtype="float64") + x_2 = paddle.data(name="x2", shape=[2, 4, 4, 9], dtype="float64") + # init instance + ps_1 = paddle.nn.PixelShuffle(3) + ps_2 = paddle.nn.PixelShuffle(3, "NHWC") + out_1 = ps_1(x_1) + out_2 = ps_2(x_2) + out_1_np = pixel_shuffle_np(self.x_1_np, 3) + out_2_np = pixel_shuffle_np(self.x_2_np, 3, "NHWC") + + exe = paddle.static.Executor(place=place) + res_1 = exe.run(fluid.default_main_program(), + feed={"x": self.x_1_np}, + fetch_list=out_1, + use_prune=True) + + res_2 = exe.run(fluid.default_main_program(), + feed={"x2": self.x_2_np}, + fetch_list=out_2, + use_prune=True) + + assert np.allclose(res_1, out_1_np) + assert np.allclose(res_2, out_2_np) + + def run_dygraph(self, up_factor, data_format): + + n, c, h, w = 2, 9, 4, 4 + + if data_format == "NCHW": + shape = [n, c, h, w] + if data_format == "NHWC": + shape = [n, h, w, c] + + x = np.random.random(shape).astype("float64") + + npresult = pixel_shuffle_np(x, up_factor, data_format) + + for use_cuda in ([False, True] + if core.is_compiled_with_cuda() else [False]): + place = paddle.CUDAPlace(0) if use_cuda else paddle.CPUPlace() + + paddle.disable_static(place=place) + + pixel_shuffle = paddle.nn.PixelShuffle( + up_factor, data_format=data_format) + result = pixel_shuffle(paddle.to_tensor(x)) + + self.assertTrue(np.allclose(result.numpy(), npresult)) + + result_functional = F.pixel_shuffle( + paddle.to_tensor(x), 3, data_format) + self.assertTrue(np.allclose(result_functional.numpy(), npresult)) + + def test_dygraph1(self): + self.run_dygraph(3, "NCHW") + + def test_dygraph2(self): + self.run_dygraph(3, "NHWC") + + +class TestPixelShuffleError(unittest.TestCase): + def test_error_functional(self): + def error_upscale_factor(): + with paddle.fluid.dygraph.guard(): + x = np.random.random([2, 9, 4, 4]).astype("float64") + pixel_shuffle = F.pixel_shuffle(paddle.to_tensor(x), 3.33) + + self.assertRaises(TypeError, error_upscale_factor) + + def error_data_format(): + with paddle.fluid.dygraph.guard(): + x = np.random.random([2, 9, 4, 4]).astype("float64") + pixel_shuffle = F.pixel_shuffle(paddle.to_tensor(x), 3, "WOW") + + self.assertRaises(ValueError, error_data_format) + + def test_error_layer(self): + def error_upscale_factor_layer(): + with paddle.fluid.dygraph.guard(): + x = np.random.random([2, 9, 4, 4]).astype("float64") + ps = paddle.nn.PixelShuffle(3.33) + + self.assertRaises(TypeError, error_upscale_factor_layer) + + def error_data_format_layer(): + with paddle.fluid.dygraph.guard(): + x = np.random.random([2, 9, 4, 4]).astype("float64") + ps = paddle.nn.PixelShuffle(3, "MEOW") + + self.assertRaises(ValueError, error_data_format_layer) + + if __name__ == '__main__': unittest.main() diff --git a/python/paddle/nn/__init__.py b/python/paddle/nn/__init__.py index b262b945267c72136141d98b2b0d5b911bd08ca9..dd5d0d269a6cb6377b11b6c98e86eef4ee0f8b57 100644 --- a/python/paddle/nn/__init__.py +++ b/python/paddle/nn/__init__.py @@ -139,7 +139,10 @@ from .layer.transformer import TransformerDecoder from .layer.transformer import Transformer from .layer.distance import PairwiseDistance #DEFINE_ALIAS +from .layer.vision import PixelShuffle + from .layer import loss #DEFINE_ALIAS from .layer import conv #DEFINE_ALIAS +from .layer import vision #DEFINE_ALIAS from ..fluid.dygraph.layers import Layer #DEFINE_ALIAS from ..fluid.dygraph.container import LayerList, ParameterList, Sequential #DEFINE_ALIAS diff --git a/python/paddle/nn/functional/__init__.py b/python/paddle/nn/functional/__init__.py index 905849360e116341bdf366c6b600875af2357b11..afc1614732d06dcef4ca0e1e75cd93e28d6a2d3d 100644 --- a/python/paddle/nn/functional/__init__.py +++ b/python/paddle/nn/functional/__init__.py @@ -194,7 +194,7 @@ from .vision import box_clip #DEFINE_ALIAS from .vision import box_coder #DEFINE_ALIAS from .vision import box_decoder_and_assign #DEFINE_ALIAS from .vision import collect_fpn_proposals #DEFINE_ALIAS -# from .vision import deformable_conv #DEFINE_ALIAS +# from .vision import deformable_conv #DEFINE_ALIAS from .vision import deformable_roi_pooling #DEFINE_ALIAS from .vision import density_prior_box #DEFINE_ALIAS from .vision import detection_output #DEFINE_ALIAS @@ -206,7 +206,7 @@ from .vision import generate_proposals #DEFINE_ALIAS from .vision import grid_sample #DEFINE_ALIAS from .vision import image_resize #DEFINE_ALIAS from .vision import image_resize_short #DEFINE_ALIAS -# from .vision import multi_box_head #DEFINE_ALIAS +# from .vision import multi_box_head #DEFINE_ALIAS from .vision import pixel_shuffle #DEFINE_ALIAS from .vision import prior_box #DEFINE_ALIAS from .vision import prroi_pool #DEFINE_ALIAS diff --git a/python/paddle/nn/functional/vision.py b/python/paddle/nn/functional/vision.py index 23e45725a78299e7e67308400a8b9c1adbfebed7..6cb15afbfd2db0011c1c38840ed9bad21320e00f 100644 --- a/python/paddle/nn/functional/vision.py +++ b/python/paddle/nn/functional/vision.py @@ -12,7 +12,10 @@ # See the License for the specific language governing permissions and # limitations under the License. -# TODO: define specitial functions used in computer vision task +from ...fluid.data_feeder import check_variable_and_dtype +from ...fluid.layer_helper import LayerHelper +from ...fluid.framework import core, in_dygraph_mode + from ...fluid.layers import affine_channel #DEFINE_ALIAS from ...fluid.layers import affine_grid #DEFINE_ALIAS from ...fluid.layers import anchor_generator #DEFINE_ALIAS @@ -28,6 +31,7 @@ from ...fluid.layers import distribute_fpn_proposals #DEFINE_ALIAS from ...fluid.layers import generate_mask_labels #DEFINE_ALIAS from ...fluid.layers import generate_proposal_labels #DEFINE_ALIAS from ...fluid.layers import generate_proposals #DEFINE_ALIAS +from ...fluid.layers import grid_sampler #DEFINE_ALIAS from ...fluid.layers import image_resize #DEFINE_ALIAS from ...fluid.layers import prior_box #DEFINE_ALIAS from ...fluid.layers import prroi_pool #DEFINE_ALIAS @@ -43,7 +47,7 @@ from ...fluid.layers import yolov3_loss #DEFINE_ALIAS from ...fluid.layers import fsp_matrix #DEFINE_ALIAS from ...fluid.layers import image_resize_short #DEFINE_ALIAS -from ...fluid.layers import pixel_shuffle #DEFINE_ALIAS +# from ...fluid.layers import pixel_shuffle #DEFINE_ALIAS from ...fluid.layers import retinanet_detection_output #DEFINE_ALIAS from ...fluid.layers import retinanet_target_assign #DEFINE_ALIAS from ...fluid.layers import roi_perspective_transform #DEFINE_ALIAS @@ -67,7 +71,7 @@ __all__ = [ 'generate_mask_labels', 'generate_proposal_labels', 'generate_proposals', - 'grid_sample', + 'grid_sampler', 'image_resize', 'image_resize_short', # 'multi_box_head', @@ -89,8 +93,6 @@ __all__ = [ 'yolov3_loss' ] -from ...fluid.layer_helper import LayerHelper -from ...fluid.data_feeder import check_variable_and_dtype from ...fluid import core, dygraph_utils from ...fluid.framework import Variable, in_dygraph_mode from ...device import get_cudnn_version @@ -112,22 +114,16 @@ def grid_sample(x, data x and y is indexing the 3rd dimension (in height dimension), finally results is the bilinear interpolation or nearest value of 4 nearest corner points. The output tensor shape will be [N, C, H, W]. - .. code-block:: text - Step 1: Get (x, y) grid coordinates and scale to [0, H-1/W-1]. - .. code-block:: text - grid_x = 0.5 * (grid[:, :, :, 0] + 1) * (W - 1) grid_y = 0.5 * (grid[:, :, :, 1] + 1) * (H - 1) - Step 2: Indices input data X with grid (x, y) in each [H, W] area, and bilinear interpolate point value by 4 nearest points or nearest interpolate point value by nearest point. - wn ------- y_n ------- en | | | | d_n | @@ -137,27 +133,21 @@ def grid_sample(x, | d_s | | | | ws ------- y_s ------- wn - For bilinear interpolation: - x_w = floor(x) // west side x coord x_e = x_w + 1 // east side x coord y_n = floor(y) // north side y coord y_s = y_s + 1 // south side y coord - d_w = grid_x - x_w // distance to west side d_e = x_e - grid_x // distance to east side d_n = grid_y - y_n // distance to north side d_s = y_s - grid_y // distance to south side - wn = X[:, :, y_n, x_w] // north-west point value en = X[:, :, y_n, x_e] // north-east point value ws = X[:, :, y_s, x_w] // south-east point value es = X[:, :, y_s, x_w] // north-east point value - output = wn * d_e * d_s + en * d_w * d_s + ws * d_e * d_n + es * d_w * d_n - Args: x(Tensor): The input tensor, which is a 4-d tensor with shape [N, C, H, W], N is the batch size, C is the channel @@ -176,14 +166,10 @@ def grid_sample(x, name(str, optional): For detailed information, please refer to :ref:`api_guide_Name`. Usually name is no need to set and None by default. - Returns: Tensor, The shape of output is [N, C, grid_H, grid_W] in which `grid_H` is the height of grid and `grid_W` is the width of grid. The data type is same as input tensor. - Examples: - .. code-block:: python - import paddle import paddle.nn.functional as F import numpy as np @@ -272,3 +258,57 @@ def grid_sample(x, attrs=attrs, outputs={'Output': out}) return out + + +def pixel_shuffle(x, upscale_factor, data_format="NCHW", name=None): + """ + This API implements pixel shuffle operation. + See more details in :ref:`api_nn_vision_PixelShuffle` . + Parameters: + x(Tensor): 4-D tensor, the data type should be float32 or float64. + upscale_factor(int): factor to increase spatial resolution. + data_format (str): The data format of the input and output data. An optional string from: "NCHW", "NHWC". The default is "NCHW". When it is "NCHW", the data is stored in the order of: [batch_size, input_channels, input_height, input_width]. + name (str, optional): The default value is None. Normally there is no need for user to set this property. + Returns: + Out(tensor): Reshaped tensor according to the new dimension. + Raises: + ValueError: If the square of upscale_factor cannot divide the channels of input. + Examples: + .. code-block:: python + import paddle + import paddle.nn.functional as F + import numpy as np + x = np.random.randn(2, 9, 4, 4).astype(np.float32) + paddle.disable_static() + x_var = paddle.to_tensor(x) + out_var = F.pixel_shuffle(x_var, 3) + out = out_var.numpy() + print(out.shape) + # (2, 1, 12, 12) + """ + if not in_dygraph_mode(): + check_variable_and_dtype(x, 'x', ['float32', 'float64'], + 'pixel_shuffle') + + if not isinstance(upscale_factor, int): + raise TypeError("upscale factor must be int type") + + if data_format not in ["NCHW", "NHWC"]: + raise ValueError("Attr(data_format) should be 'NCHW' or 'NHWC'." + "But recevie Attr(data_format): {} ".format( + data_format)) + + if in_dygraph_mode(): + return core.ops.pixel_shuffle(x, "upscale_factor", upscale_factor, + "data_format", data_format) + + helper = LayerHelper("pixel_shuffle", **locals()) + + out = helper.create_variable_for_type_inference(dtype=x.dtype) + helper.append_op( + type="pixel_shuffle", + inputs={"X": x}, + outputs={"Out": out}, + attrs={"upscale_factor": upscale_factor, + "data_format": data_format}) + return out diff --git a/python/paddle/nn/layer/__init__.py b/python/paddle/nn/layer/__init__.py index b7098aee423feeb3670206c1459a05401c86e5c0..b25350be601dd9e56d8268859b52a12d3745c44d 100644 --- a/python/paddle/nn/layer/__init__.py +++ b/python/paddle/nn/layer/__init__.py @@ -20,6 +20,7 @@ from . import conv from . import extension from . import activation from . import norm +from . import vision from . import distance from . import transformer @@ -29,6 +30,8 @@ from .conv import * from .extension import * from .activation import * from .norm import * +from .vision import * + from .transformer import * # from .activation import PReLU #DEFINE_ALIAS from .activation import ReLU #DEFINE_ALIAS @@ -104,4 +107,6 @@ from .norm import InstanceNorm #DEFINE_ALIAS # from .rnn import RNNCell #DEFINE_ALIAS # from .rnn import GRUCell #DEFINE_ALIAS # from .rnn import LSTMCell #DEFINE_ALIAS + +from .vision import PixelShuffle #DEFINE_ALIAS from .distance import PairwiseDistance #DEFINE_ALIAS diff --git a/python/paddle/nn/layer/vision.py b/python/paddle/nn/layer/vision.py new file mode 100644 index 0000000000000000000000000000000000000000..a5f360ec02e6d8b59b80db4602776e904cf0b499 --- /dev/null +++ b/python/paddle/nn/layer/vision.py @@ -0,0 +1,82 @@ +# Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +# TODO: define specitial functions used in computer vision task + +from ...fluid.dygraph import layers +from .. import functional + +__all__ = ['PixelShuffle'] + + +class PixelShuffle(layers.Layer): + """ + + PixelShuffle Layer + + This operator rearranges elements in a tensor of shape [N, C, H, W] + to a tensor of shape [N, C/upscale_factor**2, H*upscale_factor, W*upscale_factor], + or from shape [N, H, W, C] to [N, H*upscale_factor, W*upscale_factor, C/upscale_factor**2]. + This is useful for implementing efficient sub-pixel convolution + with a stride of 1/upscale_factor. + Please refer to the paper: `Real-Time Single Image and Video Super-Resolution + Using an Efficient Sub-Pixel Convolutional Neural Network `_ . + by Shi et. al (2016) for more details. + + Parameters: + + upscale_factor(int): factor to increase spatial resolution. + data_format (str): The data format of the input and output data. An optional string from: "NCHW", "NHWC". The default is "NCHW". When it is "NCHW", the data is stored in the order of: [batch_size, input_channels, input_height, input_width]. + name (str, optional): Name for the operation (optional, default is None). For more information, please refer to :ref:`api_guide_Name`. + + Shape: + - x: 4-D tensor with shape: (N, C, H, W) or (N, H, W, C). + - out: 4-D tensor with shape: (N, C/upscale_factor**2, H*upscale_factor, W*upscale_factor) or (N, H*upscale_factor, W*upscale_factor, C/upscale_factor^2). + + + Examples: + .. code-block:: python + + import paddle + import paddle.nn as nn + import numpy as np + + paddle.disable_static() + x = np.random.randn(2, 9, 4, 4).astype(np.float32) + x_var = paddle.to_tensor(x) + pixel_shuffle = nn.PixelShuffle(3) + out_var = pixel_shuffle(x_var) + out = out_var.numpy() + print(out.shape) + # (2, 1, 12, 12) + + """ + + def __init__(self, upscale_factor, data_format="NCHW", name=None): + super(PixelShuffle, self).__init__() + + if not isinstance(upscale_factor, int): + raise TypeError("upscale factor must be int type") + + if data_format not in ["NCHW", "NHWC"]: + raise ValueError("Data format should be 'NCHW' or 'NHWC'." + "But recevie data format: {}".format(data_format)) + + self._upscale_factor = upscale_factor + self._data_format = data_format + self._name = name + + def forward(self, x): + return functional.pixel_shuffle(x, self._upscale_factor, + self._data_format, self._name)