diff --git a/paddle/fluid/operators/pixel_shuffle_op.cc b/paddle/fluid/operators/pixel_shuffle_op.cc
index 1ed7988dcfcc0831156c09a72e958852f3d45fb5..70d232ad6a51e21b863974e70920eb2d9da895e6 100644
--- a/paddle/fluid/operators/pixel_shuffle_op.cc
+++ b/paddle/fluid/operators/pixel_shuffle_op.cc
@@ -28,25 +28,44 @@ class PixelShuffleOp : public framework::OperatorWithKernel {
                           "Output(Out) of PixelShuffleOp should not be null."));
 
     auto input_dims = ctx->GetInputDim("X");
-    PADDLE_ENFORCE_EQ(
-        input_dims.size(), 4,
-        platform::errors::InvalidArgument(
-            "Input should be a 4-D tensor of format [N, C, H, W], but got %u.",
-            input_dims.size()));
+    PADDLE_ENFORCE_EQ(input_dims.size(), 4,
+                      platform::errors::InvalidArgument(
+                          "Input should be a 4-D tensor of format [N, C, H, W] "
+                          "or [N, H, W, C], but got %u.",
+                          input_dims.size()));
 
     auto upscale_factor = ctx->Attrs().Get<int>("upscale_factor");
 
-    PADDLE_ENFORCE_EQ(input_dims[1] % (upscale_factor * upscale_factor), 0,
-                      platform::errors::InvalidArgument(
-                          "The square of upscale_factor[%u] should divide the "
-                          "number of channel[%u]",
-                          input_dims[1], upscale_factor * upscale_factor));
-
+    const std::string data_format =
+        ctx->Attrs().Get<std::string>("data_format");
+    const bool channel_last = (data_format == "NHWC");
+
+    if (!channel_last) {
+      PADDLE_ENFORCE_EQ(
+          input_dims[1] % (upscale_factor * upscale_factor), 0,
+          platform::errors::InvalidArgument(
+              "The square of upscale_factor[%u] should divide the "
+              "number of channel[%u]",
+              input_dims[1], upscale_factor * upscale_factor));
+    } else {
+      PADDLE_ENFORCE_EQ(
+          input_dims[3] % (upscale_factor * upscale_factor), 0,
+          platform::errors::InvalidArgument(
+              "The square of upscale_factor[%u] should divide the "
+              "number of channel[%u]",
+              input_dims[3], upscale_factor * upscale_factor));
+    }
     auto output_dims = input_dims;
     output_dims[0] = input_dims[0];
-    output_dims[1] = input_dims[1] / (upscale_factor * upscale_factor);
-    output_dims[2] = input_dims[2] * upscale_factor;
-    output_dims[3] = input_dims[3] * upscale_factor;
+    if (!channel_last) {
+      output_dims[1] = input_dims[1] / (upscale_factor * upscale_factor);
+      output_dims[2] = input_dims[2] * upscale_factor;
+      output_dims[3] = input_dims[3] * upscale_factor;
+    } else {
+      output_dims[1] = input_dims[1] * upscale_factor;
+      output_dims[2] = input_dims[2] * upscale_factor;
+      output_dims[3] = input_dims[3] / (upscale_factor * upscale_factor);
+    }
     ctx->SetOutputDim("Out", output_dims);
   }
 };
@@ -54,14 +73,14 @@ class PixelShuffleOp : public framework::OperatorWithKernel {
 class PixelShuffleOpMaker : public framework::OpProtoAndCheckerMaker {
  public:
   void Make() override {
-    AddInput(
-        "X",
-        "(Tensor, default Tensor<float>), "
-        "the input feature data of PixelShuffleOp, the layout is [N C H W].");
-    AddOutput(
-        "Out",
-        "(Tensor, default Tensor<float>), the output of "
-        "PixelShuffleOp. The layout is [N,C/factor^2,H*factor,W*factor].");
+    AddInput("X",
+             "(Tensor, default Tensor<float>), "
+             "the input feature data of PixelShuffleOp, the layout is [N, C, "
+             "H, W] or [N, H, W, C].");
+    AddOutput("Out",
+              "(Tensor, default Tensor<float>), the output of "
+              "PixelShuffleOp. The layout is [N, C/factor^2, H*factor, "
+              "W*factor] or [N, H*factor, W*factor, C/factor^2].");
     AddAttr<int>("upscale_factor",
                  "the factor to increase spatial resolution by.")
         .SetDefault(1)
@@ -70,6 +89,11 @@ class PixelShuffleOpMaker : public framework::OpProtoAndCheckerMaker {
                             platform::errors::InvalidArgument(
                                 "upscale_factor should be larger than 0."));
         });
+    AddAttr<std::string>(
+        "data_format",
+        "An optional string from: \"NHWC\", \"NCHW\". "
+        "Defaults to \"NHWC\", Specify the data format of the input data.")
+        .SetDefault("NCHW");
 
     AddComment(R"DOC(
 		Pixel Shuffle operator
@@ -114,19 +138,30 @@ class PixelShuffleGradOp : public framework::OperatorWithKernel {
         platform::errors::NotFound("Output(X@Grad) should not be null"));
 
     auto do_dims = ctx->GetInputDim(framework::GradVarName("Out"));
-    PADDLE_ENFORCE_EQ(
-        do_dims.size(), 4,
-        platform::errors::InvalidArgument(
-            "Input should be a 4-D tensor of format [N, C, H, W], but got %u.",
-            do_dims.size()));
+    PADDLE_ENFORCE_EQ(do_dims.size(), 4,
+                      platform::errors::InvalidArgument(
+                          "Input should be a 4-D tensor of format [N, C, H, W] "
+                          "or [N, H, W, C], but got %u.",
+                          do_dims.size()));
 
     auto upscale_factor = ctx->Attrs().Get<int>("upscale_factor");
 
+    const std::string data_format =
+        ctx->Attrs().Get<std::string>("data_format");
+    const bool channel_last = (data_format == "NHWC");
+
     auto dx_dims = do_dims;
     dx_dims[0] = do_dims[0];
-    dx_dims[1] = do_dims[1] * (upscale_factor * upscale_factor);
-    dx_dims[2] = do_dims[2] / upscale_factor;
-    dx_dims[3] = do_dims[3] / upscale_factor;
+
+    if (!channel_last) {
+      dx_dims[1] = do_dims[1] * (upscale_factor * upscale_factor);
+      dx_dims[2] = do_dims[2] / upscale_factor;
+      dx_dims[3] = do_dims[3] / upscale_factor;
+    } else {
+      dx_dims[1] = do_dims[1] / upscale_factor;
+      dx_dims[2] = do_dims[2] / upscale_factor;
+      dx_dims[3] = do_dims[3] * (upscale_factor * upscale_factor);
+    }
     ctx->SetOutputDim(framework::GradVarName("X"), dx_dims);
   }
 };
diff --git a/paddle/fluid/operators/pixel_shuffle_op.h b/paddle/fluid/operators/pixel_shuffle_op.h
index 1ae1c7e9d50cb9d701fd0e79337a1906f2f5d545..b2a0db0f838d5dcc3fed2ed9838f1c43240ce0e7 100644
--- a/paddle/fluid/operators/pixel_shuffle_op.h
+++ b/paddle/fluid/operators/pixel_shuffle_op.h
@@ -11,6 +11,7 @@ limitations under the License. */
 
 #pragma once
 #include <algorithm>
+#include <string>
 #include <vector>
 #include "paddle/fluid/framework/op_registry.h"
 #include "paddle/fluid/operators/math/math_function.h"
@@ -24,23 +25,33 @@ class PixelShuffleOpKernel : public framework::OpKernel<T> {
   void Compute(const framework::ExecutionContext& ctx) const override {
     auto* in = ctx.Input<framework::Tensor>("X");
     auto* out = ctx.Output<framework::Tensor>("Out");
+
     out->mutable_data<T>(ctx.GetPlace());
 
     int factor = ctx.Attr<int>("upscale_factor");
 
+    std::string data_format = ctx.Attr<std::string>("data_format");
+    bool channel_last = (data_format == "NHWC");
+
     auto in_dims = in->dims();
     auto o_dims = out->dims();
 
     framework::Tensor t;
     t.ShareDataWith(*in);
-    t.Resize({in_dims[0], o_dims[1], factor, factor, in_dims[2], in_dims[3]});
-
+    if (!channel_last) {
+      t.Resize({in_dims[0], o_dims[1], factor, factor, in_dims[2], in_dims[3]});
+    } else {
+      t.Resize({in_dims[0], in_dims[1], in_dims[2], o_dims[3], factor, factor});
+    }
     std::vector<int> axis = {0, 1, 4, 2, 5, 3};
 
     framework::Tensor o;
     o.ShareDataWith(*out);
-    o.Resize({in_dims[0], o_dims[1], in_dims[2], factor, in_dims[3], factor});
-
+    if (!channel_last) {
+      o.Resize({in_dims[0], o_dims[1], in_dims[2], factor, in_dims[3], factor});
+    } else {
+      o.Resize({in_dims[0], in_dims[1], factor, in_dims[2], factor, o_dims[3]});
+    }
     math::Transpose<DeviceContext, T, 6> trans;
     auto& dev_ctx = ctx.template device_context<DeviceContext>();
     trans(dev_ctx, t, &o, axis);
@@ -58,19 +69,32 @@ class PixelShuffleGradOpKernel : public framework::OpKernel<T> {
 
     int factor = ctx.Attr<int>("upscale_factor");
 
+    std::string data_format = ctx.Attr<std::string>("data_format");
+    bool channel_last = (data_format == "NHWC");
+
     auto do_dims = dout->dims();
     auto dx_dims = dx->dims();
 
     framework::Tensor t;
     t.ShareDataWith(*dout);
-    t.Resize({do_dims[0], do_dims[1], dx_dims[2], factor, dx_dims[3], factor});
-
+    if (!channel_last) {
+      t.Resize(
+          {do_dims[0], do_dims[1], dx_dims[2], factor, dx_dims[3], factor});
+    } else {
+      t.Resize(
+          {do_dims[0], dx_dims[1], factor, dx_dims[2], factor, do_dims[3]});
+    }
     std::vector<int> axis = {0, 1, 3, 5, 2, 4};
 
     framework::Tensor o;
     o.ShareDataWith(*dx);
-    o.Resize({do_dims[0], do_dims[1], factor, factor, dx_dims[2], dx_dims[3]});
-
+    if (!channel_last) {
+      o.Resize(
+          {do_dims[0], do_dims[1], factor, factor, dx_dims[2], dx_dims[3]});
+    } else {
+      o.Resize(
+          {do_dims[0], dx_dims[1], dx_dims[2], do_dims[3], factor, factor});
+    }
     math::Transpose<DeviceContext, T, 6> trans;
     auto& dev_ctx = ctx.template device_context<DeviceContext>();
     trans(dev_ctx, t, &o, axis);
diff --git a/python/paddle/fluid/tests/unittests/test_pixel_shuffle.py b/python/paddle/fluid/tests/unittests/test_pixel_shuffle.py
index 0bcb4be3b7fb9380932cf137ac8e4939dcd77288..cf93f39ab8c5c92aa075f2f0a7dca9a5c5d9f485 100644
--- a/python/paddle/fluid/tests/unittests/test_pixel_shuffle.py
+++ b/python/paddle/fluid/tests/unittests/test_pixel_shuffle.py
@@ -16,16 +16,17 @@ from __future__ import print_function
 
 import unittest
 import numpy as np
+
 from op_test import OpTest
+import paddle
+import paddle.nn.functional as F
+import paddle.fluid.core as core
+import paddle.fluid as fluid
 
 
-class TestPixelShuffle(OpTest):
-    def setUp(self):
-        self.op_type = "pixel_shuffle"
-        n, c, h, w = 2, 9, 4, 4
-        up_factor = 3
-        shape = [n, c, h, w]
-        x = np.random.random(shape).astype("float64")
+def pixel_shuffle_np(x, up_factor, data_format="NCHW"):
+    if data_format == "NCHW":
+        n, c, h, w = x.shape
         new_shape = (n, c // (up_factor * up_factor), up_factor, up_factor, h,
                      w)
         # reshape to (num,output_channel,upscale_factor,upscale_factor,h,w)
@@ -34,10 +35,42 @@ class TestPixelShuffle(OpTest):
         npresult = npresult.transpose(0, 1, 4, 2, 5, 3)
         oshape = [n, c // (up_factor * up_factor), h * up_factor, w * up_factor]
         npresult = np.reshape(npresult, oshape)
+        return npresult
+    else:
+        n, h, w, c = x.shape
+        new_shape = (n, h, w, c // (up_factor * up_factor), up_factor,
+                     up_factor)
+        # reshape to (num,h,w,output_channel,upscale_factor,upscale_factor)
+        npresult = np.reshape(x, new_shape)
+        # transpose to (num,h,upscale_factor,w,upscale_factor,output_channel)
+        npresult = npresult.transpose(0, 1, 4, 2, 5, 3)
+        oshape = [n, h * up_factor, w * up_factor, c // (up_factor * up_factor)]
+        npresult = np.reshape(npresult, oshape)
+        return npresult
+
+
+class TestPixelShuffleOp(OpTest):
+    def setUp(self):
+        self.op_type = "pixel_shuffle"
+        self.init_data_format()
+        n, c, h, w = 2, 9, 4, 4
+
+        if self.format == "NCHW":
+            shape = [n, c, h, w]
+        if self.format == "NHWC":
+            shape = [n, h, w, c]
+
+        up_factor = 3
+
+        x = np.random.random(shape).astype("float64")
+        npresult = pixel_shuffle_np(x, up_factor, self.format)
 
         self.inputs = {'X': x}
         self.outputs = {'Out': npresult}
-        self.attrs = {'upscale_factor': up_factor}
+        self.attrs = {'upscale_factor': up_factor, "data_format": self.format}
+
+    def init_data_format(self):
+        self.format = "NCHW"
 
     def test_check_output(self):
         self.check_output()
@@ -46,5 +79,141 @@ class TestPixelShuffle(OpTest):
         self.check_grad(['X'], 'Out')
 
 
+class TestChannelLast(TestPixelShuffleOp):
+    def init_data_format(self):
+        self.format = "NHWC"
+
+
+class TestPixelShuffleAPI(unittest.TestCase):
+    def setUp(self):
+        self.x_1_np = np.random.random([2, 9, 4, 4]).astype("float64")
+        self.x_2_np = np.random.random([2, 4, 4, 9]).astype("float64")
+        self.out_1_np = pixel_shuffle_np(self.x_1_np, 3)
+        self.out_2_np = pixel_shuffle_np(self.x_2_np, 3, "NHWC")
+
+    def test_static_graph_functional(self):
+        for use_cuda in ([False, True]
+                         if core.is_compiled_with_cuda() else [False]):
+            place = paddle.CUDAPlace(0) if use_cuda else paddle.CPUPlace()
+
+            paddle.enable_static()
+            x_1 = paddle.data(name="x", shape=[2, 9, 4, 4], dtype="float64")
+            x_2 = paddle.data(name="x2", shape=[2, 4, 4, 9], dtype="float64")
+            out_1 = F.pixel_shuffle(x_1, 3)
+            out_2 = F.pixel_shuffle(x_2, 3, "NHWC")
+
+            exe = paddle.static.Executor(place=place)
+            res_1 = exe.run(fluid.default_main_program(),
+                            feed={"x": self.x_1_np},
+                            fetch_list=out_1,
+                            use_prune=True)
+
+            res_2 = exe.run(fluid.default_main_program(),
+                            feed={"x2": self.x_2_np},
+                            fetch_list=out_2,
+                            use_prune=True)
+
+            assert np.allclose(res_1, self.out_1_np)
+            assert np.allclose(res_2, self.out_2_np)
+
+    # same test between layer and functional in this op.
+    def test_static_graph_layer(self):
+        for use_cuda in ([False, True]
+                         if core.is_compiled_with_cuda() else [False]):
+            place = paddle.CUDAPlace(0) if use_cuda else paddle.CPUPlace()
+
+            paddle.enable_static()
+            x_1 = paddle.data(name="x", shape=[2, 9, 4, 4], dtype="float64")
+            x_2 = paddle.data(name="x2", shape=[2, 4, 4, 9], dtype="float64")
+            # init instance
+            ps_1 = paddle.nn.PixelShuffle(3)
+            ps_2 = paddle.nn.PixelShuffle(3, "NHWC")
+            out_1 = ps_1(x_1)
+            out_2 = ps_2(x_2)
+            out_1_np = pixel_shuffle_np(self.x_1_np, 3)
+            out_2_np = pixel_shuffle_np(self.x_2_np, 3, "NHWC")
+
+            exe = paddle.static.Executor(place=place)
+            res_1 = exe.run(fluid.default_main_program(),
+                            feed={"x": self.x_1_np},
+                            fetch_list=out_1,
+                            use_prune=True)
+
+            res_2 = exe.run(fluid.default_main_program(),
+                            feed={"x2": self.x_2_np},
+                            fetch_list=out_2,
+                            use_prune=True)
+
+            assert np.allclose(res_1, out_1_np)
+            assert np.allclose(res_2, out_2_np)
+
+    def run_dygraph(self, up_factor, data_format):
+
+        n, c, h, w = 2, 9, 4, 4
+
+        if data_format == "NCHW":
+            shape = [n, c, h, w]
+        if data_format == "NHWC":
+            shape = [n, h, w, c]
+
+        x = np.random.random(shape).astype("float64")
+
+        npresult = pixel_shuffle_np(x, up_factor, data_format)
+
+        for use_cuda in ([False, True]
+                         if core.is_compiled_with_cuda() else [False]):
+            place = paddle.CUDAPlace(0) if use_cuda else paddle.CPUPlace()
+
+            paddle.disable_static(place=place)
+
+            pixel_shuffle = paddle.nn.PixelShuffle(
+                up_factor, data_format=data_format)
+            result = pixel_shuffle(paddle.to_tensor(x))
+
+            self.assertTrue(np.allclose(result.numpy(), npresult))
+
+            result_functional = F.pixel_shuffle(
+                paddle.to_tensor(x), 3, data_format)
+            self.assertTrue(np.allclose(result_functional.numpy(), npresult))
+
+    def test_dygraph1(self):
+        self.run_dygraph(3, "NCHW")
+
+    def test_dygraph2(self):
+        self.run_dygraph(3, "NHWC")
+
+
+class TestPixelShuffleError(unittest.TestCase):
+    def test_error_functional(self):
+        def error_upscale_factor():
+            with paddle.fluid.dygraph.guard():
+                x = np.random.random([2, 9, 4, 4]).astype("float64")
+                pixel_shuffle = F.pixel_shuffle(paddle.to_tensor(x), 3.33)
+
+        self.assertRaises(TypeError, error_upscale_factor)
+
+        def error_data_format():
+            with paddle.fluid.dygraph.guard():
+                x = np.random.random([2, 9, 4, 4]).astype("float64")
+                pixel_shuffle = F.pixel_shuffle(paddle.to_tensor(x), 3, "WOW")
+
+        self.assertRaises(ValueError, error_data_format)
+
+    def test_error_layer(self):
+        def error_upscale_factor_layer():
+            with paddle.fluid.dygraph.guard():
+                x = np.random.random([2, 9, 4, 4]).astype("float64")
+                ps = paddle.nn.PixelShuffle(3.33)
+
+        self.assertRaises(TypeError, error_upscale_factor_layer)
+
+        def error_data_format_layer():
+            with paddle.fluid.dygraph.guard():
+                x = np.random.random([2, 9, 4, 4]).astype("float64")
+                ps = paddle.nn.PixelShuffle(3, "MEOW")
+
+        self.assertRaises(ValueError, error_data_format_layer)
+
+
 if __name__ == '__main__':
     unittest.main()
diff --git a/python/paddle/nn/__init__.py b/python/paddle/nn/__init__.py
index b262b945267c72136141d98b2b0d5b911bd08ca9..dd5d0d269a6cb6377b11b6c98e86eef4ee0f8b57 100644
--- a/python/paddle/nn/__init__.py
+++ b/python/paddle/nn/__init__.py
@@ -139,7 +139,10 @@ from .layer.transformer import TransformerDecoder
 from .layer.transformer import Transformer
 from .layer.distance import PairwiseDistance  #DEFINE_ALIAS
 
+from .layer.vision import PixelShuffle
+
 from .layer import loss  #DEFINE_ALIAS
 from .layer import conv  #DEFINE_ALIAS
+from .layer import vision  #DEFINE_ALIAS
 from ..fluid.dygraph.layers import Layer  #DEFINE_ALIAS
 from ..fluid.dygraph.container import LayerList, ParameterList, Sequential  #DEFINE_ALIAS
diff --git a/python/paddle/nn/functional/__init__.py b/python/paddle/nn/functional/__init__.py
index 905849360e116341bdf366c6b600875af2357b11..afc1614732d06dcef4ca0e1e75cd93e28d6a2d3d 100644
--- a/python/paddle/nn/functional/__init__.py
+++ b/python/paddle/nn/functional/__init__.py
@@ -194,7 +194,7 @@ from .vision import box_clip  #DEFINE_ALIAS
 from .vision import box_coder  #DEFINE_ALIAS
 from .vision import box_decoder_and_assign  #DEFINE_ALIAS
 from .vision import collect_fpn_proposals  #DEFINE_ALIAS
-# from .vision import deformable_conv        #DEFINE_ALIAS
+# from .vision import deformable_conv  #DEFINE_ALIAS
 from .vision import deformable_roi_pooling  #DEFINE_ALIAS
 from .vision import density_prior_box  #DEFINE_ALIAS
 from .vision import detection_output  #DEFINE_ALIAS
@@ -206,7 +206,7 @@ from .vision import generate_proposals  #DEFINE_ALIAS
 from .vision import grid_sample  #DEFINE_ALIAS
 from .vision import image_resize  #DEFINE_ALIAS
 from .vision import image_resize_short  #DEFINE_ALIAS
-# from .vision import multi_box_head        #DEFINE_ALIAS
+# from .vision import multi_box_head  #DEFINE_ALIAS
 from .vision import pixel_shuffle  #DEFINE_ALIAS
 from .vision import prior_box  #DEFINE_ALIAS
 from .vision import prroi_pool  #DEFINE_ALIAS
diff --git a/python/paddle/nn/functional/vision.py b/python/paddle/nn/functional/vision.py
index 23e45725a78299e7e67308400a8b9c1adbfebed7..6cb15afbfd2db0011c1c38840ed9bad21320e00f 100644
--- a/python/paddle/nn/functional/vision.py
+++ b/python/paddle/nn/functional/vision.py
@@ -12,7 +12,10 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
-# TODO: define specitial functions used in computer vision task  
+from ...fluid.data_feeder import check_variable_and_dtype
+from ...fluid.layer_helper import LayerHelper
+from ...fluid.framework import core, in_dygraph_mode
+
 from ...fluid.layers import affine_channel  #DEFINE_ALIAS
 from ...fluid.layers import affine_grid  #DEFINE_ALIAS
 from ...fluid.layers import anchor_generator  #DEFINE_ALIAS
@@ -28,6 +31,7 @@ from ...fluid.layers import distribute_fpn_proposals  #DEFINE_ALIAS
 from ...fluid.layers import generate_mask_labels  #DEFINE_ALIAS
 from ...fluid.layers import generate_proposal_labels  #DEFINE_ALIAS
 from ...fluid.layers import generate_proposals  #DEFINE_ALIAS
+from ...fluid.layers import grid_sampler  #DEFINE_ALIAS
 from ...fluid.layers import image_resize  #DEFINE_ALIAS
 from ...fluid.layers import prior_box  #DEFINE_ALIAS
 from ...fluid.layers import prroi_pool  #DEFINE_ALIAS
@@ -43,7 +47,7 @@ from ...fluid.layers import yolov3_loss  #DEFINE_ALIAS
 
 from ...fluid.layers import fsp_matrix  #DEFINE_ALIAS
 from ...fluid.layers import image_resize_short  #DEFINE_ALIAS
-from ...fluid.layers import pixel_shuffle  #DEFINE_ALIAS
+# from ...fluid.layers import pixel_shuffle  #DEFINE_ALIAS
 from ...fluid.layers import retinanet_detection_output  #DEFINE_ALIAS
 from ...fluid.layers import retinanet_target_assign  #DEFINE_ALIAS
 from ...fluid.layers import roi_perspective_transform  #DEFINE_ALIAS
@@ -67,7 +71,7 @@ __all__ = [
     'generate_mask_labels',
     'generate_proposal_labels',
     'generate_proposals',
-    'grid_sample',
+    'grid_sampler',
     'image_resize',
     'image_resize_short',
     #       'multi_box_head',
@@ -89,8 +93,6 @@ __all__ = [
     'yolov3_loss'
 ]
 
-from ...fluid.layer_helper import LayerHelper
-from ...fluid.data_feeder import check_variable_and_dtype
 from ...fluid import core, dygraph_utils
 from ...fluid.framework import Variable, in_dygraph_mode
 from ...device import get_cudnn_version
@@ -112,22 +114,16 @@ def grid_sample(x,
     data x and y is indexing the 3rd dimension (in height dimension),
     finally results is the bilinear interpolation or nearest value of 4 nearest corner
     points. The output tensor shape will be [N, C, H, W].
-
     .. code-block:: text
-
         Step 1:
         Get (x, y) grid coordinates and scale to [0, H-1/W-1].
-
         .. code-block:: text
-
             grid_x = 0.5 * (grid[:, :, :, 0] + 1) * (W - 1)
             grid_y = 0.5 * (grid[:, :, :, 1] + 1) * (H - 1)
-
         Step 2:
         Indices input data X with grid (x, y) in each [H, W] area, and bilinear
         interpolate point value by 4 nearest points or nearest interpolate point value
         by nearest point.
-
           wn ------- y_n ------- en
           |           |           |
           |          d_n          |
@@ -137,27 +133,21 @@ def grid_sample(x,
           |          d_s          |
           |           |           |
           ws ------- y_s ------- wn
-
         For bilinear interpolation:
-
         x_w = floor(x)              // west side x coord
         x_e = x_w + 1               // east side x coord
         y_n = floor(y)              // north side y coord
         y_s = y_s + 1               // south side y coord
-
         d_w = grid_x - x_w          // distance to west side
         d_e = x_e - grid_x          // distance to east side
         d_n = grid_y - y_n          // distance to north side
         d_s = y_s - grid_y          // distance to south side
-
         wn = X[:, :, y_n, x_w]      // north-west point value
         en = X[:, :, y_n, x_e]      // north-east point value
         ws = X[:, :, y_s, x_w]      // south-east point value
         es = X[:, :, y_s, x_w]      // north-east point value
-
         output = wn * d_e * d_s + en * d_w * d_s
                + ws * d_e * d_n + es * d_w * d_n
-
     Args:
         x(Tensor): The input tensor, which is a 4-d tensor with shape
                      [N, C, H, W], N is the batch size, C is the channel
@@ -176,14 +166,10 @@ def grid_sample(x,
         name(str, optional): For detailed information, please refer
                              to :ref:`api_guide_Name`. Usually name is no need to set and
                              None by default.
-
     Returns: Tensor, The shape of output is [N, C, grid_H, grid_W] in which `grid_H` is the height of grid
                  and `grid_W` is the width of grid. The data type is same as input tensor.
-
     Examples:
-
         .. code-block:: python
-
             import paddle
             import paddle.nn.functional as F
             import numpy as np
@@ -272,3 +258,57 @@ def grid_sample(x,
             attrs=attrs,
             outputs={'Output': out})
     return out
+
+
+def pixel_shuffle(x, upscale_factor, data_format="NCHW", name=None):
+    """
+    This API implements pixel shuffle operation.
+    See more details in :ref:`api_nn_vision_PixelShuffle` .
+    Parameters:
+        x(Tensor): 4-D tensor, the data type should be float32 or float64.
+        upscale_factor(int): factor to increase spatial resolution.
+        data_format (str): The data format of the input and output data. An optional string from: "NCHW", "NHWC". The default is "NCHW". When it is "NCHW", the data is stored in the order of: [batch_size, input_channels, input_height, input_width].
+        name (str, optional): The default value is None.  Normally there is no need for user to set this property.
+    Returns:
+        Out(tensor): Reshaped tensor according to the new dimension.
+    Raises:
+        ValueError: If the square of upscale_factor cannot divide the channels of input.
+    Examples:
+        .. code-block:: python
+            import paddle
+            import paddle.nn.functional as F
+            import numpy as np
+            x = np.random.randn(2, 9, 4, 4).astype(np.float32)
+            paddle.disable_static()
+            x_var = paddle.to_tensor(x)
+            out_var = F.pixel_shuffle(x_var, 3)
+            out = out_var.numpy()
+            print(out.shape) 
+            # (2, 1, 12, 12)
+    """
+    if not in_dygraph_mode():
+        check_variable_and_dtype(x, 'x', ['float32', 'float64'],
+                                 'pixel_shuffle')
+
+    if not isinstance(upscale_factor, int):
+        raise TypeError("upscale factor must be int type")
+
+    if data_format not in ["NCHW", "NHWC"]:
+        raise ValueError("Attr(data_format) should be 'NCHW' or 'NHWC'."
+                         "But recevie Attr(data_format): {} ".format(
+                             data_format))
+
+    if in_dygraph_mode():
+        return core.ops.pixel_shuffle(x, "upscale_factor", upscale_factor,
+                                      "data_format", data_format)
+
+    helper = LayerHelper("pixel_shuffle", **locals())
+
+    out = helper.create_variable_for_type_inference(dtype=x.dtype)
+    helper.append_op(
+        type="pixel_shuffle",
+        inputs={"X": x},
+        outputs={"Out": out},
+        attrs={"upscale_factor": upscale_factor,
+               "data_format": data_format})
+    return out
diff --git a/python/paddle/nn/layer/__init__.py b/python/paddle/nn/layer/__init__.py
index b7098aee423feeb3670206c1459a05401c86e5c0..b25350be601dd9e56d8268859b52a12d3745c44d 100644
--- a/python/paddle/nn/layer/__init__.py
+++ b/python/paddle/nn/layer/__init__.py
@@ -20,6 +20,7 @@ from . import conv
 from . import extension
 from . import activation
 from . import norm
+from . import vision
 from . import distance
 from . import transformer
 
@@ -29,6 +30,8 @@ from .conv import *
 from .extension import *
 from .activation import *
 from .norm import *
+from .vision import *
+
 from .transformer import *
 # from .activation import PReLU        #DEFINE_ALIAS
 from .activation import ReLU  #DEFINE_ALIAS
@@ -104,4 +107,6 @@ from .norm import InstanceNorm  #DEFINE_ALIAS
 # from .rnn import RNNCell        #DEFINE_ALIAS
 # from .rnn import GRUCell        #DEFINE_ALIAS
 # from .rnn import LSTMCell        #DEFINE_ALIAS
+
+from .vision import PixelShuffle  #DEFINE_ALIAS
 from .distance import PairwiseDistance  #DEFINE_ALIAS
diff --git a/python/paddle/nn/layer/vision.py b/python/paddle/nn/layer/vision.py
new file mode 100644
index 0000000000000000000000000000000000000000..a5f360ec02e6d8b59b80db4602776e904cf0b499
--- /dev/null
+++ b/python/paddle/nn/layer/vision.py
@@ -0,0 +1,82 @@
+#   Copyright (c) 2020 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+# TODO: define specitial functions used in computer vision task 
+
+from ...fluid.dygraph import layers
+from .. import functional
+
+__all__ = ['PixelShuffle']
+
+
+class PixelShuffle(layers.Layer):
+    """
+    
+    PixelShuffle Layer    
+
+    This operator rearranges elements in a tensor of shape [N, C, H, W]
+    to a tensor of shape [N, C/upscale_factor**2, H*upscale_factor, W*upscale_factor],
+    or from shape [N, H, W, C] to [N, H*upscale_factor, W*upscale_factor, C/upscale_factor**2].
+    This is useful for implementing efficient sub-pixel convolution
+    with a stride of 1/upscale_factor.
+    Please refer to the paper: `Real-Time Single Image and Video Super-Resolution
+    Using an Efficient Sub-Pixel Convolutional Neural Network <https://arxiv.org/abs/1609.05158v2>`_ .
+    by Shi et. al (2016) for more details.
+
+    Parameters:
+
+        upscale_factor(int): factor to increase spatial resolution.
+        data_format (str): The data format of the input and output data. An optional string from: "NCHW", "NHWC". The default is "NCHW". When it is "NCHW", the data is stored in the order of: [batch_size, input_channels, input_height, input_width].
+        name (str, optional): Name for the operation (optional, default is None). For more information, please refer to :ref:`api_guide_Name`.
+
+    Shape:
+        - x: 4-D tensor with shape: (N, C, H, W) or (N, H, W, C).
+        - out: 4-D tensor with shape: (N, C/upscale_factor**2, H*upscale_factor, W*upscale_factor) or (N, H*upscale_factor, W*upscale_factor, C/upscale_factor^2).
+
+
+    Examples:
+        .. code-block:: python
+            
+            import paddle
+            import paddle.nn as nn
+            import numpy as np
+
+            paddle.disable_static()
+            x = np.random.randn(2, 9, 4, 4).astype(np.float32)
+            x_var = paddle.to_tensor(x)
+            pixel_shuffle = nn.PixelShuffle(3)
+            out_var = pixel_shuffle(x_var)
+            out = out_var.numpy()
+            print(out.shape) 
+            # (2, 1, 12, 12)
+
+    """
+
+    def __init__(self, upscale_factor, data_format="NCHW", name=None):
+        super(PixelShuffle, self).__init__()
+
+        if not isinstance(upscale_factor, int):
+            raise TypeError("upscale factor must be int type")
+
+        if data_format not in ["NCHW", "NHWC"]:
+            raise ValueError("Data format should be 'NCHW' or 'NHWC'."
+                             "But recevie data format: {}".format(data_format))
+
+        self._upscale_factor = upscale_factor
+        self._data_format = data_format
+        self._name = name
+
+    def forward(self, x):
+        return functional.pixel_shuffle(x, self._upscale_factor,
+                                        self._data_format, self._name)