diff --git a/paddle/fluid/API.spec b/paddle/fluid/API.spec
index ed4e67879c795258683b094cfaeaff9063d66848..b6ae930b7155d15d24b287cc3eed50f2aeaa5599 100644
--- a/paddle/fluid/API.spec
+++ b/paddle/fluid/API.spec
@@ -170,6 +170,7 @@ paddle.fluid.layers.prelu ArgSpec(args=['x', 'mode', 'param_attr', 'name'], vara
 paddle.fluid.layers.flatten ArgSpec(args=['x', 'axis', 'name'], varargs=None, keywords=None, defaults=(1, None))
 paddle.fluid.layers.sequence_mask ArgSpec(args=['x', 'maxlen', 'dtype', 'name'], varargs=None, keywords=None, defaults=(None, 'int64', None))
 paddle.fluid.layers.stack ArgSpec(args=['x', 'axis'], varargs=None, keywords=None, defaults=(0,))
+paddle.fluid.layers.pad2d ArgSpec(args=['input', 'paddings', 'mode', 'pad_value', 'data_format', 'name'], varargs=None, keywords=None, defaults=([0, 0, 0, 0], 'constant', 0.0, 'NCHW', None))
 paddle.fluid.layers.unstack ArgSpec(args=['x', 'axis', 'num'], varargs=None, keywords=None, defaults=(0, None))
 paddle.fluid.layers.data ArgSpec(args=['name', 'shape', 'append_batch_size', 'dtype', 'lod_level', 'type', 'stop_gradient'], varargs=None, keywords=None, defaults=(True, 'float32', 0, VarType.LOD_TENSOR, True))
 paddle.fluid.layers.open_recordio_file ArgSpec(args=['filename', 'shapes', 'lod_levels', 'dtypes', 'pass_num', 'for_parallel'], varargs=None, keywords=None, defaults=(1, True))
diff --git a/paddle/fluid/operators/pad2d_op.cc b/paddle/fluid/operators/pad2d_op.cc
new file mode 100644
index 0000000000000000000000000000000000000000..a706d05fd7c35ef993f5199f0f893622cb863c5d
--- /dev/null
+++ b/paddle/fluid/operators/pad2d_op.cc
@@ -0,0 +1,584 @@
+/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#include <algorithm>
+#include "paddle/fluid/framework/op_registry.h"
+#include "paddle/fluid/operators/math/math_function.h"
+
+namespace paddle {
+namespace operators {
+
+using framework::Tensor;
+
+template <typename T>
+void Pad2DConstNCHW(const T* in_data, const int num, const int channels,
+                    const int in_height, const int in_width,
+                    const int out_height, const int out_width,
+                    const int pad_top, const int pad_left, T value,
+                    T* out_data) {
+  for (int n = 0; n < num; ++n) {
+    for (int c = 0; c < channels; ++c) {
+      for (int out_h = 0; out_h < out_height; ++out_h) {
+        for (int out_w = 0; out_w < out_width; ++out_w) {
+          int in_h = out_h - pad_top;
+          int in_w = out_w - pad_left;
+          out_data[out_h * out_width + out_w] =
+              (in_h < 0 || in_w < 0 || in_h >= in_height || in_w >= in_width)
+                  ? value
+                  : in_data[in_h * in_width + in_w];
+        }
+      }
+      in_data += in_height * in_width;
+      out_data += out_height * out_width;
+    }
+  }
+}
+
+template <typename T>
+void Pad2DConstNHWC(const T* in_data, const int num, const int channels,
+                    const int in_height, const int in_width,
+                    const int out_height, const int out_width,
+                    const int pad_top, const int pad_left, T value,
+                    T* out_data) {
+  for (int n = 0; n < num; ++n) {
+    for (int out_h = 0; out_h < out_height; ++out_h) {
+      for (int out_w = 0; out_w < out_width; ++out_w) {
+        int in_h = out_h - pad_top;
+        int in_w = out_w - pad_left;
+        const int out_index = (out_h * out_width + out_w) * channels;
+        if (in_h < 0 || in_w < 0 || in_h >= in_height || in_w >= in_width) {
+          for (int c = 0; c < channels; ++c) {
+            out_data[out_index + c] = value;
+          }
+        } else {
+          const int in_index = (in_h * in_width + in_w) * channels;
+          for (int c = 0; c < channels; ++c) {
+            out_data[out_index + c] = in_data[in_index + c];
+          }
+        }
+      }
+    }
+    in_data += in_height * in_width * channels;
+    out_data += out_height * out_width * channels;
+  }
+}
+
+template <typename T>
+void Pad2DReflectNCHW(const T* in_data, const int num, const int channels,
+                      const int in_height, const int in_width,
+                      const int out_height, const int out_width,
+                      const int pad_top, const int pad_left, T* out_data) {
+  for (int n = 0; n < num; ++n) {
+    for (int c = 0; c < channels; ++c) {
+      for (int out_h = 0; out_h < out_height; ++out_h) {
+        for (int out_w = 0; out_w < out_width; ++out_w) {
+          int in_h = out_h - pad_top;
+          int in_w = out_w - pad_left;
+          in_h = std::max(in_h, -in_h);  // reflect by 0
+          in_h =
+              std::min(in_h, 2 * in_height - in_h - 2);  // reflect by in_height
+          in_w = std::max(in_w, -in_w);                  // reflect by 0
+          in_w =
+              std::min(in_w, 2 * in_width - in_w - 2);  // reflect by in_width
+          out_data[out_h * out_width + out_w] = in_data[in_h * in_width + in_w];
+        }
+      }
+      in_data += in_height * in_width;
+      out_data += out_height * out_width;
+    }
+  }
+}
+
+template <typename T>
+void Pad2DReflectNHWC(const T* in_data, const int num, const int channels,
+                      const int in_height, const int in_width,
+                      const int out_height, const int out_width,
+                      const int pad_top, const int pad_left, T* out_data) {
+  for (int n = 0; n < num; ++n) {
+    for (int out_h = 0; out_h < out_height; ++out_h) {
+      for (int out_w = 0; out_w < out_width; ++out_w) {
+        const int out_index = (out_h * out_width + out_w) * channels;
+        int in_h = out_h - pad_top;
+        int in_w = out_w - pad_left;
+        in_h = std::max(in_h, -in_h);
+        in_h = std::min(in_h, 2 * in_height - in_h - 2);
+        in_w = std::max(in_w, -in_w);
+        in_w = std::min(in_w, 2 * in_width - in_w - 2);
+        const int in_index = (in_h * in_width + in_w) * channels;
+
+        for (int c = 0; c < channels; ++c) {
+          out_data[out_index + c] = in_data[in_index + c];
+        }
+      }
+    }
+    in_data += in_height * in_width * channels;
+    out_data += out_height * out_width * channels;
+  }
+}
+
+template <typename T>
+void Pad2DEdgeNCHW(const T* in_data, const int num, const int channels,
+                   const int in_height, const int in_width,
+                   const int out_height, const int out_width, const int pad_top,
+                   const int pad_left, T* out_data) {
+  for (int n = 0; n < num; ++n) {
+    for (int c = 0; c < channels; ++c) {
+      for (int out_h = 0; out_h < out_height; ++out_h) {
+        for (int out_w = 0; out_w < out_width; ++out_w) {
+          int in_h = std::min(in_height - 1, std::max(out_h - pad_top, 0));
+          int in_w = std::min(in_width - 1, std::max(out_w - pad_left, 0));
+          out_data[out_h * out_width + out_w] = in_data[in_h * in_width + in_w];
+        }
+      }
+      in_data += in_height * in_width;
+      out_data += out_height * out_width;
+    }
+  }
+}
+
+template <typename T>
+void Pad2DEdgeNHWC(const T* in_data, const int num, const int channels,
+                   const int in_height, const int in_width,
+                   const int out_height, const int out_width, const int pad_top,
+                   const int pad_left, T* out_data) {
+  for (int n = 0; n < num; ++n) {
+    for (int out_h = 0; out_h < out_height; ++out_h) {
+      for (int out_w = 0; out_w < out_width; ++out_w) {
+        const int out_index = (out_h * out_width + out_w) * channels;
+        int in_h = std::min(in_height - 1, std::max(out_h - pad_top, 0));
+        int in_w = std::min(in_width - 1, std::max(out_w - pad_left, 0));
+        const int in_index = (in_h * in_width + in_w) * channels;
+        for (int c = 0; c < channels; ++c) {
+          out_data[out_index + c] = in_data[in_index + c];
+        }
+      }
+    }
+    in_data += in_height * in_width * channels;
+    out_data += out_height * out_width * channels;
+  }
+}
+
+template <typename T>
+void Pad2DGradConstNCHW(T* d_in_data, const int num, const int channels,
+                        const int in_height, const int in_width,
+                        const int out_height, const int out_width,
+                        const int pad_top, const int pad_left,
+                        const T* d_out_data) {
+  for (int n = 0; n < num; ++n) {
+    for (int c = 0; c < channels; ++c) {
+      for (int out_h = 0; out_h < out_height; ++out_h) {
+        for (int out_w = 0; out_w < out_width; ++out_w) {
+          int in_h = out_h - pad_top;
+          int in_w = out_w - pad_left;
+          if (!(in_h < 0 || in_w < 0 || in_h >= in_height ||
+                in_w >= in_width)) {
+            d_in_data[in_h * in_width + in_w] =
+                d_out_data[out_h * out_width + out_w];
+          }
+        }
+      }
+      d_in_data += in_height * in_width;
+      d_out_data += out_height * out_width;
+    }
+  }
+}
+
+template <typename T>
+void Pad2DGradConstNHWC(T* d_in_data, const int num, const int channels,
+                        const int in_height, const int in_width,
+                        const int out_height, const int out_width,
+                        const int pad_top, const int pad_left,
+                        const T* d_out_data) {
+  for (int n = 0; n < num; ++n) {
+    for (int out_h = 0; out_h < out_height; ++out_h) {
+      for (int out_w = 0; out_w < out_width; ++out_w) {
+        int in_h = out_h - pad_top;
+        int in_w = out_w - pad_left;
+        const int out_index = (out_h * out_width + out_w) * channels;
+        if (!(in_h < 0 || in_w < 0 || in_h >= in_height || in_w >= in_width)) {
+          const int in_index = (in_h * in_width + in_w) * channels;
+          for (int c = 0; c < channels; ++c) {
+            d_in_data[in_index + c] = d_out_data[out_index + c];
+          }
+        }
+      }
+    }
+    d_in_data += in_height * in_width * channels;
+    d_out_data += out_height * out_width * channels;
+  }
+}
+
+template <typename T>
+void Pad2DGradReflectNCHW(T* d_in_data, const int num, const int channels,
+                          const int in_height, const int in_width,
+                          const int out_height, const int out_width,
+                          const int pad_top, const int pad_left,
+                          const T* d_out_data) {
+  for (int n = 0; n < num; ++n) {
+    for (int c = 0; c < channels; ++c) {
+      for (int out_h = 0; out_h < out_height; ++out_h) {
+        for (int out_w = 0; out_w < out_width; ++out_w) {
+          int in_h = out_h - pad_top;
+          int in_w = out_w - pad_left;
+          in_h = std::max(in_h, -in_h);  // reflect over 0
+          in_h = std::min(in_h,
+                          2 * in_height - in_h - 2);  // reflect over in_height
+          in_w = std::max(in_w, -in_w);               // reflect over 0
+          in_w =
+              std::min(in_w, 2 * in_width - in_w - 2);  // reflect over in_width
+          d_in_data[in_h * in_width + in_w] +=
+              d_out_data[out_h * out_width + out_w];
+        }
+      }
+      d_in_data += in_height * in_width;
+      d_out_data += out_height * out_width;
+    }
+  }
+}
+
+template <typename T>
+void Pad2DGradReflectNHWC(T* d_in_data, const int num, const int channels,
+                          const int in_height, const int in_width,
+                          const int out_height, const int out_width,
+                          const int pad_top, const int pad_left,
+                          const T* d_out_data) {
+  for (int n = 0; n < num; ++n) {
+    for (int out_h = 0; out_h < out_height; ++out_h) {
+      for (int out_w = 0; out_w < out_width; ++out_w) {
+        const int out_index = (out_h * out_width + out_w) * channels;
+        int in_h = out_h - pad_top;
+        int in_w = out_w - pad_left;
+        in_h = std::max(in_h, -in_h);
+        in_h = std::min(in_h, 2 * in_height - in_h - 2);
+        in_w = std::max(in_w, -in_w);
+        in_w = std::min(in_w, 2 * in_width - in_w - 2);
+        const int in_index = (in_h * in_width + in_w) * channels;
+        for (int c = 0; c < channels; ++c) {
+          d_in_data[in_index + c] += d_out_data[out_index + c];
+        }
+      }
+    }
+    d_in_data += in_height * in_width * channels;
+    d_out_data += out_height * out_width * channels;
+  }
+}
+
+template <typename T>
+void Pad2DGradEdgeNCHW(T* d_in_data, const int num, const int channels,
+                       const int in_height, const int in_width,
+                       const int out_height, const int out_width,
+                       const int pad_top, const int pad_left,
+                       const T* d_out_data) {
+  for (int n = 0; n < num; ++n) {
+    for (int c = 0; c < channels; ++c) {
+      for (int out_h = 0; out_h < out_height; ++out_h) {
+        for (int out_w = 0; out_w < out_width; ++out_w) {
+          int in_h = std::min(in_height - 1, std::max(out_h - pad_top, 0));
+          int in_w = std::min(in_width - 1, std::max(out_w - pad_left, 0));
+          d_in_data[in_h * in_width + in_w] +=
+              d_out_data[out_h * out_width + out_w];
+        }
+      }
+      d_in_data += in_height * in_width;
+      d_out_data += out_height * out_width;
+    }
+  }
+}
+
+template <typename T>
+void Pad2DGradEdgeNHWC(T* d_in_data, const int num, const int channels,
+                       const int in_height, const int in_width,
+                       const int out_height, const int out_width,
+                       const int pad_top, const int pad_left,
+                       const T* d_out_data) {
+  for (int n = 0; n < num; ++n) {
+    for (int out_h = 0; out_h < out_height; ++out_h) {
+      for (int out_w = 0; out_w < out_width; ++out_w) {
+        const int out_index = (out_h * out_width + out_w) * channels;
+        int in_h = std::min(in_height - 1, std::max(out_h - pad_top, 0));
+        int in_w = std::min(in_width - 1, std::max(out_w - pad_left, 0));
+        const int in_index = (in_h * in_width + in_w) * channels;
+        for (int c = 0; c < channels; ++c) {
+          d_in_data[in_index + c] += d_out_data[out_index + c];
+        }
+      }
+    }
+    d_in_data += in_height * in_width * channels;
+    d_out_data += out_height * out_width * channels;
+  }
+}
+
+template <typename T>
+class Pad2dCPUKernel : public framework::OpKernel<T> {
+ public:
+  void Compute(const framework::ExecutionContext& context) const override {
+    auto pads = context.Attr<std::vector<int>>("paddings");
+    auto mode = context.Attr<std::string>("mode");
+    auto data_format = context.Attr<std::string>("data_format");
+    T value = context.Attr<T>("pad_value");
+    auto* x = context.Input<Tensor>("X");
+    auto* out = context.Output<Tensor>("Out");
+    auto in_dims = x->dims();
+    auto out_dims = out->dims();
+    const T* in_data = x->data<T>();
+    T* out_data = out->mutable_data<T>(context.GetPlace());
+    const int pad_top = pads[0];
+    const int pad_left = pads[2];
+    const int num = in_dims[0];
+    if (data_format == "NCHW") {
+      const int channels = in_dims[1];
+      const int in_height = in_dims[2];
+      const int in_width = in_dims[3];
+      const int out_height = out_dims[2];
+      const int out_width = out_dims[3];
+      if (mode == "reflect") {
+        Pad2DReflectNCHW(in_data, num, channels, in_height, in_width,
+                         out_height, out_width, pad_top, pad_left, out_data);
+      } else if (mode == "edge") {
+        Pad2DEdgeNCHW(in_data, num, channels, in_height, in_width, out_height,
+                      out_width, pad_top, pad_left, out_data);
+      } else {
+        Pad2DConstNCHW(in_data, num, channels, in_height, in_width, out_height,
+                       out_width, pad_top, pad_left, value, out_data);
+      }
+    } else {
+      const int channels = in_dims[3];
+      const int in_height = in_dims[1];
+      const int in_width = in_dims[2];
+      const int out_height = out_dims[1];
+      const int out_width = out_dims[2];
+      if (mode == "reflect") {
+        Pad2DReflectNHWC(in_data, num, channels, in_height, in_width,
+                         out_height, out_width, pad_top, pad_left, out_data);
+      } else if (mode == "edge") {
+        Pad2DEdgeNHWC(in_data, num, channels, in_height, in_width, out_height,
+                      out_width, pad_top, pad_left, out_data);
+      } else {
+        Pad2DConstNHWC(in_data, num, channels, in_height, in_width, out_height,
+                       out_width, pad_top, pad_left, value, out_data);
+      }
+    }
+  }
+};
+
+template <typename T>
+class Pad2dGradCPUKernel : public framework::OpKernel<T> {
+ public:
+  void Compute(const framework::ExecutionContext& context) const override {
+    auto pads = context.Attr<std::vector<int>>("paddings");
+    auto mode = context.Attr<std::string>("mode");
+    auto data_format = context.Attr<std::string>("data_format");
+    auto* d_out = context.Input<Tensor>(framework::GradVarName("Out"));
+    auto* d_in = context.Output<Tensor>(framework::GradVarName("X"));
+    auto d_in_dims = d_in->dims();
+    auto d_out_dims = d_out->dims();
+    const T* d_out_data = d_out->data<T>();
+    T* d_in_data = d_in->mutable_data<T>(context.GetPlace());
+    math::SetConstant<platform::CPUDeviceContext, T> set_zero;
+    set_zero(context.template device_context<platform::CPUDeviceContext>(),
+             d_in, static_cast<T>(0));
+    const int pad_top = pads[0];
+    const int pad_left = pads[2];
+    const int num = d_in_dims[0];
+    if (data_format == "NCHW") {
+      const int channels = d_in_dims[1];
+      const int in_height = d_in_dims[2];
+      const int in_width = d_in_dims[3];
+      const int out_height = d_out_dims[2];
+      const int out_width = d_out_dims[3];
+      if (mode == "reflect") {
+        Pad2DGradReflectNCHW(d_in_data, num, channels, in_height, in_width,
+                             out_height, out_width, pad_top, pad_left,
+                             d_out_data);
+      } else if (mode == "edge") {
+        Pad2DGradEdgeNCHW(d_in_data, num, channels, in_height, in_width,
+                          out_height, out_width, pad_top, pad_left, d_out_data);
+      } else {
+        Pad2DGradConstNCHW(d_in_data, num, channels, in_height, in_width,
+                           out_height, out_width, pad_top, pad_left,
+                           d_out_data);
+      }
+    } else {
+      const int channels = d_in_dims[3];
+      const int in_height = d_in_dims[1];
+      const int in_width = d_in_dims[2];
+      const int out_height = d_out_dims[1];
+      const int out_width = d_out_dims[2];
+      if (mode == "reflect") {
+        Pad2DGradReflectNHWC(d_in_data, num, channels, in_height, in_width,
+                             out_height, out_width, pad_top, pad_left,
+                             d_out_data);
+      } else if (mode == "edge") {
+        Pad2DGradEdgeNHWC(d_in_data, num, channels, in_height, in_width,
+                          out_height, out_width, pad_top, pad_left, d_out_data);
+      } else {
+        Pad2DGradConstNHWC(d_in_data, num, channels, in_height, in_width,
+                           out_height, out_width, pad_top, pad_left,
+                           d_out_data);
+      }
+    }
+  }
+};
+
+class Pad2dOp : public framework::OperatorWithKernel {
+ public:
+  using framework::OperatorWithKernel::OperatorWithKernel;
+
+  void InferShape(framework::InferShapeContext* ctx) const override {
+    PADDLE_ENFORCE(ctx->HasInput("X"),
+                   "Input(X) of Pad2dOp should not be null.");
+    PADDLE_ENFORCE(ctx->HasOutput("Out"),
+                   "Output(Out) of Pad2dOp should not be null.");
+
+    auto x_dim = ctx->GetInputDim("X");
+    auto paddings = ctx->Attrs().Get<std::vector<int>>("paddings");
+    PADDLE_ENFORCE_EQ(x_dim.size(), 4,
+                      "Size of paddings should be equal to 4.");
+    std::vector<int64_t> out_dims(x_dim.size());
+
+    auto data_format = ctx->Attrs().Get<std::string>("data_format");
+    out_dims[0] = x_dim[0];
+    if (data_format == "NCHW") {
+      out_dims[1] = x_dim[1];
+      out_dims[2] = x_dim[2] + paddings[0] + paddings[1];  // height
+      out_dims[3] = x_dim[3] + paddings[2] + paddings[3];  // width
+    } else {                                               // NHWC
+      out_dims[3] = x_dim[3];
+      out_dims[1] = x_dim[1] + paddings[0] + paddings[1];
+      out_dims[2] = x_dim[2] + paddings[2] + paddings[3];
+    }
+
+    ctx->SetOutputDim("Out", framework::make_ddim(out_dims));
+    if (out_dims[0] == x_dim[0]) {
+      // Only pass LoD when the first dimension is equal between
+      // output and input.
+      ctx->ShareLoD("X", /*->*/ "Out");
+    }
+  }
+};
+
+class Pad2dOpMaker : public framework::OpProtoAndCheckerMaker {
+ public:
+  void Make() override {
+    AddInput("X",
+             "The input of pad2d op. "
+             "The input should be a 4-D tensor with formate NCHW or NHWC.");
+    AddOutput("Out",
+              "The output of pad2d op. "
+              "A tensor with the same shape as X.");
+    AddAttr<std::vector<int>>(
+        "paddings",
+        "(vector<int>) "
+        "A list<int> to describe the padding rules."
+        "paddings=[0, 1, 2, 3] means "
+        "padding 0 row to top, 1 row to bottom, 2 columns to left "
+        "and 3 columns to right. Size of paddings must be 4.");
+    AddAttr<float>("pad_value",
+                   "(float, default 0.0) "
+                   "The value to fill the padded areas in constant mode.")
+        .SetDefault(0.0f);
+    AddAttr<std::string>("mode",
+                         "(float, default constant) "
+                         "Three modes: constant(default), reflect, edge.")
+        .SetDefault("constant");
+    AddAttr<std::string>(
+        "data_format",
+        "(string, default NCHW) Only used in "
+        "An optional string from: \"NHWC\", \"NCHW\". "
+        "Defaults to \"NHWC\". Specify the data format of the input data.")
+        .SetDefault("NCHW");
+    AddComment(R"DOC(
+Pad2d Operator.
+Pad 2-d images accordding to 'paddings' and 'mode'. 
+If mode is 'reflect', paddings[0] and paddings[1] must be no greater
+than height-1. And the width dimension has the same condition.
+
+Given that X is a channel of image from input:
+
+X = [[1, 2, 3],
+     [4, 5, 6]]
+
+Case 0:
+
+paddings = [0, 1, 2, 3],
+mode = 'constant'
+pad_value = 0
+
+Out = [[0, 0, 1, 2, 3, 0, 0, 0]
+       [0, 0, 4, 5, 6, 0, 0, 0]
+       [0, 0, 0, 0, 0, 0, 0, 0]]
+
+Case 1:
+
+paddings = [0, 1, 2, 1],
+mode = 'reflect'
+
+Out = [[3, 2, 1, 2, 3, 2]
+       [6, 5, 4, 5, 6, 5]
+       [3, 2, 1, 2, 3, 2]]
+
+Case 2:
+
+paddings = [0, 1, 2, 1],
+mode = 'edge'
+
+Out = [[1, 1, 1, 2, 3, 3]
+       [4, 4, 4, 5, 6, 6]
+       [4, 4, 4, 5, 6, 6]]
+)DOC");
+  }
+};
+
+class Pad2dOpGrad : public framework::OperatorWithKernel {
+ public:
+  using framework::OperatorWithKernel::OperatorWithKernel;
+
+  void InferShape(framework::InferShapeContext* ctx) const override {
+    PADDLE_ENFORCE(ctx->HasInput("X"), "Input(X) should not be null");
+    PADDLE_ENFORCE(ctx->HasInput(framework::GradVarName("Out")),
+                   "Input(Out@GRAD) should not be null");
+    auto x_dims = ctx->GetInputDim("X");
+    auto x_grad_name = framework::GradVarName("X");
+    if (ctx->HasOutput(x_grad_name)) {
+      ctx->SetOutputDim(x_grad_name, x_dims);
+    }
+  }
+};
+
+class Pad2dOpGradMaker : public framework::SingleGradOpDescMaker {
+ public:
+  using framework::SingleGradOpDescMaker::SingleGradOpDescMaker;
+
+ protected:
+  std::unique_ptr<framework::OpDesc> Apply() const override {
+    auto* bind = new framework::OpDesc();
+    bind->SetInput("X", Input("X"));
+    bind->SetInput(framework::GradVarName("Out"), OutputGrad("Out"));
+    bind->SetOutput(framework::GradVarName("X"), InputGrad("X"));
+    bind->SetAttrMap(Attrs());
+    bind->SetType("pad2d_grad");
+    return std::unique_ptr<framework::OpDesc>(bind);
+  }
+};
+
+}  // namespace operators
+}  // namespace paddle
+
+namespace ops = paddle::operators;
+
+REGISTER_OPERATOR(pad2d, ops::Pad2dOp, ops::Pad2dOpMaker,
+                  ops::Pad2dOpGradMaker);
+REGISTER_OPERATOR(pad2d_grad, ops::Pad2dOpGrad);
+REGISTER_OP_CPU_KERNEL(pad2d, ops::Pad2dCPUKernel<float>);
+REGISTER_OP_CPU_KERNEL(pad2d_grad, ops::Pad2dGradCPUKernel<float>);
diff --git a/paddle/fluid/operators/pad2d_op.cu b/paddle/fluid/operators/pad2d_op.cu
new file mode 100644
index 0000000000000000000000000000000000000000..9ba0ddbd84a43cfd5f028ce072b5c7606fae343d
--- /dev/null
+++ b/paddle/fluid/operators/pad2d_op.cu
@@ -0,0 +1,432 @@
+/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+    http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#include <algorithm>
+#include "paddle/fluid/framework/op_registry.h"
+#include "paddle/fluid/operators/math/math_function.h"
+#include "paddle/fluid/platform/cuda_primitives.h"
+#include "paddle/fluid/platform/gpu_info.h"
+
+namespace paddle {
+namespace operators {
+
+using platform::PADDLE_CUDA_NUM_THREADS;
+
+#define CUDA_1D_KERNEL_LOOP(i, n)                              \
+  for (int i = blockIdx.x * blockDim.x + threadIdx.x; i < (n); \
+       i += blockDim.x * gridDim.x)
+
+using framework::Tensor;
+
+template <typename T>
+__global__ void Pad2DConstNCHW(const int nthreads, const T* in_data,
+                               const int num, const int channels,
+                               const int in_height, const int in_width,
+                               const int out_height, const int out_width,
+                               const int pad_top, const int pad_left, T value,
+                               T* out_data) {
+  CUDA_1D_KERNEL_LOOP(index, nthreads) {
+    int nc = index / out_width;
+    const int out_w = index % out_width;
+    const int out_h = nc % out_height;
+    nc /= out_height;
+    int in_h = out_h - pad_top;
+    int in_w = out_w - pad_left;
+    out_data[index] =
+        (in_h < 0 || in_w < 0 || in_h >= in_height || in_w >= in_width)
+            ? value
+            : in_data[(nc * in_height + in_h) * in_width + in_w];
+  }
+}
+
+template <typename T>
+__global__ void Pad2DConstNHWC(const int nthreads, const T* in_data,
+                               const int num, const int channels,
+                               const int in_height, const int in_width,
+                               const int out_height, const int out_width,
+                               const int pad_top, const int pad_left, T value,
+                               T* out_data) {
+  CUDA_1D_KERNEL_LOOP(index, nthreads) {
+    int n = index / channels;
+    const int c = index % channels;
+    const int out_w = n % out_width;
+    n /= out_width;
+    const int out_h = n % out_height;
+    n /= out_height;
+    const int in_h = out_h - pad_top;
+    const int in_w = out_w - pad_left;
+    out_data[index] =
+        (in_h < 0 || in_w < 0 || in_h >= in_height || in_w >= in_width)
+            ? value
+            : in_data[((n * in_height + in_h) * in_width + in_w) * channels +
+                      c];
+  }
+}
+
+template <typename T>
+__global__ void Pad2DReflectNCHW(const int nthreads, const T* in_data,
+                                 const int num, const int channels,
+                                 const int in_height, const int in_width,
+                                 const int out_height, const int out_width,
+                                 const int pad_top, const int pad_left,
+                                 T* out_data) {
+  CUDA_1D_KERNEL_LOOP(index, nthreads) {
+    int nc = index / out_width;
+    const int out_w = index % out_width;
+    const int out_h = nc % out_height;
+    nc /= out_height;
+    int in_h = out_h - pad_top;
+    int in_w = out_w - pad_left;
+    in_h = max(in_h, -in_h);                     // reflect by 0
+    in_h = min(in_h, 2 * in_height - in_h - 2);  // reflect by in_height
+    in_w = max(in_w, -in_w);                     // reflect by 0
+    in_w = min(in_w, 2 * in_width - in_w - 2);   // reflect by in_width
+    out_data[index] = in_data[(nc * in_height + in_h) * in_width + in_w];
+  }
+}
+
+template <typename T>
+__global__ void Pad2DReflectNHWC(const int nthreads, const T* in_data,
+                                 const int num, const int channels,
+                                 const int in_height, const int in_width,
+                                 const int out_height, const int out_width,
+                                 const int pad_top, const int pad_left,
+                                 T* out_data) {
+  CUDA_1D_KERNEL_LOOP(index, nthreads) {
+    int n = index / channels;
+    const int c = index % channels;
+    const int out_w = n % out_width;
+    n /= out_width;
+    const int out_h = n % out_height;
+    n /= out_height;
+    int in_h = out_h - pad_top;
+    int in_w = out_w - pad_left;
+    in_h = max(in_h, -in_h);
+    in_h = min(in_h, 2 * in_height - in_h - 2);
+    in_w = max(in_w, -in_w);
+    in_w = min(in_w, 2 * in_width - in_w - 2);
+    out_data[index] =
+        in_data[((n * in_height + in_h) * in_width + in_w) * channels + c];
+  }
+}
+
+template <typename T>
+__global__ void Pad2DEdgeNCHW(const int nthreads, const T* in_data,
+                              const int num, const int channels,
+                              const int in_height, const int in_width,
+                              const int out_height, const int out_width,
+                              const int pad_top, const int pad_left,
+                              T* out_data) {
+  CUDA_1D_KERNEL_LOOP(index, nthreads) {
+    int nc = index / out_width;
+    const int out_w = index % out_width;
+    const int out_h = nc % out_height;
+    nc /= out_height;
+    int in_h = min(in_height - 1, max(out_h - pad_top, 0));
+    int in_w = min(in_width - 1, max(out_w - pad_left, 0));
+    out_data[index] = in_data[(nc * in_height + in_h) * in_width + in_w];
+  }
+}
+
+template <typename T>
+__global__ void Pad2DEdgeNHWC(const int nthreads, const T* in_data,
+                              const int num, const int channels,
+                              const int in_height, const int in_width,
+                              const int out_height, const int out_width,
+                              const int pad_top, const int pad_left,
+                              T* out_data) {
+  CUDA_1D_KERNEL_LOOP(index, nthreads) {
+    int n = index / channels;
+    const int c = index % channels;
+    const int out_w = n % out_width;
+    n /= out_width;
+    const int out_h = n % out_height;
+    n /= out_height;
+    int in_h = min(in_height - 1, max(out_h - pad_top, 0));
+    int in_w = min(in_width - 1, max(out_w - pad_left, 0));
+    out_data[index] =
+        in_data[((n * in_height + in_h) * in_width + in_w) * channels + c];
+  }
+}
+
+template <typename T>
+__global__ void Pad2DGradConstNCHW(const int in_size, T* d_in_data,
+                                   const int num, const int channels,
+                                   const int in_height, const int in_width,
+                                   const int out_height, const int out_width,
+                                   const int pad_top, const int pad_left,
+                                   const T* d_out_data) {
+  CUDA_1D_KERNEL_LOOP(in_index, in_size) {
+    int nc = in_index / in_width;
+    const int out_w = in_index % in_width + pad_left;
+    const int out_h = nc % in_height + pad_top;
+    nc /= in_height;
+    d_in_data[in_index] =
+        d_out_data[(nc * out_height + out_h) * out_width + out_w];
+  }
+}
+
+template <typename T>
+__global__ void Pad2DGradConstNHWC(const int in_size, T* d_in_data,
+                                   const int num, const int channels,
+                                   const int in_height, const int in_width,
+                                   const int out_height, const int out_width,
+                                   const int pad_top, const int pad_left,
+                                   const T* d_out_data) {
+  CUDA_1D_KERNEL_LOOP(in_index, in_size) {
+    int n = in_index / channels;
+    const int c = in_index % channels;
+    const int out_w = n % in_width + pad_left;
+    n /= in_width;
+    const int out_h = n % in_height + pad_top;
+    n /= in_height;
+    d_in_data[in_index] =
+        d_out_data[((n * out_height + out_h) * out_width + out_w) * channels +
+                   c];
+  }
+}
+
+template <typename T>
+__global__ void Pad2DGradReflectNCHW(const int out_size, T* d_in_data,
+                                     const int num, const int channels,
+                                     const int in_height, const int in_width,
+                                     const int out_height, const int out_width,
+                                     const int pad_top, const int pad_left,
+                                     const T* d_out_data) {
+  CUDA_1D_KERNEL_LOOP(out_index, out_size) {
+    int nc = out_index / out_width;
+    const int out_w = out_index % out_width;
+    const int out_h = nc % out_height;
+    nc /= out_height;
+    int in_h = out_h - pad_top;
+    int in_w = out_w - pad_left;
+    in_h = max(in_h, -in_h);
+    in_w = max(in_w, -in_w);
+    in_h = min(in_h, 2 * in_height - in_h - 2);
+    in_w = min(in_w, 2 * in_width - in_w - 2);
+    atomicAdd(&d_in_data[(nc * in_height + in_h) * in_width + in_w],
+              d_out_data[out_index]);
+  }
+}
+
+template <typename T>
+__global__ void Pad2DGradReflectNHWC(const int out_size, T* d_in_data,
+                                     const int num, const int channels,
+                                     const int in_height, const int in_width,
+                                     const int out_height, const int out_width,
+                                     const int pad_top, const int pad_left,
+                                     const T* d_out_data) {
+  CUDA_1D_KERNEL_LOOP(out_index, out_size) {
+    const int c = out_index % channels;
+    int n = out_index / channels;
+    const int out_w = n % out_width;
+    n /= out_width;
+    const int out_h = n % out_height;
+    n /= out_height;
+    int in_h = out_h - pad_top;
+    int in_w = out_w - pad_left;
+    in_h = max(in_h, -in_h);
+    in_w = max(in_w, -in_w);
+    in_h = min(in_h, in_height * 2 - in_h - 2);
+    in_w = min(in_w, in_width * 2 - in_w - 2);
+    atomicAdd(
+        &d_in_data[((n * in_height + in_h) * in_width + in_w) * channels + c],
+        d_out_data[out_index]);
+  }
+}
+
+template <typename T>
+__global__ void Pad2DGradEdgeNCHW(const int out_size, T* d_in_data,
+                                  const int num, const int channels,
+                                  const int in_height, const int in_width,
+                                  const int out_height, const int out_width,
+                                  const int pad_top, const int pad_left,
+                                  const T* d_out_data) {
+  CUDA_1D_KERNEL_LOOP(out_index, out_size) {
+    int nc = out_index / out_width;
+    const int out_w = out_index % out_width;
+    const int out_h = nc % out_height;
+    nc /= out_height;
+    const int in_h = min(in_height - 1, max(out_h - pad_top, 0));
+    const int in_w = min(in_width - 1, max(out_w - pad_left, 0));
+    atomicAdd(&d_in_data[(nc * in_height + in_h) * in_width + in_w],
+              d_out_data[out_index]);
+  }
+}
+
+template <typename T>
+__global__ void Pad2DGradEdgeNHWC(const int out_size, T* d_in_data,
+                                  const int num, const int channels,
+                                  const int in_height, const int in_width,
+                                  const int out_height, const int out_width,
+                                  const int pad_top, const int pad_left,
+                                  const T* d_out_data) {
+  CUDA_1D_KERNEL_LOOP(out_index, out_size) {
+    const int c = out_index % channels;
+    int n = out_index / channels;
+    const int out_w = n % out_width;
+    n /= out_width;
+    const int out_h = n % out_height;
+    n /= out_height;
+    const int in_h = min(in_height - 1, max(out_h - pad_top, 0));
+    const int in_w = min(in_width - 1, max(out_w - pad_left, 0));
+    atomicAdd(
+        &d_in_data[((n * in_height + in_h) * in_width + in_w) * channels + c],
+        d_out_data[out_index]);
+  }
+}
+
+template <typename T>
+class Pad2dCUDAKernel : public framework::OpKernel<T> {
+ public:
+  void Compute(const framework::ExecutionContext& context) const override {
+    auto pads = context.Attr<std::vector<int>>("paddings");
+    auto mode = context.Attr<std::string>("mode");
+    auto data_format = context.Attr<std::string>("data_format");
+    T value = context.Attr<T>("pad_value");
+    auto* x = context.Input<Tensor>("X");
+    auto* out = context.Output<Tensor>("Out");
+    auto in_dims = x->dims();
+    auto out_dims = out->dims();
+    const T* in_data = x->data<T>();
+    T* out_data = out->mutable_data<T>(context.GetPlace());
+    const int pad_top = pads[0];
+    const int pad_left = pads[2];
+    const int num = in_dims[0];
+
+    auto stream = context.cuda_device_context().stream();
+    int block = PADDLE_CUDA_NUM_THREADS;
+    const int out_size = out->numel();
+    int grid = (out_size + block - 1) / block;
+
+    if (data_format == "NCHW") {
+      const int channels = in_dims[1];
+      const int in_height = in_dims[2];
+      const int in_width = in_dims[3];
+      const int out_height = out_dims[2];
+      const int out_width = out_dims[3];
+      if (mode == "reflect") {
+        Pad2DReflectNCHW<T><<<grid, block, 0, stream>>>(
+            out_size, in_data, num, channels, in_height, in_width, out_height,
+            out_width, pad_top, pad_left, out_data);
+      } else if (mode == "edge") {
+        Pad2DEdgeNCHW<T><<<grid, block, 0, stream>>>(
+            out_size, in_data, num, channels, in_height, in_width, out_height,
+            out_width, pad_top, pad_left, out_data);
+      } else {
+        Pad2DConstNCHW<T><<<grid, block, 0, stream>>>(
+            out_size, in_data, num, channels, in_height, in_width, out_height,
+            out_width, pad_top, pad_left, value, out_data);
+      }
+    } else {
+      const int channels = in_dims[3];
+      const int in_height = in_dims[1];
+      const int in_width = in_dims[2];
+      const int out_height = out_dims[1];
+      const int out_width = out_dims[2];
+      if (mode == "reflect") {
+        Pad2DReflectNHWC<T><<<grid, block, 0, stream>>>(
+            out_size, in_data, num, channels, in_height, in_width, out_height,
+            out_width, pad_top, pad_left, out_data);
+      } else if (mode == "edge") {
+        Pad2DEdgeNHWC<T><<<grid, block, 0, stream>>>(
+            out_size, in_data, num, channels, in_height, in_width, out_height,
+            out_width, pad_top, pad_left, out_data);
+      } else {
+        Pad2DConstNHWC<T><<<grid, block, 0, stream>>>(
+            out_size, in_data, num, channels, in_height, in_width, out_height,
+            out_width, pad_top, pad_left, value, out_data);
+      }
+    }
+  }
+};
+
+template <typename T>
+class Pad2dGradCUDAKernel : public framework::OpKernel<T> {
+ public:
+  void Compute(const framework::ExecutionContext& context) const override {
+    auto pads = context.Attr<std::vector<int>>("paddings");
+    auto mode = context.Attr<std::string>("mode");
+    auto data_format = context.Attr<std::string>("data_format");
+    auto* d_out = context.Input<Tensor>(framework::GradVarName("Out"));
+    auto* d_in = context.Output<Tensor>(framework::GradVarName("X"));
+    auto d_in_dims = d_in->dims();
+    auto d_out_dims = d_out->dims();
+    const T* d_out_data = d_out->data<T>();
+    T* d_in_data = d_in->mutable_data<T>(context.GetPlace());
+
+    math::SetConstant<platform::CUDADeviceContext, T> set_zero;
+    set_zero(context.template device_context<platform::CUDADeviceContext>(),
+             d_in, static_cast<T>(0));
+
+    const int pad_top = pads[0];
+    const int pad_left = pads[2];
+    const int num = d_in_dims[0];
+
+    auto stream = context.cuda_device_context().stream();
+    int block = PADDLE_CUDA_NUM_THREADS;
+    const int out_size = d_out->numel();
+    const int in_size = d_in->numel();
+    int grid = (out_size + block - 1) / block;
+
+    if (data_format == "NCHW") {
+      const int channels = d_in_dims[1];
+      const int in_height = d_in_dims[2];
+      const int in_width = d_in_dims[3];
+      const int out_height = d_out_dims[2];
+      const int out_width = d_out_dims[3];
+      if (mode == "reflect") {
+        Pad2DGradReflectNCHW<T><<<grid, block, 0, stream>>>(
+            out_size, d_in_data, num, channels, in_height, in_width, out_height,
+            out_width, pad_top, pad_left, d_out_data);
+      } else if (mode == "edge") {
+        Pad2DGradEdgeNCHW<T><<<grid, block, 0, stream>>>(
+            out_size, d_in_data, num, channels, in_height, in_width, out_height,
+            out_width, pad_top, pad_left, d_out_data);
+      } else {
+        grid = (in_size + block - 1) / block;
+        Pad2DGradConstNCHW<T><<<grid, block, 0, stream>>>(
+            in_size, d_in_data, num, channels, in_height, in_width, out_height,
+            out_width, pad_top, pad_left, d_out_data);
+      }
+    } else {
+      const int channels = d_in_dims[3];
+      const int in_height = d_in_dims[1];
+      const int in_width = d_in_dims[2];
+      const int out_height = d_out_dims[1];
+      const int out_width = d_out_dims[2];
+      if (mode == "reflect") {
+        Pad2DGradReflectNHWC<T><<<grid, block, 0, stream>>>(
+            out_size, d_in_data, num, channels, in_height, in_width, out_height,
+            out_width, pad_top, pad_left, d_out_data);
+      } else if (mode == "edge") {
+        Pad2DGradEdgeNHWC<T><<<grid, block, 0, stream>>>(
+            out_size, d_in_data, num, channels, in_height, in_width, out_height,
+            out_width, pad_top, pad_left, d_out_data);
+      } else {
+        grid = (in_size + block - 1) / block;
+        Pad2DGradConstNHWC<T><<<grid, block, 0, stream>>>(
+            in_size, d_in_data, num, channels, in_height, in_width, out_height,
+            out_width, pad_top, pad_left, d_out_data);
+      }
+    }
+  }
+};
+
+}  // namespace operators
+}  // namespace paddle
+
+namespace ops = paddle::operators;
+REGISTER_OP_CUDA_KERNEL(pad2d, ops::Pad2dCUDAKernel<float>);
+REGISTER_OP_CUDA_KERNEL(pad2d_grad, ops::Pad2dGradCUDAKernel<float>);
diff --git a/python/paddle/fluid/layers/nn.py b/python/paddle/fluid/layers/nn.py
index 8ef7444a1a3b8ffd38c177ad943c008bd0aaf084..0ecfc958a3b89c85ef00574d630042d410c3fa0a 100644
--- a/python/paddle/fluid/layers/nn.py
+++ b/python/paddle/fluid/layers/nn.py
@@ -109,6 +109,7 @@ __all__ = [
     'flatten',
     'sequence_mask',
     'stack',
+    'pad2d',
     'unstack',
 ]
 
@@ -5614,6 +5615,94 @@ def rank_loss(label, left, right, name=None):
     return out
 
 
+def pad2d(input,
+          paddings=[0, 0, 0, 0],
+          mode='constant',
+          pad_value=0.0,
+          data_format="NCHW",
+          name=None):
+    """
+    Pad 2-d images accordding to 'paddings' and 'mode'.
+    If mode is 'reflect', paddings[0] and paddings[1] must be no greater
+    than height-1. And the width dimension has the same condition.
+
+    Example:
+
+      Given that X is a channel of image from input:
+      
+      X = [[1, 2, 3],
+           [4, 5, 6]]
+      
+      Case 0:
+      
+        paddings = [0, 1, 2, 3],
+        mode = 'constant'
+        pad_value = 0
+        
+        Out = [[0, 0, 1, 2, 3, 0, 0, 0]
+               [0, 0, 4, 5, 6, 0, 0, 0]
+               [0, 0, 0, 0, 0, 0, 0, 0]]
+      
+      Case 1:
+      
+        paddings = [0, 1, 2, 1],
+        mode = 'reflect'
+        
+        Out = [[3, 2, 1, 2, 3, 2]
+               [6, 5, 4, 5, 6, 5]
+               [3, 2, 1, 2, 3, 2]]
+        
+      Case 2:
+      
+        paddings = [0, 1, 2, 1],
+        mode = 'edge'
+        
+        Out = [[1, 1, 1, 2, 3, 3]
+               [4, 4, 4, 5, 6, 6]
+               [4, 4, 4, 5, 6, 6]]
+    
+  
+    Args:
+        input (Variable): The input image with [N, C, H, W] format or [N, H, W, C] format.
+        paddings (tuple|list): The padding size. If padding is a tuple, it must
+            contain four integers, (padding_top, padding_bottom, padding_left, padding_right).
+            Default: padding = [0, 0, 0, 0].
+        mode (str): Three modes: constant(default), reflect, edge. Default: constant
+        pad_value (float32): The value to fill the padded areas in constant mode. Default: 0
+        data_format (str): An optional string from: "NHWC", "NCHW". Specify the data format of
+                           the input data.
+                           Default: "NCHW"
+        name (str|None): A name for this layer(optional). If set None, the layer
+            will be named automatically.
+
+    Returns:
+        Variable: The tensor variable padded accordding to paddings and mode.
+
+
+    Examples:
+        .. code-block:: python
+
+          data = fluid.layers.data(name='data', shape=[3, 32, 32], dtype='float32')
+          result = fluid.layers.pad2d(input=data, padding=[1,2,3,4], mode='reflect')
+    """
+
+    helper = LayerHelper('pad2d', **locals())
+    dtype = helper.input_dtype(input_param_name='input')
+    out = helper.create_tmp_variable(dtype)
+    helper.append_op(
+        type='pad2d',
+        inputs={'X': input},
+        outputs={"Out": out},
+        attrs={
+            'paddings': paddings,
+            'mode': mode,
+            'pad_value': pad_value,
+            'data_frmat': data_format
+        })
+
+    return out
+
+
 def prelu(x, mode, param_attr=None, name=None):
     """
     Equation:
@@ -5628,8 +5717,8 @@ def prelu(x, mode, param_attr=None, name=None):
 		       all: all elements share same weight
  		       channel:elements in a channel share same weight
  		       element:each element has a weight
-	  name(str|None): A name for this layer(optional). If set None, the layer
-                        will be named automatically.
+	name(str|None): A name for this layer(optional). If set None, the layer
+                        will be named automatically. 
 
     Returns:
         Variable: The output tensor with the same shape as input.
diff --git a/python/paddle/fluid/tests/unittests/test_layers.py b/python/paddle/fluid/tests/unittests/test_layers.py
index f2fccd5d76777b183823c684d5e5ace317925bb0..ecdf32524afb1357b192ce14674b7073972dee9f 100644
--- a/python/paddle/fluid/tests/unittests/test_layers.py
+++ b/python/paddle/fluid/tests/unittests/test_layers.py
@@ -521,6 +521,20 @@ class TestBook(unittest.TestCase):
             self.assertIsNotNone(out)
         print(str(program))
 
+    def test_pad2d(self):
+        program = Program()
+        with program_guard(program):
+            input = layers.data(
+                name="input", shape=[3, 100, 100], dtype="float32")
+            out = layers.pad2d(
+                input,
+                paddings=[1, 2, 3, 4],
+                mode='reflect',
+                data_format='NCHW',
+                name="shape")
+            self.assertIsNotNone(out)
+        print(str(program))
+
     def test_prelu(self):
         program = Program()
         with program_guard(program):
diff --git a/python/paddle/fluid/tests/unittests/test_pad2d_op.py b/python/paddle/fluid/tests/unittests/test_pad2d_op.py
new file mode 100644
index 0000000000000000000000000000000000000000..728b8c181a4410d7df7f304bcc8d2816e91ea6d8
--- /dev/null
+++ b/python/paddle/fluid/tests/unittests/test_pad2d_op.py
@@ -0,0 +1,102 @@
+#   Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import unittest
+import numpy as np
+from op_test import OpTest
+
+
+class TestPad2dOp(OpTest):
+    def setUp(self):
+        self.pad_value = 0.0
+        self.initTestCase()
+        self.op_type = "pad2d"
+        self.inputs = {'X': np.random.random(self.shape).astype("float32"), }
+        self.attrs = {}
+        self.attrs['paddings'] = np.array(self.paddings).flatten()
+        self.attrs['pad_value'] = self.pad_value
+        self.attrs['mode'] = self.mode
+        self.attrs['data_format'] = self.data_format
+        if self.data_format == "NCHW":
+            paddings = [(0, 0), (0, 0), (self.paddings[0], self.paddings[1]),
+                        (self.paddings[2], self.paddings[3])]
+        else:
+            paddings = [(0, 0), (self.paddings[0], self.paddings[1]),
+                        (self.paddings[2], self.paddings[3]), (0, 0)]
+        if self.mode == "constant":
+            out = np.pad(self.inputs['X'],
+                         paddings,
+                         mode=self.mode,
+                         constant_values=self.pad_value)
+        else:
+            out = np.pad(self.inputs['X'], paddings, mode=self.mode)
+        self.outputs = {'Out': out}
+
+    def test_check_output(self):
+        self.check_output()
+
+    def test_check_grad_normal(self):
+        self.check_grad(['X'], 'Out', max_relative_error=0.006)
+
+    def initTestCase(self):
+        self.shape = (2, 3, 4, 4)
+        self.paddings = [0, 1, 2, 3]
+        self.mode = "constant"
+        self.data_format = "NCHW"
+        self.pad_value = 0.0
+
+
+class TestCase1(TestPad2dOp):
+    def initTestCase(self):
+        self.shape = (2, 3, 4, 4)
+        self.paddings = [0, 1, 2, 3]
+        self.mode = "reflect"
+        self.data_format = "NCHW"
+
+
+class TestCase2(TestPad2dOp):
+    def initTestCase(self):
+        self.shape = (2, 3, 4, 4)
+        self.paddings = [0, 1, 2, 3]
+        self.mode = "edge"
+        self.data_format = "NCHW"
+
+
+class TestCase3(TestPad2dOp):
+    def initTestCase(self):
+        self.shape = (2, 4, 4, 2)
+        self.paddings = [0, 1, 2, 3]
+        self.mode = "reflect"
+        self.data_format = "NHWC"
+
+
+class TestCase4(TestPad2dOp):
+    def initTestCase(self):
+        self.shape = (2, 4, 4, 2)
+        self.paddings = [0, 1, 2, 3]
+        self.mode = "edge"
+        self.data_format = "NHWC"
+
+
+class TestCase5(TestPad2dOp):
+    def initTestCase(self):
+        self.shape = (2, 4, 4, 2)
+        self.paddings = [0, 1, 2, 3]
+        self.mode = "constant"
+        self.pad_value = 1.2
+        self.data_format = "NHWC"
+
+
+if __name__ == '__main__':
+    unittest.main()