Added REFLECT and SYMMETRIC modes to the pad() operator.

230f1425 · Wiktor Adamski · 19bf36b7 · 230f1425 · 230f1425 · 230f1425
8 changed file
--- a/mace/ops/opencl/cl/pad.cl
+++ b/mace/ops/opencl/cl/pad.cl
@@ -4,7 +4,9 @@ __kernel void pad(OUT_OF_RANGE_PARAMS
                  GLOBAL_WORK_GROUP_SIZE_DIM3
                  __read_only image2d_t input,
                  __write_only image2d_t output,
+#if PAD_TYPE == 0
                  __private const float constant_value,
+#endif
                  __private const int input_height,
                  __private const int input_width,
                  __private const int output_height,
@@ -26,6 +28,7 @@ __kernel void pad(OUT_OF_RANGE_PARAMS
 #endif
  const int width = global_size_dim1;

+#if PAD_TYPE == 0
  DATA_TYPE4 data = constant_value;
  if ((height_padding <= height_idx && height_idx < input_padded_height) &&
      (width_padding <= width_idx && width_idx < input_padded_width)) {
@@ -37,6 +40,59 @@ __kernel void pad(OUT_OF_RANGE_PARAMS
                                    width_idx - width_padding),
                              in_hb_idx));
  }
+#elif PAD_TYPE == 1 || PAD_TYPE == 2
+  const int diff_left = width_padding - width_idx;
+  int w;
+
+  if (diff_left > 0) {
+#if PAD_TYPE == 1
+    w = diff_left;
+#else
+    w = diff_left - 1;
+#endif
+  } else {
+    const int diff_right = width_idx - input_padded_width;
+
+    if (diff_right >= 0) {
+#if PAD_TYPE == 1
+      w = input_width - diff_right - 2;
+#else
+      w = input_width - diff_right - 1;
+#endif
+
+    } else {
+      w = -diff_left;
+    }
+  }
+
+  const int diff_up = height_padding - height_idx;
+  int h;
+
+  if (diff_up > 0) {
+#if PAD_TYPE == 1
+    h = diff_up;
+#else
+    h = diff_up - 1;
+#endif
+  } else {
+    const int diff_down = height_idx - input_padded_height;
+
+    if (diff_down >= 0) {
+#if PAD_TYPE == 1
+      h = input_height - diff_down - 2;
+#else
+      h = input_height - diff_down - 1;
+#endif
+
+    } else {
+      h = -diff_up;
+    }
+  }
+
+  const int in_hb_idx = mad24(batch_idx, input_height, h);
+  const DATA_TYPE4 data = READ_IMAGET(input, SAMPLER,
+      (int2)(mad24(chan_blk_idx, input_width, w), in_hb_idx));
+#endif

  const int pos = mad24(chan_blk_idx, width, width_idx);


--- a/mace/ops/opencl/image/pad.h
+++ b/mace/ops/opencl/image/pad.h
@@ -23,6 +23,7 @@

 #include "mace/core/op_context.h"
 #include "mace/core/tensor.h"
+#include "mace/ops/pad.h"
 #include "mace/ops/opencl/helper.h"

 namespace mace {
@@ -33,9 +34,10 @@ namespace image {
 template <typename T>
 class PadKernel : public OpenCLPadKernel {
 public:
-  PadKernel(const std::vector<int> &paddings,
+  PadKernel(const PadType type,
+            const std::vector<int> &paddings,
            const float constant_value)
-      : paddings_(paddings), constant_value_(constant_value) {}
+      : type_(type), paddings_(paddings), constant_value_(constant_value) {}

  MaceStatus Compute(
      OpContext *context,
@@ -43,6 +45,7 @@ class PadKernel : public OpenCLPadKernel {
      Tensor *output) override;

 private:
+  PadType type_;
  std::vector<int> paddings_;
  float constant_value_;
  cl::Kernel kernel_;
@@ -60,7 +63,23 @@ MaceStatus PadKernel<T>::Compute(
  MACE_CHECK((this->paddings_[0] == 0) && (this->paddings_[1] == 0) &&
      (this->paddings_[6] == 0) && (this->paddings_[7] == 0))
    << "Mace only support height/width dimension now";
+  for (int i = 2; i <= 5; ++i) {
+    MACE_CHECK(paddings_[i] >= 0);
+  }
  auto input_shape = input->shape();
+  if (type_ == PadType::REFLECT) {
+    MACE_CHECK(paddings_[2] < input_shape[1] &&
+               paddings_[3] < input_shape[1] &&
+               paddings_[4] < input_shape[2] &&
+               paddings_[5] < input_shape[2]);
+  } else if (type_ == PadType::SYMMETRIC) {
+    MACE_CHECK(paddings_[2] <= input_shape[1] &&
+               paddings_[3] <= input_shape[1] &&
+               paddings_[4] <= input_shape[2] &&
+               paddings_[5] <= input_shape[2]);
+  } else {
+    MACE_CHECK(type_ == PadType::CONSTANT);
+  }
  std::vector<index_t> output_shape = {
      input_shape[0] + this->paddings_[0] + this->paddings_[1],
      input_shape[1] + this->paddings_[2] + this->paddings_[3],
@@ -92,6 +111,7 @@ MaceStatus PadKernel<T>::Compute(
    auto dt = DataTypeToEnum<T>::value;
    built_options.emplace("-DDATA_TYPE=" + DtToCLDt(dt));
    built_options.emplace("-DCMD_DATA_TYPE=" + DtToCLCMDDt(dt));
+    built_options.emplace(MakeString("-DPAD_TYPE=", type_));
    MACE_RETURN_IF_ERROR(runtime->BuildKernel("pad", kernel_name,
                                              built_options, &kernel_));

@@ -110,7 +130,9 @@ MaceStatus PadKernel<T>::Compute(
    MACE_SET_3D_GWS_ARGS(kernel_, gws);
    kernel_.setArg(idx++, *(input->opencl_image()));
    kernel_.setArg(idx++, *(output->opencl_image()));
-    kernel_.setArg(idx++, this->constant_value_);
+    if (type_ == PadType::CONSTANT) {
+      kernel_.setArg(idx++, this->constant_value_);
+    }
    kernel_.setArg(idx++, static_cast<int32_t>(input_shape[1]));
    kernel_.setArg(idx++, static_cast<int32_t>(input_shape[2]));
    kernel_.setArg(idx++, static_cast<int32_t>(output_shape[1]));

--- a/mace/ops/pad.cc
+++ b/mace/ops/pad.cc
@@ -16,6 +16,7 @@
 #include <memory>

 #include "mace/core/operator.h"
+#include "mace/ops/pad.h"
 #ifdef MACE_ENABLE_OPENCL
 #include "mace/ops/opencl/image/pad.h"
 #endif  // MACE_ENABLE_OPENCL
@@ -31,6 +32,9 @@ class PadOp<DeviceType::CPU, T> : public Operation {
 public:
  explicit PadOp(OpConstructContext *context)
      : Operation(context),
+        type_(
+            static_cast<PadType>(Operation::GetOptionalArg<int>(
+                "pad_type", static_cast<int>(PadType::CONSTANT)))),
        paddings_(Operation::GetRepeatedArgs<int>("paddings")),
        constant_value_(Operation::GetOptionalArg<float>(
            "constant_value", 0.0)) {
@@ -49,6 +53,15 @@ class PadOp<DeviceType::CPU, T> : public Operation {
    MACE_CHECK(
        this->paddings_.size() == static_cast<size_t>(input->dim_size()) * 2);
    auto input_shape = input->shape();
+    for (size_t i = 0; i < paddings_.size(); ++i) {
+      if (type_ == PadType::REFLECT) {
+        MACE_CHECK(paddings_[i] < input_shape[i / 2]);
+
+      } else if (type_ == PadType::SYMMETRIC) {
+        MACE_CHECK(paddings_[i] <= input_shape[i / 2]);
+      }
+      MACE_CHECK(paddings_[i] >= 0);
+    }
    MACE_RETURN_IF_ERROR(output->Resize({input_shape[0] + this->paddings_[0]
                                             + this->paddings_[1],
                                         input_shape[1] + this->paddings_[2]
@@ -62,32 +75,97 @@ class PadOp<DeviceType::CPU, T> : public Operation {
    Tensor::MappingGuard output_guard(output);
    auto input_ptr = input->data<T>();
    T *output_ptr = output->mutable_data<T>();
-    std::fill(output_ptr, output_ptr + output->size(), this->constant_value_);

    const index_t batch = input->dim(0);
    const index_t channel = input->dim(1);
    const index_t height = input->dim(2);
    const index_t width = input->dim(3);
+
+    if (type_ == PadType::CONSTANT) {
+      std::fill(output_ptr, output_ptr + output->size(), this->constant_value_);
+
 #pragma omp parallel for collapse(3)
-    for (index_t b = 0; b < batch; ++b) {
-      for (index_t c = 0; c < channel; ++c) {
-        for (index_t h = 0; h < height; ++h) {
-          const index_t in_offset = (((b * channel + c) * height) + h) * width;
-          const index_t out_offset = (((b + this->paddings_[0]) * output->dim(1)
-              + (c + this->paddings_[2])) * output->dim(2)
-              + (h + this->paddings_[4])) * output->dim(3)
-              + this->paddings_[6];
-          memcpy(output_ptr + out_offset,
-                 input_ptr + in_offset,
-                 width * sizeof(T));
+      for (index_t b = 0; b < batch; ++b) {
+        for (index_t c = 0; c < channel; ++c) {
+          for (index_t h = 0; h < height; ++h) {
+            const index_t in_offset = (((b * channel + c) * height) +
+                                      h) * width;
+            const index_t out_offset =
+                  (((b + this->paddings_[0]) * output->dim(1)
+                + (c + this->paddings_[2])) * output->dim(2)
+                + (h + this->paddings_[4])) * output->dim(3)
+                + this->paddings_[6];
+            memcpy(output_ptr + out_offset,
+                   input_ptr + in_offset,
+                   width * sizeof(T));
+          }
        }
      }
+    } else if (type_ == PadType::REFLECT || type_ == PadType::SYMMETRIC) {
+      const index_t o_batch   = output->dim(0);
+      const index_t o_channel = output->dim(1);
+      const index_t o_height  = output->dim(2);
+      const index_t o_width   = output->dim(3);
+      const int l_add = type_ == PadType::REFLECT ?  0 : -1;
+      const int r_add = type_ == PadType::REFLECT ? -2 : -1;
+
+#pragma omp parallel for collapse(1)
+      for (index_t h = 0; h < o_height; ++h) {
+        index_t h_in = get_src_idx(h, height, paddings_[4], l_add, r_add);
+
+        for (index_t b = 0; b < o_batch; ++b) {
+          index_t b_in = get_src_idx(b, batch, paddings_[0], l_add, r_add);
+
+          for (index_t c = 0; c < o_channel; ++c) {
+            index_t c_in = get_src_idx(c, channel, paddings_[2], l_add, r_add);
+            const index_t in_offset = (((b_in * channel + c_in) * height) +
+                                      h_in) * width;
+            index_t out_offset = (((b * o_channel + c) * o_height) +
+                                 h) * o_width;
+
+            for (index_t i = 0, j = paddings_[6] + l_add;
+                 i < paddings_[6]; ++i, --j) {
+              output_ptr[out_offset++] = input_ptr[in_offset + j];
+            }
+            memcpy(output_ptr + out_offset, input_ptr + in_offset,
+                   width * sizeof(T));
+            out_offset += width;
+            for (index_t i = 0, j = width + r_add; i < paddings_[7]; ++i, --j) {
+              output_ptr[out_offset++] = input_ptr[in_offset + j];
+            }
+          }
+        }
+      }
+    } else {
+      LOG(FATAL) << "Pad op doesn't support type " << type_;
    }

    return MaceStatus::MACE_SUCCESS;
  }

 private:
+  int get_src_idx(int out, int in_size, int pad, int l_add, int r_add) {
+    const int diff_left = pad - out;
+    int in;
+
+    if (diff_left > 0) {
+      in = diff_left + l_add;
+
+    } else {
+      const int diff_right = out - (in_size + pad);
+
+      if (diff_right >= 0) {
+        in = in_size - diff_right + r_add;
+
+      } else {
+        in = -diff_left;
+      }
+    }
+
+    return in;
+  }
+
+  PadType type_;
  std::vector<int> paddings_;
  float constant_value_;
 };
@@ -98,11 +176,14 @@ class PadOp<DeviceType::GPU, T> : public Operation {
 public:
  explicit PadOp(OpConstructContext *context)
      : Operation(context) {
+    PadType type = static_cast<PadType>(Operation::GetOptionalArg<int>(
+      "pad_type", static_cast<int>(PadType::CONSTANT)));
    std::vector<int> paddings = Operation::GetRepeatedArgs<int>("paddings");
    float constant_value = Operation::GetOptionalArg<float>(
        "constant_value", 0.0);
    if (context->device()->gpu_runtime()->UseImageMemory()) {
-      kernel_.reset(new opencl::image::PadKernel<T>(paddings, constant_value));
+      kernel_.reset(new opencl::image::PadKernel<T>(
+          type, paddings, constant_value));
    } else {
      MACE_NOT_IMPLEMENTED;
    }

--- a/mace/ops/pad.h
+++ b/mace/ops/pad.h
+// Copyright 2018 The MACE Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef MACE_OPS_PAD_H_
+#define MACE_OPS_PAD_H_
+
+namespace mace {
+namespace ops {
+
+enum PadType {
+  CONSTANT  = 0,
+  REFLECT   = 1,
+  SYMMETRIC = 2,
+};
+
+}  // namespace ops
+}  // namespace mace
+
+#endif  // MACE_OPS_PAD_H_
--- a/mace/ops/pad_benchmark.cc
+++ b/mace/ops/pad_benchmark.cc
@@ -14,6 +14,7 @@

 #include "mace/core/testing/test_benchmark.h"
 #include "mace/ops/ops_test_util.h"
+#include "mace/ops/pad.h"

 namespace mace {
 namespace ops {
@@ -22,7 +23,7 @@ namespace test {
 namespace {
 template <DeviceType D, typename T>
 void Pad(int iters, int batch, int height,
-         int width, int channels, int pad) {
+         int width, int channels, int pad, int pad_type) {
  mace::testing::StopTiming();

  OpsTestNet net;
@@ -35,6 +36,7 @@ void Pad(int iters, int batch, int height,
      .Input("Input")
      .Output("Output")
      .AddIntsArg("paddings", paddings)
+      .AddIntArg("pad_type", pad_type)
      .AddFloatArg("constant_value", 1.0)
      .AddIntArg("T", static_cast<int>(DataTypeToEnum<T>::value))
      .Finalize(net.NewOperatorDef());
@@ -53,19 +55,26 @@ void Pad(int iters, int batch, int height,
 }
 }  // namespace

-#define MACE_BM_PAD_MACRO(N, H, W, C, PAD, TYPE, DEVICE)                     \
-  static void MACE_BM_PAD_##N##_##H##_##W##_##C##_##PAD##_##TYPE##_##DEVICE( \
+#define MACE_BM_PAD_MACRO(N, H, W, C, PAD, MODE, TYPE, DEVICE)               \
+  static void MACE_BM_PAD_##N##_##H##_##W##_##C##_##PAD##_##MODE##_##TYPE    \
+              ##_##DEVICE(                                                   \
      int iters) {                                                           \
    const int64_t tot = static_cast<int64_t>(iters) * N * C * H * W;         \
    mace::testing::BytesProcessed(tot *(sizeof(TYPE)));                      \
-    Pad<DEVICE, TYPE>(iters, N, H, W, C, PAD);                               \
+    Pad<DEVICE, TYPE>(iters, N, H, W, C, PAD, MODE);                         \
  }                                                                          \
-  MACE_BENCHMARK(MACE_BM_PAD_##N##_##H##_##W##_##C##_##PAD##_##TYPE##_##DEVICE)
+  MACE_BENCHMARK(MACE_BM_PAD_##N##_##H##_##W##_##C##_##PAD##_##MODE##_##TYPE \
+                 ##_##DEVICE)

-#define MACE_BM_PAD(N, H, W, C, PAD)                 \
-  MACE_BM_PAD_MACRO(N, H, W, C, PAD, float, CPU);    \
-  MACE_BM_PAD_MACRO(N, H, W, C, PAD, float, GPU);    \
-  MACE_BM_PAD_MACRO(N, H, W, C, PAD, half, GPU);
+#define MACE_BM_PAD_MODE(N, H, W, C, PAD, MODE)            \
+  MACE_BM_PAD_MACRO(N, H, W, C, PAD, MODE, float, CPU);    \
+  MACE_BM_PAD_MACRO(N, H, W, C, PAD, MODE, float, GPU);    \
+  MACE_BM_PAD_MACRO(N, H, W, C, PAD, MODE, half, GPU);
+
+#define MACE_BM_PAD(N, H, W, C, PAD)              \
+  MACE_BM_PAD_MODE(N, H, W, C, PAD, CONSTANT);    \
+  MACE_BM_PAD_MODE(N, H, W, C, PAD, REFLECT);     \
+  MACE_BM_PAD_MODE(N, H, W, C, PAD, SYMMETRIC);

 MACE_BM_PAD(1, 512, 512, 1, 2);
 MACE_BM_PAD(1, 112, 112, 64, 1);

--- a/mace/ops/pad_test.cc
+++ b/mace/ops/pad_test.cc
@@ -13,6 +13,7 @@
 // limitations under the License.

 #include "mace/ops/ops_test_util.h"
+#include "mace/ops/pad.h"

 namespace mace {
 namespace ops {
@@ -109,7 +110,7 @@ TEST_F(PadTest, ComplexCPU) {
 namespace {
 template <typename T>
 void Complex(const std::vector<index_t> &input_shape,
-             const std::vector<int> &paddings) {
+             const std::vector<int> &paddings, const int pad_type) {
  // Construct graph
  OpsTestNet net;

@@ -122,6 +123,7 @@ void Complex(const std::vector<index_t> &input_shape,
      .Input("TInput")
      .Output("TOutput")
      .AddIntsArg("paddings", paddings)
+      .AddIntArg("pad_type", pad_type)
      .AddFloatArg("constant_value", 1.0)
      .AddIntArg("data_format", DataFormat::NHWC)
      .Finalize(net.NewOperatorDef());
@@ -138,6 +140,7 @@ void Complex(const std::vector<index_t> &input_shape,
      .Input("Input")
      .Output("Output")
      .AddIntsArg("paddings", paddings)
+      .AddIntArg("pad_type", pad_type)
      .AddFloatArg("constant_value", 1.0)
      .AddIntArg("data_format", DataFormat::NHWC)
      .Finalize(net.NewOperatorDef());
@@ -156,15 +159,311 @@ void Complex(const std::vector<index_t> &input_shape,
 }  // namespace

 TEST_F(PadTest, ComplexFloat) {
-  Complex<float>({1, 32, 32, 4}, {0, 0, 2, 2, 1, 1, 0, 0});
-  Complex<float>({1, 31, 37, 16}, {0, 0, 2, 0, 1, 0, 0, 0});
-  Complex<float>({1, 128, 128, 32}, {0, 0, 0, 1, 0, 2, 0, 0});
+  for (int i = PadType::CONSTANT; i <= PadType::SYMMETRIC; i++) {
+    Complex<float>({1, 32, 32, 4}, {0, 0, 2, 2, 1, 1, 0, 0}, i);
+    Complex<float>({1, 31, 37, 16}, {0, 0, 2, 0, 1, 0, 0, 0}, i);
+    Complex<float>({1, 128, 128, 32}, {0, 0, 0, 1, 0, 2, 0, 0}, i);
+  }
 }

 TEST_F(PadTest, ComplexHalf) {
-  Complex<half>({1, 32, 32, 4}, {0, 0, 2, 2, 1, 1, 0, 0});
-  Complex<half>({1, 31, 37, 16}, {0, 0, 2, 0, 1, 0, 0, 0});
-  Complex<half>({1, 128, 128, 32}, {0, 0, 0, 1, 0, 2, 0, 0});
+  for (int i = PadType::CONSTANT; i <= PadType::SYMMETRIC; i++) {
+    Complex<half>({1, 32, 32, 4}, {0, 0, 2, 2, 1, 1, 0, 0}, i);
+    Complex<half>({1, 31, 37, 16}, {0, 0, 2, 0, 1, 0, 0, 0}, i);
+    Complex<half>({1, 128, 128, 32}, {0, 0, 0, 1, 0, 2, 0, 0}, i);
+  }
+}
+
+namespace {
+template <DeviceType D, typename T>
+void Result(const std::vector<index_t> &input_shape,
+            const std::vector<float> &input_data,
+            const std::vector<index_t> &expected_shape,
+            const std::vector<float> &expected_data,
+            const std::vector<int> &paddings,
+            const PadType pad_type) {
+  // Construct graph
+  OpsTestNet net;
+  std::string input("Input");
+  std::string t_input(input);
+  std::string output("Output");
+  std::string t_output(output);
+
+  // Add input data
+  net.AddInputFromArray<D, float>(input, input_shape, input_data);
+
+  if (D == DeviceType::CPU) {
+    t_input = "TInput";
+    t_output = "TOutput";
+    net.TransformDataFormat<DeviceType::CPU, T>(input, NHWC, t_input, NCHW);
+  }
+
+  OpDefBuilder("Pad", "PadTest")
+  .Input(t_input)
+  .Output(t_output)
+  .AddIntsArg("paddings", paddings)
+  .AddIntArg("pad_type", static_cast<int>(pad_type))
+  .AddIntArg("data_format", DataFormat::NHWC)
+  .Finalize(net.NewOperatorDef());
+
+  // Run
+  net.RunOp(D);
+
+  if (D == DeviceType::CPU) {
+    net.TransformDataFormat<DeviceType::CPU, T>(t_output, NCHW, output, NHWC);
+  }
+
+  auto actual = net.GetTensor(output.c_str());
+  auto expected = net.CreateTensor<float>(expected_shape, expected_data);
+
+  ExpectTensorNear<float>(*expected, *actual, 1e-5);
+}
+}  // namespace
+
+TEST_F(PadTest, ReflectCPU) {
+  std::vector<index_t> input_shape{2, 2, 2, 2};
+  int size = std::accumulate(input_shape.begin(), input_shape.end(),
+                             1, std::multiplies<index_t>());
+  std::vector<float> input_data;
+  std::vector<index_t> expected_shape{4, 4, 4, 4};
+  std::vector<float> expected_data{
+          16, 15, 16, 15,
+          14, 13, 14, 13,
+          16, 15, 16, 15,
+          14, 13, 14, 13,
+
+          12, 11, 12, 11,
+          10,  9, 10,  9,
+          12, 11, 12, 11,
+          10,  9, 10,  9,
+
+          16, 15, 16, 15,
+          14, 13, 14, 13,
+          16, 15, 16, 15,
+          14, 13, 14, 13,
+
+          12, 11, 12, 11,
+          10,  9, 10,  9,
+          12, 11, 12, 11,
+          10,  9, 10,  9,
+
+
+           8,  7,  8,  7,
+           6,  5,  6,  5,
+           8,  7,  8,  7,
+           6,  5,  6,  5,
+
+           4,  3,  4,  3,
+           2,  1,  2,  1,
+           4,  3,  4,  3,
+           2,  1,  2,  1,
+
+           8,  7,  8,  7,
+           6,  5,  6,  5,
+           8,  7,  8,  7,
+           6,  5,  6,  5,
+
+           4,  3,  4,  3,
+           2,  1,  2,  1,
+           4,  3,  4,  3,
+           2,  1,  2,  1,
+
+
+          16, 15, 16, 15,
+          14, 13, 14, 13,
+          16, 15, 16, 15,
+          14, 13, 14, 13,
+
+          12, 11, 12, 11,
+          10,  9, 10,  9,
+          12, 11, 12, 11,
+          10,  9, 10,  9,
+
+          16, 15, 16, 15,
+          14, 13, 14, 13,
+          16, 15, 16, 15,
+          14, 13, 14, 13,
+
+          12, 11, 12, 11,
+          10,  9, 10,  9,
+          12, 11, 12, 11,
+          10,  9, 10,  9,
+
+
+           8,  7,  8,  7,
+           6,  5,  6,  5,
+           8,  7,  8,  7,
+           6,  5,  6,  5,
+
+           4,  3,  4,  3,
+           2,  1,  2,  1,
+           4,  3,  4,  3,
+           2,  1,  2,  1,
+
+           8,  7,  8,  7,
+           6,  5,  6,  5,
+           8,  7,  8,  7,
+           6,  5,  6,  5,
+
+           4,  3,  4,  3,
+           2,  1,  2,  1,
+           4,  3,  4,  3,
+           2,  1,  2,  1,
+  };
+  const std::vector<int> paddings{1, 1, 1, 1, 1, 1, 1, 1};
+
+  input_data.reserve(size);
+  for (int i = 1; i <= size; i++) {
+    input_data.push_back(i);
+  }
+
+  Result<DeviceType::CPU, float>(input_shape, input_data, expected_shape,
+      expected_data, paddings, PadType::REFLECT);
+}
+
+TEST_F(PadTest, SymmetricCPU) {
+  std::vector<index_t> input_shape{2, 2, 2, 2};
+  int size = std::accumulate(input_shape.begin(), input_shape.end(),
+                             1, std::multiplies<index_t>());
+  std::vector<float> input_data;
+  std::vector<index_t> expected_shape{4, 4, 4, 4};
+  std::vector<float> expected_data{
+           1,  1,  2,  2,
+           1,  1,  2,  2,
+           3,  3,  4,  4,
+           3,  3,  4,  4,
+
+           1,  1,  2,  2,
+           1,  1,  2,  2,
+           3,  3,  4,  4,
+           3,  3,  4,  4,
+
+           5,  5,  6,  6,
+           5,  5,  6,  6,
+           7,  7,  8,  8,
+           7,  7,  8,  8,
+
+           5,  5,  6,  6,
+           5,  5,  6,  6,
+           7,  7,  8,  8,
+           7,  7,  8,  8,
+
+
+           1,  1,  2,  2,
+           1,  1,  2,  2,
+           3,  3,  4,  4,
+           3,  3,  4,  4,
+
+           1,  1,  2,  2,
+           1,  1,  2,  2,
+           3,  3,  4,  4,
+           3,  3,  4,  4,
+
+           5,  5,  6,  6,
+           5,  5,  6,  6,
+           7,  7,  8,  8,
+           7,  7,  8,  8,
+
+           5,  5,  6,  6,
+           5,  5,  6,  6,
+           7,  7,  8,  8,
+           7,  7,  8,  8,
+
+
+           9,  9, 10, 10,
+           9,  9, 10, 10,
+          11, 11, 12, 12,
+          11, 11, 12, 12,
+
+           9,  9, 10, 10,
+           9,  9, 10, 10,
+          11, 11, 12, 12,
+          11, 11, 12, 12,
+
+          13, 13, 14, 14,
+          13, 13, 14, 14,
+          15, 15, 16, 16,
+          15, 15, 16, 16,
+
+          13, 13, 14, 14,
+          13, 13, 14, 14,
+          15, 15, 16, 16,
+          15, 15, 16, 16,
+
+
+           9,  9, 10, 10,
+           9,  9, 10, 10,
+          11, 11, 12, 12,
+          11, 11, 12, 12,
+
+           9,  9, 10, 10,
+           9,  9, 10, 10,
+          11, 11, 12, 12,
+          11, 11, 12, 12,
+
+          13, 13, 14, 14,
+          13, 13, 14, 14,
+          15, 15, 16, 16,
+          15, 15, 16, 16,
+
+          13, 13, 14, 14,
+          13, 13, 14, 14,
+          15, 15, 16, 16,
+          15, 15, 16, 16,
+  };
+  const std::vector<int> paddings{1, 1, 1, 1, 1, 1, 1, 1};
+
+  input_data.reserve(size);
+  for (int i = 1; i <= size; i++) {
+    input_data.push_back(i);
+  }
+
+  Result<DeviceType::CPU, float>(input_shape, input_data, expected_shape,
+      expected_data, paddings, PadType::SYMMETRIC);
+}
+
+TEST_F(PadTest, Result) {
+  std::vector<index_t> input_shape{1, 3, 4, 1};
+  int size = std::accumulate(input_shape.begin(), input_shape.end(),
+                             1, std::multiplies<index_t>());
+  std::vector<float> input_data;
+  std::vector<index_t> expected_shape{1, 6, 7, 1};
+  std::vector<float> expected_reflect{
+     8,  7,  6, 5,  6,  7,  8,
+     4,  3,  2, 1,  2,  3,  4,
+     8,  7,  6, 5,  6,  7,  8,
+    12, 11, 10, 9, 10, 11, 12,
+     8,  7,  6, 5,  6,  7,  8,
+     4,  3,  2, 1,  2,  3,  4,
+  };
+  std::vector<float> expected_symmetric{
+     3,  2, 1, 1,  2,  3,  4,
+     3,  2, 1, 1,  2,  3,  4,
+     7,  6, 5, 5,  6,  7,  8,
+    11, 10, 9, 9, 10, 11, 12,
+    11, 10, 9, 9, 10, 11, 12,
+     7,  6, 5, 5,  6,  7,  8,
+  };
+  const std::vector<int> paddings{0, 0, 1, 2, 3, 0, 0, 0};
+
+  input_data.reserve(size);
+  for (int i = 1; i <= size; i++) {
+    input_data.push_back(i);
+  }
+
+  Result<DeviceType::CPU, float>(input_shape, input_data, expected_shape,
+      expected_reflect, paddings, PadType::REFLECT);
+  Result<DeviceType::GPU, float>(input_shape, input_data, expected_shape,
+      expected_reflect, paddings, PadType::REFLECT);
+  Result<DeviceType::GPU, half>(input_shape, input_data, expected_shape,
+      expected_reflect, paddings, PadType::REFLECT);
+
+  Result<DeviceType::CPU, float>(input_shape, input_data, expected_shape,
+      expected_symmetric, paddings, PadType::SYMMETRIC);
+  Result<DeviceType::GPU, float>(input_shape, input_data, expected_shape,
+      expected_symmetric, paddings, PadType::SYMMETRIC);
+  Result<DeviceType::GPU, half>(input_shape, input_data, expected_shape,
+      expected_symmetric, paddings, PadType::SYMMETRIC);
 }

 }  // namespace test

--- a/mace/python/tools/converter_tool/base_converter.py
+++ b/mace/python/tools/converter_tool/base_converter.py
@@ -89,6 +89,12 @@ class ReduceType(Enum):
    PROD = 3


+class PadType(Enum):
+    CONSTANT = 0
+    REFLECT = 1
+    SYMMETRIC = 2
+
+
 class FrameworkType(Enum):
    TENSORFLOW = 0
    CAFFE = 1
@@ -221,6 +227,7 @@ class MaceKeyword(object):
    mace_step_h_str = 'step_h'
    mace_step_w_str = 'step_w'
    mace_find_range_every_time = 'find_range_every_time'
+    mace_pad_type_str = 'pad_type'


 class TransformerRule(Enum):

--- a/mace/python/tools/converter_tool/tensorflow_converter.py
+++ b/mace/python/tools/converter_tool/tensorflow_converter.py
@@ -25,6 +25,7 @@ from mace.python.tools.converter_tool.base_converter import PoolingType
 from mace.python.tools.converter_tool.base_converter import PaddingMode
 from mace.python.tools.converter_tool.base_converter import ActivationType
 from mace.python.tools.converter_tool.base_converter import EltwiseType
+from mace.python.tools.converter_tool.base_converter import PadType
 from mace.python.tools.converter_tool.base_converter import FrameworkType
 from mace.python.tools.converter_tool.base_converter import ReduceType
 from mace.python.tools.converter_tool.base_converter import DataFormat
@@ -115,6 +116,7 @@ TFSupportedOps = [
    'FakeQuantWithMinMaxVars',
    'FloorDiv',
    'Sqrt',
+    'MirrorPad',
 ]

 TFOpType = Enum('TFOpType', [(op, op) for op in TFSupportedOps], type=str)
@@ -202,6 +204,12 @@ class TensorflowConverter(base_converter.ConverterInterface):
        TFOpType.LeakyRelu.name: ActivationType.LEAKYRELU,
    }

+    pad_type = {
+        'CONSTANT' : PadType.CONSTANT,
+        'REFLECT'  : PadType.REFLECT,
+        'SYMMETRIC': PadType.SYMMETRIC
+    }
+
    def __init__(self, option, src_model_file):
        self._op_converters = {
            TFOpType.Conv2D.name: self.convert_conv2d,
@@ -268,6 +276,7 @@ class TensorflowConverter(base_converter.ConverterInterface):
            TFOpType.FakeQuantWithMinMaxVars.name: self.convert_fake_quantize,
            TFOpType.FloorDiv.name: self.convert_elementwise,
            TFOpType.Sqrt.name: self.convert_elementwise,
+            TFOpType.MirrorPad.name: self.convert_pad,
        }
        self._option = option
        self._mace_net_def = mace_pb2.NetDef()
@@ -724,12 +733,21 @@ class TensorflowConverter(base_converter.ConverterInterface):
        paddings_arg.ints.extend(paddings_value)
        self._skip_tensor.add(tf_op.inputs[1].name)

-        if len(tf_op.inputs) == 3:
-            constant_value_arg = op.arg.add()
-            constant_value_arg.name = MaceKeyword.mace_constant_value_str
-            constant_value = tf_op.inputs[2].eval().astype(np.int32).flat[0]
-            constant_value_arg.i = constant_value
-            self._skip_tensor.add(tf_op.inputs[2].name)
+        pad_type_arg = op.arg.add()
+        pad_type_arg.name = MaceKeyword.mace_pad_type_str
+
+        if tf_op.type == TFOpType.Pad:
+            if len(tf_op.inputs) == 3:
+                constant_value_arg = op.arg.add()
+                constant_value_arg.name = MaceKeyword.mace_constant_value_str
+                constant_value = tf_op.inputs[2].eval().astype(np.int32).flat[0]
+                constant_value_arg.i = constant_value
+                self._skip_tensor.add(tf_op.inputs[2].name)
+
+            pad_type_arg.i = PadType.CONSTANT.value
+
+        elif tf_op.type == TFOpType.MirrorPad:
+            pad_type_arg.i = self.pad_type[tf_op.get_attr('mode')].value

    def convert_concat(self, tf_op):
        op = self.convert_general_op(tf_op)