From 230f1425cb2e4d8ffa8a48541ca7679ff7284e05 Mon Sep 17 00:00:00 2001
From: Wiktor Adamski <wiktor.adamski@tcl.com>
Date: Fri, 15 Feb 2019 12:44:35 +0100
Subject: [PATCH] Added REFLECT and SYMMETRIC modes to the pad() operator.

---
 mace/ops/opencl/cl/pad.cl                     |  56 ++++
 mace/ops/opencl/image/pad.h                   |  28 +-
 mace/ops/pad.cc                               | 107 +++++-
 mace/ops/pad.h                                |  30 ++
 mace/ops/pad_benchmark.cc                     |  27 +-
 mace/ops/pad_test.cc                          | 313 +++++++++++++++++-
 .../tools/converter_tool/base_converter.py    |   7 +
 .../converter_tool/tensorflow_converter.py    |  30 +-
 8 files changed, 560 insertions(+), 38 deletions(-)
 create mode 100644 mace/ops/pad.h
diff --git a/mace/ops/opencl/cl/pad.cl b/mace/ops/opencl/cl/pad.cl
index d482cf1d..f62c8007 100644
--- a/mace/ops/opencl/cl/pad.cl
+++ b/mace/ops/opencl/cl/pad.cl
@@ -4,7 +4,9 @@ __kernel void pad(OUT_OF_RANGE_PARAMS
                   GLOBAL_WORK_GROUP_SIZE_DIM3
                   __read_only image2d_t input,
                   __write_only image2d_t output,
+#if PAD_TYPE == 0
                   __private const float constant_value,
+#endif
                   __private const int input_height,
                   __private const int input_width,
                   __private const int output_height,
@@ -26,6 +28,7 @@ __kernel void pad(OUT_OF_RANGE_PARAMS
 #endif
   const int width = global_size_dim1;
 
+#if PAD_TYPE == 0
   DATA_TYPE4 data = constant_value;
   if ((height_padding <= height_idx && height_idx < input_padded_height) &&
       (width_padding <= width_idx && width_idx < input_padded_width)) {
@@ -37,6 +40,59 @@ __kernel void pad(OUT_OF_RANGE_PARAMS
                                     width_idx - width_padding),
                               in_hb_idx));
   }
+#elif PAD_TYPE == 1 || PAD_TYPE == 2
+  const int diff_left = width_padding - width_idx;
+  int w;
+
+  if (diff_left > 0) {
+#if PAD_TYPE == 1
+    w = diff_left;
+#else
+    w = diff_left - 1;
+#endif
+  } else {
+    const int diff_right = width_idx - input_padded_width;
+
+    if (diff_right >= 0) {
+#if PAD_TYPE == 1
+      w = input_width - diff_right - 2;
+#else
+      w = input_width - diff_right - 1;
+#endif
+
+    } else {
+      w = -diff_left;
+    }
+  }
+
+  const int diff_up = height_padding - height_idx;
+  int h;
+
+  if (diff_up > 0) {
+#if PAD_TYPE == 1
+    h = diff_up;
+#else
+    h = diff_up - 1;
+#endif
+  } else {
+    const int diff_down = height_idx - input_padded_height;
+
+    if (diff_down >= 0) {
+#if PAD_TYPE == 1
+      h = input_height - diff_down - 2;
+#else
+      h = input_height - diff_down - 1;
+#endif
+
+    } else {
+      h = -diff_up;
+    }
+  }
+
+  const int in_hb_idx = mad24(batch_idx, input_height, h);
+  const DATA_TYPE4 data = READ_IMAGET(input, SAMPLER,
+      (int2)(mad24(chan_blk_idx, input_width, w), in_hb_idx));
+#endif
 
   const int pos = mad24(chan_blk_idx, width, width_idx);
 
diff --git a/mace/ops/opencl/image/pad.h b/mace/ops/opencl/image/pad.h
index c723d9b8..a8a52123 100644
--- a/mace/ops/opencl/image/pad.h
+++ b/mace/ops/opencl/image/pad.h
@@ -23,6 +23,7 @@
 
 #include "mace/core/op_context.h"
 #include "mace/core/tensor.h"
+#include "mace/ops/pad.h"
 #include "mace/ops/opencl/helper.h"
 
 namespace mace {
@@ -33,9 +34,10 @@ namespace image {
 template <typename T>
 class PadKernel : public OpenCLPadKernel {
  public:
-  PadKernel(const std::vector<int> &paddings,
+  PadKernel(const PadType type,
+            const std::vector<int> &paddings,
             const float constant_value)
-      : paddings_(paddings), constant_value_(constant_value) {}
+      : type_(type), paddings_(paddings), constant_value_(constant_value) {}
 
   MaceStatus Compute(
       OpContext *context,
@@ -43,6 +45,7 @@ class PadKernel : public OpenCLPadKernel {
       Tensor *output) override;
 
  private:
+  PadType type_;
   std::vector<int> paddings_;
   float constant_value_;
   cl::Kernel kernel_;
@@ -60,7 +63,23 @@ MaceStatus PadKernel<T>::Compute(
   MACE_CHECK((this->paddings_[0] == 0) && (this->paddings_[1] == 0) &&
       (this->paddings_[6] == 0) && (this->paddings_[7] == 0))
     << "Mace only support height/width dimension now";
+  for (int i = 2; i <= 5; ++i) {
+    MACE_CHECK(paddings_[i] >= 0);
+  }
   auto input_shape = input->shape();
+  if (type_ == PadType::REFLECT) {
+    MACE_CHECK(paddings_[2] < input_shape[1] &&
+               paddings_[3] < input_shape[1] &&
+               paddings_[4] < input_shape[2] &&
+               paddings_[5] < input_shape[2]);
+  } else if (type_ == PadType::SYMMETRIC) {
+    MACE_CHECK(paddings_[2] <= input_shape[1] &&
+               paddings_[3] <= input_shape[1] &&
+               paddings_[4] <= input_shape[2] &&
+               paddings_[5] <= input_shape[2]);
+  } else {
+    MACE_CHECK(type_ == PadType::CONSTANT);
+  }
   std::vector<index_t> output_shape = {
       input_shape[0] + this->paddings_[0] + this->paddings_[1],
       input_shape[1] + this->paddings_[2] + this->paddings_[3],
@@ -92,6 +111,7 @@ MaceStatus PadKernel<T>::Compute(
     auto dt = DataTypeToEnum<T>::value;
     built_options.emplace("-DDATA_TYPE=" + DtToCLDt(dt));
     built_options.emplace("-DCMD_DATA_TYPE=" + DtToCLCMDDt(dt));
+    built_options.emplace(MakeString("-DPAD_TYPE=", type_));
     MACE_RETURN_IF_ERROR(runtime->BuildKernel("pad", kernel_name,
                                               built_options, &kernel_));
 
@@ -110,7 +130,9 @@ MaceStatus PadKernel<T>::Compute(
     MACE_SET_3D_GWS_ARGS(kernel_, gws);
     kernel_.setArg(idx++, *(input->opencl_image()));
     kernel_.setArg(idx++, *(output->opencl_image()));
-    kernel_.setArg(idx++, this->constant_value_);
+    if (type_ == PadType::CONSTANT) {
+      kernel_.setArg(idx++, this->constant_value_);
+    }
     kernel_.setArg(idx++, static_cast<int32_t>(input_shape[1]));
     kernel_.setArg(idx++, static_cast<int32_t>(input_shape[2]));
     kernel_.setArg(idx++, static_cast<int32_t>(output_shape[1]));
diff --git a/mace/ops/pad.cc b/mace/ops/pad.cc
index 3576543b..0dfdf673 100644
--- a/mace/ops/pad.cc
+++ b/mace/ops/pad.cc
@@ -16,6 +16,7 @@
 #include <memory>
 
 #include "mace/core/operator.h"
+#include "mace/ops/pad.h"
 #ifdef MACE_ENABLE_OPENCL
 #include "mace/ops/opencl/image/pad.h"
 #endif  // MACE_ENABLE_OPENCL
@@ -31,6 +32,9 @@ class PadOp<DeviceType::CPU, T> : public Operation {
  public:
   explicit PadOp(OpConstructContext *context)
       : Operation(context),
+        type_(
+            static_cast<PadType>(Operation::GetOptionalArg<int>(
+                "pad_type", static_cast<int>(PadType::CONSTANT)))),
         paddings_(Operation::GetRepeatedArgs<int>("paddings")),
         constant_value_(Operation::GetOptionalArg<float>(
             "constant_value", 0.0)) {
@@ -49,6 +53,15 @@ class PadOp<DeviceType::CPU, T> : public Operation {
     MACE_CHECK(
         this->paddings_.size() == static_cast<size_t>(input->dim_size()) * 2);
     auto input_shape = input->shape();
+    for (size_t i = 0; i < paddings_.size(); ++i) {
+      if (type_ == PadType::REFLECT) {
+        MACE_CHECK(paddings_[i] < input_shape[i / 2]);
+
+      } else if (type_ == PadType::SYMMETRIC) {
+        MACE_CHECK(paddings_[i] <= input_shape[i / 2]);
+      }
+      MACE_CHECK(paddings_[i] >= 0);
+    }
     MACE_RETURN_IF_ERROR(output->Resize({input_shape[0] + this->paddings_[0]
                                              + this->paddings_[1],
                                          input_shape[1] + this->paddings_[2]
@@ -62,32 +75,97 @@ class PadOp<DeviceType::CPU, T> : public Operation {
     Tensor::MappingGuard output_guard(output);
     auto input_ptr = input->data<T>();
     T *output_ptr = output->mutable_data<T>();
-    std::fill(output_ptr, output_ptr + output->size(), this->constant_value_);
 
     const index_t batch = input->dim(0);
     const index_t channel = input->dim(1);
     const index_t height = input->dim(2);
     const index_t width = input->dim(3);
+
+    if (type_ == PadType::CONSTANT) {
+      std::fill(output_ptr, output_ptr + output->size(), this->constant_value_);
+
 #pragma omp parallel for collapse(3)
-    for (index_t b = 0; b < batch; ++b) {
-      for (index_t c = 0; c < channel; ++c) {
-        for (index_t h = 0; h < height; ++h) {
-          const index_t in_offset = (((b * channel + c) * height) + h) * width;
-          const index_t out_offset = (((b + this->paddings_[0]) * output->dim(1)
-              + (c + this->paddings_[2])) * output->dim(2)
-              + (h + this->paddings_[4])) * output->dim(3)
-              + this->paddings_[6];
-          memcpy(output_ptr + out_offset,
-                 input_ptr + in_offset,
-                 width * sizeof(T));
+      for (index_t b = 0; b < batch; ++b) {
+        for (index_t c = 0; c < channel; ++c) {
+          for (index_t h = 0; h < height; ++h) {
+            const index_t in_offset = (((b * channel + c) * height) +
+                                      h) * width;
+            const index_t out_offset =
+                  (((b + this->paddings_[0]) * output->dim(1)
+                + (c + this->paddings_[2])) * output->dim(2)
+                + (h + this->paddings_[4])) * output->dim(3)
+                + this->paddings_[6];
+            memcpy(output_ptr + out_offset,
+                   input_ptr + in_offset,
+                   width * sizeof(T));
+          }
         }
       }
+    } else if (type_ == PadType::REFLECT || type_ == PadType::SYMMETRIC) {
+      const index_t o_batch   = output->dim(0);
+      const index_t o_channel = output->dim(1);
+      const index_t o_height  = output->dim(2);
+      const index_t o_width   = output->dim(3);
+      const int l_add = type_ == PadType::REFLECT ?  0 : -1;
+      const int r_add = type_ == PadType::REFLECT ? -2 : -1;
+
+#pragma omp parallel for collapse(1)
+      for (index_t h = 0; h < o_height; ++h) {
+        index_t h_in = get_src_idx(h, height, paddings_[4], l_add, r_add);
+
+        for (index_t b = 0; b < o_batch; ++b) {
+          index_t b_in = get_src_idx(b, batch, paddings_[0], l_add, r_add);
+
+          for (index_t c = 0; c < o_channel; ++c) {
+            index_t c_in = get_src_idx(c, channel, paddings_[2], l_add, r_add);
+            const index_t in_offset = (((b_in * channel + c_in) * height) +
+                                      h_in) * width;
+            index_t out_offset = (((b * o_channel + c) * o_height) +
+                                 h) * o_width;
+
+            for (index_t i = 0, j = paddings_[6] + l_add;
+                 i < paddings_[6]; ++i, --j) {
+              output_ptr[out_offset++] = input_ptr[in_offset + j];
+            }
+            memcpy(output_ptr + out_offset, input_ptr + in_offset,
+                   width * sizeof(T));
+            out_offset += width;
+            for (index_t i = 0, j = width + r_add; i < paddings_[7]; ++i, --j) {
+              output_ptr[out_offset++] = input_ptr[in_offset + j];
+            }
+          }
+        }
+      }
+    } else {
+      LOG(FATAL) << "Pad op doesn't support type " << type_;
     }
 
     return MaceStatus::MACE_SUCCESS;
   }
 
  private:
+  int get_src_idx(int out, int in_size, int pad, int l_add, int r_add) {
+    const int diff_left = pad - out;
+    int in;
+
+    if (diff_left > 0) {
+      in = diff_left + l_add;
+
+    } else {
+      const int diff_right = out - (in_size + pad);
+
+      if (diff_right >= 0) {
+        in = in_size - diff_right + r_add;
+
+      } else {
+        in = -diff_left;
+      }
+    }
+
+    return in;
+  }
+
+  PadType type_;
   std::vector<int> paddings_;
   float constant_value_;
 };
@@ -98,11 +176,14 @@ class PadOp<DeviceType::GPU, T> : public Operation {
  public:
   explicit PadOp(OpConstructContext *context)
       : Operation(context) {
+    PadType type = static_cast<PadType>(Operation::GetOptionalArg<int>(
+      "pad_type", static_cast<int>(PadType::CONSTANT)));
     std::vector<int> paddings = Operation::GetRepeatedArgs<int>("paddings");
     float constant_value = Operation::GetOptionalArg<float>(
         "constant_value", 0.0);
     if (context->device()->gpu_runtime()->UseImageMemory()) {
-      kernel_.reset(new opencl::image::PadKernel<T>(paddings, constant_value));
+      kernel_.reset(new opencl::image::PadKernel<T>(
+          type, paddings, constant_value));
     } else {
       MACE_NOT_IMPLEMENTED;
     }
diff --git a/mace/ops/pad.h b/mace/ops/pad.h
new file mode 100644
index 00000000..e2139e27
--- /dev/null
+++ b/mace/ops/pad.h
@@ -0,0 +1,30 @@
+// Copyright 2018 The MACE Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+//     http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#ifndef MACE_OPS_PAD_H_
+#define MACE_OPS_PAD_H_
+
+namespace mace {
+namespace ops {
+
+enum PadType {
+  CONSTANT  = 0,
+  REFLECT   = 1,
+  SYMMETRIC = 2,
+};
+
+}  // namespace ops
+}  // namespace mace
+
+#endif  // MACE_OPS_PAD_H_
diff --git a/mace/ops/pad_benchmark.cc b/mace/ops/pad_benchmark.cc
index 69aac4dc..0466aa6b 100644
--- a/mace/ops/pad_benchmark.cc
+++ b/mace/ops/pad_benchmark.cc
@@ -14,6 +14,7 @@
 
 #include "mace/core/testing/test_benchmark.h"
 #include "mace/ops/ops_test_util.h"
+#include "mace/ops/pad.h"
 
 namespace mace {
 namespace ops {
@@ -22,7 +23,7 @@ namespace test {
 namespace {
 template <DeviceType D, typename T>
 void Pad(int iters, int batch, int height,
-         int width, int channels, int pad) {
+         int width, int channels, int pad, int pad_type) {
   mace::testing::StopTiming();
 
   OpsTestNet net;
@@ -35,6 +36,7 @@ void Pad(int iters, int batch, int height,
       .Input("Input")
       .Output("Output")
       .AddIntsArg("paddings", paddings)
+      .AddIntArg("pad_type", pad_type)
       .AddFloatArg("constant_value", 1.0)
       .AddIntArg("T", static_cast<int>(DataTypeToEnum<T>::value))
       .Finalize(net.NewOperatorDef());
@@ -53,19 +55,26 @@ void Pad(int iters, int batch, int height,
 }
 }  // namespace
 
-#define MACE_BM_PAD_MACRO(N, H, W, C, PAD, TYPE, DEVICE)                     \
-  static void MACE_BM_PAD_##N##_##H##_##W##_##C##_##PAD##_##TYPE##_##DEVICE( \
+#define MACE_BM_PAD_MACRO(N, H, W, C, PAD, MODE, TYPE, DEVICE)               \
+  static void MACE_BM_PAD_##N##_##H##_##W##_##C##_##PAD##_##MODE##_##TYPE    \
+              ##_##DEVICE(                                                   \
       int iters) {                                                           \
     const int64_t tot = static_cast<int64_t>(iters) * N * C * H * W;         \
     mace::testing::BytesProcessed(tot *(sizeof(TYPE)));                      \
-    Pad<DEVICE, TYPE>(iters, N, H, W, C, PAD);                               \
+    Pad<DEVICE, TYPE>(iters, N, H, W, C, PAD, MODE);                         \
   }                                                                          \
-  MACE_BENCHMARK(MACE_BM_PAD_##N##_##H##_##W##_##C##_##PAD##_##TYPE##_##DEVICE)
+  MACE_BENCHMARK(MACE_BM_PAD_##N##_##H##_##W##_##C##_##PAD##_##MODE##_##TYPE \
+                 ##_##DEVICE)
 
-#define MACE_BM_PAD(N, H, W, C, PAD)                 \
-  MACE_BM_PAD_MACRO(N, H, W, C, PAD, float, CPU);    \
-  MACE_BM_PAD_MACRO(N, H, W, C, PAD, float, GPU);    \
-  MACE_BM_PAD_MACRO(N, H, W, C, PAD, half, GPU);
+#define MACE_BM_PAD_MODE(N, H, W, C, PAD, MODE)            \
+  MACE_BM_PAD_MACRO(N, H, W, C, PAD, MODE, float, CPU);    \
+  MACE_BM_PAD_MACRO(N, H, W, C, PAD, MODE, float, GPU);    \
+  MACE_BM_PAD_MACRO(N, H, W, C, PAD, MODE, half, GPU);
+
+#define MACE_BM_PAD(N, H, W, C, PAD)              \
+  MACE_BM_PAD_MODE(N, H, W, C, PAD, CONSTANT);    \
+  MACE_BM_PAD_MODE(N, H, W, C, PAD, REFLECT);     \
+  MACE_BM_PAD_MODE(N, H, W, C, PAD, SYMMETRIC);
 
 MACE_BM_PAD(1, 512, 512, 1, 2);
 MACE_BM_PAD(1, 112, 112, 64, 1);
diff --git a/mace/ops/pad_test.cc b/mace/ops/pad_test.cc
index 677cf9c5..4995cbcc 100644
--- a/mace/ops/pad_test.cc
+++ b/mace/ops/pad_test.cc
@@ -13,6 +13,7 @@
 // limitations under the License.
 
 #include "mace/ops/ops_test_util.h"
+#include "mace/ops/pad.h"
 
 namespace mace {
 namespace ops {
@@ -109,7 +110,7 @@ TEST_F(PadTest, ComplexCPU) {
 namespace {
 template <typename T>
 void Complex(const std::vector<index_t> &input_shape,
-             const std::vector<int> &paddings) {
+             const std::vector<int> &paddings, const int pad_type) {
   // Construct graph
   OpsTestNet net;
 
@@ -122,6 +123,7 @@ void Complex(const std::vector<index_t> &input_shape,
       .Input("TInput")
       .Output("TOutput")
       .AddIntsArg("paddings", paddings)
+      .AddIntArg("pad_type", pad_type)
       .AddFloatArg("constant_value", 1.0)
       .AddIntArg("data_format", DataFormat::NHWC)
       .Finalize(net.NewOperatorDef());
@@ -138,6 +140,7 @@ void Complex(const std::vector<index_t> &input_shape,
       .Input("Input")
       .Output("Output")
       .AddIntsArg("paddings", paddings)
+      .AddIntArg("pad_type", pad_type)
       .AddFloatArg("constant_value", 1.0)
       .AddIntArg("data_format", DataFormat::NHWC)
       .Finalize(net.NewOperatorDef());
@@ -156,15 +159,311 @@ void Complex(const std::vector<index_t> &input_shape,
 }  // namespace
 
 TEST_F(PadTest, ComplexFloat) {
-  Complex<float>({1, 32, 32, 4}, {0, 0, 2, 2, 1, 1, 0, 0});
-  Complex<float>({1, 31, 37, 16}, {0, 0, 2, 0, 1, 0, 0, 0});
-  Complex<float>({1, 128, 128, 32}, {0, 0, 0, 1, 0, 2, 0, 0});
+  for (int i = PadType::CONSTANT; i <= PadType::SYMMETRIC; i++) {
+    Complex<float>({1, 32, 32, 4}, {0, 0, 2, 2, 1, 1, 0, 0}, i);
+    Complex<float>({1, 31, 37, 16}, {0, 0, 2, 0, 1, 0, 0, 0}, i);
+    Complex<float>({1, 128, 128, 32}, {0, 0, 0, 1, 0, 2, 0, 0}, i);
+  }
 }
 
 TEST_F(PadTest, ComplexHalf) {
-  Complex<half>({1, 32, 32, 4}, {0, 0, 2, 2, 1, 1, 0, 0});
-  Complex<half>({1, 31, 37, 16}, {0, 0, 2, 0, 1, 0, 0, 0});
-  Complex<half>({1, 128, 128, 32}, {0, 0, 0, 1, 0, 2, 0, 0});
+  for (int i = PadType::CONSTANT; i <= PadType::SYMMETRIC; i++) {
+    Complex<half>({1, 32, 32, 4}, {0, 0, 2, 2, 1, 1, 0, 0}, i);
+    Complex<half>({1, 31, 37, 16}, {0, 0, 2, 0, 1, 0, 0, 0}, i);
+    Complex<half>({1, 128, 128, 32}, {0, 0, 0, 1, 0, 2, 0, 0}, i);
+  }
+}
+
+namespace {
+template <DeviceType D, typename T>
+void Result(const std::vector<index_t> &input_shape,
+            const std::vector<float> &input_data,
+            const std::vector<index_t> &expected_shape,
+            const std::vector<float> &expected_data,
+            const std::vector<int> &paddings,
+            const PadType pad_type) {
+  // Construct graph
+  OpsTestNet net;
+  std::string input("Input");
+  std::string t_input(input);
+  std::string output("Output");
+  std::string t_output(output);
+
+  // Add input data
+  net.AddInputFromArray<D, float>(input, input_shape, input_data);
+
+  if (D == DeviceType::CPU) {
+    t_input = "TInput";
+    t_output = "TOutput";
+    net.TransformDataFormat<DeviceType::CPU, T>(input, NHWC, t_input, NCHW);
+  }
+
+  OpDefBuilder("Pad", "PadTest")
+  .Input(t_input)
+  .Output(t_output)
+  .AddIntsArg("paddings", paddings)
+  .AddIntArg("pad_type", static_cast<int>(pad_type))
+  .AddIntArg("data_format", DataFormat::NHWC)
+  .Finalize(net.NewOperatorDef());
+
+  // Run
+  net.RunOp(D);
+
+  if (D == DeviceType::CPU) {
+    net.TransformDataFormat<DeviceType::CPU, T>(t_output, NCHW, output, NHWC);
+  }
+
+  auto actual = net.GetTensor(output.c_str());
+  auto expected = net.CreateTensor<float>(expected_shape, expected_data);
+
+  ExpectTensorNear<float>(*expected, *actual, 1e-5);
+}
+}  // namespace
+
+TEST_F(PadTest, ReflectCPU) {
+  std::vector<index_t> input_shape{2, 2, 2, 2};
+  int size = std::accumulate(input_shape.begin(), input_shape.end(),
+                             1, std::multiplies<index_t>());
+  std::vector<float> input_data;
+  std::vector<index_t> expected_shape{4, 4, 4, 4};
+  std::vector<float> expected_data{
+          16, 15, 16, 15,
+          14, 13, 14, 13,
+          16, 15, 16, 15,
+          14, 13, 14, 13,
+
+          12, 11, 12, 11,
+          10,  9, 10,  9,
+          12, 11, 12, 11,
+          10,  9, 10,  9,
+
+          16, 15, 16, 15,
+          14, 13, 14, 13,
+          16, 15, 16, 15,
+          14, 13, 14, 13,
+
+          12, 11, 12, 11,
+          10,  9, 10,  9,
+          12, 11, 12, 11,
+          10,  9, 10,  9,
+
+
+           8,  7,  8,  7,
+           6,  5,  6,  5,
+           8,  7,  8,  7,
+           6,  5,  6,  5,
+
+           4,  3,  4,  3,
+           2,  1,  2,  1,
+           4,  3,  4,  3,
+           2,  1,  2,  1,
+
+           8,  7,  8,  7,
+           6,  5,  6,  5,
+           8,  7,  8,  7,
+           6,  5,  6,  5,
+
+           4,  3,  4,  3,
+           2,  1,  2,  1,
+           4,  3,  4,  3,
+           2,  1,  2,  1,
+
+
+          16, 15, 16, 15,
+          14, 13, 14, 13,
+          16, 15, 16, 15,
+          14, 13, 14, 13,
+
+          12, 11, 12, 11,
+          10,  9, 10,  9,
+          12, 11, 12, 11,
+          10,  9, 10,  9,
+
+          16, 15, 16, 15,
+          14, 13, 14, 13,
+          16, 15, 16, 15,
+          14, 13, 14, 13,
+
+          12, 11, 12, 11,
+          10,  9, 10,  9,
+          12, 11, 12, 11,
+          10,  9, 10,  9,
+
+
+           8,  7,  8,  7,
+           6,  5,  6,  5,
+           8,  7,  8,  7,
+           6,  5,  6,  5,
+
+           4,  3,  4,  3,
+           2,  1,  2,  1,
+           4,  3,  4,  3,
+           2,  1,  2,  1,
+
+           8,  7,  8,  7,
+           6,  5,  6,  5,
+           8,  7,  8,  7,
+           6,  5,  6,  5,
+
+           4,  3,  4,  3,
+           2,  1,  2,  1,
+           4,  3,  4,  3,
+           2,  1,  2,  1,
+  };
+  const std::vector<int> paddings{1, 1, 1, 1, 1, 1, 1, 1};
+
+  input_data.reserve(size);
+  for (int i = 1; i <= size; i++) {
+    input_data.push_back(i);
+  }
+
+  Result<DeviceType::CPU, float>(input_shape, input_data, expected_shape,
+      expected_data, paddings, PadType::REFLECT);
+}
+
+TEST_F(PadTest, SymmetricCPU) {
+  std::vector<index_t> input_shape{2, 2, 2, 2};
+  int size = std::accumulate(input_shape.begin(), input_shape.end(),
+                             1, std::multiplies<index_t>());
+  std::vector<float> input_data;
+  std::vector<index_t> expected_shape{4, 4, 4, 4};
+  std::vector<float> expected_data{
+           1,  1,  2,  2,
+           1,  1,  2,  2,
+           3,  3,  4,  4,
+           3,  3,  4,  4,
+
+           1,  1,  2,  2,
+           1,  1,  2,  2,
+           3,  3,  4,  4,
+           3,  3,  4,  4,
+
+           5,  5,  6,  6,
+           5,  5,  6,  6,
+           7,  7,  8,  8,
+           7,  7,  8,  8,
+
+           5,  5,  6,  6,
+           5,  5,  6,  6,
+           7,  7,  8,  8,
+           7,  7,  8,  8,
+
+
+           1,  1,  2,  2,
+           1,  1,  2,  2,
+           3,  3,  4,  4,
+           3,  3,  4,  4,
+
+           1,  1,  2,  2,
+           1,  1,  2,  2,
+           3,  3,  4,  4,
+           3,  3,  4,  4,
+
+           5,  5,  6,  6,
+           5,  5,  6,  6,
+           7,  7,  8,  8,
+           7,  7,  8,  8,
+
+           5,  5,  6,  6,
+           5,  5,  6,  6,
+           7,  7,  8,  8,
+           7,  7,  8,  8,
+
+
+           9,  9, 10, 10,
+           9,  9, 10, 10,
+          11, 11, 12, 12,
+          11, 11, 12, 12,
+
+           9,  9, 10, 10,
+           9,  9, 10, 10,
+          11, 11, 12, 12,
+          11, 11, 12, 12,
+
+          13, 13, 14, 14,
+          13, 13, 14, 14,
+          15, 15, 16, 16,
+          15, 15, 16, 16,
+
+          13, 13, 14, 14,
+          13, 13, 14, 14,
+          15, 15, 16, 16,
+          15, 15, 16, 16,
+
+
+           9,  9, 10, 10,
+           9,  9, 10, 10,
+          11, 11, 12, 12,
+          11, 11, 12, 12,
+
+           9,  9, 10, 10,
+           9,  9, 10, 10,
+          11, 11, 12, 12,
+          11, 11, 12, 12,
+
+          13, 13, 14, 14,
+          13, 13, 14, 14,
+          15, 15, 16, 16,
+          15, 15, 16, 16,
+
+          13, 13, 14, 14,
+          13, 13, 14, 14,
+          15, 15, 16, 16,
+          15, 15, 16, 16,
+  };
+  const std::vector<int> paddings{1, 1, 1, 1, 1, 1, 1, 1};
+
+  input_data.reserve(size);
+  for (int i = 1; i <= size; i++) {
+    input_data.push_back(i);
+  }
+
+  Result<DeviceType::CPU, float>(input_shape, input_data, expected_shape,
+      expected_data, paddings, PadType::SYMMETRIC);
+}
+
+TEST_F(PadTest, Result) {
+  std::vector<index_t> input_shape{1, 3, 4, 1};
+  int size = std::accumulate(input_shape.begin(), input_shape.end(),
+                             1, std::multiplies<index_t>());
+  std::vector<float> input_data;
+  std::vector<index_t> expected_shape{1, 6, 7, 1};
+  std::vector<float> expected_reflect{
+     8,  7,  6, 5,  6,  7,  8,
+     4,  3,  2, 1,  2,  3,  4,
+     8,  7,  6, 5,  6,  7,  8,
+    12, 11, 10, 9, 10, 11, 12,
+     8,  7,  6, 5,  6,  7,  8,
+     4,  3,  2, 1,  2,  3,  4,
+  };
+  std::vector<float> expected_symmetric{
+     3,  2, 1, 1,  2,  3,  4,
+     3,  2, 1, 1,  2,  3,  4,
+     7,  6, 5, 5,  6,  7,  8,
+    11, 10, 9, 9, 10, 11, 12,
+    11, 10, 9, 9, 10, 11, 12,
+     7,  6, 5, 5,  6,  7,  8,
+  };
+  const std::vector<int> paddings{0, 0, 1, 2, 3, 0, 0, 0};
+
+  input_data.reserve(size);
+  for (int i = 1; i <= size; i++) {
+    input_data.push_back(i);
+  }
+
+  Result<DeviceType::CPU, float>(input_shape, input_data, expected_shape,
+      expected_reflect, paddings, PadType::REFLECT);
+  Result<DeviceType::GPU, float>(input_shape, input_data, expected_shape,
+      expected_reflect, paddings, PadType::REFLECT);
+  Result<DeviceType::GPU, half>(input_shape, input_data, expected_shape,
+      expected_reflect, paddings, PadType::REFLECT);
+
+  Result<DeviceType::CPU, float>(input_shape, input_data, expected_shape,
+      expected_symmetric, paddings, PadType::SYMMETRIC);
+  Result<DeviceType::GPU, float>(input_shape, input_data, expected_shape,
+      expected_symmetric, paddings, PadType::SYMMETRIC);
+  Result<DeviceType::GPU, half>(input_shape, input_data, expected_shape,
+      expected_symmetric, paddings, PadType::SYMMETRIC);
 }
 
 }  // namespace test
diff --git a/mace/python/tools/converter_tool/base_converter.py b/mace/python/tools/converter_tool/base_converter.py
index d4d326ef..fa7a7a23 100644
--- a/mace/python/tools/converter_tool/base_converter.py
+++ b/mace/python/tools/converter_tool/base_converter.py
@@ -89,6 +89,12 @@ class ReduceType(Enum):
     PROD = 3
 
 
+class PadType(Enum):
+    CONSTANT = 0
+    REFLECT = 1
+    SYMMETRIC = 2
+
+
 class FrameworkType(Enum):
     TENSORFLOW = 0
     CAFFE = 1
@@ -221,6 +227,7 @@ class MaceKeyword(object):
     mace_step_h_str = 'step_h'
     mace_step_w_str = 'step_w'
     mace_find_range_every_time = 'find_range_every_time'
+    mace_pad_type_str = 'pad_type'
 
 
 class TransformerRule(Enum):
diff --git a/mace/python/tools/converter_tool/tensorflow_converter.py b/mace/python/tools/converter_tool/tensorflow_converter.py
index d1958b25..fa14eefd 100644
--- a/mace/python/tools/converter_tool/tensorflow_converter.py
+++ b/mace/python/tools/converter_tool/tensorflow_converter.py
@@ -25,6 +25,7 @@ from mace.python.tools.converter_tool.base_converter import PoolingType
 from mace.python.tools.converter_tool.base_converter import PaddingMode
 from mace.python.tools.converter_tool.base_converter import ActivationType
 from mace.python.tools.converter_tool.base_converter import EltwiseType
+from mace.python.tools.converter_tool.base_converter import PadType
 from mace.python.tools.converter_tool.base_converter import FrameworkType
 from mace.python.tools.converter_tool.base_converter import ReduceType
 from mace.python.tools.converter_tool.base_converter import DataFormat
@@ -115,6 +116,7 @@ TFSupportedOps = [
     'FakeQuantWithMinMaxVars',
     'FloorDiv',
     'Sqrt',
+    'MirrorPad',
 ]
 
 TFOpType = Enum('TFOpType', [(op, op) for op in TFSupportedOps], type=str)
@@ -202,6 +204,12 @@ class TensorflowConverter(base_converter.ConverterInterface):
         TFOpType.LeakyRelu.name: ActivationType.LEAKYRELU,
     }
 
+    pad_type = {
+        'CONSTANT' : PadType.CONSTANT,
+        'REFLECT'  : PadType.REFLECT,
+        'SYMMETRIC': PadType.SYMMETRIC
+    }
+
     def __init__(self, option, src_model_file):
         self._op_converters = {
             TFOpType.Conv2D.name: self.convert_conv2d,
@@ -268,6 +276,7 @@ class TensorflowConverter(base_converter.ConverterInterface):
             TFOpType.FakeQuantWithMinMaxVars.name: self.convert_fake_quantize,
             TFOpType.FloorDiv.name: self.convert_elementwise,
             TFOpType.Sqrt.name: self.convert_elementwise,
+            TFOpType.MirrorPad.name: self.convert_pad,
         }
         self._option = option
         self._mace_net_def = mace_pb2.NetDef()
@@ -724,12 +733,21 @@ class TensorflowConverter(base_converter.ConverterInterface):
         paddings_arg.ints.extend(paddings_value)
         self._skip_tensor.add(tf_op.inputs[1].name)
 
-        if len(tf_op.inputs) == 3:
-            constant_value_arg = op.arg.add()
-            constant_value_arg.name = MaceKeyword.mace_constant_value_str
-            constant_value = tf_op.inputs[2].eval().astype(np.int32).flat[0]
-            constant_value_arg.i = constant_value
-            self._skip_tensor.add(tf_op.inputs[2].name)
+        pad_type_arg = op.arg.add()
+        pad_type_arg.name = MaceKeyword.mace_pad_type_str
+
+        if tf_op.type == TFOpType.Pad:
+            if len(tf_op.inputs) == 3:
+                constant_value_arg = op.arg.add()
+                constant_value_arg.name = MaceKeyword.mace_constant_value_str
+                constant_value = tf_op.inputs[2].eval().astype(np.int32).flat[0]
+                constant_value_arg.i = constant_value
+                self._skip_tensor.add(tf_op.inputs[2].name)
+
+            pad_type_arg.i = PadType.CONSTANT.value
+
+        elif tf_op.type == TFOpType.MirrorPad:
+            pad_type_arg.i = self.pad_type[tf_op.get_attr('mode')].value
 
     def convert_concat(self, tf_op):
         op = self.convert_general_op(tf_op)
-- 
GitLab