Add pad op benchmark and convertion.

a95ce07c · liuqi · 478b21f3 · a95ce07c · a95ce07c · a95ce07c
Showing with 105 addition and 0 deletion

mace/kernels/pad.h mace/kernels/pad.h +2 -0

mace/ops/pad_benchmark.cc mace/ops/pad_benchmark.cc +78 -0

mace/python/tools/tf_converter_lib.py mace/python/tools/tf_converter_lib.py +25 -0

未找到文件。
--- a/mace/kernels/pad.h
+++ b/mace/kernels/pad.h
@@ -5,6 +5,7 @@
 #define MACE_KERNELS_PAD_H_

 #include <algorithm>
+#include <memory>
 #include <vector>

 #include "mace/core/future.h"
@@ -49,6 +50,7 @@ struct PadFunctor : public PadFunctorBase {
    const index_t height = input->dim(1);
    const index_t width = input->dim(2);
    const index_t channel = input->dim(3);
+#pragma omp parallel for collapse(3)
    for (index_t b = 0; b < batch; ++b) {
      for (index_t h = 0; h < height; ++h) {
        for (index_t w = 0; w < width; ++w) {

--- a/mace/ops/pad_benchmark.cc
+++ b/mace/ops/pad_benchmark.cc
+//
+// Copyright (c) 2017 XiaoMi All rights reserved.
+//
+
+#include "mace/core/operator.h"
+#include "mace/core/runtime/opencl/opencl_runtime.h"
+#include "mace/core/testing/test_benchmark.h"
+#include "mace/ops/ops_test_util.h"
+
+namespace mace {
+namespace ops {
+namespace test {
+
+template <DeviceType D, typename T>
+static void Pad(int iters, int batch, int height,
+                int width, int channels, int pad) {
+  mace::testing::StopTiming();
+
+  OpsTestNet net;
+
+  // Add input data
+  net.AddRandomInput<D, T>("Input", {batch, height, width, channels});
+
+  const std::vector<int> paddings = {0, 0, pad, pad, pad, pad, 0, 0};
+  if (D == DeviceType::OPENCL) {
+    BufferToImage<D, T>(&net, "Input", "InputImage",
+                        kernels::BufferType::IN_OUT_CHANNEL);
+    OpDefBuilder("Pad", "PadTest")
+        .Input("InputImage")
+        .Output("OutputImage")
+        .AddIntsArg("paddings", paddings)
+        .AddFloatArg("constant_value", 1.0)
+        .Finalize(net.NewOperatorDef());
+  } else {
+    OpDefBuilder("Pad", "PadTest")
+        .Input("Input")
+        .Output("Output")
+        .AddIntsArg("paddings", paddings)
+        .AddFloatArg("constant_value", 1.0)
+        .Finalize(net.NewOperatorDef());
+  }
+
+  // Warm-up
+  for (int i = 0; i < 5; ++i) {
+    net.RunOp(D);
+  }
+  net.Sync();
+
+  mace::testing::StartTiming();
+  while (iters--) {
+    net.Run();
+  }
+  net.Sync();
+}
+
+#define BM_PAD_MACRO(N, H, W, C, PAD, TYPE, DEVICE)                  \
+  static void BM_PAD_##N##_##H##_##W##_##C##_##PAD##_##TYPE##_##DEVICE( \
+      int iters) {                                                   \
+    const int64_t tot = static_cast<int64_t>(iters) * N * C * H * W; \
+    mace::testing::MaccProcessed(tot);                               \
+    mace::testing::BytesProcessed(tot *(sizeof(TYPE)));              \
+    Pad<DEVICE, TYPE>(iters, N, H, W, C, PAD);                       \
+  }                                                                  \
+  BENCHMARK(BM_PAD_##N##_##H##_##W##_##C##_##PAD##_##TYPE##_##DEVICE)
+
+#define BM_PAD(N, H, W, C, PAD)                 \
+  BM_PAD_MACRO(N, H, W, C, PAD, float, CPU);    \
+  BM_PAD_MACRO(N, H, W, C, PAD, float, OPENCL); \
+  BM_PAD_MACRO(N, H, W, C, PAD, half, OPENCL);
+
+BM_PAD(1, 512, 512, 1, 2);
+BM_PAD(1, 112, 112, 64, 1);
+BM_PAD(1, 256, 256, 32, 2);
+BM_PAD(1, 512, 512, 16, 2);
+
+}  // namespace test
+}  // namespace ops
+}  // namespace mace
--- a/mace/python/tools/tf_converter_lib.py
+++ b/mace/python/tools/tf_converter_lib.py
@@ -992,6 +992,29 @@ class TFConverter(object):
        self.add_output_shape([shape], op_def)
        self.resolved_ops[reshape_op.name] = 1

+    def convert_pad(self, op):
+        op_def = self.net_def.op.add()
+        arg = op_def.arg.add()
+        arg.name = 'T'
+        arg.i = self.dt
+        op_def.name = op.name
+        op_def.type = "Pad"
+        op_def.input.extend([op.inputs[0].name])
+        op_def.output.extend([output.name for output in op.outputs])
+        paddings_arg = op_def.arg.add()
+        paddings_arg.name = 'paddings'
+        paddings_arg.ints.extend(
+            get_input_tensor(op, 1).eval().astype(np.int32).flat)
+        self.unused_tensor.add(get_input_tensor(op, 1).name)
+        if len(op.inputs) == 3:
+            constant_value_arg = op_def.arg.add()
+            constant_value_arg.name = 'constant_value'
+            constant_value_arg.i = \
+                get_input_tensor(op, 2).eval().astype(np.int32).flat[0]
+            self.unused_tensor.add(get_input_tensor(op, 2).name)
+        self.add_output_shape(op.outputs, op_def)
+        self.resolved_ops[op.name] = 1
+
    def convert_normal_op(self, op):
        op_def = self.net_def.op.add()
        arg = op_def.arg.add()
@@ -1084,6 +1107,8 @@ class TFConverter(object):
                else:
                    raise Exception('Unknown Op: %s, type: %s' % (op.name,
                                                                  op.type))
+            elif op.type == 'Pad':
+                self.convert_pad(op)
            # elif op.type in ['']:
            #  self.convert_normal_op(op)
            else: