From a95ce07c205604aa1626ec45602e843c98f545af Mon Sep 17 00:00:00 2001 From: liuqi Date: Wed, 11 Apr 2018 17:51:25 +0800 Subject: [PATCH] Add pad op benchmark and convertion. --- mace/kernels/pad.h | 2 + mace/ops/pad_benchmark.cc | 78 +++++++++++++++++++++++++++ mace/python/tools/tf_converter_lib.py | 25 +++++++++ 3 files changed, 105 insertions(+) create mode 100644 mace/ops/pad_benchmark.cc diff --git a/mace/kernels/pad.h b/mace/kernels/pad.h index bb354ac2..6fbb1c76 100644 --- a/mace/kernels/pad.h +++ b/mace/kernels/pad.h @@ -5,6 +5,7 @@ #define MACE_KERNELS_PAD_H_ #include +#include #include #include "mace/core/future.h" @@ -49,6 +50,7 @@ struct PadFunctor : public PadFunctorBase { const index_t height = input->dim(1); const index_t width = input->dim(2); const index_t channel = input->dim(3); +#pragma omp parallel for collapse(3) for (index_t b = 0; b < batch; ++b) { for (index_t h = 0; h < height; ++h) { for (index_t w = 0; w < width; ++w) { diff --git a/mace/ops/pad_benchmark.cc b/mace/ops/pad_benchmark.cc new file mode 100644 index 00000000..947c7aa8 --- /dev/null +++ b/mace/ops/pad_benchmark.cc @@ -0,0 +1,78 @@ +// +// Copyright (c) 2017 XiaoMi All rights reserved. +// + +#include "mace/core/operator.h" +#include "mace/core/runtime/opencl/opencl_runtime.h" +#include "mace/core/testing/test_benchmark.h" +#include "mace/ops/ops_test_util.h" + +namespace mace { +namespace ops { +namespace test { + +template +static void Pad(int iters, int batch, int height, + int width, int channels, int pad) { + mace::testing::StopTiming(); + + OpsTestNet net; + + // Add input data + net.AddRandomInput("Input", {batch, height, width, channels}); + + const std::vector paddings = {0, 0, pad, pad, pad, pad, 0, 0}; + if (D == DeviceType::OPENCL) { + BufferToImage(&net, "Input", "InputImage", + kernels::BufferType::IN_OUT_CHANNEL); + OpDefBuilder("Pad", "PadTest") + .Input("InputImage") + .Output("OutputImage") + .AddIntsArg("paddings", paddings) + .AddFloatArg("constant_value", 1.0) + .Finalize(net.NewOperatorDef()); + } else { + OpDefBuilder("Pad", "PadTest") + .Input("Input") + .Output("Output") + .AddIntsArg("paddings", paddings) + .AddFloatArg("constant_value", 1.0) + .Finalize(net.NewOperatorDef()); + } + + // Warm-up + for (int i = 0; i < 5; ++i) { + net.RunOp(D); + } + net.Sync(); + + mace::testing::StartTiming(); + while (iters--) { + net.Run(); + } + net.Sync(); +} + +#define BM_PAD_MACRO(N, H, W, C, PAD, TYPE, DEVICE) \ + static void BM_PAD_##N##_##H##_##W##_##C##_##PAD##_##TYPE##_##DEVICE( \ + int iters) { \ + const int64_t tot = static_cast(iters) * N * C * H * W; \ + mace::testing::MaccProcessed(tot); \ + mace::testing::BytesProcessed(tot *(sizeof(TYPE))); \ + Pad(iters, N, H, W, C, PAD); \ + } \ + BENCHMARK(BM_PAD_##N##_##H##_##W##_##C##_##PAD##_##TYPE##_##DEVICE) + +#define BM_PAD(N, H, W, C, PAD) \ + BM_PAD_MACRO(N, H, W, C, PAD, float, CPU); \ + BM_PAD_MACRO(N, H, W, C, PAD, float, OPENCL); \ + BM_PAD_MACRO(N, H, W, C, PAD, half, OPENCL); + +BM_PAD(1, 512, 512, 1, 2); +BM_PAD(1, 112, 112, 64, 1); +BM_PAD(1, 256, 256, 32, 2); +BM_PAD(1, 512, 512, 16, 2); + +} // namespace test +} // namespace ops +} // namespace mace diff --git a/mace/python/tools/tf_converter_lib.py b/mace/python/tools/tf_converter_lib.py index a721e0fd..56b3f04d 100644 --- a/mace/python/tools/tf_converter_lib.py +++ b/mace/python/tools/tf_converter_lib.py @@ -992,6 +992,29 @@ class TFConverter(object): self.add_output_shape([shape], op_def) self.resolved_ops[reshape_op.name] = 1 + def convert_pad(self, op): + op_def = self.net_def.op.add() + arg = op_def.arg.add() + arg.name = 'T' + arg.i = self.dt + op_def.name = op.name + op_def.type = "Pad" + op_def.input.extend([op.inputs[0].name]) + op_def.output.extend([output.name for output in op.outputs]) + paddings_arg = op_def.arg.add() + paddings_arg.name = 'paddings' + paddings_arg.ints.extend( + get_input_tensor(op, 1).eval().astype(np.int32).flat) + self.unused_tensor.add(get_input_tensor(op, 1).name) + if len(op.inputs) == 3: + constant_value_arg = op_def.arg.add() + constant_value_arg.name = 'constant_value' + constant_value_arg.i = \ + get_input_tensor(op, 2).eval().astype(np.int32).flat[0] + self.unused_tensor.add(get_input_tensor(op, 2).name) + self.add_output_shape(op.outputs, op_def) + self.resolved_ops[op.name] = 1 + def convert_normal_op(self, op): op_def = self.net_def.op.add() arg = op_def.arg.add() @@ -1084,6 +1107,8 @@ class TFConverter(object): else: raise Exception('Unknown Op: %s, type: %s' % (op.name, op.type)) + elif op.type == 'Pad': + self.convert_pad(op) # elif op.type in ['']: # self.convert_normal_op(op) else: -- GitLab