diff --git a/mace/kernels/pad.h b/mace/kernels/pad.h index bb354ac207a2614851f9880a543f5612fadc21b8..6fbb1c7663388dcb7dcd9845c07274747f5f0165 100644 --- a/mace/kernels/pad.h +++ b/mace/kernels/pad.h @@ -5,6 +5,7 @@ #define MACE_KERNELS_PAD_H_ #include +#include #include #include "mace/core/future.h" @@ -49,6 +50,7 @@ struct PadFunctor : public PadFunctorBase { const index_t height = input->dim(1); const index_t width = input->dim(2); const index_t channel = input->dim(3); +#pragma omp parallel for collapse(3) for (index_t b = 0; b < batch; ++b) { for (index_t h = 0; h < height; ++h) { for (index_t w = 0; w < width; ++w) { diff --git a/mace/ops/pad_benchmark.cc b/mace/ops/pad_benchmark.cc new file mode 100644 index 0000000000000000000000000000000000000000..947c7aa8d83e7dc55271f6985f8c1a38ddc2e050 --- /dev/null +++ b/mace/ops/pad_benchmark.cc @@ -0,0 +1,78 @@ +// +// Copyright (c) 2017 XiaoMi All rights reserved. +// + +#include "mace/core/operator.h" +#include "mace/core/runtime/opencl/opencl_runtime.h" +#include "mace/core/testing/test_benchmark.h" +#include "mace/ops/ops_test_util.h" + +namespace mace { +namespace ops { +namespace test { + +template +static void Pad(int iters, int batch, int height, + int width, int channels, int pad) { + mace::testing::StopTiming(); + + OpsTestNet net; + + // Add input data + net.AddRandomInput("Input", {batch, height, width, channels}); + + const std::vector paddings = {0, 0, pad, pad, pad, pad, 0, 0}; + if (D == DeviceType::OPENCL) { + BufferToImage(&net, "Input", "InputImage", + kernels::BufferType::IN_OUT_CHANNEL); + OpDefBuilder("Pad", "PadTest") + .Input("InputImage") + .Output("OutputImage") + .AddIntsArg("paddings", paddings) + .AddFloatArg("constant_value", 1.0) + .Finalize(net.NewOperatorDef()); + } else { + OpDefBuilder("Pad", "PadTest") + .Input("Input") + .Output("Output") + .AddIntsArg("paddings", paddings) + .AddFloatArg("constant_value", 1.0) + .Finalize(net.NewOperatorDef()); + } + + // Warm-up + for (int i = 0; i < 5; ++i) { + net.RunOp(D); + } + net.Sync(); + + mace::testing::StartTiming(); + while (iters--) { + net.Run(); + } + net.Sync(); +} + +#define BM_PAD_MACRO(N, H, W, C, PAD, TYPE, DEVICE) \ + static void BM_PAD_##N##_##H##_##W##_##C##_##PAD##_##TYPE##_##DEVICE( \ + int iters) { \ + const int64_t tot = static_cast(iters) * N * C * H * W; \ + mace::testing::MaccProcessed(tot); \ + mace::testing::BytesProcessed(tot *(sizeof(TYPE))); \ + Pad(iters, N, H, W, C, PAD); \ + } \ + BENCHMARK(BM_PAD_##N##_##H##_##W##_##C##_##PAD##_##TYPE##_##DEVICE) + +#define BM_PAD(N, H, W, C, PAD) \ + BM_PAD_MACRO(N, H, W, C, PAD, float, CPU); \ + BM_PAD_MACRO(N, H, W, C, PAD, float, OPENCL); \ + BM_PAD_MACRO(N, H, W, C, PAD, half, OPENCL); + +BM_PAD(1, 512, 512, 1, 2); +BM_PAD(1, 112, 112, 64, 1); +BM_PAD(1, 256, 256, 32, 2); +BM_PAD(1, 512, 512, 16, 2); + +} // namespace test +} // namespace ops +} // namespace mace diff --git a/mace/python/tools/tf_converter_lib.py b/mace/python/tools/tf_converter_lib.py index a721e0fd2b849bea84a9c0e07a557078c5a6e217..56b3f04d07fbdfaae284236f601110e41b6c9dba 100644 --- a/mace/python/tools/tf_converter_lib.py +++ b/mace/python/tools/tf_converter_lib.py @@ -992,6 +992,29 @@ class TFConverter(object): self.add_output_shape([shape], op_def) self.resolved_ops[reshape_op.name] = 1 + def convert_pad(self, op): + op_def = self.net_def.op.add() + arg = op_def.arg.add() + arg.name = 'T' + arg.i = self.dt + op_def.name = op.name + op_def.type = "Pad" + op_def.input.extend([op.inputs[0].name]) + op_def.output.extend([output.name for output in op.outputs]) + paddings_arg = op_def.arg.add() + paddings_arg.name = 'paddings' + paddings_arg.ints.extend( + get_input_tensor(op, 1).eval().astype(np.int32).flat) + self.unused_tensor.add(get_input_tensor(op, 1).name) + if len(op.inputs) == 3: + constant_value_arg = op_def.arg.add() + constant_value_arg.name = 'constant_value' + constant_value_arg.i = \ + get_input_tensor(op, 2).eval().astype(np.int32).flat[0] + self.unused_tensor.add(get_input_tensor(op, 2).name) + self.add_output_shape(op.outputs, op_def) + self.resolved_ops[op.name] = 1 + def convert_normal_op(self, op): op_def = self.net_def.op.add() arg = op_def.arg.add() @@ -1084,6 +1107,8 @@ class TFConverter(object): else: raise Exception('Unknown Op: %s, type: %s' % (op.name, op.type)) + elif op.type == 'Pad': + self.convert_pad(op) # elif op.type in ['']: # self.convert_normal_op(op) else: