提交 a95ce07c 编写于 作者: L liuqi

Add pad op benchmark and convertion.

上级 478b21f3
......@@ -5,6 +5,7 @@
#define MACE_KERNELS_PAD_H_
#include <algorithm>
#include <memory>
#include <vector>
#include "mace/core/future.h"
......@@ -49,6 +50,7 @@ struct PadFunctor : public PadFunctorBase {
const index_t height = input->dim(1);
const index_t width = input->dim(2);
const index_t channel = input->dim(3);
#pragma omp parallel for collapse(3)
for (index_t b = 0; b < batch; ++b) {
for (index_t h = 0; h < height; ++h) {
for (index_t w = 0; w < width; ++w) {
......
//
// Copyright (c) 2017 XiaoMi All rights reserved.
//
#include "mace/core/operator.h"
#include "mace/core/runtime/opencl/opencl_runtime.h"
#include "mace/core/testing/test_benchmark.h"
#include "mace/ops/ops_test_util.h"
namespace mace {
namespace ops {
namespace test {
template <DeviceType D, typename T>
static void Pad(int iters, int batch, int height,
int width, int channels, int pad) {
mace::testing::StopTiming();
OpsTestNet net;
// Add input data
net.AddRandomInput<D, T>("Input", {batch, height, width, channels});
const std::vector<int> paddings = {0, 0, pad, pad, pad, pad, 0, 0};
if (D == DeviceType::OPENCL) {
BufferToImage<D, T>(&net, "Input", "InputImage",
kernels::BufferType::IN_OUT_CHANNEL);
OpDefBuilder("Pad", "PadTest")
.Input("InputImage")
.Output("OutputImage")
.AddIntsArg("paddings", paddings)
.AddFloatArg("constant_value", 1.0)
.Finalize(net.NewOperatorDef());
} else {
OpDefBuilder("Pad", "PadTest")
.Input("Input")
.Output("Output")
.AddIntsArg("paddings", paddings)
.AddFloatArg("constant_value", 1.0)
.Finalize(net.NewOperatorDef());
}
// Warm-up
for (int i = 0; i < 5; ++i) {
net.RunOp(D);
}
net.Sync();
mace::testing::StartTiming();
while (iters--) {
net.Run();
}
net.Sync();
}
#define BM_PAD_MACRO(N, H, W, C, PAD, TYPE, DEVICE) \
static void BM_PAD_##N##_##H##_##W##_##C##_##PAD##_##TYPE##_##DEVICE( \
int iters) { \
const int64_t tot = static_cast<int64_t>(iters) * N * C * H * W; \
mace::testing::MaccProcessed(tot); \
mace::testing::BytesProcessed(tot *(sizeof(TYPE))); \
Pad<DEVICE, TYPE>(iters, N, H, W, C, PAD); \
} \
BENCHMARK(BM_PAD_##N##_##H##_##W##_##C##_##PAD##_##TYPE##_##DEVICE)
#define BM_PAD(N, H, W, C, PAD) \
BM_PAD_MACRO(N, H, W, C, PAD, float, CPU); \
BM_PAD_MACRO(N, H, W, C, PAD, float, OPENCL); \
BM_PAD_MACRO(N, H, W, C, PAD, half, OPENCL);
BM_PAD(1, 512, 512, 1, 2);
BM_PAD(1, 112, 112, 64, 1);
BM_PAD(1, 256, 256, 32, 2);
BM_PAD(1, 512, 512, 16, 2);
} // namespace test
} // namespace ops
} // namespace mace
......@@ -992,6 +992,29 @@ class TFConverter(object):
self.add_output_shape([shape], op_def)
self.resolved_ops[reshape_op.name] = 1
def convert_pad(self, op):
op_def = self.net_def.op.add()
arg = op_def.arg.add()
arg.name = 'T'
arg.i = self.dt
op_def.name = op.name
op_def.type = "Pad"
op_def.input.extend([op.inputs[0].name])
op_def.output.extend([output.name for output in op.outputs])
paddings_arg = op_def.arg.add()
paddings_arg.name = 'paddings'
paddings_arg.ints.extend(
get_input_tensor(op, 1).eval().astype(np.int32).flat)
self.unused_tensor.add(get_input_tensor(op, 1).name)
if len(op.inputs) == 3:
constant_value_arg = op_def.arg.add()
constant_value_arg.name = 'constant_value'
constant_value_arg.i = \
get_input_tensor(op, 2).eval().astype(np.int32).flat[0]
self.unused_tensor.add(get_input_tensor(op, 2).name)
self.add_output_shape(op.outputs, op_def)
self.resolved_ops[op.name] = 1
def convert_normal_op(self, op):
op_def = self.net_def.op.add()
arg = op_def.arg.add()
......@@ -1084,6 +1107,8 @@ class TFConverter(object):
else:
raise Exception('Unknown Op: %s, type: %s' % (op.name,
op.type))
elif op.type == 'Pad':
self.convert_pad(op)
# elif op.type in ['']:
# self.convert_normal_op(op)
else:
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册