提交 0461beb5 编写于 作者: L Liangliang He

Merge branch 'opencl' into 'master'

Update conv2d benchmark test

See merge request !81
......@@ -12,11 +12,11 @@ load("//mace:mace.bzl", "if_android")
cc_library(
name = "opencl_runtime",
srcs = glob([
"runtime/opencl/cl.hpp",
"runtime/opencl/cl2.hpp",
"runtime/opencl/*.cc",
]),
hdrs = glob([
"runtime/opencl/cl.hpp",
"runtime/opencl/cl2.hpp",
"runtime/opencl/*.h",
]),
copts = ["-std=c++11"],
......
//
// Copyright (c) 2017 XiaoMi All rights reserved.
//
#ifndef MACE_CORE_RUNTIME_OPENCL_CL2_HEADER_H_
#define MACE_CORE_RUNTIME_OPENCL_CL2_HEADER_H_
#define CL_HPP_TARGET_OPENCL_VERSION 200
#include "mace/core/runtime/opencl/cl2.hpp"
#endif // MACE_CORE_RUNTIME_OPENCL_CL2_HEADER_H_
......@@ -2,8 +2,8 @@
// Copyright (c) 2017 XiaoMi All rights reserved.
//
#include "mace/core/runtime/opencl/cl2_header.h"
#include "mace/core/runtime/opencl/opencl_allocator.h"
#include "mace/core/runtime/opencl/cl2.hpp"
#include "mace/core/runtime/opencl/opencl_runtime.h"
namespace mace {
......
......@@ -26,6 +26,7 @@ bool ReadSourceFile(const std::string &filename, std::string *content) {
std::string line;
while (std::getline(ifs, line)) {
*content += line;
*content += "\n";
}
ifs.close();
return true;
......@@ -66,14 +67,15 @@ bool BuildProgram(OpenCLRuntime *runtime,
*program = cl::Program(runtime->context(), sources);
std::string build_options = "-Werror -cl-mad-enable -cl-fast-relaxed-math -I" + path;
// TODO(heliangliang) -cl-unsafe-math-optimizations -cl-fast-relaxed-math
if (program->build({runtime->device()}, build_options.c_str()) != CL_SUCCESS) {
cl_int ret = program->build({runtime->device()}, build_options.c_str());
if (ret != CL_SUCCESS) {
if (program->getBuildInfo<CL_PROGRAM_BUILD_STATUS>(runtime->device()) ==
CL_BUILD_ERROR) {
std::string build_log =
program->getBuildInfo<CL_PROGRAM_BUILD_LOG>(runtime->device());
LOG(INFO) << "Program build log: " << build_log;
}
LOG(FATAL) << "Build program failed";
LOG(FATAL) << "Build program failed: " << ret;
}
return true;
......
......@@ -5,14 +5,10 @@
#ifndef MACE_CORE_RUNTIME_OPENCL_OPENCL_RUNTIME_H_
#define MACE_CORE_RUNTIME_OPENCL_OPENCL_RUNTIME_H_
#ifndef CL_HPP_TARGET_OPENCL_VERSION
#define CL_HPP_TARGET_OPENCL_VERSION 200
#endif
#include <map>
#include <mutex>
#include "mace/core/runtime/opencl/cl2.hpp"
#include "mace/core/runtime/opencl/cl2_header.h"
#include "mace/core/runtime/opencl/opencl_wrapper.h"
namespace mace {
......
......@@ -126,6 +126,12 @@ class OpenCLLibraryImpl final {
using clRetainKernelFunc = cl_int (*)(cl_kernel kernel);
using clCreateBufferFunc =
cl_mem (*)(cl_context, cl_mem_flags, size_t, void *, cl_int *);
using clCreateImageFunc = cl_mem (*)(cl_context,
cl_mem_flags,
const cl_image_format *,
const cl_image_desc *,
void *,
cl_int *);
using clCreateProgramWithSourceFunc = cl_program (*)(
cl_context, cl_uint, const char **, const size_t *, cl_int *);
using clReleaseKernelFunc = cl_int (*)(cl_kernel kernel);
......@@ -136,8 +142,12 @@ class OpenCLLibraryImpl final {
using clRetainDeviceFunc = cl_int (*)(cl_device_id);
using clReleaseDeviceFunc = cl_int (*)(cl_device_id);
using clRetainEventFunc = cl_int (*)(cl_event);
using clGetKernelWorkGroupInfoFunc =
cl_int (*)(cl_kernel, cl_device_id, cl_kernel_work_group_info, size_t, void *, size_t *);
using clGetKernelWorkGroupInfoFunc = cl_int (*)(cl_kernel,
cl_device_id,
cl_kernel_work_group_info,
size_t,
void *,
size_t *);
#define DEFINE_FUNC_PTR(func) func##Func func = nullptr
......@@ -149,6 +159,7 @@ class OpenCLLibraryImpl final {
DEFINE_FUNC_PTR(clReleaseKernel);
DEFINE_FUNC_PTR(clCreateProgramWithSource);
DEFINE_FUNC_PTR(clCreateBuffer);
DEFINE_FUNC_PTR(clCreateImage);
DEFINE_FUNC_PTR(clRetainKernel);
DEFINE_FUNC_PTR(clCreateKernel);
DEFINE_FUNC_PTR(clGetProgramInfo);
......@@ -269,6 +280,7 @@ void *OpenCLLibraryImpl::LoadFromPath(const std::string &path) {
ASSIGN_FROM_DLSYM(clReleaseKernel);
ASSIGN_FROM_DLSYM(clCreateProgramWithSource);
ASSIGN_FROM_DLSYM(clCreateBuffer);
ASSIGN_FROM_DLSYM(clCreateImage);
ASSIGN_FROM_DLSYM(clRetainKernel);
ASSIGN_FROM_DLSYM(clCreateKernel);
ASSIGN_FROM_DLSYM(clGetProgramInfo);
......@@ -708,6 +720,24 @@ cl_mem clCreateBuffer(cl_context context,
}
}
cl_mem clCreateImage(cl_context context,
cl_mem_flags flags,
const cl_image_format *image_format,
const cl_image_desc *image_desc,
void *host_ptr,
cl_int *errcode_ret) {
auto func = mace::OpenCLLibraryImpl::Get().clCreateImage;
if (func != nullptr) {
return func(context, flags, image_format, image_desc, host_ptr,
errcode_ret);
} else {
if (errcode_ret != nullptr) {
*errcode_ret = CL_OUT_OF_RESOURCES;
}
return nullptr;
}
}
cl_program clCreateProgramWithSource(cl_context context,
cl_uint count,
const char **strings,
......@@ -795,8 +825,8 @@ cl_int clGetKernelWorkGroupInfo(cl_kernel kernel,
size_t *param_value_size_ret) {
auto func = mace::OpenCLLibraryImpl::Get().clGetKernelWorkGroupInfo;
if (func != nullptr) {
return func(kernel, device, param_name, param_value_size,
param_value, param_value_size_ret);
return func(kernel, device, param_name, param_value_size, param_value,
param_value_size_ret);
} else {
return CL_OUT_OF_RESOURCES;
}
......
......@@ -3,7 +3,7 @@
//
#include "mace/kernels/batch_norm.h"
#include "mace/core/runtime/opencl/cl2.hpp"
#include "mace/core/runtime/opencl/cl2_header.h"
#include "mace/core/runtime/opencl/opencl_runtime.h"
#include "mace/utils/tuner.h"
......@@ -79,4 +79,4 @@ void BatchNormFunctor<DeviceType::OPENCL, float>::operator()(
}
} // namespace kernels
} // namespace mace
\ No newline at end of file
} // namespace mace
#pragma OPENCL EXTENSION cl_khr_fp16 : enable
#pragma OPENCL EXTENSION cl_khr_3d_image_writes : enable
void kernel conv_2d_1x1_naive(global const float *input, /* n, c, h, w */
global const float *filter, /* o, i, kh, kw */
global const float *bias, /* o */
global float *output, /* n, c, h, w */
private const int input_channels) {
#include <common.h>
__kernel void conv_2d_1x1_naive(__global const float *input, /* n, c, h, w */
__global const float *filter, /* o, i, kh, kw */
__global const float *bias, /* o */
__global float *output, /* n, c, h, w */
__private const int in_chan_num) {
const int batch = get_global_id(0);
const int channel = get_global_id(1);
const int channels = get_global_size(1);
const int pixel = get_global_id(2);
const int pixels = get_global_size(2);
float *output_ptr = output + (batch * channels + channel) * pixels;
output_ptr[pixel] = bias[channel];
for (int inc = 0; inc < input_channels; ++inc) {
const float *input_ptr = input + (batch * input_channels + inc) * pixels + pixel;
const float weights = filter[channel * input_channels + inc];
for (int inc = 0; inc < in_chan_num; ++inc) {
const float *input_ptr = input + (batch * in_chan_num + inc) * pixels + pixel;
const float weights = filter[channel * in_chan_num + inc];
float in = input_ptr[0];
float out = output_ptr[0];
out += in * weights;
......@@ -23,17 +24,19 @@ void kernel conv_2d_1x1_naive(global const float *input, /* n, c, h, w */
}
}
void kernel conv_2d_1x1_v2(global const float *input, /* n, c, h, w */
global const float *filter, /* o, i, kh, kw */
global const float *bias, /* o */
global float *output, /* n, c, h, w */
private const int in_chan_num,
private const int out_chan_num,
private const int pixel_num) {
__kernel void conv_2d_1x1_v2(__global const float *input, /* n, c, h, w */
__global const float *filter, /* o, i, kh, kw */
__global const float *bias, /* o */
__global float *output, /* n, c, h, w */
__private const int in_chan_num,
__private const int out_chan_num,
__private const int pixel_num) {
int batch = get_global_id(0);
int out_chan_blk = get_global_id(1);
int out_pixel_blk = get_global_id(2);
__private float output_slice[4 * 4];
const int out_chan_begin = out_chan_blk * 4;
const int out_chan_end = min(out_chan_begin + 4, out_chan_num);
const int out_pixel_begin = out_pixel_blk * 4;
......@@ -41,21 +44,23 @@ void kernel conv_2d_1x1_v2(global const float *input, /* n, c, h, w */
const int in_offset = batch * in_chan_num * pixel_num;
const int out_offset = batch * out_chan_num * pixel_num;
const float *input_base = input + in_offset + out_pixel_begin;
float *output_base = output + out_offset + out_pixel_begin;
int pixels = out_pixel_end - out_pixel_begin;
int out_chan_len = out_chan_end - out_chan_begin;
int pixel_len = out_pixel_end - out_pixel_begin;
for (int out_chan = out_chan_begin; out_chan < out_chan_end; ++out_chan) {
float bias_value = bias[out_chan];
float *output_ptr = output_base + out_chan * pixel_num;
for (int p = 0; p < pixels; ++p) {
output_ptr[p] = bias_value;
int out_chan_offset = out_chan - out_chan_begin;
for (int p = 0; p < pixel_len; ++p) {
output_slice[out_chan_offset * 4 + p] = bias_value;
}
}
int in_chan = 0;
if (pixels == 4) {
if (pixel_len == 4) {
for (; in_chan + 3 < in_chan_num; in_chan += 4) {
const float *input_ptr = input_base + in_chan * pixel_num;
int out_chan = out_chan_begin;
......@@ -66,30 +71,32 @@ void kernel conv_2d_1x1_v2(global const float *input, /* n, c, h, w */
float4 in1 = vload4(0, input_ptr + pixel_num);
float4 in2 = vload4(0, input_ptr + 2 * pixel_num);
float4 in3 = vload4(0, input_ptr + 3 * pixel_num);
#pragma unroll
for (int oc = 0; oc < 4; ++oc) {
float4 weights = vload4(0, filter_ptr + oc * in_chan_num);
float4 out = vload4(0, output_ptr + oc * pixel_num);
float4 out = vload4(oc, output_slice);
out += in0 * weights.x;
out += in1 * weights.y;
out += in2 * weights.z;
out += in3 * weights.w;
vstore4(out, 0, output_ptr + oc * pixel_num);
vstore4(out, oc, output_slice);
}
}
for (; out_chan < out_chan_end; ++out_chan) {
const float* filter_ptr = filter + out_chan * in_chan_num + in_chan;
float *output_ptr = output_base + out_chan * pixel_num;
int out_chan_offset = out_chan - out_chan_begin;
float4 weights = vload4(0, filter_ptr);
float4 in0 = vload4(0, input_ptr);
float4 in1 = vload4(0, input_ptr + pixel_num);
float4 in2 = vload4(0, input_ptr + 2 * pixel_num);
float4 in3 = vload4(0, input_ptr + 3 * pixel_num);
float4 out = vload4(0, output_ptr);
float4 out = vload4(out_chan_offset, output_slice);
out += in0 * weights.x;
out += in1 * weights.y;
out += in2 * weights.z;
out += in3 * weights.w;
vstore4(out, 0, output_ptr);
vstore4(out, out_chan_offset, output_slice);
}
}
}
......@@ -99,13 +106,71 @@ void kernel conv_2d_1x1_v2(global const float *input, /* n, c, h, w */
for (int out_chan = out_chan_begin; out_chan < out_chan_end; ++out_chan) {
float weights = filter[out_chan * in_chan_num + in_chan];
float *output_ptr = output_base + out_chan * pixel_num;
int out_chan_offset = out_chan - out_chan_begin;
for (int p = 0; p < pixels; ++p) {
for (int p = 0; p < pixel_len; ++p) {
float in = input_ptr[p];
float out = output_ptr[p];
out += in * weights;
output_ptr[p] = out;
output_slice[out_chan_offset * 4 + p] += in * weights;
}
}
}
for (int out_chan_offset = 0; out_chan_offset < out_chan_len; ++out_chan_offset) {
int out_chan = out_chan_begin + out_chan_offset;
float *output_ptr = output_base + out_chan * pixel_num;
if (pixel_len == 4) {
float4 out = vload4(out_chan_offset, output_slice);
vstore4(out, 0, output_ptr);
} else {
int offset = out_chan_offset << 2;
for (int p = 0; p < pixel_len; ++p) {
output_ptr[p] = output_slice[offset + p];
}
}
}
}
/* FIXME this is incomplete */
__kernel void conv_2d_1x1_v3(__read_only image3d_t input, /* n, c/4, h, w, 4 */
__global const float *filter, /* o, i, kh, kw */
__global const float *bias, /* o */
__write_only image3d_t output, /* n, c/4, h, w, 4 */
__private const int batch_num,
__private const int in_chan_num,
__private const int out_chan_num,
__private const int height,
__private const int width) {
int out_chan_blk = get_global_id(0);
int h = get_global_id(1);
int w = get_global_id(2);
int in_chan_blk_num = (in_chan_num + 3) / 4;
int out_chan_blk_num = (out_chan_num + 3) / 4;
const sampler_t sampler = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_CLAMP | CLK_FILTER_NEAREST;
for (int batch = 0; batch < batch_num; ++batch) {
float4 bias_value = vload4(out_chan_blk, bias);
__private float4 out = bias_value;
for (int in_chan_blk = 0; in_chan_blk < in_chan_blk_num; ++in_chan_blk) {
int in_d = batch * in_chan_blk_num + in_chan_blk;
float4 in = read_imagef(input, sampler, (int4)(in_d, h, w, 0));
const float *filter_base = filter + (out_chan_blk << 2) * in_chan_num;
float4 weights = vload4(in_chan_blk, filter_base);
out.x += dot(in, weights);
weights = vload4(in_chan_blk, filter_base + in_chan_num);
out.y += dot(in, weights);
weights = vload4(in_chan_blk, filter_base + in_chan_num * 2);
out.z += dot(in, weights);
weights = vload4(in_chan_blk, filter_base + in_chan_num * 3);
out.w += dot(in, weights);
}
int out_d = batch * out_chan_blk_num + out_chan_blk;
int4 out_coord = (int4)(out_d, h, w, 0);
write_imagef(output, out_coord, out);
}
}
......@@ -2,9 +2,10 @@
// Copyright (c) 2017 XiaoMi All rights reserved.
//
#include "mace/kernels/conv_2d.h"
#include "mace/core/common.h"
#include "mace/core/runtime/opencl/cl2_header.h"
#include "mace/core/runtime/opencl/opencl_runtime.h"
#include "mace/kernels/conv_2d.h"
#include "mace/utils/utils.h"
namespace mace {
......@@ -22,22 +23,22 @@ void Conv1x1Naive(const Tensor *input,
auto runtime = OpenCLRuntime::Get();
auto program = runtime->program();
auto conv_2d = cl::KernelFunctor<cl::Buffer, cl::Buffer, cl::Buffer, cl::Buffer,
int, int>(program, "conv_2d_1x1_naive");
auto conv_2d =
cl::KernelFunctor<cl::Buffer, cl::Buffer, cl::Buffer, cl::Buffer, int,
int>(program, "conv_2d_1x1_naive");
const index_t pixels = height * width;
cl_int error;
conv_2d(cl::EnqueueArgs(runtime->command_queue(),
cl::NDRange(static_cast<int>(batch),
static_cast<int>(channels),
static_cast<int>(pixels)),
cl::NDRange(1, 1, 128)),
conv_2d(cl::EnqueueArgs(
runtime->command_queue(),
cl::NDRange(static_cast<int>(batch), static_cast<int>(channels),
static_cast<int>(pixels)),
cl::NDRange(1, 1, 128)),
*(static_cast<cl::Buffer *>(input->buffer())),
*(static_cast<cl::Buffer *>(filter->buffer())),
*(static_cast<cl::Buffer *>(bias->buffer())),
*(static_cast<cl::Buffer *>(output->buffer())),
static_cast<int>(input_channels),
error);
static_cast<int>(input_channels), error);
MACE_CHECK(error == CL_SUCCESS);
}
......@@ -53,31 +54,126 @@ void Conv1x1V2(const Tensor *input,
auto runtime = OpenCLRuntime::Get();
auto program = runtime->program();
auto conv_2d = cl::KernelFunctor<cl::Buffer, cl::Buffer, cl::Buffer, cl::Buffer,
int, int, int, int>(program, "conv_2d_1x1_v2");
const index_t pixels = height * width;
const index_t channel_blocks = (channels + 3) / 4;
const index_t pixel_blocks = (pixels + 3) / 4;
// TODO KernelFunctor has an extra clReleaseCommandQueue due to a copy
// TODO check wired clReleaseCommandQueue latency
// The KernelFunctor can cause segment faults in cb_retain_event
auto conv_2d_kernel = cl::Kernel(program, "conv_2d_1x1_v2");
const uint32_t kwg_size = runtime->GetKernelMaxWorkGroupSize(conv_2d_kernel);
uint32_t idx = 0;
conv_2d_kernel.setArg(idx++,
*(static_cast<const cl::Buffer *>(input->buffer())));
conv_2d_kernel.setArg(idx++,
*(static_cast<const cl::Buffer *>(filter->buffer())));
conv_2d_kernel.setArg(idx++,
*(static_cast<const cl::Buffer *>(bias->buffer())));
conv_2d_kernel.setArg(idx++, *(static_cast<cl::Buffer *>(output->buffer())));
conv_2d_kernel.setArg(idx++, static_cast<int>(input_channels));
conv_2d_kernel.setArg(idx++, static_cast<int>(channels));
conv_2d_kernel.setArg(idx++, static_cast<int>(pixels));
auto command_queue = runtime->command_queue();
cl_int error = command_queue.enqueueNDRangeKernel(
conv_2d_kernel, cl::NullRange,
cl::NDRange(static_cast<int>(batch), static_cast<int>(channel_blocks),
static_cast<int>(pixel_blocks)),
cl::NDRange(1, 2, kwg_size / 2));
MACE_CHECK(error == CL_SUCCESS, error);
}
void Conv1x1V3(const Tensor *input,
const Tensor *filter,
const Tensor *bias,
Tensor *output) {
const index_t batch = output->shape()[0];
const index_t channels = output->shape()[1];
const index_t height = output->shape()[2];
const index_t width = output->shape()[3];
const index_t input_channels = input->shape()[1];
auto runtime = OpenCLRuntime::Get();
auto program = runtime->program();
const index_t pixels = height * width;
const index_t pixel_blocks = (pixels + 3) / 4;
const index_t channel_blocks = (channels + 3) / 4;
const index_t input_channel_blocks = (input_channels + 3) / 4;
// FIXME temp hacking
static std::map<std::ptrdiff_t, cl::Image3D> input_image_map;
static std::map<std::ptrdiff_t, cl::Image3D> output_image_map;
cl::Image3D input_image;
cl::Image3D output_image;
auto input_iter =
input_image_map.find(reinterpret_cast<std::ptrdiff_t>(input->buffer()));
if (input_iter != input_image_map.end()) {
input_image = input_iter->second;
} else {
// The batch dimension is collapsed with channel
cl_int error;
cl::Image3D image =
cl::Image3D(OpenCLRuntime::Get()->context(),
CL_MEM_READ_ONLY | CL_MEM_ALLOC_HOST_PTR,
cl::ImageFormat(CL_RGBA, CL_FLOAT), height, width,
batch * input_channel_blocks, 0, 0, nullptr, &error);
MACE_CHECK(error == CL_SUCCESS);
input_image = image;
input_image_map.clear();
input_image_map.emplace(reinterpret_cast<std::ptrdiff_t>(input->buffer()),
image);
}
auto output_iter =
output_image_map.find(reinterpret_cast<std::ptrdiff_t>(output->buffer()));
if (output_iter != output_image_map.end()) {
output_image = output_iter->second;
} else {
cl_int error;
cl::Image3D image =
cl::Image3D(OpenCLRuntime::Get()->context(),
CL_MEM_WRITE_ONLY | CL_MEM_ALLOC_HOST_PTR,
cl::ImageFormat(CL_RGBA, CL_FLOAT), height, width,
batch * channel_blocks, 0, 0, nullptr, &error);
MACE_CHECK(error == CL_SUCCESS);
output_image = image;
output_image_map.clear();
output_image_map.emplace(reinterpret_cast<std::ptrdiff_t>(output->buffer()),
image);
}
auto conv_2d_kernel = cl::Kernel(program, "conv_2d_1x1_v3");
const uint32_t kwg_size = runtime->GetKernelMaxWorkGroupSize(conv_2d_kernel);
uint32_t idx = 0;
conv_2d_kernel.setArg(idx++, input_image);
conv_2d_kernel.setArg(idx++,
*(static_cast<const cl::Buffer *>(filter->buffer())));
conv_2d_kernel.setArg(idx++,
*(static_cast<const cl::Buffer *>(bias->buffer())));
conv_2d_kernel.setArg(idx++, output_image);
conv_2d_kernel.setArg(idx++, static_cast<int>(batch));
conv_2d_kernel.setArg(idx++, static_cast<int>(input_channels));
conv_2d_kernel.setArg(idx++, static_cast<int>(channels));
conv_2d_kernel.setArg(idx++, static_cast<int>(height));
conv_2d_kernel.setArg(idx++, static_cast<int>(width));
auto command_queue = runtime->command_queue();
cl_int error;
conv_2d(cl::EnqueueArgs(runtime->command_queue(),
cl::NDRange(static_cast<int>(batch),
static_cast<int>(channel_blocks),
static_cast<int>(pixel_blocks)),
cl::NDRange(1, 1, 256)),
*(static_cast<cl::Buffer *>(input->buffer())),
*(static_cast<cl::Buffer *>(filter->buffer())),
*(static_cast<cl::Buffer *>(bias->buffer())),
*(static_cast<cl::Buffer *>(output->buffer())),
static_cast<int>(input_channels),
static_cast<int>(channels),
static_cast<int>(pixels),
error);
MACE_CHECK(error == CL_SUCCESS);
error = command_queue.enqueueNDRangeKernel(
conv_2d_kernel, cl::NullRange,
cl::NDRange(static_cast<int>(channel_blocks), static_cast<int>(height),
static_cast<int>(width)),
cl::NDRange(1, 2, kwg_size / 2));
MACE_CHECK(error == CL_SUCCESS, error);
}
extern void Conv2dOpenclK1x1S1(const Tensor *input, const Tensor *filter,
const Tensor *bias, Tensor *output) {
extern void Conv2dOpenclK1x1S1(const Tensor *input,
const Tensor *filter,
const Tensor *bias,
Tensor *output) {
const index_t batch = output->shape()[0];
const index_t height = output->shape()[2];
const index_t width = output->shape()[3];
......@@ -89,7 +185,6 @@ extern void Conv2dOpenclK1x1S1(const Tensor *input, const Tensor *filter,
MACE_CHECK(input_batch == batch && input_height == height &&
input_width == width);
// Conv1x1Naive(input, filter, bias, output);
Conv1x1V2(input, filter, bias, output);
};
......
......@@ -17,7 +17,7 @@ static void AddNBenchmark(int iters, int n, int size) {
for (int i = 0; i < n; ++i) {
op_def_builder.Input(internal::MakeString("Input", i).c_str());
}
op_def_builder.Output("Output").Finalize(net.operator_def());
op_def_builder.Output("Output").Finalize(net.NewOperatorDef());
// Add input data
for (int i = 0; i < n; ++i) {
......
......@@ -17,7 +17,7 @@ TEST_F(AddnOpTest, AddnOp) {
.Input("Input2")
.Input("Input3")
.Output("Output")
.Finalize(net.operator_def());
.Finalize(net.NewOperatorDef());
// Add input data
net.AddRandomInput<DeviceType::CPU, float>("Input1", {1, 2, 3, 4});
......
......@@ -21,7 +21,7 @@ static void BatchNorm(
.Input("Var")
.Input("Epsilon")
.Output("Output")
.Finalize(net.operator_def());
.Finalize(net.NewOperatorDef());
// Add input data
net.AddRandomInput<D, T>("Input", {batch, channels, height, width});
......
......@@ -21,7 +21,7 @@ void Simple() {
.Input("Var")
.Input("Epsilon")
.Output("Output")
.Finalize(net.operator_def());
.Finalize(net.NewOperatorDef());
// Add input data
net.AddInputFromArray<D, float>("Input", {1, 1, 6, 2},
......@@ -73,7 +73,7 @@ TEST_F(BatchNormOpTest, SimpleRandomNeon) {
.Input("Var")
.Input("Epsilon")
.Output("Output")
.Finalize(net.operator_def());
.Finalize(net.NewOperatorDef());
// Add input data
net.AddRandomInput<DeviceType::CPU, float>("Input", {batch, channels, height, width});
......@@ -114,7 +114,7 @@ TEST_F(BatchNormOpTest, ComplexRandomNeon) {
.Input("Var")
.Input("Epsilon")
.Output("Output")
.Finalize(net.operator_def());
.Finalize(net.NewOperatorDef());
// Add input data
net.AddRandomInput<DeviceType::CPU, float>("Input", {batch, channels, height, width});
......@@ -155,7 +155,7 @@ TEST_F(BatchNormOpTest, SimpleRandomOPENCL) {
.Input("Var")
.Input("Epsilon")
.Output("Output")
.Finalize(net.operator_def());
.Finalize(net.NewOperatorDef());
// Add input data
net.AddRandomInput<DeviceType::OPENCL, float>("Input", {batch, channels, height, width});
......@@ -201,7 +201,7 @@ TEST_F(BatchNormOpTest, ComplexRandomOPENCL) {
.Input("Var")
.Input("Epsilon")
.Output("Output")
.Finalize(net.operator_def());
.Finalize(net.NewOperatorDef());
// Add input data
net.AddRandomInput<DeviceType::OPENCL, float>("Input", {batch, channels, height, width});
......
......@@ -19,10 +19,10 @@ static void ChannelShuffle(
OpDefBuilder("GlobalAvgPooling", "GlobalAvgPoolingTest")
.Input("Input")
.Output("Output")
.Finalize(net.operator_def());
.AddIntArg("group", group)
.Finalize(net.NewOperatorDef());
// Add input data
net.AddIntArg("group", group);
net.AddRandomInput<DeviceType::CPU, float>("Input", {batch, channels, height, width});
// Warm-up
......
......@@ -14,9 +14,9 @@ TEST_F(ChannelShuffleOpTest, C8G4) {
OpDefBuilder("ChannelShuffle", "ChannelShuffleTest")
.Input("Input")
.Output("Output")
.Finalize(net.operator_def());
.AddIntArg("group", 4)
.Finalize(net.NewOperatorDef());
net.AddIntArg("group", 4);
// Add input data
net.AddInputFromArray<DeviceType::CPU, float>(
......
......@@ -17,7 +17,7 @@ static void ConcatHelper(int iters, int concat_dim, int dim1) {
.Input("Input1")
.Input("Axis")
.Output("Output")
.Finalize(net.operator_def());
.Finalize(net.NewOperatorDef());
// Add input data
const int kDim0 = 100;
......
......@@ -18,7 +18,7 @@ TEST_F(ConcatOpTest, Simple_Horizon) {
.Input("Input1")
.Input("Axis")
.Output("Output")
.Finalize(net.operator_def());
.Finalize(net.NewOperatorDef());
std::vector<index_t> input_shape = {4, 4};
std::vector<float> input0;
......@@ -56,7 +56,7 @@ TEST_F(ConcatOpTest, Simple_Vertical) {
.Input("Input1")
.Input("Axis")
.Output("Output")
.Finalize(net.operator_def());
.Finalize(net.NewOperatorDef());
std::vector<index_t> input_shape = {4, 4};
std::vector<float> input0;
......@@ -99,7 +99,7 @@ TEST_F(ConcatOpTest, Random) {
for (int i = 0; i < num_inputs; ++i) {
builder = builder.Input(("Input" + ToString(i)).c_str());
}
builder.Input("Axis").Output("Output").Finalize(net.operator_def());
builder.Input("Axis").Output("Output").Finalize(net.NewOperatorDef());
std::vector<index_t> shape_data;
GenerateRandomIntTypeData<index_t>({dim}, shape_data, 1, dim);
......
......@@ -3,6 +3,7 @@
//
#include <algorithm>
#include <sstream>
#include "mace/core/operator.h"
#include "mace/core/testing/test_benchmark.h"
......@@ -13,6 +14,7 @@ namespace mace {
template <DeviceType D, typename T>
static void Conv2d(int iters,
int iters_to_sync,
int batch,
int channels,
int height,
......@@ -30,17 +32,15 @@ static void Conv2d(int iters,
.Input("Filter")
.Input("Bias")
.Output("Output")
.Finalize(net.operator_def());
// Add args
net.AddIntsArg("strides", {stride, stride});
net.AddIntArg("padding", padding);
net.AddIntsArg("dilations", {1, 1});
.AddIntsArg("strides", {stride, stride})
.AddIntArg("padding", padding)
.AddIntsArg("dilations", {1, 1})
.Finalize(net.NewOperatorDef());
// Add input data
net.AddRandomInput<D, float>("Input", {batch, channels, height, width});
net.AddRandomInput<D, float>("Filter",
{output_channels, channels, kernel_h, kernel_w});
{output_channels, channels, kernel_h, kernel_w});
net.AddRandomInput<D, float>("Bias", {output_channels});
// Warm-up
......@@ -52,10 +52,17 @@ static void Conv2d(int iters,
mace::testing::StartTiming();
while (iters--) {
net.RunOp(D);
net.Sync();
if (iters % iters_to_sync == 0) {
net.Sync();
}
}
}
// In common network, there are usually more than 1 layers, this is used to
// approximate the amortized latency. The OpenCL runtime for Mali/Adreno is
// in-order.
constexpr int kItersToSync = 10;
#define BM_CONV_2D_MACRO(N, C, H, W, KH, KW, STRIDE, P, OC, TYPE, DEVICE) \
static void \
BM_CONV_2D_##N##_##C##_##H##_##W##_K##KH##x##KW##S##STRIDE##_##P##_##OC##_##TYPE##_##DEVICE( \
......@@ -63,8 +70,8 @@ static void Conv2d(int iters,
const int64_t tot = static_cast<int64_t>(iters) * N * C * H * W; \
mace::testing::ItemsProcessed(tot); \
mace::testing::BytesProcessed(tot *(sizeof(TYPE))); \
Conv2d<DEVICE, TYPE>(iters, N, C, H, W, KH, KW, STRIDE, mace::Padding::P, \
OC); \
Conv2d<DEVICE, TYPE>(iters, kItersToSync, N, C, H, W, KH, KW, STRIDE, \
mace::Padding::P, OC); \
} \
BENCHMARK( \
BM_CONV_2D_##N##_##C##_##H##_##W##_K##KH##x##KW##S##STRIDE##_##P##_##OC##_##TYPE##_##DEVICE)
......
......@@ -18,12 +18,12 @@ TEST_F(Conv2dOpTest, Simple_VALID) {
.Input("Filter")
.Input("Bias")
.Output("Output")
.Finalize(net.operator_def());
.AddIntsArg("strides", {1, 1})
.AddIntArg("padding", Padding::VALID)
.AddIntsArg("dilations", {1, 1})
.Finalize(net.NewOperatorDef());
// Add args
net.AddIntsArg("strides", {1, 1});
net.AddIntArg("padding", Padding::VALID);
net.AddIntsArg("dilations", {1, 1});
// Add input data
net.AddInputFromArray<DeviceType::CPU, float>(
......@@ -52,12 +52,10 @@ TEST_F(Conv2dOpTest, Simple_SAME) {
.Input("Filter")
.Input("Bias")
.Output("Output")
.Finalize(net.operator_def());
// Add args
net.AddIntsArg("strides", {1, 1});
net.AddIntArg("padding", Padding::SAME);
net.AddIntsArg("dilations", {1, 1});
.AddIntsArg("strides", {1, 1})
.AddIntArg("padding", Padding::SAME)
.AddIntsArg("dilations", {1, 1})
.Finalize(net.NewOperatorDef());
// Add input data
net.AddInputFromArray<DeviceType::CPU, float>(
......@@ -88,12 +86,10 @@ TEST_F(Conv2dOpTest, Combined) {
.Input("Filter")
.Input("Bias")
.Output("Output")
.Finalize(net.operator_def());
// Add args
net.AddIntsArg("strides", {2, 2});
net.AddIntArg("padding", Padding::SAME);
net.AddIntsArg("dilations", {1, 1});
.AddIntsArg("strides", {2, 2})
.AddIntArg("padding", Padding::SAME)
.AddIntsArg("dilations", {1, 1})
.Finalize(net.NewOperatorDef());
// Add input data
net.AddInputFromArray<DeviceType::CPU, float>(
......@@ -127,12 +123,10 @@ void TestConv1x1() {
.Input("Filter")
.Input("Bias")
.Output("Output")
.Finalize(net.operator_def());
// Add args
net.AddIntsArg("strides", {1, 1});
net.AddIntArg("padding", Padding::VALID);
net.AddIntsArg("dilations", {1, 1});
.AddIntsArg("strides", {1, 1})
.AddIntArg("padding", Padding::VALID)
.AddIntsArg("dilations", {1, 1})
.Finalize(net.NewOperatorDef());
// Add input data
net.AddInputFromArray<D, float>(
......@@ -190,12 +184,10 @@ TEST_F(Conv2dOpTest, AlignedConvNxNS12) {
.Input("Filter")
.Input("Bias")
.Output("Output")
.Finalize(net.operator_def());
// Add args
net.AddIntsArg("strides", {stride_h, stride_w});
net.AddIntArg("padding", type);
net.AddIntsArg("dilations", {1, 1});
.AddIntsArg("strides", {stride_h, stride_w})
.AddIntArg("padding", type)
.AddIntsArg("dilations", {1, 1})
.Finalize(net.NewOperatorDef());
// Add input data
net.AddRandomInput<DeviceType::CPU, float>("Input", {batch, input_channels, height, width});
......@@ -241,12 +233,10 @@ TEST_F(Conv2dOpTest, UnalignedConvNxNS12) {
.Input("Filter")
.Input("Bias")
.Output("Output")
.Finalize(net.operator_def());
// Add args
net.AddIntsArg("strides", {stride_h, stride_w});
net.AddIntArg("padding", type);
net.AddIntsArg("dilations", {1, 1});
.AddIntsArg("strides", {stride_h, stride_w})
.AddIntArg("padding", type)
.AddIntsArg("dilations", {1, 1})
.Finalize(net.NewOperatorDef());
// Add input data
net.AddRandomInput<DeviceType::CPU, float>("Input", {batch, input_channels, height, width});
......
......@@ -19,12 +19,10 @@ void SimpleValidTest() {
.Input("Filter")
.Input("Bias")
.Output("Output")
.Finalize(net.operator_def());
// Add args
net.AddIntsArg("strides", {1, 1});
net.AddIntArg("padding", Padding::VALID);
net.AddIntsArg("dilations", {1, 1});
.AddIntsArg("strides", {1, 1})
.AddIntArg("padding", Padding::VALID)
.AddIntsArg("dilations", {1, 1})
.Finalize(net.NewOperatorDef());
// Add input data
net.AddInputFromArray<D, float>("Input", {1, 2, 2, 3},
......@@ -68,12 +66,10 @@ void TestNxNS12(const index_t height, const index_t width) {
.Input("Filter")
.Input("Bias")
.Output("Output")
.Finalize(net.operator_def());
// Add args
net.AddIntsArg("strides", {stride_h, stride_w});
net.AddIntArg("padding", type);
net.AddIntsArg("dilations", {1, 1});
.AddIntsArg("strides", {stride_h, stride_w})
.AddIntArg("padding", type)
.AddIntsArg("dilations", {1, 1})
.Finalize(net.NewOperatorDef());
// Add input data
net.AddRandomInput<D, float>("Input", {batch, input_channels, height, width});
......
......@@ -30,12 +30,10 @@ static void DepthwiseConv2d(int iters,
.Input("Filter")
.Input("Bias")
.Output("Output")
.Finalize(net.operator_def());
// Add args
net.AddIntsArg("strides", {stride, stride});
net.AddIntArg("padding", padding);
net.AddIntsArg("dilations", {1, 1});
.AddIntsArg("strides", {stride, stride})
.AddIntArg("padding", padding)
.AddIntsArg("dilations", {1, 1})
.Finalize(net.NewOperatorDef());
// Add input data
net.AddRandomInput<D, float>("Input", {batch, channels, height, width});
......
......@@ -19,7 +19,7 @@ static void GlobalAvgPooling(
OpDefBuilder("GlobalAvgPooling", "GlobalAvgPoolingTest")
.Input("Input")
.Output("Output")
.Finalize(net.operator_def());
.Finalize(net.NewOperatorDef());
// Add input data
net.AddRandomInput<DeviceType::CPU, float>("Input", {batch, channels, height, width});
......
......@@ -14,7 +14,7 @@ TEST_F(GlobalAvgPoolingOpTest, 3x7x7_CPU) {
OpDefBuilder("GlobalAvgPooling", "GlobalAvgPoolingTest")
.Input("Input")
.Output("Output")
.Finalize(net.operator_def());
.Finalize(net.NewOperatorDef());
// Add input data
std::vector<float> input(147);
......@@ -38,7 +38,7 @@ TEST_F(GlobalAvgPoolingOpTest, 3x7x7_NEON) {
OpDefBuilder("GlobalAvgPooling", "GlobalAvgPoolingTest")
.Input("Input")
.Output("Output")
.Finalize(net.operator_def());
.Finalize(net.NewOperatorDef());
// Add input data
std::vector<float> input(147);
......
......@@ -17,21 +17,70 @@ namespace mace {
class OpDefBuilder {
public:
OpDefBuilder(const char *type, const char *name) {
OpDefBuilder(const char *type, const std::string &name) {
op_def_.set_type(type);
op_def_.set_name(name);
}
OpDefBuilder &Input(const char *input_name) {
OpDefBuilder &Input(const std::string &input_name) {
op_def_.add_input(input_name);
return *this;
}
OpDefBuilder &Output(const char *output_name) {
OpDefBuilder &Output(const std::string &output_name) {
op_def_.add_output(output_name);
return *this;
}
OpDefBuilder AddIntArg(const std::string &name, const int value) {
auto arg = op_def_.add_arg();
arg->set_name(name);
arg->set_i(value);
return *this;
}
OpDefBuilder AddFloatArg(const std::string &name, const float value) {
auto arg = op_def_.add_arg();
arg->set_name(name);
arg->set_f(value);
return *this;
}
OpDefBuilder AddStringArg(const std::string &name, const char *value) {
auto arg = op_def_.add_arg();
arg->set_name(name);
arg->set_s(value);
return *this;
}
OpDefBuilder AddIntsArg(const std::string &name, const std::vector<int> &values) {
auto arg = op_def_.add_arg();
arg->set_name(name);
for (auto value : values) {
arg->add_ints(value);
}
return *this;
}
OpDefBuilder AddFloatsArg(const std::string &name, const std::vector<float> &values) {
auto arg = op_def_.add_arg();
arg->set_name(name);
for (auto value : values) {
arg->add_floats(value);
}
return *this;
}
OpDefBuilder AddStringsArg(const std::string &name,
const std::vector<const char *> &values) {
auto arg = op_def_.add_arg();
arg->set_name(name);
for (auto value : values) {
arg->add_strings(value);
}
return *this;
}
void Finalize(OperatorDef *op_def) const {
MACE_CHECK(op_def != nullptr, "input should not be null.");
*op_def = op_def_;
......@@ -45,7 +94,7 @@ class OpsTestNet {
OpsTestNet() {}
template <DeviceType D, typename T>
void AddInputFromArray(const char *name,
void AddInputFromArray(const std::string &name,
const std::vector<index_t> &shape,
const std::vector<T> &data) {
Tensor *input =
......@@ -58,7 +107,7 @@ class OpsTestNet {
}
template <DeviceType D, typename T>
void AddRepeatedInput(const char *name,
void AddRepeatedInput(const std::string &name,
const std::vector<index_t> &shape,
const T data) {
Tensor *input =
......@@ -70,7 +119,7 @@ class OpsTestNet {
}
template <DeviceType D, typename T>
void AddRandomInput(const char *name,
void AddRandomInput(const std::string &name,
const std::vector<index_t> &shape,
bool positive = false) {
Tensor *input =
......@@ -89,56 +138,18 @@ class OpsTestNet {
});
}
void AddIntArg(const char *name, const int value) {
auto arg = op_def_.add_arg();
arg->set_name(name);
arg->set_i(value);
}
void AddFloatArg(const char *name, const float value) {
auto arg = op_def_.add_arg();
arg->set_name(name);
arg->set_f(value);
}
void AddStringArg(const char *name, const char *value) {
auto arg = op_def_.add_arg();
arg->set_name(name);
arg->set_s(value);
}
void AddIntsArg(const char *name, const std::vector<int> &values) {
auto arg = op_def_.add_arg();
arg->set_name(name);
for (auto value : values) {
arg->add_ints(value);
}
}
void AddFloatsArg(const char *name, const std::vector<float> &values) {
auto arg = op_def_.add_arg();
arg->set_name(name);
for (auto value : values) {
arg->add_floats(value);
}
}
void AddStringsArg(const char *name,
const std::vector<const char *> &values) {
auto arg = op_def_.add_arg();
arg->set_name(name);
for (auto value : values) {
arg->add_strings(value);
}
OperatorDef *NewOperatorDef() {
op_defs_.emplace_back(OperatorDef());
return &op_defs_[op_defs_.size() - 1];
}
OperatorDef *operator_def() { return &op_def_; }
Workspace *ws() { return &ws_; }
bool RunOp(DeviceType device) {
NetDef net_def;
net_def.add_op()->CopyFrom(op_def_);
for (auto &op_def_ : op_defs_) {
net_def.add_op()->CopyFrom(op_def_);
}
VLOG(3) << net_def.DebugString();
net_ = CreateNet(net_def, &ws_, device);
device_ = device;
......@@ -159,7 +170,7 @@ class OpsTestNet {
public:
Workspace ws_;
OperatorDef op_def_;
std::vector<OperatorDef> op_defs_;
std::unique_ptr<NetBase> net_;
DeviceType device_;
};
......
......@@ -27,14 +27,12 @@ static void Pooling(int iters,
OpDefBuilder("Pooling", "PoolingTest")
.Input("Input")
.Output("Output")
.Finalize(net.operator_def());
// Add args
net.AddIntArg("pooling_type", pooling_type);
net.AddIntsArg("kernels", {kernel, kernel});
net.AddIntsArg("strides", {stride, stride});
net.AddIntArg("padding", padding);
net.AddIntsArg("dilations", {1, 1});
.AddIntArg("pooling_type", pooling_type)
.AddIntsArg("kernels", {kernel, kernel})
.AddIntsArg("strides", {stride, stride})
.AddIntArg("padding", padding)
.AddIntsArg("dilations", {1, 1})
.Finalize(net.NewOperatorDef());
// Add input data
net.AddRandomInput<DeviceType::CPU, float>("Input", {batch, channels, height, width});
......
......@@ -19,14 +19,12 @@ TEST_F(PoolingOpTest, MAX_VALID) {
OpDefBuilder("Pooling", "PoolingTest")
.Input("Input")
.Output("Output")
.Finalize(net.operator_def());
// Add args
net.AddIntsArg("kernels", {2, 2});
net.AddIntsArg("strides", {2, 2});
net.AddIntArg("padding", Padding::VALID);
net.AddIntsArg("dilations", {1, 1});
net.AddIntArg("pooling_type", PoolingType::MAX);
.AddIntsArg("kernels", {2, 2})
.AddIntsArg("strides", {2, 2})
.AddIntArg("padding", Padding::VALID)
.AddIntsArg("dilations", {1, 1})
.AddIntArg("pooling_type", PoolingType::MAX)
.Finalize(net.NewOperatorDef());
// Add input data
net.AddInputFromArray<DeviceType::CPU, float>(
......@@ -50,14 +48,12 @@ TEST_F(PoolingOpTest, AVG_VALID) {
OpDefBuilder("Pooling", "PoolingTest")
.Input("Input")
.Output("Output")
.Finalize(net.operator_def());
// Add args
net.AddIntsArg("kernels", {2, 2});
net.AddIntsArg("strides", {2, 2});
net.AddIntArg("padding", Padding::VALID);
net.AddIntsArg("dilations", {1, 1});
net.AddIntArg("pooling_type", PoolingType::AVG);
.AddIntsArg("kernels", {2, 2})
.AddIntsArg("strides", {2, 2})
.AddIntArg("padding", Padding::VALID)
.AddIntsArg("dilations", {1, 1})
.AddIntArg("pooling_type", PoolingType::AVG)
.Finalize(net.NewOperatorDef());
// Add input data
net.AddInputFromArray<DeviceType::CPU, float>(
......@@ -81,14 +77,12 @@ TEST_F(PoolingOpTest, MAX_SAME) {
OpDefBuilder("Pooling", "PoolingTest")
.Input("Input")
.Output("Output")
.Finalize(net.operator_def());
// Add args
net.AddIntsArg("kernels", {2, 2});
net.AddIntsArg("strides", {2, 2});
net.AddIntArg("padding", Padding::SAME);
net.AddIntsArg("dilations", {1, 1});
net.AddIntArg("pooling_type", PoolingType::MAX);
.AddIntsArg("kernels", {2, 2})
.AddIntsArg("strides", {2, 2})
.AddIntArg("padding", Padding::SAME)
.AddIntsArg("dilations", {1, 1})
.AddIntArg("pooling_type", PoolingType::MAX)
.Finalize(net.NewOperatorDef());
// Add input data
net.AddInputFromArray<DeviceType::CPU, float>("Input", {1, 1, 3, 3},
......@@ -109,14 +103,12 @@ TEST_F(PoolingOpTest, MAX_VALID_DILATION) {
OpDefBuilder("Pooling", "PoolingTest")
.Input("Input")
.Output("Output")
.Finalize(net.operator_def());
// Add args
net.AddIntsArg("kernels", {2, 2});
net.AddIntsArg("strides", {1, 1});
net.AddIntArg("padding", Padding::VALID);
net.AddIntsArg("dilations", {2, 2});
net.AddIntArg("pooling_type", PoolingType::MAX);
.AddIntsArg("kernels", {2, 2})
.AddIntsArg("strides", {1, 1})
.AddIntArg("padding", Padding::VALID)
.AddIntsArg("dilations", {2, 2})
.AddIntArg("pooling_type", PoolingType::MAX)
.Finalize(net.NewOperatorDef());
// Add input data
net.AddInputFromArray<DeviceType::CPU, float>(
......@@ -138,14 +130,12 @@ TEST_F(PoolingOpTest, MAX_k2x2s2x2) {
OpDefBuilder("Pooling", "PoolingTest")
.Input("Input")
.Output("Output")
.Finalize(net.operator_def());
// Add args
net.AddIntArg("pooling_type", PoolingType::MAX);
net.AddIntsArg("kernels", {2, 2});
net.AddIntsArg("strides", {2, 2});
net.AddIntArg("padding", Padding::SAME);
net.AddIntsArg("dilations", {1, 1});
.AddIntArg("pooling_type", PoolingType::MAX)
.AddIntsArg("kernels", {2, 2})
.AddIntsArg("strides", {2, 2})
.AddIntArg("padding", Padding::SAME)
.AddIntsArg("dilations", {1, 1})
.Finalize(net.NewOperatorDef());
// Add input data
net.AddInputFromArray<DeviceType::CPU, float>(
......@@ -166,14 +156,12 @@ TEST_F(PoolingOpTest, MAX_k3x3s2x2) {
OpDefBuilder("Pooling", "PoolingTest")
.Input("Input")
.Output("Output")
.Finalize(net.operator_def());
// Add args
net.AddIntArg("pooling_type", PoolingType::MAX);
net.AddIntsArg("kernels", {3, 3});
net.AddIntsArg("strides", {2, 2});
net.AddIntArg("padding", Padding::VALID);
net.AddIntsArg("dilations", {1, 1});
.AddIntArg("pooling_type", PoolingType::MAX)
.AddIntsArg("kernels", {3, 3})
.AddIntsArg("strides", {2, 2})
.AddIntArg("padding", Padding::VALID)
.AddIntsArg("dilations", {1, 1})
.Finalize(net.NewOperatorDef());
// Add input data
net.AddInputFromArray<DeviceType::CPU, float>(
......@@ -195,14 +183,12 @@ TEST_F(PoolingOpTest, AVG_k2x2s2x2) {
OpDefBuilder("Pooling", "PoolingTest")
.Input("Input")
.Output("Output")
.Finalize(net.operator_def());
// Add args
net.AddIntArg("pooling_type", PoolingType::AVG);
net.AddIntsArg("kernels", {2, 2});
net.AddIntsArg("strides", {2, 2});
net.AddIntArg("padding", Padding::SAME);
net.AddIntsArg("dilations", {1, 1});
.AddIntArg("pooling_type", PoolingType::AVG)
.AddIntsArg("kernels", {2, 2})
.AddIntsArg("strides", {2, 2})
.AddIntArg("padding", Padding::SAME)
.AddIntsArg("dilations", {1, 1})
.Finalize(net.NewOperatorDef());
// Add input data
net.AddInputFromArray<DeviceType::CPU, float>(
......
......@@ -16,7 +16,7 @@ static void ReluBenchmark(int iters, int size) {
OpDefBuilder("Relu", "ReluBM")
.Input("Input")
.Output("Output")
.Finalize(net.operator_def());
.Finalize(net.NewOperatorDef());
// Add input data
net.AddRandomInput<DeviceType::CPU, float>("Input", {size});
......
......@@ -15,7 +15,7 @@ TEST_F(ReluOpTest, ReluOp) {
OpDefBuilder("Relu", "ReluTest")
.Input("Input")
.Output("Output")
.Finalize(net.operator_def());
.Finalize(net.NewOperatorDef());
// Add input data
net.AddRandomInput<DeviceType::CPU, float>("Input", {1, 2, 3, 5});
......@@ -38,11 +38,11 @@ TEST_F(ReluOpTest, ReluOpWithMax) {
OpDefBuilder("Relu", "ReluTestWithMax")
.Input("Input")
.Output("Output")
.Finalize(net.operator_def());
.AddFloatArg("max_limit", 0.5)
.Finalize(net.NewOperatorDef());
// Add input data
net.AddRandomInput<DeviceType::CPU, float>("Input", {1, 2, 3, 5});
net.AddFloatArg("max_limit", 0.5);
// Run
net.RunOp();
......
......@@ -18,7 +18,7 @@ TEST_F(ResizeBilinearTest, ResizeBilinearWOAlignCorners) {
.Input("Input")
.Input("OutSize")
.Output("Output")
.Finalize(net.operator_def());
.Finalize(net.NewOperatorDef());
// Add input data
vector<float> input(24);
......@@ -43,9 +43,8 @@ TEST_F(ResizeBilinearTest, ResizeBilinearWAlignCorners) {
.Input("Input")
.Input("OutSize")
.Output("Output")
.Finalize(net.operator_def());
net.AddIntArg("align_corners", 1);
.AddIntArg("align_corners", 1)
.Finalize(net.NewOperatorDef());
// Add input data
vector<float> input(24);
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册