提交 0461beb5 编写于 作者: L Liangliang He

Merge branch 'opencl' into 'master'

Update conv2d benchmark test

See merge request !81
...@@ -12,11 +12,11 @@ load("//mace:mace.bzl", "if_android") ...@@ -12,11 +12,11 @@ load("//mace:mace.bzl", "if_android")
cc_library( cc_library(
name = "opencl_runtime", name = "opencl_runtime",
srcs = glob([ srcs = glob([
"runtime/opencl/cl.hpp",
"runtime/opencl/cl2.hpp",
"runtime/opencl/*.cc", "runtime/opencl/*.cc",
]), ]),
hdrs = glob([ hdrs = glob([
"runtime/opencl/cl.hpp",
"runtime/opencl/cl2.hpp",
"runtime/opencl/*.h", "runtime/opencl/*.h",
]), ]),
copts = ["-std=c++11"], copts = ["-std=c++11"],
......
//
// Copyright (c) 2017 XiaoMi All rights reserved.
//
#ifndef MACE_CORE_RUNTIME_OPENCL_CL2_HEADER_H_
#define MACE_CORE_RUNTIME_OPENCL_CL2_HEADER_H_
#define CL_HPP_TARGET_OPENCL_VERSION 200
#include "mace/core/runtime/opencl/cl2.hpp"
#endif // MACE_CORE_RUNTIME_OPENCL_CL2_HEADER_H_
...@@ -2,8 +2,8 @@ ...@@ -2,8 +2,8 @@
// Copyright (c) 2017 XiaoMi All rights reserved. // Copyright (c) 2017 XiaoMi All rights reserved.
// //
#include "mace/core/runtime/opencl/cl2_header.h"
#include "mace/core/runtime/opencl/opencl_allocator.h" #include "mace/core/runtime/opencl/opencl_allocator.h"
#include "mace/core/runtime/opencl/cl2.hpp"
#include "mace/core/runtime/opencl/opencl_runtime.h" #include "mace/core/runtime/opencl/opencl_runtime.h"
namespace mace { namespace mace {
......
...@@ -26,6 +26,7 @@ bool ReadSourceFile(const std::string &filename, std::string *content) { ...@@ -26,6 +26,7 @@ bool ReadSourceFile(const std::string &filename, std::string *content) {
std::string line; std::string line;
while (std::getline(ifs, line)) { while (std::getline(ifs, line)) {
*content += line; *content += line;
*content += "\n";
} }
ifs.close(); ifs.close();
return true; return true;
...@@ -66,14 +67,15 @@ bool BuildProgram(OpenCLRuntime *runtime, ...@@ -66,14 +67,15 @@ bool BuildProgram(OpenCLRuntime *runtime,
*program = cl::Program(runtime->context(), sources); *program = cl::Program(runtime->context(), sources);
std::string build_options = "-Werror -cl-mad-enable -cl-fast-relaxed-math -I" + path; std::string build_options = "-Werror -cl-mad-enable -cl-fast-relaxed-math -I" + path;
// TODO(heliangliang) -cl-unsafe-math-optimizations -cl-fast-relaxed-math // TODO(heliangliang) -cl-unsafe-math-optimizations -cl-fast-relaxed-math
if (program->build({runtime->device()}, build_options.c_str()) != CL_SUCCESS) { cl_int ret = program->build({runtime->device()}, build_options.c_str());
if (ret != CL_SUCCESS) {
if (program->getBuildInfo<CL_PROGRAM_BUILD_STATUS>(runtime->device()) == if (program->getBuildInfo<CL_PROGRAM_BUILD_STATUS>(runtime->device()) ==
CL_BUILD_ERROR) { CL_BUILD_ERROR) {
std::string build_log = std::string build_log =
program->getBuildInfo<CL_PROGRAM_BUILD_LOG>(runtime->device()); program->getBuildInfo<CL_PROGRAM_BUILD_LOG>(runtime->device());
LOG(INFO) << "Program build log: " << build_log; LOG(INFO) << "Program build log: " << build_log;
} }
LOG(FATAL) << "Build program failed"; LOG(FATAL) << "Build program failed: " << ret;
} }
return true; return true;
......
...@@ -5,14 +5,10 @@ ...@@ -5,14 +5,10 @@
#ifndef MACE_CORE_RUNTIME_OPENCL_OPENCL_RUNTIME_H_ #ifndef MACE_CORE_RUNTIME_OPENCL_OPENCL_RUNTIME_H_
#define MACE_CORE_RUNTIME_OPENCL_OPENCL_RUNTIME_H_ #define MACE_CORE_RUNTIME_OPENCL_OPENCL_RUNTIME_H_
#ifndef CL_HPP_TARGET_OPENCL_VERSION
#define CL_HPP_TARGET_OPENCL_VERSION 200
#endif
#include <map> #include <map>
#include <mutex> #include <mutex>
#include "mace/core/runtime/opencl/cl2.hpp" #include "mace/core/runtime/opencl/cl2_header.h"
#include "mace/core/runtime/opencl/opencl_wrapper.h" #include "mace/core/runtime/opencl/opencl_wrapper.h"
namespace mace { namespace mace {
......
...@@ -126,6 +126,12 @@ class OpenCLLibraryImpl final { ...@@ -126,6 +126,12 @@ class OpenCLLibraryImpl final {
using clRetainKernelFunc = cl_int (*)(cl_kernel kernel); using clRetainKernelFunc = cl_int (*)(cl_kernel kernel);
using clCreateBufferFunc = using clCreateBufferFunc =
cl_mem (*)(cl_context, cl_mem_flags, size_t, void *, cl_int *); cl_mem (*)(cl_context, cl_mem_flags, size_t, void *, cl_int *);
using clCreateImageFunc = cl_mem (*)(cl_context,
cl_mem_flags,
const cl_image_format *,
const cl_image_desc *,
void *,
cl_int *);
using clCreateProgramWithSourceFunc = cl_program (*)( using clCreateProgramWithSourceFunc = cl_program (*)(
cl_context, cl_uint, const char **, const size_t *, cl_int *); cl_context, cl_uint, const char **, const size_t *, cl_int *);
using clReleaseKernelFunc = cl_int (*)(cl_kernel kernel); using clReleaseKernelFunc = cl_int (*)(cl_kernel kernel);
...@@ -136,8 +142,12 @@ class OpenCLLibraryImpl final { ...@@ -136,8 +142,12 @@ class OpenCLLibraryImpl final {
using clRetainDeviceFunc = cl_int (*)(cl_device_id); using clRetainDeviceFunc = cl_int (*)(cl_device_id);
using clReleaseDeviceFunc = cl_int (*)(cl_device_id); using clReleaseDeviceFunc = cl_int (*)(cl_device_id);
using clRetainEventFunc = cl_int (*)(cl_event); using clRetainEventFunc = cl_int (*)(cl_event);
using clGetKernelWorkGroupInfoFunc = using clGetKernelWorkGroupInfoFunc = cl_int (*)(cl_kernel,
cl_int (*)(cl_kernel, cl_device_id, cl_kernel_work_group_info, size_t, void *, size_t *); cl_device_id,
cl_kernel_work_group_info,
size_t,
void *,
size_t *);
#define DEFINE_FUNC_PTR(func) func##Func func = nullptr #define DEFINE_FUNC_PTR(func) func##Func func = nullptr
...@@ -149,6 +159,7 @@ class OpenCLLibraryImpl final { ...@@ -149,6 +159,7 @@ class OpenCLLibraryImpl final {
DEFINE_FUNC_PTR(clReleaseKernel); DEFINE_FUNC_PTR(clReleaseKernel);
DEFINE_FUNC_PTR(clCreateProgramWithSource); DEFINE_FUNC_PTR(clCreateProgramWithSource);
DEFINE_FUNC_PTR(clCreateBuffer); DEFINE_FUNC_PTR(clCreateBuffer);
DEFINE_FUNC_PTR(clCreateImage);
DEFINE_FUNC_PTR(clRetainKernel); DEFINE_FUNC_PTR(clRetainKernel);
DEFINE_FUNC_PTR(clCreateKernel); DEFINE_FUNC_PTR(clCreateKernel);
DEFINE_FUNC_PTR(clGetProgramInfo); DEFINE_FUNC_PTR(clGetProgramInfo);
...@@ -269,6 +280,7 @@ void *OpenCLLibraryImpl::LoadFromPath(const std::string &path) { ...@@ -269,6 +280,7 @@ void *OpenCLLibraryImpl::LoadFromPath(const std::string &path) {
ASSIGN_FROM_DLSYM(clReleaseKernel); ASSIGN_FROM_DLSYM(clReleaseKernel);
ASSIGN_FROM_DLSYM(clCreateProgramWithSource); ASSIGN_FROM_DLSYM(clCreateProgramWithSource);
ASSIGN_FROM_DLSYM(clCreateBuffer); ASSIGN_FROM_DLSYM(clCreateBuffer);
ASSIGN_FROM_DLSYM(clCreateImage);
ASSIGN_FROM_DLSYM(clRetainKernel); ASSIGN_FROM_DLSYM(clRetainKernel);
ASSIGN_FROM_DLSYM(clCreateKernel); ASSIGN_FROM_DLSYM(clCreateKernel);
ASSIGN_FROM_DLSYM(clGetProgramInfo); ASSIGN_FROM_DLSYM(clGetProgramInfo);
...@@ -708,6 +720,24 @@ cl_mem clCreateBuffer(cl_context context, ...@@ -708,6 +720,24 @@ cl_mem clCreateBuffer(cl_context context,
} }
} }
cl_mem clCreateImage(cl_context context,
cl_mem_flags flags,
const cl_image_format *image_format,
const cl_image_desc *image_desc,
void *host_ptr,
cl_int *errcode_ret) {
auto func = mace::OpenCLLibraryImpl::Get().clCreateImage;
if (func != nullptr) {
return func(context, flags, image_format, image_desc, host_ptr,
errcode_ret);
} else {
if (errcode_ret != nullptr) {
*errcode_ret = CL_OUT_OF_RESOURCES;
}
return nullptr;
}
}
cl_program clCreateProgramWithSource(cl_context context, cl_program clCreateProgramWithSource(cl_context context,
cl_uint count, cl_uint count,
const char **strings, const char **strings,
...@@ -795,8 +825,8 @@ cl_int clGetKernelWorkGroupInfo(cl_kernel kernel, ...@@ -795,8 +825,8 @@ cl_int clGetKernelWorkGroupInfo(cl_kernel kernel,
size_t *param_value_size_ret) { size_t *param_value_size_ret) {
auto func = mace::OpenCLLibraryImpl::Get().clGetKernelWorkGroupInfo; auto func = mace::OpenCLLibraryImpl::Get().clGetKernelWorkGroupInfo;
if (func != nullptr) { if (func != nullptr) {
return func(kernel, device, param_name, param_value_size, return func(kernel, device, param_name, param_value_size, param_value,
param_value, param_value_size_ret); param_value_size_ret);
} else { } else {
return CL_OUT_OF_RESOURCES; return CL_OUT_OF_RESOURCES;
} }
......
...@@ -3,7 +3,7 @@ ...@@ -3,7 +3,7 @@
// //
#include "mace/kernels/batch_norm.h" #include "mace/kernels/batch_norm.h"
#include "mace/core/runtime/opencl/cl2.hpp" #include "mace/core/runtime/opencl/cl2_header.h"
#include "mace/core/runtime/opencl/opencl_runtime.h" #include "mace/core/runtime/opencl/opencl_runtime.h"
#include "mace/utils/tuner.h" #include "mace/utils/tuner.h"
...@@ -79,4 +79,4 @@ void BatchNormFunctor<DeviceType::OPENCL, float>::operator()( ...@@ -79,4 +79,4 @@ void BatchNormFunctor<DeviceType::OPENCL, float>::operator()(
} }
} // namespace kernels } // namespace kernels
} // namespace mace } // namespace mace
\ No newline at end of file
#pragma OPENCL EXTENSION cl_khr_fp16 : enable
#pragma OPENCL EXTENSION cl_khr_3d_image_writes : enable
void kernel conv_2d_1x1_naive(global const float *input, /* n, c, h, w */ #include <common.h>
global const float *filter, /* o, i, kh, kw */
global const float *bias, /* o */ __kernel void conv_2d_1x1_naive(__global const float *input, /* n, c, h, w */
global float *output, /* n, c, h, w */ __global const float *filter, /* o, i, kh, kw */
private const int input_channels) { __global const float *bias, /* o */
__global float *output, /* n, c, h, w */
__private const int in_chan_num) {
const int batch = get_global_id(0); const int batch = get_global_id(0);
const int channel = get_global_id(1); const int channel = get_global_id(1);
const int channels = get_global_size(1); const int channels = get_global_size(1);
const int pixel = get_global_id(2); const int pixel = get_global_id(2);
const int pixels = get_global_size(2); const int pixels = get_global_size(2);
float *output_ptr = output + (batch * channels + channel) * pixels; float *output_ptr = output + (batch * channels + channel) * pixels;
output_ptr[pixel] = bias[channel]; output_ptr[pixel] = bias[channel];
for (int inc = 0; inc < input_channels; ++inc) { for (int inc = 0; inc < in_chan_num; ++inc) {
const float *input_ptr = input + (batch * input_channels + inc) * pixels + pixel; const float *input_ptr = input + (batch * in_chan_num + inc) * pixels + pixel;
const float weights = filter[channel * input_channels + inc]; const float weights = filter[channel * in_chan_num + inc];
float in = input_ptr[0]; float in = input_ptr[0];
float out = output_ptr[0]; float out = output_ptr[0];
out += in * weights; out += in * weights;
...@@ -23,17 +24,19 @@ void kernel conv_2d_1x1_naive(global const float *input, /* n, c, h, w */ ...@@ -23,17 +24,19 @@ void kernel conv_2d_1x1_naive(global const float *input, /* n, c, h, w */
} }
} }
void kernel conv_2d_1x1_v2(global const float *input, /* n, c, h, w */ __kernel void conv_2d_1x1_v2(__global const float *input, /* n, c, h, w */
global const float *filter, /* o, i, kh, kw */ __global const float *filter, /* o, i, kh, kw */
global const float *bias, /* o */ __global const float *bias, /* o */
global float *output, /* n, c, h, w */ __global float *output, /* n, c, h, w */
private const int in_chan_num, __private const int in_chan_num,
private const int out_chan_num, __private const int out_chan_num,
private const int pixel_num) { __private const int pixel_num) {
int batch = get_global_id(0); int batch = get_global_id(0);
int out_chan_blk = get_global_id(1); int out_chan_blk = get_global_id(1);
int out_pixel_blk = get_global_id(2); int out_pixel_blk = get_global_id(2);
__private float output_slice[4 * 4];
const int out_chan_begin = out_chan_blk * 4; const int out_chan_begin = out_chan_blk * 4;
const int out_chan_end = min(out_chan_begin + 4, out_chan_num); const int out_chan_end = min(out_chan_begin + 4, out_chan_num);
const int out_pixel_begin = out_pixel_blk * 4; const int out_pixel_begin = out_pixel_blk * 4;
...@@ -41,21 +44,23 @@ void kernel conv_2d_1x1_v2(global const float *input, /* n, c, h, w */ ...@@ -41,21 +44,23 @@ void kernel conv_2d_1x1_v2(global const float *input, /* n, c, h, w */
const int in_offset = batch * in_chan_num * pixel_num; const int in_offset = batch * in_chan_num * pixel_num;
const int out_offset = batch * out_chan_num * pixel_num; const int out_offset = batch * out_chan_num * pixel_num;
const float *input_base = input + in_offset + out_pixel_begin; const float *input_base = input + in_offset + out_pixel_begin;
float *output_base = output + out_offset + out_pixel_begin; float *output_base = output + out_offset + out_pixel_begin;
int pixels = out_pixel_end - out_pixel_begin; int out_chan_len = out_chan_end - out_chan_begin;
int pixel_len = out_pixel_end - out_pixel_begin;
for (int out_chan = out_chan_begin; out_chan < out_chan_end; ++out_chan) { for (int out_chan = out_chan_begin; out_chan < out_chan_end; ++out_chan) {
float bias_value = bias[out_chan]; float bias_value = bias[out_chan];
float *output_ptr = output_base + out_chan * pixel_num; int out_chan_offset = out_chan - out_chan_begin;
for (int p = 0; p < pixels; ++p) { for (int p = 0; p < pixel_len; ++p) {
output_ptr[p] = bias_value; output_slice[out_chan_offset * 4 + p] = bias_value;
} }
} }
int in_chan = 0; int in_chan = 0;
if (pixels == 4) { if (pixel_len == 4) {
for (; in_chan + 3 < in_chan_num; in_chan += 4) { for (; in_chan + 3 < in_chan_num; in_chan += 4) {
const float *input_ptr = input_base + in_chan * pixel_num; const float *input_ptr = input_base + in_chan * pixel_num;
int out_chan = out_chan_begin; int out_chan = out_chan_begin;
...@@ -66,30 +71,32 @@ void kernel conv_2d_1x1_v2(global const float *input, /* n, c, h, w */ ...@@ -66,30 +71,32 @@ void kernel conv_2d_1x1_v2(global const float *input, /* n, c, h, w */
float4 in1 = vload4(0, input_ptr + pixel_num); float4 in1 = vload4(0, input_ptr + pixel_num);
float4 in2 = vload4(0, input_ptr + 2 * pixel_num); float4 in2 = vload4(0, input_ptr + 2 * pixel_num);
float4 in3 = vload4(0, input_ptr + 3 * pixel_num); float4 in3 = vload4(0, input_ptr + 3 * pixel_num);
#pragma unroll
for (int oc = 0; oc < 4; ++oc) { for (int oc = 0; oc < 4; ++oc) {
float4 weights = vload4(0, filter_ptr + oc * in_chan_num); float4 weights = vload4(0, filter_ptr + oc * in_chan_num);
float4 out = vload4(0, output_ptr + oc * pixel_num); float4 out = vload4(oc, output_slice);
out += in0 * weights.x; out += in0 * weights.x;
out += in1 * weights.y; out += in1 * weights.y;
out += in2 * weights.z; out += in2 * weights.z;
out += in3 * weights.w; out += in3 * weights.w;
vstore4(out, 0, output_ptr + oc * pixel_num); vstore4(out, oc, output_slice);
} }
} }
for (; out_chan < out_chan_end; ++out_chan) { for (; out_chan < out_chan_end; ++out_chan) {
const float* filter_ptr = filter + out_chan * in_chan_num + in_chan; const float* filter_ptr = filter + out_chan * in_chan_num + in_chan;
float *output_ptr = output_base + out_chan * pixel_num; float *output_ptr = output_base + out_chan * pixel_num;
int out_chan_offset = out_chan - out_chan_begin;
float4 weights = vload4(0, filter_ptr); float4 weights = vload4(0, filter_ptr);
float4 in0 = vload4(0, input_ptr); float4 in0 = vload4(0, input_ptr);
float4 in1 = vload4(0, input_ptr + pixel_num); float4 in1 = vload4(0, input_ptr + pixel_num);
float4 in2 = vload4(0, input_ptr + 2 * pixel_num); float4 in2 = vload4(0, input_ptr + 2 * pixel_num);
float4 in3 = vload4(0, input_ptr + 3 * pixel_num); float4 in3 = vload4(0, input_ptr + 3 * pixel_num);
float4 out = vload4(0, output_ptr); float4 out = vload4(out_chan_offset, output_slice);
out += in0 * weights.x; out += in0 * weights.x;
out += in1 * weights.y; out += in1 * weights.y;
out += in2 * weights.z; out += in2 * weights.z;
out += in3 * weights.w; out += in3 * weights.w;
vstore4(out, 0, output_ptr); vstore4(out, out_chan_offset, output_slice);
} }
} }
} }
...@@ -99,13 +106,71 @@ void kernel conv_2d_1x1_v2(global const float *input, /* n, c, h, w */ ...@@ -99,13 +106,71 @@ void kernel conv_2d_1x1_v2(global const float *input, /* n, c, h, w */
for (int out_chan = out_chan_begin; out_chan < out_chan_end; ++out_chan) { for (int out_chan = out_chan_begin; out_chan < out_chan_end; ++out_chan) {
float weights = filter[out_chan * in_chan_num + in_chan]; float weights = filter[out_chan * in_chan_num + in_chan];
float *output_ptr = output_base + out_chan * pixel_num; float *output_ptr = output_base + out_chan * pixel_num;
int out_chan_offset = out_chan - out_chan_begin;
for (int p = 0; p < pixels; ++p) { for (int p = 0; p < pixel_len; ++p) {
float in = input_ptr[p]; float in = input_ptr[p];
float out = output_ptr[p]; output_slice[out_chan_offset * 4 + p] += in * weights;
out += in * weights;
output_ptr[p] = out;
} }
} }
} }
for (int out_chan_offset = 0; out_chan_offset < out_chan_len; ++out_chan_offset) {
int out_chan = out_chan_begin + out_chan_offset;
float *output_ptr = output_base + out_chan * pixel_num;
if (pixel_len == 4) {
float4 out = vload4(out_chan_offset, output_slice);
vstore4(out, 0, output_ptr);
} else {
int offset = out_chan_offset << 2;
for (int p = 0; p < pixel_len; ++p) {
output_ptr[p] = output_slice[offset + p];
}
}
}
}
/* FIXME this is incomplete */
__kernel void conv_2d_1x1_v3(__read_only image3d_t input, /* n, c/4, h, w, 4 */
__global const float *filter, /* o, i, kh, kw */
__global const float *bias, /* o */
__write_only image3d_t output, /* n, c/4, h, w, 4 */
__private const int batch_num,
__private const int in_chan_num,
__private const int out_chan_num,
__private const int height,
__private const int width) {
int out_chan_blk = get_global_id(0);
int h = get_global_id(1);
int w = get_global_id(2);
int in_chan_blk_num = (in_chan_num + 3) / 4;
int out_chan_blk_num = (out_chan_num + 3) / 4;
const sampler_t sampler = CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_CLAMP | CLK_FILTER_NEAREST;
for (int batch = 0; batch < batch_num; ++batch) {
float4 bias_value = vload4(out_chan_blk, bias);
__private float4 out = bias_value;
for (int in_chan_blk = 0; in_chan_blk < in_chan_blk_num; ++in_chan_blk) {
int in_d = batch * in_chan_blk_num + in_chan_blk;
float4 in = read_imagef(input, sampler, (int4)(in_d, h, w, 0));
const float *filter_base = filter + (out_chan_blk << 2) * in_chan_num;
float4 weights = vload4(in_chan_blk, filter_base);
out.x += dot(in, weights);
weights = vload4(in_chan_blk, filter_base + in_chan_num);
out.y += dot(in, weights);
weights = vload4(in_chan_blk, filter_base + in_chan_num * 2);
out.z += dot(in, weights);
weights = vload4(in_chan_blk, filter_base + in_chan_num * 3);
out.w += dot(in, weights);
}
int out_d = batch * out_chan_blk_num + out_chan_blk;
int4 out_coord = (int4)(out_d, h, w, 0);
write_imagef(output, out_coord, out);
}
} }
...@@ -2,9 +2,10 @@ ...@@ -2,9 +2,10 @@
// Copyright (c) 2017 XiaoMi All rights reserved. // Copyright (c) 2017 XiaoMi All rights reserved.
// //
#include "mace/kernels/conv_2d.h"
#include "mace/core/common.h" #include "mace/core/common.h"
#include "mace/core/runtime/opencl/cl2_header.h"
#include "mace/core/runtime/opencl/opencl_runtime.h" #include "mace/core/runtime/opencl/opencl_runtime.h"
#include "mace/kernels/conv_2d.h"
#include "mace/utils/utils.h" #include "mace/utils/utils.h"
namespace mace { namespace mace {
...@@ -22,22 +23,22 @@ void Conv1x1Naive(const Tensor *input, ...@@ -22,22 +23,22 @@ void Conv1x1Naive(const Tensor *input,
auto runtime = OpenCLRuntime::Get(); auto runtime = OpenCLRuntime::Get();
auto program = runtime->program(); auto program = runtime->program();
auto conv_2d = cl::KernelFunctor<cl::Buffer, cl::Buffer, cl::Buffer, cl::Buffer, auto conv_2d =
int, int>(program, "conv_2d_1x1_naive"); cl::KernelFunctor<cl::Buffer, cl::Buffer, cl::Buffer, cl::Buffer, int,
int>(program, "conv_2d_1x1_naive");
const index_t pixels = height * width; const index_t pixels = height * width;
cl_int error; cl_int error;
conv_2d(cl::EnqueueArgs(runtime->command_queue(), conv_2d(cl::EnqueueArgs(
cl::NDRange(static_cast<int>(batch), runtime->command_queue(),
static_cast<int>(channels), cl::NDRange(static_cast<int>(batch), static_cast<int>(channels),
static_cast<int>(pixels)), static_cast<int>(pixels)),
cl::NDRange(1, 1, 128)), cl::NDRange(1, 1, 128)),
*(static_cast<cl::Buffer *>(input->buffer())), *(static_cast<cl::Buffer *>(input->buffer())),
*(static_cast<cl::Buffer *>(filter->buffer())), *(static_cast<cl::Buffer *>(filter->buffer())),
*(static_cast<cl::Buffer *>(bias->buffer())), *(static_cast<cl::Buffer *>(bias->buffer())),
*(static_cast<cl::Buffer *>(output->buffer())), *(static_cast<cl::Buffer *>(output->buffer())),
static_cast<int>(input_channels), static_cast<int>(input_channels), error);
error);
MACE_CHECK(error == CL_SUCCESS); MACE_CHECK(error == CL_SUCCESS);
} }
...@@ -53,31 +54,126 @@ void Conv1x1V2(const Tensor *input, ...@@ -53,31 +54,126 @@ void Conv1x1V2(const Tensor *input,
auto runtime = OpenCLRuntime::Get(); auto runtime = OpenCLRuntime::Get();
auto program = runtime->program(); auto program = runtime->program();
auto conv_2d = cl::KernelFunctor<cl::Buffer, cl::Buffer, cl::Buffer, cl::Buffer,
int, int, int, int>(program, "conv_2d_1x1_v2");
const index_t pixels = height * width; const index_t pixels = height * width;
const index_t channel_blocks = (channels + 3) / 4; const index_t channel_blocks = (channels + 3) / 4;
const index_t pixel_blocks = (pixels + 3) / 4; const index_t pixel_blocks = (pixels + 3) / 4;
// TODO KernelFunctor has an extra clReleaseCommandQueue due to a copy
// TODO check wired clReleaseCommandQueue latency
// The KernelFunctor can cause segment faults in cb_retain_event
auto conv_2d_kernel = cl::Kernel(program, "conv_2d_1x1_v2");
const uint32_t kwg_size = runtime->GetKernelMaxWorkGroupSize(conv_2d_kernel);
uint32_t idx = 0;
conv_2d_kernel.setArg(idx++,
*(static_cast<const cl::Buffer *>(input->buffer())));
conv_2d_kernel.setArg(idx++,
*(static_cast<const cl::Buffer *>(filter->buffer())));
conv_2d_kernel.setArg(idx++,
*(static_cast<const cl::Buffer *>(bias->buffer())));
conv_2d_kernel.setArg(idx++, *(static_cast<cl::Buffer *>(output->buffer())));
conv_2d_kernel.setArg(idx++, static_cast<int>(input_channels));
conv_2d_kernel.setArg(idx++, static_cast<int>(channels));
conv_2d_kernel.setArg(idx++, static_cast<int>(pixels));
auto command_queue = runtime->command_queue();
cl_int error = command_queue.enqueueNDRangeKernel(
conv_2d_kernel, cl::NullRange,
cl::NDRange(static_cast<int>(batch), static_cast<int>(channel_blocks),
static_cast<int>(pixel_blocks)),
cl::NDRange(1, 2, kwg_size / 2));
MACE_CHECK(error == CL_SUCCESS, error);
}
void Conv1x1V3(const Tensor *input,
const Tensor *filter,
const Tensor *bias,
Tensor *output) {
const index_t batch = output->shape()[0];
const index_t channels = output->shape()[1];
const index_t height = output->shape()[2];
const index_t width = output->shape()[3];
const index_t input_channels = input->shape()[1];
auto runtime = OpenCLRuntime::Get();
auto program = runtime->program();
const index_t pixels = height * width;
const index_t pixel_blocks = (pixels + 3) / 4;
const index_t channel_blocks = (channels + 3) / 4;
const index_t input_channel_blocks = (input_channels + 3) / 4;
// FIXME temp hacking
static std::map<std::ptrdiff_t, cl::Image3D> input_image_map;
static std::map<std::ptrdiff_t, cl::Image3D> output_image_map;
cl::Image3D input_image;
cl::Image3D output_image;
auto input_iter =
input_image_map.find(reinterpret_cast<std::ptrdiff_t>(input->buffer()));
if (input_iter != input_image_map.end()) {
input_image = input_iter->second;
} else {
// The batch dimension is collapsed with channel
cl_int error;
cl::Image3D image =
cl::Image3D(OpenCLRuntime::Get()->context(),
CL_MEM_READ_ONLY | CL_MEM_ALLOC_HOST_PTR,
cl::ImageFormat(CL_RGBA, CL_FLOAT), height, width,
batch * input_channel_blocks, 0, 0, nullptr, &error);
MACE_CHECK(error == CL_SUCCESS);
input_image = image;
input_image_map.clear();
input_image_map.emplace(reinterpret_cast<std::ptrdiff_t>(input->buffer()),
image);
}
auto output_iter =
output_image_map.find(reinterpret_cast<std::ptrdiff_t>(output->buffer()));
if (output_iter != output_image_map.end()) {
output_image = output_iter->second;
} else {
cl_int error;
cl::Image3D image =
cl::Image3D(OpenCLRuntime::Get()->context(),
CL_MEM_WRITE_ONLY | CL_MEM_ALLOC_HOST_PTR,
cl::ImageFormat(CL_RGBA, CL_FLOAT), height, width,
batch * channel_blocks, 0, 0, nullptr, &error);
MACE_CHECK(error == CL_SUCCESS);
output_image = image;
output_image_map.clear();
output_image_map.emplace(reinterpret_cast<std::ptrdiff_t>(output->buffer()),
image);
}
auto conv_2d_kernel = cl::Kernel(program, "conv_2d_1x1_v3");
const uint32_t kwg_size = runtime->GetKernelMaxWorkGroupSize(conv_2d_kernel);
uint32_t idx = 0;
conv_2d_kernel.setArg(idx++, input_image);
conv_2d_kernel.setArg(idx++,
*(static_cast<const cl::Buffer *>(filter->buffer())));
conv_2d_kernel.setArg(idx++,
*(static_cast<const cl::Buffer *>(bias->buffer())));
conv_2d_kernel.setArg(idx++, output_image);
conv_2d_kernel.setArg(idx++, static_cast<int>(batch));
conv_2d_kernel.setArg(idx++, static_cast<int>(input_channels));
conv_2d_kernel.setArg(idx++, static_cast<int>(channels));
conv_2d_kernel.setArg(idx++, static_cast<int>(height));
conv_2d_kernel.setArg(idx++, static_cast<int>(width));
auto command_queue = runtime->command_queue();
cl_int error; cl_int error;
conv_2d(cl::EnqueueArgs(runtime->command_queue(), error = command_queue.enqueueNDRangeKernel(
cl::NDRange(static_cast<int>(batch), conv_2d_kernel, cl::NullRange,
static_cast<int>(channel_blocks), cl::NDRange(static_cast<int>(channel_blocks), static_cast<int>(height),
static_cast<int>(pixel_blocks)), static_cast<int>(width)),
cl::NDRange(1, 1, 256)), cl::NDRange(1, 2, kwg_size / 2));
*(static_cast<cl::Buffer *>(input->buffer())), MACE_CHECK(error == CL_SUCCESS, error);
*(static_cast<cl::Buffer *>(filter->buffer())),
*(static_cast<cl::Buffer *>(bias->buffer())),
*(static_cast<cl::Buffer *>(output->buffer())),
static_cast<int>(input_channels),
static_cast<int>(channels),
static_cast<int>(pixels),
error);
MACE_CHECK(error == CL_SUCCESS);
} }
extern void Conv2dOpenclK1x1S1(const Tensor *input, const Tensor *filter, extern void Conv2dOpenclK1x1S1(const Tensor *input,
const Tensor *bias, Tensor *output) { const Tensor *filter,
const Tensor *bias,
Tensor *output) {
const index_t batch = output->shape()[0]; const index_t batch = output->shape()[0];
const index_t height = output->shape()[2]; const index_t height = output->shape()[2];
const index_t width = output->shape()[3]; const index_t width = output->shape()[3];
...@@ -89,7 +185,6 @@ extern void Conv2dOpenclK1x1S1(const Tensor *input, const Tensor *filter, ...@@ -89,7 +185,6 @@ extern void Conv2dOpenclK1x1S1(const Tensor *input, const Tensor *filter,
MACE_CHECK(input_batch == batch && input_height == height && MACE_CHECK(input_batch == batch && input_height == height &&
input_width == width); input_width == width);
// Conv1x1Naive(input, filter, bias, output);
Conv1x1V2(input, filter, bias, output); Conv1x1V2(input, filter, bias, output);
}; };
......
...@@ -17,7 +17,7 @@ static void AddNBenchmark(int iters, int n, int size) { ...@@ -17,7 +17,7 @@ static void AddNBenchmark(int iters, int n, int size) {
for (int i = 0; i < n; ++i) { for (int i = 0; i < n; ++i) {
op_def_builder.Input(internal::MakeString("Input", i).c_str()); op_def_builder.Input(internal::MakeString("Input", i).c_str());
} }
op_def_builder.Output("Output").Finalize(net.operator_def()); op_def_builder.Output("Output").Finalize(net.NewOperatorDef());
// Add input data // Add input data
for (int i = 0; i < n; ++i) { for (int i = 0; i < n; ++i) {
......
...@@ -17,7 +17,7 @@ TEST_F(AddnOpTest, AddnOp) { ...@@ -17,7 +17,7 @@ TEST_F(AddnOpTest, AddnOp) {
.Input("Input2") .Input("Input2")
.Input("Input3") .Input("Input3")
.Output("Output") .Output("Output")
.Finalize(net.operator_def()); .Finalize(net.NewOperatorDef());
// Add input data // Add input data
net.AddRandomInput<DeviceType::CPU, float>("Input1", {1, 2, 3, 4}); net.AddRandomInput<DeviceType::CPU, float>("Input1", {1, 2, 3, 4});
......
...@@ -21,7 +21,7 @@ static void BatchNorm( ...@@ -21,7 +21,7 @@ static void BatchNorm(
.Input("Var") .Input("Var")
.Input("Epsilon") .Input("Epsilon")
.Output("Output") .Output("Output")
.Finalize(net.operator_def()); .Finalize(net.NewOperatorDef());
// Add input data // Add input data
net.AddRandomInput<D, T>("Input", {batch, channels, height, width}); net.AddRandomInput<D, T>("Input", {batch, channels, height, width});
......
...@@ -21,7 +21,7 @@ void Simple() { ...@@ -21,7 +21,7 @@ void Simple() {
.Input("Var") .Input("Var")
.Input("Epsilon") .Input("Epsilon")
.Output("Output") .Output("Output")
.Finalize(net.operator_def()); .Finalize(net.NewOperatorDef());
// Add input data // Add input data
net.AddInputFromArray<D, float>("Input", {1, 1, 6, 2}, net.AddInputFromArray<D, float>("Input", {1, 1, 6, 2},
...@@ -73,7 +73,7 @@ TEST_F(BatchNormOpTest, SimpleRandomNeon) { ...@@ -73,7 +73,7 @@ TEST_F(BatchNormOpTest, SimpleRandomNeon) {
.Input("Var") .Input("Var")
.Input("Epsilon") .Input("Epsilon")
.Output("Output") .Output("Output")
.Finalize(net.operator_def()); .Finalize(net.NewOperatorDef());
// Add input data // Add input data
net.AddRandomInput<DeviceType::CPU, float>("Input", {batch, channels, height, width}); net.AddRandomInput<DeviceType::CPU, float>("Input", {batch, channels, height, width});
...@@ -114,7 +114,7 @@ TEST_F(BatchNormOpTest, ComplexRandomNeon) { ...@@ -114,7 +114,7 @@ TEST_F(BatchNormOpTest, ComplexRandomNeon) {
.Input("Var") .Input("Var")
.Input("Epsilon") .Input("Epsilon")
.Output("Output") .Output("Output")
.Finalize(net.operator_def()); .Finalize(net.NewOperatorDef());
// Add input data // Add input data
net.AddRandomInput<DeviceType::CPU, float>("Input", {batch, channels, height, width}); net.AddRandomInput<DeviceType::CPU, float>("Input", {batch, channels, height, width});
...@@ -155,7 +155,7 @@ TEST_F(BatchNormOpTest, SimpleRandomOPENCL) { ...@@ -155,7 +155,7 @@ TEST_F(BatchNormOpTest, SimpleRandomOPENCL) {
.Input("Var") .Input("Var")
.Input("Epsilon") .Input("Epsilon")
.Output("Output") .Output("Output")
.Finalize(net.operator_def()); .Finalize(net.NewOperatorDef());
// Add input data // Add input data
net.AddRandomInput<DeviceType::OPENCL, float>("Input", {batch, channels, height, width}); net.AddRandomInput<DeviceType::OPENCL, float>("Input", {batch, channels, height, width});
...@@ -201,7 +201,7 @@ TEST_F(BatchNormOpTest, ComplexRandomOPENCL) { ...@@ -201,7 +201,7 @@ TEST_F(BatchNormOpTest, ComplexRandomOPENCL) {
.Input("Var") .Input("Var")
.Input("Epsilon") .Input("Epsilon")
.Output("Output") .Output("Output")
.Finalize(net.operator_def()); .Finalize(net.NewOperatorDef());
// Add input data // Add input data
net.AddRandomInput<DeviceType::OPENCL, float>("Input", {batch, channels, height, width}); net.AddRandomInput<DeviceType::OPENCL, float>("Input", {batch, channels, height, width});
......
...@@ -19,10 +19,10 @@ static void ChannelShuffle( ...@@ -19,10 +19,10 @@ static void ChannelShuffle(
OpDefBuilder("GlobalAvgPooling", "GlobalAvgPoolingTest") OpDefBuilder("GlobalAvgPooling", "GlobalAvgPoolingTest")
.Input("Input") .Input("Input")
.Output("Output") .Output("Output")
.Finalize(net.operator_def()); .AddIntArg("group", group)
.Finalize(net.NewOperatorDef());
// Add input data // Add input data
net.AddIntArg("group", group);
net.AddRandomInput<DeviceType::CPU, float>("Input", {batch, channels, height, width}); net.AddRandomInput<DeviceType::CPU, float>("Input", {batch, channels, height, width});
// Warm-up // Warm-up
......
...@@ -14,9 +14,9 @@ TEST_F(ChannelShuffleOpTest, C8G4) { ...@@ -14,9 +14,9 @@ TEST_F(ChannelShuffleOpTest, C8G4) {
OpDefBuilder("ChannelShuffle", "ChannelShuffleTest") OpDefBuilder("ChannelShuffle", "ChannelShuffleTest")
.Input("Input") .Input("Input")
.Output("Output") .Output("Output")
.Finalize(net.operator_def()); .AddIntArg("group", 4)
.Finalize(net.NewOperatorDef());
net.AddIntArg("group", 4);
// Add input data // Add input data
net.AddInputFromArray<DeviceType::CPU, float>( net.AddInputFromArray<DeviceType::CPU, float>(
......
...@@ -17,7 +17,7 @@ static void ConcatHelper(int iters, int concat_dim, int dim1) { ...@@ -17,7 +17,7 @@ static void ConcatHelper(int iters, int concat_dim, int dim1) {
.Input("Input1") .Input("Input1")
.Input("Axis") .Input("Axis")
.Output("Output") .Output("Output")
.Finalize(net.operator_def()); .Finalize(net.NewOperatorDef());
// Add input data // Add input data
const int kDim0 = 100; const int kDim0 = 100;
......
...@@ -18,7 +18,7 @@ TEST_F(ConcatOpTest, Simple_Horizon) { ...@@ -18,7 +18,7 @@ TEST_F(ConcatOpTest, Simple_Horizon) {
.Input("Input1") .Input("Input1")
.Input("Axis") .Input("Axis")
.Output("Output") .Output("Output")
.Finalize(net.operator_def()); .Finalize(net.NewOperatorDef());
std::vector<index_t> input_shape = {4, 4}; std::vector<index_t> input_shape = {4, 4};
std::vector<float> input0; std::vector<float> input0;
...@@ -56,7 +56,7 @@ TEST_F(ConcatOpTest, Simple_Vertical) { ...@@ -56,7 +56,7 @@ TEST_F(ConcatOpTest, Simple_Vertical) {
.Input("Input1") .Input("Input1")
.Input("Axis") .Input("Axis")
.Output("Output") .Output("Output")
.Finalize(net.operator_def()); .Finalize(net.NewOperatorDef());
std::vector<index_t> input_shape = {4, 4}; std::vector<index_t> input_shape = {4, 4};
std::vector<float> input0; std::vector<float> input0;
...@@ -99,7 +99,7 @@ TEST_F(ConcatOpTest, Random) { ...@@ -99,7 +99,7 @@ TEST_F(ConcatOpTest, Random) {
for (int i = 0; i < num_inputs; ++i) { for (int i = 0; i < num_inputs; ++i) {
builder = builder.Input(("Input" + ToString(i)).c_str()); builder = builder.Input(("Input" + ToString(i)).c_str());
} }
builder.Input("Axis").Output("Output").Finalize(net.operator_def()); builder.Input("Axis").Output("Output").Finalize(net.NewOperatorDef());
std::vector<index_t> shape_data; std::vector<index_t> shape_data;
GenerateRandomIntTypeData<index_t>({dim}, shape_data, 1, dim); GenerateRandomIntTypeData<index_t>({dim}, shape_data, 1, dim);
......
...@@ -3,6 +3,7 @@ ...@@ -3,6 +3,7 @@
// //
#include <algorithm> #include <algorithm>
#include <sstream>
#include "mace/core/operator.h" #include "mace/core/operator.h"
#include "mace/core/testing/test_benchmark.h" #include "mace/core/testing/test_benchmark.h"
...@@ -13,6 +14,7 @@ namespace mace { ...@@ -13,6 +14,7 @@ namespace mace {
template <DeviceType D, typename T> template <DeviceType D, typename T>
static void Conv2d(int iters, static void Conv2d(int iters,
int iters_to_sync,
int batch, int batch,
int channels, int channels,
int height, int height,
...@@ -30,17 +32,15 @@ static void Conv2d(int iters, ...@@ -30,17 +32,15 @@ static void Conv2d(int iters,
.Input("Filter") .Input("Filter")
.Input("Bias") .Input("Bias")
.Output("Output") .Output("Output")
.Finalize(net.operator_def()); .AddIntsArg("strides", {stride, stride})
.AddIntArg("padding", padding)
// Add args .AddIntsArg("dilations", {1, 1})
net.AddIntsArg("strides", {stride, stride}); .Finalize(net.NewOperatorDef());
net.AddIntArg("padding", padding);
net.AddIntsArg("dilations", {1, 1});
// Add input data // Add input data
net.AddRandomInput<D, float>("Input", {batch, channels, height, width}); net.AddRandomInput<D, float>("Input", {batch, channels, height, width});
net.AddRandomInput<D, float>("Filter", net.AddRandomInput<D, float>("Filter",
{output_channels, channels, kernel_h, kernel_w}); {output_channels, channels, kernel_h, kernel_w});
net.AddRandomInput<D, float>("Bias", {output_channels}); net.AddRandomInput<D, float>("Bias", {output_channels});
// Warm-up // Warm-up
...@@ -52,10 +52,17 @@ static void Conv2d(int iters, ...@@ -52,10 +52,17 @@ static void Conv2d(int iters,
mace::testing::StartTiming(); mace::testing::StartTiming();
while (iters--) { while (iters--) {
net.RunOp(D); net.RunOp(D);
net.Sync(); if (iters % iters_to_sync == 0) {
net.Sync();
}
} }
} }
// In common network, there are usually more than 1 layers, this is used to
// approximate the amortized latency. The OpenCL runtime for Mali/Adreno is
// in-order.
constexpr int kItersToSync = 10;
#define BM_CONV_2D_MACRO(N, C, H, W, KH, KW, STRIDE, P, OC, TYPE, DEVICE) \ #define BM_CONV_2D_MACRO(N, C, H, W, KH, KW, STRIDE, P, OC, TYPE, DEVICE) \
static void \ static void \
BM_CONV_2D_##N##_##C##_##H##_##W##_K##KH##x##KW##S##STRIDE##_##P##_##OC##_##TYPE##_##DEVICE( \ BM_CONV_2D_##N##_##C##_##H##_##W##_K##KH##x##KW##S##STRIDE##_##P##_##OC##_##TYPE##_##DEVICE( \
...@@ -63,8 +70,8 @@ static void Conv2d(int iters, ...@@ -63,8 +70,8 @@ static void Conv2d(int iters,
const int64_t tot = static_cast<int64_t>(iters) * N * C * H * W; \ const int64_t tot = static_cast<int64_t>(iters) * N * C * H * W; \
mace::testing::ItemsProcessed(tot); \ mace::testing::ItemsProcessed(tot); \
mace::testing::BytesProcessed(tot *(sizeof(TYPE))); \ mace::testing::BytesProcessed(tot *(sizeof(TYPE))); \
Conv2d<DEVICE, TYPE>(iters, N, C, H, W, KH, KW, STRIDE, mace::Padding::P, \ Conv2d<DEVICE, TYPE>(iters, kItersToSync, N, C, H, W, KH, KW, STRIDE, \
OC); \ mace::Padding::P, OC); \
} \ } \
BENCHMARK( \ BENCHMARK( \
BM_CONV_2D_##N##_##C##_##H##_##W##_K##KH##x##KW##S##STRIDE##_##P##_##OC##_##TYPE##_##DEVICE) BM_CONV_2D_##N##_##C##_##H##_##W##_K##KH##x##KW##S##STRIDE##_##P##_##OC##_##TYPE##_##DEVICE)
......
...@@ -18,12 +18,12 @@ TEST_F(Conv2dOpTest, Simple_VALID) { ...@@ -18,12 +18,12 @@ TEST_F(Conv2dOpTest, Simple_VALID) {
.Input("Filter") .Input("Filter")
.Input("Bias") .Input("Bias")
.Output("Output") .Output("Output")
.Finalize(net.operator_def()); .AddIntsArg("strides", {1, 1})
.AddIntArg("padding", Padding::VALID)
.AddIntsArg("dilations", {1, 1})
.Finalize(net.NewOperatorDef());
// Add args // Add args
net.AddIntsArg("strides", {1, 1});
net.AddIntArg("padding", Padding::VALID);
net.AddIntsArg("dilations", {1, 1});
// Add input data // Add input data
net.AddInputFromArray<DeviceType::CPU, float>( net.AddInputFromArray<DeviceType::CPU, float>(
...@@ -52,12 +52,10 @@ TEST_F(Conv2dOpTest, Simple_SAME) { ...@@ -52,12 +52,10 @@ TEST_F(Conv2dOpTest, Simple_SAME) {
.Input("Filter") .Input("Filter")
.Input("Bias") .Input("Bias")
.Output("Output") .Output("Output")
.Finalize(net.operator_def()); .AddIntsArg("strides", {1, 1})
.AddIntArg("padding", Padding::SAME)
// Add args .AddIntsArg("dilations", {1, 1})
net.AddIntsArg("strides", {1, 1}); .Finalize(net.NewOperatorDef());
net.AddIntArg("padding", Padding::SAME);
net.AddIntsArg("dilations", {1, 1});
// Add input data // Add input data
net.AddInputFromArray<DeviceType::CPU, float>( net.AddInputFromArray<DeviceType::CPU, float>(
...@@ -88,12 +86,10 @@ TEST_F(Conv2dOpTest, Combined) { ...@@ -88,12 +86,10 @@ TEST_F(Conv2dOpTest, Combined) {
.Input("Filter") .Input("Filter")
.Input("Bias") .Input("Bias")
.Output("Output") .Output("Output")
.Finalize(net.operator_def()); .AddIntsArg("strides", {2, 2})
.AddIntArg("padding", Padding::SAME)
// Add args .AddIntsArg("dilations", {1, 1})
net.AddIntsArg("strides", {2, 2}); .Finalize(net.NewOperatorDef());
net.AddIntArg("padding", Padding::SAME);
net.AddIntsArg("dilations", {1, 1});
// Add input data // Add input data
net.AddInputFromArray<DeviceType::CPU, float>( net.AddInputFromArray<DeviceType::CPU, float>(
...@@ -127,12 +123,10 @@ void TestConv1x1() { ...@@ -127,12 +123,10 @@ void TestConv1x1() {
.Input("Filter") .Input("Filter")
.Input("Bias") .Input("Bias")
.Output("Output") .Output("Output")
.Finalize(net.operator_def()); .AddIntsArg("strides", {1, 1})
.AddIntArg("padding", Padding::VALID)
// Add args .AddIntsArg("dilations", {1, 1})
net.AddIntsArg("strides", {1, 1}); .Finalize(net.NewOperatorDef());
net.AddIntArg("padding", Padding::VALID);
net.AddIntsArg("dilations", {1, 1});
// Add input data // Add input data
net.AddInputFromArray<D, float>( net.AddInputFromArray<D, float>(
...@@ -190,12 +184,10 @@ TEST_F(Conv2dOpTest, AlignedConvNxNS12) { ...@@ -190,12 +184,10 @@ TEST_F(Conv2dOpTest, AlignedConvNxNS12) {
.Input("Filter") .Input("Filter")
.Input("Bias") .Input("Bias")
.Output("Output") .Output("Output")
.Finalize(net.operator_def()); .AddIntsArg("strides", {stride_h, stride_w})
.AddIntArg("padding", type)
// Add args .AddIntsArg("dilations", {1, 1})
net.AddIntsArg("strides", {stride_h, stride_w}); .Finalize(net.NewOperatorDef());
net.AddIntArg("padding", type);
net.AddIntsArg("dilations", {1, 1});
// Add input data // Add input data
net.AddRandomInput<DeviceType::CPU, float>("Input", {batch, input_channels, height, width}); net.AddRandomInput<DeviceType::CPU, float>("Input", {batch, input_channels, height, width});
...@@ -241,12 +233,10 @@ TEST_F(Conv2dOpTest, UnalignedConvNxNS12) { ...@@ -241,12 +233,10 @@ TEST_F(Conv2dOpTest, UnalignedConvNxNS12) {
.Input("Filter") .Input("Filter")
.Input("Bias") .Input("Bias")
.Output("Output") .Output("Output")
.Finalize(net.operator_def()); .AddIntsArg("strides", {stride_h, stride_w})
.AddIntArg("padding", type)
// Add args .AddIntsArg("dilations", {1, 1})
net.AddIntsArg("strides", {stride_h, stride_w}); .Finalize(net.NewOperatorDef());
net.AddIntArg("padding", type);
net.AddIntsArg("dilations", {1, 1});
// Add input data // Add input data
net.AddRandomInput<DeviceType::CPU, float>("Input", {batch, input_channels, height, width}); net.AddRandomInput<DeviceType::CPU, float>("Input", {batch, input_channels, height, width});
......
...@@ -19,12 +19,10 @@ void SimpleValidTest() { ...@@ -19,12 +19,10 @@ void SimpleValidTest() {
.Input("Filter") .Input("Filter")
.Input("Bias") .Input("Bias")
.Output("Output") .Output("Output")
.Finalize(net.operator_def()); .AddIntsArg("strides", {1, 1})
.AddIntArg("padding", Padding::VALID)
// Add args .AddIntsArg("dilations", {1, 1})
net.AddIntsArg("strides", {1, 1}); .Finalize(net.NewOperatorDef());
net.AddIntArg("padding", Padding::VALID);
net.AddIntsArg("dilations", {1, 1});
// Add input data // Add input data
net.AddInputFromArray<D, float>("Input", {1, 2, 2, 3}, net.AddInputFromArray<D, float>("Input", {1, 2, 2, 3},
...@@ -68,12 +66,10 @@ void TestNxNS12(const index_t height, const index_t width) { ...@@ -68,12 +66,10 @@ void TestNxNS12(const index_t height, const index_t width) {
.Input("Filter") .Input("Filter")
.Input("Bias") .Input("Bias")
.Output("Output") .Output("Output")
.Finalize(net.operator_def()); .AddIntsArg("strides", {stride_h, stride_w})
.AddIntArg("padding", type)
// Add args .AddIntsArg("dilations", {1, 1})
net.AddIntsArg("strides", {stride_h, stride_w}); .Finalize(net.NewOperatorDef());
net.AddIntArg("padding", type);
net.AddIntsArg("dilations", {1, 1});
// Add input data // Add input data
net.AddRandomInput<D, float>("Input", {batch, input_channels, height, width}); net.AddRandomInput<D, float>("Input", {batch, input_channels, height, width});
......
...@@ -30,12 +30,10 @@ static void DepthwiseConv2d(int iters, ...@@ -30,12 +30,10 @@ static void DepthwiseConv2d(int iters,
.Input("Filter") .Input("Filter")
.Input("Bias") .Input("Bias")
.Output("Output") .Output("Output")
.Finalize(net.operator_def()); .AddIntsArg("strides", {stride, stride})
.AddIntArg("padding", padding)
// Add args .AddIntsArg("dilations", {1, 1})
net.AddIntsArg("strides", {stride, stride}); .Finalize(net.NewOperatorDef());
net.AddIntArg("padding", padding);
net.AddIntsArg("dilations", {1, 1});
// Add input data // Add input data
net.AddRandomInput<D, float>("Input", {batch, channels, height, width}); net.AddRandomInput<D, float>("Input", {batch, channels, height, width});
......
...@@ -19,7 +19,7 @@ static void GlobalAvgPooling( ...@@ -19,7 +19,7 @@ static void GlobalAvgPooling(
OpDefBuilder("GlobalAvgPooling", "GlobalAvgPoolingTest") OpDefBuilder("GlobalAvgPooling", "GlobalAvgPoolingTest")
.Input("Input") .Input("Input")
.Output("Output") .Output("Output")
.Finalize(net.operator_def()); .Finalize(net.NewOperatorDef());
// Add input data // Add input data
net.AddRandomInput<DeviceType::CPU, float>("Input", {batch, channels, height, width}); net.AddRandomInput<DeviceType::CPU, float>("Input", {batch, channels, height, width});
......
...@@ -14,7 +14,7 @@ TEST_F(GlobalAvgPoolingOpTest, 3x7x7_CPU) { ...@@ -14,7 +14,7 @@ TEST_F(GlobalAvgPoolingOpTest, 3x7x7_CPU) {
OpDefBuilder("GlobalAvgPooling", "GlobalAvgPoolingTest") OpDefBuilder("GlobalAvgPooling", "GlobalAvgPoolingTest")
.Input("Input") .Input("Input")
.Output("Output") .Output("Output")
.Finalize(net.operator_def()); .Finalize(net.NewOperatorDef());
// Add input data // Add input data
std::vector<float> input(147); std::vector<float> input(147);
...@@ -38,7 +38,7 @@ TEST_F(GlobalAvgPoolingOpTest, 3x7x7_NEON) { ...@@ -38,7 +38,7 @@ TEST_F(GlobalAvgPoolingOpTest, 3x7x7_NEON) {
OpDefBuilder("GlobalAvgPooling", "GlobalAvgPoolingTest") OpDefBuilder("GlobalAvgPooling", "GlobalAvgPoolingTest")
.Input("Input") .Input("Input")
.Output("Output") .Output("Output")
.Finalize(net.operator_def()); .Finalize(net.NewOperatorDef());
// Add input data // Add input data
std::vector<float> input(147); std::vector<float> input(147);
......
...@@ -17,21 +17,70 @@ namespace mace { ...@@ -17,21 +17,70 @@ namespace mace {
class OpDefBuilder { class OpDefBuilder {
public: public:
OpDefBuilder(const char *type, const char *name) { OpDefBuilder(const char *type, const std::string &name) {
op_def_.set_type(type); op_def_.set_type(type);
op_def_.set_name(name); op_def_.set_name(name);
} }
OpDefBuilder &Input(const char *input_name) { OpDefBuilder &Input(const std::string &input_name) {
op_def_.add_input(input_name); op_def_.add_input(input_name);
return *this; return *this;
} }
OpDefBuilder &Output(const char *output_name) { OpDefBuilder &Output(const std::string &output_name) {
op_def_.add_output(output_name); op_def_.add_output(output_name);
return *this; return *this;
} }
OpDefBuilder AddIntArg(const std::string &name, const int value) {
auto arg = op_def_.add_arg();
arg->set_name(name);
arg->set_i(value);
return *this;
}
OpDefBuilder AddFloatArg(const std::string &name, const float value) {
auto arg = op_def_.add_arg();
arg->set_name(name);
arg->set_f(value);
return *this;
}
OpDefBuilder AddStringArg(const std::string &name, const char *value) {
auto arg = op_def_.add_arg();
arg->set_name(name);
arg->set_s(value);
return *this;
}
OpDefBuilder AddIntsArg(const std::string &name, const std::vector<int> &values) {
auto arg = op_def_.add_arg();
arg->set_name(name);
for (auto value : values) {
arg->add_ints(value);
}
return *this;
}
OpDefBuilder AddFloatsArg(const std::string &name, const std::vector<float> &values) {
auto arg = op_def_.add_arg();
arg->set_name(name);
for (auto value : values) {
arg->add_floats(value);
}
return *this;
}
OpDefBuilder AddStringsArg(const std::string &name,
const std::vector<const char *> &values) {
auto arg = op_def_.add_arg();
arg->set_name(name);
for (auto value : values) {
arg->add_strings(value);
}
return *this;
}
void Finalize(OperatorDef *op_def) const { void Finalize(OperatorDef *op_def) const {
MACE_CHECK(op_def != nullptr, "input should not be null."); MACE_CHECK(op_def != nullptr, "input should not be null.");
*op_def = op_def_; *op_def = op_def_;
...@@ -45,7 +94,7 @@ class OpsTestNet { ...@@ -45,7 +94,7 @@ class OpsTestNet {
OpsTestNet() {} OpsTestNet() {}
template <DeviceType D, typename T> template <DeviceType D, typename T>
void AddInputFromArray(const char *name, void AddInputFromArray(const std::string &name,
const std::vector<index_t> &shape, const std::vector<index_t> &shape,
const std::vector<T> &data) { const std::vector<T> &data) {
Tensor *input = Tensor *input =
...@@ -58,7 +107,7 @@ class OpsTestNet { ...@@ -58,7 +107,7 @@ class OpsTestNet {
} }
template <DeviceType D, typename T> template <DeviceType D, typename T>
void AddRepeatedInput(const char *name, void AddRepeatedInput(const std::string &name,
const std::vector<index_t> &shape, const std::vector<index_t> &shape,
const T data) { const T data) {
Tensor *input = Tensor *input =
...@@ -70,7 +119,7 @@ class OpsTestNet { ...@@ -70,7 +119,7 @@ class OpsTestNet {
} }
template <DeviceType D, typename T> template <DeviceType D, typename T>
void AddRandomInput(const char *name, void AddRandomInput(const std::string &name,
const std::vector<index_t> &shape, const std::vector<index_t> &shape,
bool positive = false) { bool positive = false) {
Tensor *input = Tensor *input =
...@@ -89,56 +138,18 @@ class OpsTestNet { ...@@ -89,56 +138,18 @@ class OpsTestNet {
}); });
} }
void AddIntArg(const char *name, const int value) { OperatorDef *NewOperatorDef() {
auto arg = op_def_.add_arg(); op_defs_.emplace_back(OperatorDef());
arg->set_name(name); return &op_defs_[op_defs_.size() - 1];
arg->set_i(value);
}
void AddFloatArg(const char *name, const float value) {
auto arg = op_def_.add_arg();
arg->set_name(name);
arg->set_f(value);
}
void AddStringArg(const char *name, const char *value) {
auto arg = op_def_.add_arg();
arg->set_name(name);
arg->set_s(value);
}
void AddIntsArg(const char *name, const std::vector<int> &values) {
auto arg = op_def_.add_arg();
arg->set_name(name);
for (auto value : values) {
arg->add_ints(value);
}
}
void AddFloatsArg(const char *name, const std::vector<float> &values) {
auto arg = op_def_.add_arg();
arg->set_name(name);
for (auto value : values) {
arg->add_floats(value);
}
}
void AddStringsArg(const char *name,
const std::vector<const char *> &values) {
auto arg = op_def_.add_arg();
arg->set_name(name);
for (auto value : values) {
arg->add_strings(value);
}
} }
OperatorDef *operator_def() { return &op_def_; }
Workspace *ws() { return &ws_; } Workspace *ws() { return &ws_; }
bool RunOp(DeviceType device) { bool RunOp(DeviceType device) {
NetDef net_def; NetDef net_def;
net_def.add_op()->CopyFrom(op_def_); for (auto &op_def_ : op_defs_) {
net_def.add_op()->CopyFrom(op_def_);
}
VLOG(3) << net_def.DebugString(); VLOG(3) << net_def.DebugString();
net_ = CreateNet(net_def, &ws_, device); net_ = CreateNet(net_def, &ws_, device);
device_ = device; device_ = device;
...@@ -159,7 +170,7 @@ class OpsTestNet { ...@@ -159,7 +170,7 @@ class OpsTestNet {
public: public:
Workspace ws_; Workspace ws_;
OperatorDef op_def_; std::vector<OperatorDef> op_defs_;
std::unique_ptr<NetBase> net_; std::unique_ptr<NetBase> net_;
DeviceType device_; DeviceType device_;
}; };
......
...@@ -27,14 +27,12 @@ static void Pooling(int iters, ...@@ -27,14 +27,12 @@ static void Pooling(int iters,
OpDefBuilder("Pooling", "PoolingTest") OpDefBuilder("Pooling", "PoolingTest")
.Input("Input") .Input("Input")
.Output("Output") .Output("Output")
.Finalize(net.operator_def()); .AddIntArg("pooling_type", pooling_type)
.AddIntsArg("kernels", {kernel, kernel})
// Add args .AddIntsArg("strides", {stride, stride})
net.AddIntArg("pooling_type", pooling_type); .AddIntArg("padding", padding)
net.AddIntsArg("kernels", {kernel, kernel}); .AddIntsArg("dilations", {1, 1})
net.AddIntsArg("strides", {stride, stride}); .Finalize(net.NewOperatorDef());
net.AddIntArg("padding", padding);
net.AddIntsArg("dilations", {1, 1});
// Add input data // Add input data
net.AddRandomInput<DeviceType::CPU, float>("Input", {batch, channels, height, width}); net.AddRandomInput<DeviceType::CPU, float>("Input", {batch, channels, height, width});
......
...@@ -19,14 +19,12 @@ TEST_F(PoolingOpTest, MAX_VALID) { ...@@ -19,14 +19,12 @@ TEST_F(PoolingOpTest, MAX_VALID) {
OpDefBuilder("Pooling", "PoolingTest") OpDefBuilder("Pooling", "PoolingTest")
.Input("Input") .Input("Input")
.Output("Output") .Output("Output")
.Finalize(net.operator_def()); .AddIntsArg("kernels", {2, 2})
.AddIntsArg("strides", {2, 2})
// Add args .AddIntArg("padding", Padding::VALID)
net.AddIntsArg("kernels", {2, 2}); .AddIntsArg("dilations", {1, 1})
net.AddIntsArg("strides", {2, 2}); .AddIntArg("pooling_type", PoolingType::MAX)
net.AddIntArg("padding", Padding::VALID); .Finalize(net.NewOperatorDef());
net.AddIntsArg("dilations", {1, 1});
net.AddIntArg("pooling_type", PoolingType::MAX);
// Add input data // Add input data
net.AddInputFromArray<DeviceType::CPU, float>( net.AddInputFromArray<DeviceType::CPU, float>(
...@@ -50,14 +48,12 @@ TEST_F(PoolingOpTest, AVG_VALID) { ...@@ -50,14 +48,12 @@ TEST_F(PoolingOpTest, AVG_VALID) {
OpDefBuilder("Pooling", "PoolingTest") OpDefBuilder("Pooling", "PoolingTest")
.Input("Input") .Input("Input")
.Output("Output") .Output("Output")
.Finalize(net.operator_def()); .AddIntsArg("kernels", {2, 2})
.AddIntsArg("strides", {2, 2})
// Add args .AddIntArg("padding", Padding::VALID)
net.AddIntsArg("kernels", {2, 2}); .AddIntsArg("dilations", {1, 1})
net.AddIntsArg("strides", {2, 2}); .AddIntArg("pooling_type", PoolingType::AVG)
net.AddIntArg("padding", Padding::VALID); .Finalize(net.NewOperatorDef());
net.AddIntsArg("dilations", {1, 1});
net.AddIntArg("pooling_type", PoolingType::AVG);
// Add input data // Add input data
net.AddInputFromArray<DeviceType::CPU, float>( net.AddInputFromArray<DeviceType::CPU, float>(
...@@ -81,14 +77,12 @@ TEST_F(PoolingOpTest, MAX_SAME) { ...@@ -81,14 +77,12 @@ TEST_F(PoolingOpTest, MAX_SAME) {
OpDefBuilder("Pooling", "PoolingTest") OpDefBuilder("Pooling", "PoolingTest")
.Input("Input") .Input("Input")
.Output("Output") .Output("Output")
.Finalize(net.operator_def()); .AddIntsArg("kernels", {2, 2})
.AddIntsArg("strides", {2, 2})
// Add args .AddIntArg("padding", Padding::SAME)
net.AddIntsArg("kernels", {2, 2}); .AddIntsArg("dilations", {1, 1})
net.AddIntsArg("strides", {2, 2}); .AddIntArg("pooling_type", PoolingType::MAX)
net.AddIntArg("padding", Padding::SAME); .Finalize(net.NewOperatorDef());
net.AddIntsArg("dilations", {1, 1});
net.AddIntArg("pooling_type", PoolingType::MAX);
// Add input data // Add input data
net.AddInputFromArray<DeviceType::CPU, float>("Input", {1, 1, 3, 3}, net.AddInputFromArray<DeviceType::CPU, float>("Input", {1, 1, 3, 3},
...@@ -109,14 +103,12 @@ TEST_F(PoolingOpTest, MAX_VALID_DILATION) { ...@@ -109,14 +103,12 @@ TEST_F(PoolingOpTest, MAX_VALID_DILATION) {
OpDefBuilder("Pooling", "PoolingTest") OpDefBuilder("Pooling", "PoolingTest")
.Input("Input") .Input("Input")
.Output("Output") .Output("Output")
.Finalize(net.operator_def()); .AddIntsArg("kernels", {2, 2})
.AddIntsArg("strides", {1, 1})
// Add args .AddIntArg("padding", Padding::VALID)
net.AddIntsArg("kernels", {2, 2}); .AddIntsArg("dilations", {2, 2})
net.AddIntsArg("strides", {1, 1}); .AddIntArg("pooling_type", PoolingType::MAX)
net.AddIntArg("padding", Padding::VALID); .Finalize(net.NewOperatorDef());
net.AddIntsArg("dilations", {2, 2});
net.AddIntArg("pooling_type", PoolingType::MAX);
// Add input data // Add input data
net.AddInputFromArray<DeviceType::CPU, float>( net.AddInputFromArray<DeviceType::CPU, float>(
...@@ -138,14 +130,12 @@ TEST_F(PoolingOpTest, MAX_k2x2s2x2) { ...@@ -138,14 +130,12 @@ TEST_F(PoolingOpTest, MAX_k2x2s2x2) {
OpDefBuilder("Pooling", "PoolingTest") OpDefBuilder("Pooling", "PoolingTest")
.Input("Input") .Input("Input")
.Output("Output") .Output("Output")
.Finalize(net.operator_def()); .AddIntArg("pooling_type", PoolingType::MAX)
.AddIntsArg("kernels", {2, 2})
// Add args .AddIntsArg("strides", {2, 2})
net.AddIntArg("pooling_type", PoolingType::MAX); .AddIntArg("padding", Padding::SAME)
net.AddIntsArg("kernels", {2, 2}); .AddIntsArg("dilations", {1, 1})
net.AddIntsArg("strides", {2, 2}); .Finalize(net.NewOperatorDef());
net.AddIntArg("padding", Padding::SAME);
net.AddIntsArg("dilations", {1, 1});
// Add input data // Add input data
net.AddInputFromArray<DeviceType::CPU, float>( net.AddInputFromArray<DeviceType::CPU, float>(
...@@ -166,14 +156,12 @@ TEST_F(PoolingOpTest, MAX_k3x3s2x2) { ...@@ -166,14 +156,12 @@ TEST_F(PoolingOpTest, MAX_k3x3s2x2) {
OpDefBuilder("Pooling", "PoolingTest") OpDefBuilder("Pooling", "PoolingTest")
.Input("Input") .Input("Input")
.Output("Output") .Output("Output")
.Finalize(net.operator_def()); .AddIntArg("pooling_type", PoolingType::MAX)
.AddIntsArg("kernels", {3, 3})
// Add args .AddIntsArg("strides", {2, 2})
net.AddIntArg("pooling_type", PoolingType::MAX); .AddIntArg("padding", Padding::VALID)
net.AddIntsArg("kernels", {3, 3}); .AddIntsArg("dilations", {1, 1})
net.AddIntsArg("strides", {2, 2}); .Finalize(net.NewOperatorDef());
net.AddIntArg("padding", Padding::VALID);
net.AddIntsArg("dilations", {1, 1});
// Add input data // Add input data
net.AddInputFromArray<DeviceType::CPU, float>( net.AddInputFromArray<DeviceType::CPU, float>(
...@@ -195,14 +183,12 @@ TEST_F(PoolingOpTest, AVG_k2x2s2x2) { ...@@ -195,14 +183,12 @@ TEST_F(PoolingOpTest, AVG_k2x2s2x2) {
OpDefBuilder("Pooling", "PoolingTest") OpDefBuilder("Pooling", "PoolingTest")
.Input("Input") .Input("Input")
.Output("Output") .Output("Output")
.Finalize(net.operator_def()); .AddIntArg("pooling_type", PoolingType::AVG)
.AddIntsArg("kernels", {2, 2})
// Add args .AddIntsArg("strides", {2, 2})
net.AddIntArg("pooling_type", PoolingType::AVG); .AddIntArg("padding", Padding::SAME)
net.AddIntsArg("kernels", {2, 2}); .AddIntsArg("dilations", {1, 1})
net.AddIntsArg("strides", {2, 2}); .Finalize(net.NewOperatorDef());
net.AddIntArg("padding", Padding::SAME);
net.AddIntsArg("dilations", {1, 1});
// Add input data // Add input data
net.AddInputFromArray<DeviceType::CPU, float>( net.AddInputFromArray<DeviceType::CPU, float>(
......
...@@ -16,7 +16,7 @@ static void ReluBenchmark(int iters, int size) { ...@@ -16,7 +16,7 @@ static void ReluBenchmark(int iters, int size) {
OpDefBuilder("Relu", "ReluBM") OpDefBuilder("Relu", "ReluBM")
.Input("Input") .Input("Input")
.Output("Output") .Output("Output")
.Finalize(net.operator_def()); .Finalize(net.NewOperatorDef());
// Add input data // Add input data
net.AddRandomInput<DeviceType::CPU, float>("Input", {size}); net.AddRandomInput<DeviceType::CPU, float>("Input", {size});
......
...@@ -15,7 +15,7 @@ TEST_F(ReluOpTest, ReluOp) { ...@@ -15,7 +15,7 @@ TEST_F(ReluOpTest, ReluOp) {
OpDefBuilder("Relu", "ReluTest") OpDefBuilder("Relu", "ReluTest")
.Input("Input") .Input("Input")
.Output("Output") .Output("Output")
.Finalize(net.operator_def()); .Finalize(net.NewOperatorDef());
// Add input data // Add input data
net.AddRandomInput<DeviceType::CPU, float>("Input", {1, 2, 3, 5}); net.AddRandomInput<DeviceType::CPU, float>("Input", {1, 2, 3, 5});
...@@ -38,11 +38,11 @@ TEST_F(ReluOpTest, ReluOpWithMax) { ...@@ -38,11 +38,11 @@ TEST_F(ReluOpTest, ReluOpWithMax) {
OpDefBuilder("Relu", "ReluTestWithMax") OpDefBuilder("Relu", "ReluTestWithMax")
.Input("Input") .Input("Input")
.Output("Output") .Output("Output")
.Finalize(net.operator_def()); .AddFloatArg("max_limit", 0.5)
.Finalize(net.NewOperatorDef());
// Add input data // Add input data
net.AddRandomInput<DeviceType::CPU, float>("Input", {1, 2, 3, 5}); net.AddRandomInput<DeviceType::CPU, float>("Input", {1, 2, 3, 5});
net.AddFloatArg("max_limit", 0.5);
// Run // Run
net.RunOp(); net.RunOp();
......
...@@ -18,7 +18,7 @@ TEST_F(ResizeBilinearTest, ResizeBilinearWOAlignCorners) { ...@@ -18,7 +18,7 @@ TEST_F(ResizeBilinearTest, ResizeBilinearWOAlignCorners) {
.Input("Input") .Input("Input")
.Input("OutSize") .Input("OutSize")
.Output("Output") .Output("Output")
.Finalize(net.operator_def()); .Finalize(net.NewOperatorDef());
// Add input data // Add input data
vector<float> input(24); vector<float> input(24);
...@@ -43,9 +43,8 @@ TEST_F(ResizeBilinearTest, ResizeBilinearWAlignCorners) { ...@@ -43,9 +43,8 @@ TEST_F(ResizeBilinearTest, ResizeBilinearWAlignCorners) {
.Input("Input") .Input("Input")
.Input("OutSize") .Input("OutSize")
.Output("Output") .Output("Output")
.Finalize(net.operator_def()); .AddIntArg("align_corners", 1)
.Finalize(net.NewOperatorDef());
net.AddIntArg("align_corners", 1);
// Add input data // Add input data
vector<float> input(24); vector<float> input(24);
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册