Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
Xiaomi
Mace
提交
0461beb5
Mace
项目概览
Xiaomi
/
Mace
通知
107
Star
40
Fork
27
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
DevOps
流水线
流水线任务
计划
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
Mace
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
DevOps
DevOps
流水线
流水线任务
计划
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
流水线任务
提交
Issue看板
提交
0461beb5
编写于
11月 02, 2017
作者:
L
Liangliang He
浏览文件
操作
浏览文件
下载
差异文件
Merge branch 'opencl' into 'master'
Update conv2d benchmark test See merge request !81
上级
de992bf8
e4455322
变更
30
隐藏空白更改
内联
并排
Showing
30 changed file
with
465 addition
and
278 deletion
+465
-278
mace/core/BUILD
mace/core/BUILD
+2
-2
mace/core/runtime/opencl/cl2_header.h
mace/core/runtime/opencl/cl2_header.h
+12
-0
mace/core/runtime/opencl/opencl_allocator.cc
mace/core/runtime/opencl/opencl_allocator.cc
+1
-1
mace/core/runtime/opencl/opencl_runtime.cc
mace/core/runtime/opencl/opencl_runtime.cc
+4
-2
mace/core/runtime/opencl/opencl_runtime.h
mace/core/runtime/opencl/opencl_runtime.h
+1
-5
mace/core/runtime/opencl/opencl_wrapper.cc
mace/core/runtime/opencl/opencl_wrapper.cc
+34
-4
mace/kernels/opencl/batch_norm_opencl.cc
mace/kernels/opencl/batch_norm_opencl.cc
+2
-2
mace/kernels/opencl/cl/common.h
mace/kernels/opencl/cl/common.h
+2
-0
mace/kernels/opencl/cl/conv_2d_1x1.cl
mace/kernels/opencl/cl/conv_2d_1x1.cl
+94
-29
mace/kernels/opencl/conv_2d_opencl_1x1.cc
mace/kernels/opencl/conv_2d_opencl_1x1.cc
+124
-29
mace/ops/addn_benchmark.cc
mace/ops/addn_benchmark.cc
+1
-1
mace/ops/addn_test.cc
mace/ops/addn_test.cc
+1
-1
mace/ops/batch_norm_benchmark.cc
mace/ops/batch_norm_benchmark.cc
+1
-1
mace/ops/batch_norm_test.cc
mace/ops/batch_norm_test.cc
+5
-5
mace/ops/channel_shuffle_benchmark.cc
mace/ops/channel_shuffle_benchmark.cc
+2
-2
mace/ops/channel_shuffle_test.cc
mace/ops/channel_shuffle_test.cc
+2
-2
mace/ops/concat_benchmark.cc
mace/ops/concat_benchmark.cc
+1
-1
mace/ops/concat_test.cc
mace/ops/concat_test.cc
+3
-3
mace/ops/conv_2d_benchmark.cc
mace/ops/conv_2d_benchmark.cc
+17
-10
mace/ops/conv_2d_test.cc
mace/ops/conv_2d_test.cc
+24
-34
mace/ops/depthwise_conv2d_test.cc
mace/ops/depthwise_conv2d_test.cc
+8
-12
mace/ops/depthwise_conv_2d_benchmark.cc
mace/ops/depthwise_conv_2d_benchmark.cc
+4
-6
mace/ops/global_avg_pooling_benchmark.cc
mace/ops/global_avg_pooling_benchmark.cc
+1
-1
mace/ops/global_avg_pooling_test.cc
mace/ops/global_avg_pooling_test.cc
+2
-2
mace/ops/ops_test_util.h
mace/ops/ops_test_util.h
+62
-51
mace/ops/pooling_benchmark.cc
mace/ops/pooling_benchmark.cc
+6
-8
mace/ops/pooling_test.cc
mace/ops/pooling_test.cc
+42
-56
mace/ops/relu_benchmark.cc
mace/ops/relu_benchmark.cc
+1
-1
mace/ops/relu_test.cc
mace/ops/relu_test.cc
+3
-3
mace/ops/resize_bilinear_test.cc
mace/ops/resize_bilinear_test.cc
+3
-4
未找到文件。
mace/core/BUILD
浏览文件 @
0461beb5
...
...
@@ -12,11 +12,11 @@ load("//mace:mace.bzl", "if_android")
cc_library
(
name
=
"opencl_runtime"
,
srcs
=
glob
([
"runtime/opencl/cl.hpp"
,
"runtime/opencl/cl2.hpp"
,
"runtime/opencl/*.cc"
,
]),
hdrs
=
glob
([
"runtime/opencl/cl.hpp"
,
"runtime/opencl/cl2.hpp"
,
"runtime/opencl/*.h"
,
]),
copts
=
[
"-std=c++11"
],
...
...
mace/core/runtime/opencl/cl2_header.h
0 → 100644
浏览文件 @
0461beb5
//
// Copyright (c) 2017 XiaoMi All rights reserved.
//
#ifndef MACE_CORE_RUNTIME_OPENCL_CL2_HEADER_H_
#define MACE_CORE_RUNTIME_OPENCL_CL2_HEADER_H_
#define CL_HPP_TARGET_OPENCL_VERSION 200
#include "mace/core/runtime/opencl/cl2.hpp"
#endif // MACE_CORE_RUNTIME_OPENCL_CL2_HEADER_H_
mace/core/runtime/opencl/opencl_allocator.cc
浏览文件 @
0461beb5
...
...
@@ -2,8 +2,8 @@
// Copyright (c) 2017 XiaoMi All rights reserved.
//
#include "mace/core/runtime/opencl/cl2_header.h"
#include "mace/core/runtime/opencl/opencl_allocator.h"
#include "mace/core/runtime/opencl/cl2.hpp"
#include "mace/core/runtime/opencl/opencl_runtime.h"
namespace
mace
{
...
...
mace/core/runtime/opencl/opencl_runtime.cc
浏览文件 @
0461beb5
...
...
@@ -26,6 +26,7 @@ bool ReadSourceFile(const std::string &filename, std::string *content) {
std
::
string
line
;
while
(
std
::
getline
(
ifs
,
line
))
{
*
content
+=
line
;
*
content
+=
"
\n
"
;
}
ifs
.
close
();
return
true
;
...
...
@@ -66,14 +67,15 @@ bool BuildProgram(OpenCLRuntime *runtime,
*
program
=
cl
::
Program
(
runtime
->
context
(),
sources
);
std
::
string
build_options
=
"-Werror -cl-mad-enable -cl-fast-relaxed-math -I"
+
path
;
// TODO(heliangliang) -cl-unsafe-math-optimizations -cl-fast-relaxed-math
if
(
program
->
build
({
runtime
->
device
()},
build_options
.
c_str
())
!=
CL_SUCCESS
)
{
cl_int
ret
=
program
->
build
({
runtime
->
device
()},
build_options
.
c_str
());
if
(
ret
!=
CL_SUCCESS
)
{
if
(
program
->
getBuildInfo
<
CL_PROGRAM_BUILD_STATUS
>
(
runtime
->
device
())
==
CL_BUILD_ERROR
)
{
std
::
string
build_log
=
program
->
getBuildInfo
<
CL_PROGRAM_BUILD_LOG
>
(
runtime
->
device
());
LOG
(
INFO
)
<<
"Program build log: "
<<
build_log
;
}
LOG
(
FATAL
)
<<
"Build program failed
"
;
LOG
(
FATAL
)
<<
"Build program failed
: "
<<
ret
;
}
return
true
;
...
...
mace/core/runtime/opencl/opencl_runtime.h
浏览文件 @
0461beb5
...
...
@@ -5,14 +5,10 @@
#ifndef MACE_CORE_RUNTIME_OPENCL_OPENCL_RUNTIME_H_
#define MACE_CORE_RUNTIME_OPENCL_OPENCL_RUNTIME_H_
#ifndef CL_HPP_TARGET_OPENCL_VERSION
#define CL_HPP_TARGET_OPENCL_VERSION 200
#endif
#include <map>
#include <mutex>
#include "mace/core/runtime/opencl/cl2
.hpp
"
#include "mace/core/runtime/opencl/cl2
_header.h
"
#include "mace/core/runtime/opencl/opencl_wrapper.h"
namespace
mace
{
...
...
mace/core/runtime/opencl/opencl_wrapper.cc
浏览文件 @
0461beb5
...
...
@@ -126,6 +126,12 @@ class OpenCLLibraryImpl final {
using
clRetainKernelFunc
=
cl_int
(
*
)(
cl_kernel
kernel
);
using
clCreateBufferFunc
=
cl_mem
(
*
)(
cl_context
,
cl_mem_flags
,
size_t
,
void
*
,
cl_int
*
);
using
clCreateImageFunc
=
cl_mem
(
*
)(
cl_context
,
cl_mem_flags
,
const
cl_image_format
*
,
const
cl_image_desc
*
,
void
*
,
cl_int
*
);
using
clCreateProgramWithSourceFunc
=
cl_program
(
*
)(
cl_context
,
cl_uint
,
const
char
**
,
const
size_t
*
,
cl_int
*
);
using
clReleaseKernelFunc
=
cl_int
(
*
)(
cl_kernel
kernel
);
...
...
@@ -136,8 +142,12 @@ class OpenCLLibraryImpl final {
using
clRetainDeviceFunc
=
cl_int
(
*
)(
cl_device_id
);
using
clReleaseDeviceFunc
=
cl_int
(
*
)(
cl_device_id
);
using
clRetainEventFunc
=
cl_int
(
*
)(
cl_event
);
using
clGetKernelWorkGroupInfoFunc
=
cl_int
(
*
)(
cl_kernel
,
cl_device_id
,
cl_kernel_work_group_info
,
size_t
,
void
*
,
size_t
*
);
using
clGetKernelWorkGroupInfoFunc
=
cl_int
(
*
)(
cl_kernel
,
cl_device_id
,
cl_kernel_work_group_info
,
size_t
,
void
*
,
size_t
*
);
#define DEFINE_FUNC_PTR(func) func##Func func = nullptr
...
...
@@ -149,6 +159,7 @@ class OpenCLLibraryImpl final {
DEFINE_FUNC_PTR
(
clReleaseKernel
);
DEFINE_FUNC_PTR
(
clCreateProgramWithSource
);
DEFINE_FUNC_PTR
(
clCreateBuffer
);
DEFINE_FUNC_PTR
(
clCreateImage
);
DEFINE_FUNC_PTR
(
clRetainKernel
);
DEFINE_FUNC_PTR
(
clCreateKernel
);
DEFINE_FUNC_PTR
(
clGetProgramInfo
);
...
...
@@ -269,6 +280,7 @@ void *OpenCLLibraryImpl::LoadFromPath(const std::string &path) {
ASSIGN_FROM_DLSYM
(
clReleaseKernel
);
ASSIGN_FROM_DLSYM
(
clCreateProgramWithSource
);
ASSIGN_FROM_DLSYM
(
clCreateBuffer
);
ASSIGN_FROM_DLSYM
(
clCreateImage
);
ASSIGN_FROM_DLSYM
(
clRetainKernel
);
ASSIGN_FROM_DLSYM
(
clCreateKernel
);
ASSIGN_FROM_DLSYM
(
clGetProgramInfo
);
...
...
@@ -708,6 +720,24 @@ cl_mem clCreateBuffer(cl_context context,
}
}
cl_mem
clCreateImage
(
cl_context
context
,
cl_mem_flags
flags
,
const
cl_image_format
*
image_format
,
const
cl_image_desc
*
image_desc
,
void
*
host_ptr
,
cl_int
*
errcode_ret
)
{
auto
func
=
mace
::
OpenCLLibraryImpl
::
Get
().
clCreateImage
;
if
(
func
!=
nullptr
)
{
return
func
(
context
,
flags
,
image_format
,
image_desc
,
host_ptr
,
errcode_ret
);
}
else
{
if
(
errcode_ret
!=
nullptr
)
{
*
errcode_ret
=
CL_OUT_OF_RESOURCES
;
}
return
nullptr
;
}
}
cl_program
clCreateProgramWithSource
(
cl_context
context
,
cl_uint
count
,
const
char
**
strings
,
...
...
@@ -795,8 +825,8 @@ cl_int clGetKernelWorkGroupInfo(cl_kernel kernel,
size_t
*
param_value_size_ret
)
{
auto
func
=
mace
::
OpenCLLibraryImpl
::
Get
().
clGetKernelWorkGroupInfo
;
if
(
func
!=
nullptr
)
{
return
func
(
kernel
,
device
,
param_name
,
param_value_size
,
param_value
,
param_value
_size_ret
);
return
func
(
kernel
,
device
,
param_name
,
param_value_size
,
param_value
,
param_value_size_ret
);
}
else
{
return
CL_OUT_OF_RESOURCES
;
}
...
...
mace/kernels/opencl/batch_norm_opencl.cc
浏览文件 @
0461beb5
...
...
@@ -3,7 +3,7 @@
//
#include "mace/kernels/batch_norm.h"
#include "mace/core/runtime/opencl/cl2
.hpp
"
#include "mace/core/runtime/opencl/cl2
_header.h
"
#include "mace/core/runtime/opencl/opencl_runtime.h"
#include "mace/utils/tuner.h"
...
...
@@ -79,4 +79,4 @@ void BatchNormFunctor<DeviceType::OPENCL, float>::operator()(
}
}
// namespace kernels
}
// namespace mace
\ No newline at end of file
}
// namespace mace
mace/kernels/opencl/cl/common.h
0 → 100644
浏览文件 @
0461beb5
#pragma OPENCL EXTENSION cl_khr_fp16 : enable
#pragma OPENCL EXTENSION cl_khr_3d_image_writes : enable
mace/kernels/opencl/cl/conv_2d_1x1.cl
浏览文件 @
0461beb5
void
kernel
conv_2d_1x1_naive
(
global
const
float
*input,
/*
n,
c,
h,
w
*/
global
const
float
*filter,
/*
o,
i,
kh,
kw
*/
global
const
float
*bias,
/*
o
*/
global
float
*output,
/*
n,
c,
h,
w
*/
private
const
int
input_channels
)
{
#
include
<common.h>
__kernel
void
conv_2d_1x1_naive
(
__global
const
float
*input,
/*
n,
c,
h,
w
*/
__global
const
float
*filter,
/*
o,
i,
kh,
kw
*/
__global
const
float
*bias,
/*
o
*/
__global
float
*output,
/*
n,
c,
h,
w
*/
__private
const
int
in_chan_num
)
{
const
int
batch
=
get_global_id
(
0
)
;
const
int
channel
=
get_global_id
(
1
)
;
const
int
channels
=
get_global_size
(
1
)
;
const
int
pixel
=
get_global_id
(
2
)
;
const
int
pixels
=
get_global_size
(
2
)
;
float
*output_ptr
=
output
+
(
batch
*
channels
+
channel
)
*
pixels
;
output_ptr[pixel]
=
bias[channel]
;
for
(
int
inc
=
0
; inc < in
put_channels
; ++inc) {
const
float
*input_ptr
=
input
+
(
batch
*
in
put_channels
+
inc
)
*
pixels
+
pixel
;
const
float
weights
=
filter[channel
*
in
put_channels
+
inc]
;
for
(
int
inc
=
0
; inc < in
_chan_num
; ++inc) {
const
float
*input_ptr
=
input
+
(
batch
*
in
_chan_num
+
inc
)
*
pixels
+
pixel
;
const
float
weights
=
filter[channel
*
in
_chan_num
+
inc]
;
float
in
=
input_ptr[0]
;
float
out
=
output_ptr[0]
;
out
+=
in
*
weights
;
...
...
@@ -23,17 +24,19 @@ void kernel conv_2d_1x1_naive(global const float *input, /* n, c, h, w */
}
}
void
kernel
conv_2d_1x1_v2
(
global
const
float
*input,
/*
n,
c,
h,
w
*/
global
const
float
*filter,
/*
o,
i,
kh,
kw
*/
global
const
float
*bias,
/*
o
*/
global
float
*output,
/*
n,
c,
h,
w
*/
private
const
int
in_chan_num,
private
const
int
out_chan_num,
private
const
int
pixel_num
)
{
__kernel
void
conv_2d_1x1_v2
(
__
global
const
float
*input,
/*
n,
c,
h,
w
*/
__
global
const
float
*filter,
/*
o,
i,
kh,
kw
*/
__
global
const
float
*bias,
/*
o
*/
__
global
float
*output,
/*
n,
c,
h,
w
*/
__
private
const
int
in_chan_num,
__
private
const
int
out_chan_num,
__
private
const
int
pixel_num
)
{
int
batch
=
get_global_id
(
0
)
;
int
out_chan_blk
=
get_global_id
(
1
)
;
int
out_pixel_blk
=
get_global_id
(
2
)
;
__private
float
output_slice[4
*
4]
;
const
int
out_chan_begin
=
out_chan_blk
*
4
;
const
int
out_chan_end
=
min
(
out_chan_begin
+
4
,
out_chan_num
)
;
const
int
out_pixel_begin
=
out_pixel_blk
*
4
;
...
...
@@ -41,21 +44,23 @@ void kernel conv_2d_1x1_v2(global const float *input, /* n, c, h, w */
const
int
in_offset
=
batch
*
in_chan_num
*
pixel_num
;
const
int
out_offset
=
batch
*
out_chan_num
*
pixel_num
;
const
float
*input_base
=
input
+
in_offset
+
out_pixel_begin
;
float
*output_base
=
output
+
out_offset
+
out_pixel_begin
;
int
pixels
=
out_pixel_end
-
out_pixel_begin
;
int
out_chan_len
=
out_chan_end
-
out_chan_begin
;
int
pixel_len
=
out_pixel_end
-
out_pixel_begin
;
for
(
int
out_chan
=
out_chan_begin
; out_chan < out_chan_end; ++out_chan) {
float
bias_value
=
bias[out_chan]
;
float
*output_ptr
=
output_base
+
out_chan
*
pixel_num
;
for
(
int
p
=
0
; p < pixel
s
; ++p) {
output_
ptr[
p]
=
bias_value
;
int
out_chan_offset
=
out_chan
-
out_chan_begin
;
for
(
int
p
=
0
; p < pixel
_len
; ++p) {
output_
slice[out_chan_offset
*
4
+
p]
=
bias_value
;
}
}
int
in_chan
=
0
;
if
(
pixel
s
==
4
)
{
if
(
pixel
_len
==
4
)
{
for
(
; in_chan + 3 < in_chan_num; in_chan += 4) {
const
float
*input_ptr
=
input_base
+
in_chan
*
pixel_num
;
int
out_chan
=
out_chan_begin
;
...
...
@@ -66,30 +71,32 @@ void kernel conv_2d_1x1_v2(global const float *input, /* n, c, h, w */
float4
in1
=
vload4
(
0
,
input_ptr
+
pixel_num
)
;
float4
in2
=
vload4
(
0
,
input_ptr
+
2
*
pixel_num
)
;
float4
in3
=
vload4
(
0
,
input_ptr
+
3
*
pixel_num
)
;
#
pragma
unroll
for
(
int
oc
=
0
; oc < 4; ++oc) {
float4
weights
=
vload4
(
0
,
filter_ptr
+
oc
*
in_chan_num
)
;
float4
out
=
vload4
(
0
,
output_ptr
+
oc
*
pixel_num
)
;
float4
out
=
vload4
(
oc,
output_slice
)
;
out
+=
in0
*
weights.x
;
out
+=
in1
*
weights.y
;
out
+=
in2
*
weights.z
;
out
+=
in3
*
weights.w
;
vstore4
(
out,
0
,
output_ptr
+
oc
*
pixel_num
)
;
vstore4
(
out,
oc,
output_slice
)
;
}
}
for
(
; out_chan < out_chan_end; ++out_chan) {
const
float*
filter_ptr
=
filter
+
out_chan
*
in_chan_num
+
in_chan
;
float
*output_ptr
=
output_base
+
out_chan
*
pixel_num
;
int
out_chan_offset
=
out_chan
-
out_chan_begin
;
float4
weights
=
vload4
(
0
,
filter_ptr
)
;
float4
in0
=
vload4
(
0
,
input_ptr
)
;
float4
in1
=
vload4
(
0
,
input_ptr
+
pixel_num
)
;
float4
in2
=
vload4
(
0
,
input_ptr
+
2
*
pixel_num
)
;
float4
in3
=
vload4
(
0
,
input_ptr
+
3
*
pixel_num
)
;
float4
out
=
vload4
(
0
,
output_ptr
)
;
float4
out
=
vload4
(
out_chan_offset,
output_slice
)
;
out
+=
in0
*
weights.x
;
out
+=
in1
*
weights.y
;
out
+=
in2
*
weights.z
;
out
+=
in3
*
weights.w
;
vstore4
(
out,
0
,
output_ptr
)
;
vstore4
(
out,
out_chan_offset,
output_slice
)
;
}
}
}
...
...
@@ -99,13 +106,71 @@ void kernel conv_2d_1x1_v2(global const float *input, /* n, c, h, w */
for
(
int
out_chan
=
out_chan_begin
; out_chan < out_chan_end; ++out_chan) {
float
weights
=
filter[out_chan
*
in_chan_num
+
in_chan]
;
float
*output_ptr
=
output_base
+
out_chan
*
pixel_num
;
int
out_chan_offset
=
out_chan
-
out_chan_begin
;
for
(
int
p
=
0
; p < pixel
s
; ++p) {
for
(
int
p
=
0
; p < pixel
_len
; ++p) {
float
in
=
input_ptr[p]
;
float
out
=
output_ptr[p]
;
out
+=
in
*
weights
;
output_ptr[p]
=
out
;
output_slice[out_chan_offset
*
4
+
p]
+=
in
*
weights
;
}
}
}
for
(
int
out_chan_offset
=
0
; out_chan_offset < out_chan_len; ++out_chan_offset) {
int
out_chan
=
out_chan_begin
+
out_chan_offset
;
float
*output_ptr
=
output_base
+
out_chan
*
pixel_num
;
if
(
pixel_len
==
4
)
{
float4
out
=
vload4
(
out_chan_offset,
output_slice
)
;
vstore4
(
out,
0
,
output_ptr
)
;
}
else
{
int
offset
=
out_chan_offset
<<
2
;
for
(
int
p
=
0
; p < pixel_len; ++p) {
output_ptr[p]
=
output_slice[offset
+
p]
;
}
}
}
}
/*
FIXME
this
is
incomplete
*/
__kernel
void
conv_2d_1x1_v3
(
__read_only
image3d_t
input,
/*
n,
c/4,
h,
w,
4
*/
__global
const
float
*filter,
/*
o,
i,
kh,
kw
*/
__global
const
float
*bias,
/*
o
*/
__write_only
image3d_t
output,
/*
n,
c/4,
h,
w,
4
*/
__private
const
int
batch_num,
__private
const
int
in_chan_num,
__private
const
int
out_chan_num,
__private
const
int
height,
__private
const
int
width
)
{
int
out_chan_blk
=
get_global_id
(
0
)
;
int
h
=
get_global_id
(
1
)
;
int
w
=
get_global_id
(
2
)
;
int
in_chan_blk_num
=
(
in_chan_num
+
3
)
/
4
;
int
out_chan_blk_num
=
(
out_chan_num
+
3
)
/
4
;
const
sampler_t
sampler
=
CLK_NORMALIZED_COORDS_FALSE
| CLK_ADDRESS_CLAMP |
CLK_FILTER_NEAREST
;
for
(
int
batch
=
0
; batch < batch_num; ++batch) {
float4
bias_value
=
vload4
(
out_chan_blk,
bias
)
;
__private
float4
out
=
bias_value
;
for
(
int
in_chan_blk
=
0
; in_chan_blk < in_chan_blk_num; ++in_chan_blk) {
int
in_d
=
batch
*
in_chan_blk_num
+
in_chan_blk
;
float4
in
=
read_imagef
(
input,
sampler,
(
int4
)(
in_d,
h,
w,
0
))
;
const
float
*filter_base
=
filter
+
(
out_chan_blk
<<
2
)
*
in_chan_num
;
float4
weights
=
vload4
(
in_chan_blk,
filter_base
)
;
out.x
+=
dot
(
in,
weights
)
;
weights
=
vload4
(
in_chan_blk,
filter_base
+
in_chan_num
)
;
out.y
+=
dot
(
in,
weights
)
;
weights
=
vload4
(
in_chan_blk,
filter_base
+
in_chan_num
*
2
)
;
out.z
+=
dot
(
in,
weights
)
;
weights
=
vload4
(
in_chan_blk,
filter_base
+
in_chan_num
*
3
)
;
out.w
+=
dot
(
in,
weights
)
;
}
int
out_d
=
batch
*
out_chan_blk_num
+
out_chan_blk
;
int4
out_coord
=
(
int4
)(
out_d,
h,
w,
0
)
;
write_imagef
(
output,
out_coord,
out
)
;
}
}
mace/kernels/opencl/conv_2d_opencl_1x1.cc
浏览文件 @
0461beb5
...
...
@@ -2,9 +2,10 @@
// Copyright (c) 2017 XiaoMi All rights reserved.
//
#include "mace/kernels/conv_2d.h"
#include "mace/core/common.h"
#include "mace/core/runtime/opencl/cl2_header.h"
#include "mace/core/runtime/opencl/opencl_runtime.h"
#include "mace/kernels/conv_2d.h"
#include "mace/utils/utils.h"
namespace
mace
{
...
...
@@ -22,22 +23,22 @@ void Conv1x1Naive(const Tensor *input,
auto
runtime
=
OpenCLRuntime
::
Get
();
auto
program
=
runtime
->
program
();
auto
conv_2d
=
cl
::
KernelFunctor
<
cl
::
Buffer
,
cl
::
Buffer
,
cl
::
Buffer
,
cl
::
Buffer
,
int
,
int
>
(
program
,
"conv_2d_1x1_naive"
);
auto
conv_2d
=
cl
::
KernelFunctor
<
cl
::
Buffer
,
cl
::
Buffer
,
cl
::
Buffer
,
cl
::
Buffer
,
int
,
int
>
(
program
,
"conv_2d_1x1_naive"
);
const
index_t
pixels
=
height
*
width
;
cl_int
error
;
conv_2d
(
cl
::
EnqueueArgs
(
runtime
->
command_queue
(),
cl
::
NDRange
(
static_cast
<
int
>
(
batch
),
static_cast
<
int
>
(
channels
),
static_cast
<
int
>
(
pixels
)),
cl
::
NDRange
(
1
,
1
,
128
)),
conv_2d
(
cl
::
EnqueueArgs
(
runtime
->
command_queue
(
),
cl
::
NDRange
(
static_cast
<
int
>
(
batch
),
static_cast
<
int
>
(
channels
),
static_cast
<
int
>
(
pixels
)),
cl
::
NDRange
(
1
,
1
,
128
)),
*
(
static_cast
<
cl
::
Buffer
*>
(
input
->
buffer
())),
*
(
static_cast
<
cl
::
Buffer
*>
(
filter
->
buffer
())),
*
(
static_cast
<
cl
::
Buffer
*>
(
bias
->
buffer
())),
*
(
static_cast
<
cl
::
Buffer
*>
(
output
->
buffer
())),
static_cast
<
int
>
(
input_channels
),
error
);
static_cast
<
int
>
(
input_channels
),
error
);
MACE_CHECK
(
error
==
CL_SUCCESS
);
}
...
...
@@ -53,31 +54,126 @@ void Conv1x1V2(const Tensor *input,
auto
runtime
=
OpenCLRuntime
::
Get
();
auto
program
=
runtime
->
program
();
auto
conv_2d
=
cl
::
KernelFunctor
<
cl
::
Buffer
,
cl
::
Buffer
,
cl
::
Buffer
,
cl
::
Buffer
,
int
,
int
,
int
,
int
>
(
program
,
"conv_2d_1x1_v2"
);
const
index_t
pixels
=
height
*
width
;
const
index_t
channel_blocks
=
(
channels
+
3
)
/
4
;
const
index_t
pixel_blocks
=
(
pixels
+
3
)
/
4
;
// TODO KernelFunctor has an extra clReleaseCommandQueue due to a copy
// TODO check wired clReleaseCommandQueue latency
// The KernelFunctor can cause segment faults in cb_retain_event
auto
conv_2d_kernel
=
cl
::
Kernel
(
program
,
"conv_2d_1x1_v2"
);
const
uint32_t
kwg_size
=
runtime
->
GetKernelMaxWorkGroupSize
(
conv_2d_kernel
);
uint32_t
idx
=
0
;
conv_2d_kernel
.
setArg
(
idx
++
,
*
(
static_cast
<
const
cl
::
Buffer
*>
(
input
->
buffer
())));
conv_2d_kernel
.
setArg
(
idx
++
,
*
(
static_cast
<
const
cl
::
Buffer
*>
(
filter
->
buffer
())));
conv_2d_kernel
.
setArg
(
idx
++
,
*
(
static_cast
<
const
cl
::
Buffer
*>
(
bias
->
buffer
())));
conv_2d_kernel
.
setArg
(
idx
++
,
*
(
static_cast
<
cl
::
Buffer
*>
(
output
->
buffer
())));
conv_2d_kernel
.
setArg
(
idx
++
,
static_cast
<
int
>
(
input_channels
));
conv_2d_kernel
.
setArg
(
idx
++
,
static_cast
<
int
>
(
channels
));
conv_2d_kernel
.
setArg
(
idx
++
,
static_cast
<
int
>
(
pixels
));
auto
command_queue
=
runtime
->
command_queue
();
cl_int
error
=
command_queue
.
enqueueNDRangeKernel
(
conv_2d_kernel
,
cl
::
NullRange
,
cl
::
NDRange
(
static_cast
<
int
>
(
batch
),
static_cast
<
int
>
(
channel_blocks
),
static_cast
<
int
>
(
pixel_blocks
)),
cl
::
NDRange
(
1
,
2
,
kwg_size
/
2
));
MACE_CHECK
(
error
==
CL_SUCCESS
,
error
);
}
void
Conv1x1V3
(
const
Tensor
*
input
,
const
Tensor
*
filter
,
const
Tensor
*
bias
,
Tensor
*
output
)
{
const
index_t
batch
=
output
->
shape
()[
0
];
const
index_t
channels
=
output
->
shape
()[
1
];
const
index_t
height
=
output
->
shape
()[
2
];
const
index_t
width
=
output
->
shape
()[
3
];
const
index_t
input_channels
=
input
->
shape
()[
1
];
auto
runtime
=
OpenCLRuntime
::
Get
();
auto
program
=
runtime
->
program
();
const
index_t
pixels
=
height
*
width
;
const
index_t
pixel_blocks
=
(
pixels
+
3
)
/
4
;
const
index_t
channel_blocks
=
(
channels
+
3
)
/
4
;
const
index_t
input_channel_blocks
=
(
input_channels
+
3
)
/
4
;
// FIXME temp hacking
static
std
::
map
<
std
::
ptrdiff_t
,
cl
::
Image3D
>
input_image_map
;
static
std
::
map
<
std
::
ptrdiff_t
,
cl
::
Image3D
>
output_image_map
;
cl
::
Image3D
input_image
;
cl
::
Image3D
output_image
;
auto
input_iter
=
input_image_map
.
find
(
reinterpret_cast
<
std
::
ptrdiff_t
>
(
input
->
buffer
()));
if
(
input_iter
!=
input_image_map
.
end
())
{
input_image
=
input_iter
->
second
;
}
else
{
// The batch dimension is collapsed with channel
cl_int
error
;
cl
::
Image3D
image
=
cl
::
Image3D
(
OpenCLRuntime
::
Get
()
->
context
(),
CL_MEM_READ_ONLY
|
CL_MEM_ALLOC_HOST_PTR
,
cl
::
ImageFormat
(
CL_RGBA
,
CL_FLOAT
),
height
,
width
,
batch
*
input_channel_blocks
,
0
,
0
,
nullptr
,
&
error
);
MACE_CHECK
(
error
==
CL_SUCCESS
);
input_image
=
image
;
input_image_map
.
clear
();
input_image_map
.
emplace
(
reinterpret_cast
<
std
::
ptrdiff_t
>
(
input
->
buffer
()),
image
);
}
auto
output_iter
=
output_image_map
.
find
(
reinterpret_cast
<
std
::
ptrdiff_t
>
(
output
->
buffer
()));
if
(
output_iter
!=
output_image_map
.
end
())
{
output_image
=
output_iter
->
second
;
}
else
{
cl_int
error
;
cl
::
Image3D
image
=
cl
::
Image3D
(
OpenCLRuntime
::
Get
()
->
context
(),
CL_MEM_WRITE_ONLY
|
CL_MEM_ALLOC_HOST_PTR
,
cl
::
ImageFormat
(
CL_RGBA
,
CL_FLOAT
),
height
,
width
,
batch
*
channel_blocks
,
0
,
0
,
nullptr
,
&
error
);
MACE_CHECK
(
error
==
CL_SUCCESS
);
output_image
=
image
;
output_image_map
.
clear
();
output_image_map
.
emplace
(
reinterpret_cast
<
std
::
ptrdiff_t
>
(
output
->
buffer
()),
image
);
}
auto
conv_2d_kernel
=
cl
::
Kernel
(
program
,
"conv_2d_1x1_v3"
);
const
uint32_t
kwg_size
=
runtime
->
GetKernelMaxWorkGroupSize
(
conv_2d_kernel
);
uint32_t
idx
=
0
;
conv_2d_kernel
.
setArg
(
idx
++
,
input_image
);
conv_2d_kernel
.
setArg
(
idx
++
,
*
(
static_cast
<
const
cl
::
Buffer
*>
(
filter
->
buffer
())));
conv_2d_kernel
.
setArg
(
idx
++
,
*
(
static_cast
<
const
cl
::
Buffer
*>
(
bias
->
buffer
())));
conv_2d_kernel
.
setArg
(
idx
++
,
output_image
);
conv_2d_kernel
.
setArg
(
idx
++
,
static_cast
<
int
>
(
batch
));
conv_2d_kernel
.
setArg
(
idx
++
,
static_cast
<
int
>
(
input_channels
));
conv_2d_kernel
.
setArg
(
idx
++
,
static_cast
<
int
>
(
channels
));
conv_2d_kernel
.
setArg
(
idx
++
,
static_cast
<
int
>
(
height
));
conv_2d_kernel
.
setArg
(
idx
++
,
static_cast
<
int
>
(
width
));
auto
command_queue
=
runtime
->
command_queue
();
cl_int
error
;
conv_2d
(
cl
::
EnqueueArgs
(
runtime
->
command_queue
(),
cl
::
NDRange
(
static_cast
<
int
>
(
batch
),
static_cast
<
int
>
(
channel_blocks
),
static_cast
<
int
>
(
pixel_blocks
)),
cl
::
NDRange
(
1
,
1
,
256
)),
*
(
static_cast
<
cl
::
Buffer
*>
(
input
->
buffer
())),
*
(
static_cast
<
cl
::
Buffer
*>
(
filter
->
buffer
())),
*
(
static_cast
<
cl
::
Buffer
*>
(
bias
->
buffer
())),
*
(
static_cast
<
cl
::
Buffer
*>
(
output
->
buffer
())),
static_cast
<
int
>
(
input_channels
),
static_cast
<
int
>
(
channels
),
static_cast
<
int
>
(
pixels
),
error
);
MACE_CHECK
(
error
==
CL_SUCCESS
);
error
=
command_queue
.
enqueueNDRangeKernel
(
conv_2d_kernel
,
cl
::
NullRange
,
cl
::
NDRange
(
static_cast
<
int
>
(
channel_blocks
),
static_cast
<
int
>
(
height
),
static_cast
<
int
>
(
width
)),
cl
::
NDRange
(
1
,
2
,
kwg_size
/
2
));
MACE_CHECK
(
error
==
CL_SUCCESS
,
error
);
}
extern
void
Conv2dOpenclK1x1S1
(
const
Tensor
*
input
,
const
Tensor
*
filter
,
const
Tensor
*
bias
,
Tensor
*
output
)
{
extern
void
Conv2dOpenclK1x1S1
(
const
Tensor
*
input
,
const
Tensor
*
filter
,
const
Tensor
*
bias
,
Tensor
*
output
)
{
const
index_t
batch
=
output
->
shape
()[
0
];
const
index_t
height
=
output
->
shape
()[
2
];
const
index_t
width
=
output
->
shape
()[
3
];
...
...
@@ -89,7 +185,6 @@ extern void Conv2dOpenclK1x1S1(const Tensor *input, const Tensor *filter,
MACE_CHECK
(
input_batch
==
batch
&&
input_height
==
height
&&
input_width
==
width
);
// Conv1x1Naive(input, filter, bias, output);
Conv1x1V2
(
input
,
filter
,
bias
,
output
);
};
...
...
mace/ops/addn_benchmark.cc
浏览文件 @
0461beb5
...
...
@@ -17,7 +17,7 @@ static void AddNBenchmark(int iters, int n, int size) {
for
(
int
i
=
0
;
i
<
n
;
++
i
)
{
op_def_builder
.
Input
(
internal
::
MakeString
(
"Input"
,
i
).
c_str
());
}
op_def_builder
.
Output
(
"Output"
).
Finalize
(
net
.
operator_d
ef
());
op_def_builder
.
Output
(
"Output"
).
Finalize
(
net
.
NewOperatorD
ef
());
// Add input data
for
(
int
i
=
0
;
i
<
n
;
++
i
)
{
...
...
mace/ops/addn_test.cc
浏览文件 @
0461beb5
...
...
@@ -17,7 +17,7 @@ TEST_F(AddnOpTest, AddnOp) {
.
Input
(
"Input2"
)
.
Input
(
"Input3"
)
.
Output
(
"Output"
)
.
Finalize
(
net
.
operator_d
ef
());
.
Finalize
(
net
.
NewOperatorD
ef
());
// Add input data
net
.
AddRandomInput
<
DeviceType
::
CPU
,
float
>
(
"Input1"
,
{
1
,
2
,
3
,
4
});
...
...
mace/ops/batch_norm_benchmark.cc
浏览文件 @
0461beb5
...
...
@@ -21,7 +21,7 @@ static void BatchNorm(
.
Input
(
"Var"
)
.
Input
(
"Epsilon"
)
.
Output
(
"Output"
)
.
Finalize
(
net
.
operator_d
ef
());
.
Finalize
(
net
.
NewOperatorD
ef
());
// Add input data
net
.
AddRandomInput
<
D
,
T
>
(
"Input"
,
{
batch
,
channels
,
height
,
width
});
...
...
mace/ops/batch_norm_test.cc
浏览文件 @
0461beb5
...
...
@@ -21,7 +21,7 @@ void Simple() {
.
Input
(
"Var"
)
.
Input
(
"Epsilon"
)
.
Output
(
"Output"
)
.
Finalize
(
net
.
operator_d
ef
());
.
Finalize
(
net
.
NewOperatorD
ef
());
// Add input data
net
.
AddInputFromArray
<
D
,
float
>
(
"Input"
,
{
1
,
1
,
6
,
2
},
...
...
@@ -73,7 +73,7 @@ TEST_F(BatchNormOpTest, SimpleRandomNeon) {
.
Input
(
"Var"
)
.
Input
(
"Epsilon"
)
.
Output
(
"Output"
)
.
Finalize
(
net
.
operator_d
ef
());
.
Finalize
(
net
.
NewOperatorD
ef
());
// Add input data
net
.
AddRandomInput
<
DeviceType
::
CPU
,
float
>
(
"Input"
,
{
batch
,
channels
,
height
,
width
});
...
...
@@ -114,7 +114,7 @@ TEST_F(BatchNormOpTest, ComplexRandomNeon) {
.
Input
(
"Var"
)
.
Input
(
"Epsilon"
)
.
Output
(
"Output"
)
.
Finalize
(
net
.
operator_d
ef
());
.
Finalize
(
net
.
NewOperatorD
ef
());
// Add input data
net
.
AddRandomInput
<
DeviceType
::
CPU
,
float
>
(
"Input"
,
{
batch
,
channels
,
height
,
width
});
...
...
@@ -155,7 +155,7 @@ TEST_F(BatchNormOpTest, SimpleRandomOPENCL) {
.
Input
(
"Var"
)
.
Input
(
"Epsilon"
)
.
Output
(
"Output"
)
.
Finalize
(
net
.
operator_d
ef
());
.
Finalize
(
net
.
NewOperatorD
ef
());
// Add input data
net
.
AddRandomInput
<
DeviceType
::
OPENCL
,
float
>
(
"Input"
,
{
batch
,
channels
,
height
,
width
});
...
...
@@ -201,7 +201,7 @@ TEST_F(BatchNormOpTest, ComplexRandomOPENCL) {
.
Input
(
"Var"
)
.
Input
(
"Epsilon"
)
.
Output
(
"Output"
)
.
Finalize
(
net
.
operator_d
ef
());
.
Finalize
(
net
.
NewOperatorD
ef
());
// Add input data
net
.
AddRandomInput
<
DeviceType
::
OPENCL
,
float
>
(
"Input"
,
{
batch
,
channels
,
height
,
width
});
...
...
mace/ops/channel_shuffle_benchmark.cc
浏览文件 @
0461beb5
...
...
@@ -19,10 +19,10 @@ static void ChannelShuffle(
OpDefBuilder
(
"GlobalAvgPooling"
,
"GlobalAvgPoolingTest"
)
.
Input
(
"Input"
)
.
Output
(
"Output"
)
.
Finalize
(
net
.
operator_def
());
.
AddIntArg
(
"group"
,
group
)
.
Finalize
(
net
.
NewOperatorDef
());
// Add input data
net
.
AddIntArg
(
"group"
,
group
);
net
.
AddRandomInput
<
DeviceType
::
CPU
,
float
>
(
"Input"
,
{
batch
,
channels
,
height
,
width
});
// Warm-up
...
...
mace/ops/channel_shuffle_test.cc
浏览文件 @
0461beb5
...
...
@@ -14,9 +14,9 @@ TEST_F(ChannelShuffleOpTest, C8G4) {
OpDefBuilder
(
"ChannelShuffle"
,
"ChannelShuffleTest"
)
.
Input
(
"Input"
)
.
Output
(
"Output"
)
.
Finalize
(
net
.
operator_def
());
.
AddIntArg
(
"group"
,
4
)
.
Finalize
(
net
.
NewOperatorDef
());
net
.
AddIntArg
(
"group"
,
4
);
// Add input data
net
.
AddInputFromArray
<
DeviceType
::
CPU
,
float
>
(
...
...
mace/ops/concat_benchmark.cc
浏览文件 @
0461beb5
...
...
@@ -17,7 +17,7 @@ static void ConcatHelper(int iters, int concat_dim, int dim1) {
.
Input
(
"Input1"
)
.
Input
(
"Axis"
)
.
Output
(
"Output"
)
.
Finalize
(
net
.
operator_d
ef
());
.
Finalize
(
net
.
NewOperatorD
ef
());
// Add input data
const
int
kDim0
=
100
;
...
...
mace/ops/concat_test.cc
浏览文件 @
0461beb5
...
...
@@ -18,7 +18,7 @@ TEST_F(ConcatOpTest, Simple_Horizon) {
.
Input
(
"Input1"
)
.
Input
(
"Axis"
)
.
Output
(
"Output"
)
.
Finalize
(
net
.
operator_d
ef
());
.
Finalize
(
net
.
NewOperatorD
ef
());
std
::
vector
<
index_t
>
input_shape
=
{
4
,
4
};
std
::
vector
<
float
>
input0
;
...
...
@@ -56,7 +56,7 @@ TEST_F(ConcatOpTest, Simple_Vertical) {
.
Input
(
"Input1"
)
.
Input
(
"Axis"
)
.
Output
(
"Output"
)
.
Finalize
(
net
.
operator_d
ef
());
.
Finalize
(
net
.
NewOperatorD
ef
());
std
::
vector
<
index_t
>
input_shape
=
{
4
,
4
};
std
::
vector
<
float
>
input0
;
...
...
@@ -99,7 +99,7 @@ TEST_F(ConcatOpTest, Random) {
for
(
int
i
=
0
;
i
<
num_inputs
;
++
i
)
{
builder
=
builder
.
Input
((
"Input"
+
ToString
(
i
)).
c_str
());
}
builder
.
Input
(
"Axis"
).
Output
(
"Output"
).
Finalize
(
net
.
operator_d
ef
());
builder
.
Input
(
"Axis"
).
Output
(
"Output"
).
Finalize
(
net
.
NewOperatorD
ef
());
std
::
vector
<
index_t
>
shape_data
;
GenerateRandomIntTypeData
<
index_t
>
({
dim
},
shape_data
,
1
,
dim
);
...
...
mace/ops/conv_2d_benchmark.cc
浏览文件 @
0461beb5
...
...
@@ -3,6 +3,7 @@
//
#include <algorithm>
#include <sstream>
#include "mace/core/operator.h"
#include "mace/core/testing/test_benchmark.h"
...
...
@@ -13,6 +14,7 @@ namespace mace {
template
<
DeviceType
D
,
typename
T
>
static
void
Conv2d
(
int
iters
,
int
iters_to_sync
,
int
batch
,
int
channels
,
int
height
,
...
...
@@ -30,17 +32,15 @@ static void Conv2d(int iters,
.
Input
(
"Filter"
)
.
Input
(
"Bias"
)
.
Output
(
"Output"
)
.
Finalize
(
net
.
operator_def
());
// Add args
net
.
AddIntsArg
(
"strides"
,
{
stride
,
stride
});
net
.
AddIntArg
(
"padding"
,
padding
);
net
.
AddIntsArg
(
"dilations"
,
{
1
,
1
});
.
AddIntsArg
(
"strides"
,
{
stride
,
stride
})
.
AddIntArg
(
"padding"
,
padding
)
.
AddIntsArg
(
"dilations"
,
{
1
,
1
})
.
Finalize
(
net
.
NewOperatorDef
());
// Add input data
net
.
AddRandomInput
<
D
,
float
>
(
"Input"
,
{
batch
,
channels
,
height
,
width
});
net
.
AddRandomInput
<
D
,
float
>
(
"Filter"
,
{
output_channels
,
channels
,
kernel_h
,
kernel_w
});
{
output_channels
,
channels
,
kernel_h
,
kernel_w
});
net
.
AddRandomInput
<
D
,
float
>
(
"Bias"
,
{
output_channels
});
// Warm-up
...
...
@@ -52,10 +52,17 @@ static void Conv2d(int iters,
mace
::
testing
::
StartTiming
();
while
(
iters
--
)
{
net
.
RunOp
(
D
);
net
.
Sync
();
if
(
iters
%
iters_to_sync
==
0
)
{
net
.
Sync
();
}
}
}
// In common network, there are usually more than 1 layers, this is used to
// approximate the amortized latency. The OpenCL runtime for Mali/Adreno is
// in-order.
constexpr
int
kItersToSync
=
10
;
#define BM_CONV_2D_MACRO(N, C, H, W, KH, KW, STRIDE, P, OC, TYPE, DEVICE) \
static void \
BM_CONV_2D_##N##_##C##_##H##_##W##_K##KH##x##KW##S##STRIDE##_##P##_##OC##_##TYPE##_##DEVICE( \
...
...
@@ -63,8 +70,8 @@ static void Conv2d(int iters,
const int64_t tot = static_cast<int64_t>(iters) * N * C * H * W; \
mace::testing::ItemsProcessed(tot); \
mace::testing::BytesProcessed(tot *(sizeof(TYPE))); \
Conv2d<DEVICE, TYPE>(iters,
N, C, H, W, KH, KW, STRIDE, mace::Padding::P,
\
OC);
\
Conv2d<DEVICE, TYPE>(iters,
kItersToSync, N, C, H, W, KH, KW, STRIDE,
\
mace::Padding::P, OC);
\
} \
BENCHMARK( \
BM_CONV_2D_##N##_##C##_##H##_##W##_K##KH##x##KW##S##STRIDE##_##P##_##OC##_##TYPE##_##DEVICE)
...
...
mace/ops/conv_2d_test.cc
浏览文件 @
0461beb5
...
...
@@ -18,12 +18,12 @@ TEST_F(Conv2dOpTest, Simple_VALID) {
.
Input
(
"Filter"
)
.
Input
(
"Bias"
)
.
Output
(
"Output"
)
.
Finalize
(
net
.
operator_def
());
.
AddIntsArg
(
"strides"
,
{
1
,
1
})
.
AddIntArg
(
"padding"
,
Padding
::
VALID
)
.
AddIntsArg
(
"dilations"
,
{
1
,
1
})
.
Finalize
(
net
.
NewOperatorDef
());
// Add args
net
.
AddIntsArg
(
"strides"
,
{
1
,
1
});
net
.
AddIntArg
(
"padding"
,
Padding
::
VALID
);
net
.
AddIntsArg
(
"dilations"
,
{
1
,
1
});
// Add input data
net
.
AddInputFromArray
<
DeviceType
::
CPU
,
float
>
(
...
...
@@ -52,12 +52,10 @@ TEST_F(Conv2dOpTest, Simple_SAME) {
.
Input
(
"Filter"
)
.
Input
(
"Bias"
)
.
Output
(
"Output"
)
.
Finalize
(
net
.
operator_def
());
// Add args
net
.
AddIntsArg
(
"strides"
,
{
1
,
1
});
net
.
AddIntArg
(
"padding"
,
Padding
::
SAME
);
net
.
AddIntsArg
(
"dilations"
,
{
1
,
1
});
.
AddIntsArg
(
"strides"
,
{
1
,
1
})
.
AddIntArg
(
"padding"
,
Padding
::
SAME
)
.
AddIntsArg
(
"dilations"
,
{
1
,
1
})
.
Finalize
(
net
.
NewOperatorDef
());
// Add input data
net
.
AddInputFromArray
<
DeviceType
::
CPU
,
float
>
(
...
...
@@ -88,12 +86,10 @@ TEST_F(Conv2dOpTest, Combined) {
.
Input
(
"Filter"
)
.
Input
(
"Bias"
)
.
Output
(
"Output"
)
.
Finalize
(
net
.
operator_def
());
// Add args
net
.
AddIntsArg
(
"strides"
,
{
2
,
2
});
net
.
AddIntArg
(
"padding"
,
Padding
::
SAME
);
net
.
AddIntsArg
(
"dilations"
,
{
1
,
1
});
.
AddIntsArg
(
"strides"
,
{
2
,
2
})
.
AddIntArg
(
"padding"
,
Padding
::
SAME
)
.
AddIntsArg
(
"dilations"
,
{
1
,
1
})
.
Finalize
(
net
.
NewOperatorDef
());
// Add input data
net
.
AddInputFromArray
<
DeviceType
::
CPU
,
float
>
(
...
...
@@ -127,12 +123,10 @@ void TestConv1x1() {
.
Input
(
"Filter"
)
.
Input
(
"Bias"
)
.
Output
(
"Output"
)
.
Finalize
(
net
.
operator_def
());
// Add args
net
.
AddIntsArg
(
"strides"
,
{
1
,
1
});
net
.
AddIntArg
(
"padding"
,
Padding
::
VALID
);
net
.
AddIntsArg
(
"dilations"
,
{
1
,
1
});
.
AddIntsArg
(
"strides"
,
{
1
,
1
})
.
AddIntArg
(
"padding"
,
Padding
::
VALID
)
.
AddIntsArg
(
"dilations"
,
{
1
,
1
})
.
Finalize
(
net
.
NewOperatorDef
());
// Add input data
net
.
AddInputFromArray
<
D
,
float
>
(
...
...
@@ -190,12 +184,10 @@ TEST_F(Conv2dOpTest, AlignedConvNxNS12) {
.
Input
(
"Filter"
)
.
Input
(
"Bias"
)
.
Output
(
"Output"
)
.
Finalize
(
net
.
operator_def
());
// Add args
net
.
AddIntsArg
(
"strides"
,
{
stride_h
,
stride_w
});
net
.
AddIntArg
(
"padding"
,
type
);
net
.
AddIntsArg
(
"dilations"
,
{
1
,
1
});
.
AddIntsArg
(
"strides"
,
{
stride_h
,
stride_w
})
.
AddIntArg
(
"padding"
,
type
)
.
AddIntsArg
(
"dilations"
,
{
1
,
1
})
.
Finalize
(
net
.
NewOperatorDef
());
// Add input data
net
.
AddRandomInput
<
DeviceType
::
CPU
,
float
>
(
"Input"
,
{
batch
,
input_channels
,
height
,
width
});
...
...
@@ -241,12 +233,10 @@ TEST_F(Conv2dOpTest, UnalignedConvNxNS12) {
.
Input
(
"Filter"
)
.
Input
(
"Bias"
)
.
Output
(
"Output"
)
.
Finalize
(
net
.
operator_def
());
// Add args
net
.
AddIntsArg
(
"strides"
,
{
stride_h
,
stride_w
});
net
.
AddIntArg
(
"padding"
,
type
);
net
.
AddIntsArg
(
"dilations"
,
{
1
,
1
});
.
AddIntsArg
(
"strides"
,
{
stride_h
,
stride_w
})
.
AddIntArg
(
"padding"
,
type
)
.
AddIntsArg
(
"dilations"
,
{
1
,
1
})
.
Finalize
(
net
.
NewOperatorDef
());
// Add input data
net
.
AddRandomInput
<
DeviceType
::
CPU
,
float
>
(
"Input"
,
{
batch
,
input_channels
,
height
,
width
});
...
...
mace/ops/depthwise_conv2d_test.cc
浏览文件 @
0461beb5
...
...
@@ -19,12 +19,10 @@ void SimpleValidTest() {
.
Input
(
"Filter"
)
.
Input
(
"Bias"
)
.
Output
(
"Output"
)
.
Finalize
(
net
.
operator_def
());
// Add args
net
.
AddIntsArg
(
"strides"
,
{
1
,
1
});
net
.
AddIntArg
(
"padding"
,
Padding
::
VALID
);
net
.
AddIntsArg
(
"dilations"
,
{
1
,
1
});
.
AddIntsArg
(
"strides"
,
{
1
,
1
})
.
AddIntArg
(
"padding"
,
Padding
::
VALID
)
.
AddIntsArg
(
"dilations"
,
{
1
,
1
})
.
Finalize
(
net
.
NewOperatorDef
());
// Add input data
net
.
AddInputFromArray
<
D
,
float
>
(
"Input"
,
{
1
,
2
,
2
,
3
},
...
...
@@ -68,12 +66,10 @@ void TestNxNS12(const index_t height, const index_t width) {
.
Input
(
"Filter"
)
.
Input
(
"Bias"
)
.
Output
(
"Output"
)
.
Finalize
(
net
.
operator_def
());
// Add args
net
.
AddIntsArg
(
"strides"
,
{
stride_h
,
stride_w
});
net
.
AddIntArg
(
"padding"
,
type
);
net
.
AddIntsArg
(
"dilations"
,
{
1
,
1
});
.
AddIntsArg
(
"strides"
,
{
stride_h
,
stride_w
})
.
AddIntArg
(
"padding"
,
type
)
.
AddIntsArg
(
"dilations"
,
{
1
,
1
})
.
Finalize
(
net
.
NewOperatorDef
());
// Add input data
net
.
AddRandomInput
<
D
,
float
>
(
"Input"
,
{
batch
,
input_channels
,
height
,
width
});
...
...
mace/ops/depthwise_conv_2d_benchmark.cc
浏览文件 @
0461beb5
...
...
@@ -30,12 +30,10 @@ static void DepthwiseConv2d(int iters,
.
Input
(
"Filter"
)
.
Input
(
"Bias"
)
.
Output
(
"Output"
)
.
Finalize
(
net
.
operator_def
());
// Add args
net
.
AddIntsArg
(
"strides"
,
{
stride
,
stride
});
net
.
AddIntArg
(
"padding"
,
padding
);
net
.
AddIntsArg
(
"dilations"
,
{
1
,
1
});
.
AddIntsArg
(
"strides"
,
{
stride
,
stride
})
.
AddIntArg
(
"padding"
,
padding
)
.
AddIntsArg
(
"dilations"
,
{
1
,
1
})
.
Finalize
(
net
.
NewOperatorDef
());
// Add input data
net
.
AddRandomInput
<
D
,
float
>
(
"Input"
,
{
batch
,
channels
,
height
,
width
});
...
...
mace/ops/global_avg_pooling_benchmark.cc
浏览文件 @
0461beb5
...
...
@@ -19,7 +19,7 @@ static void GlobalAvgPooling(
OpDefBuilder
(
"GlobalAvgPooling"
,
"GlobalAvgPoolingTest"
)
.
Input
(
"Input"
)
.
Output
(
"Output"
)
.
Finalize
(
net
.
operator_d
ef
());
.
Finalize
(
net
.
NewOperatorD
ef
());
// Add input data
net
.
AddRandomInput
<
DeviceType
::
CPU
,
float
>
(
"Input"
,
{
batch
,
channels
,
height
,
width
});
...
...
mace/ops/global_avg_pooling_test.cc
浏览文件 @
0461beb5
...
...
@@ -14,7 +14,7 @@ TEST_F(GlobalAvgPoolingOpTest, 3x7x7_CPU) {
OpDefBuilder
(
"GlobalAvgPooling"
,
"GlobalAvgPoolingTest"
)
.
Input
(
"Input"
)
.
Output
(
"Output"
)
.
Finalize
(
net
.
operator_d
ef
());
.
Finalize
(
net
.
NewOperatorD
ef
());
// Add input data
std
::
vector
<
float
>
input
(
147
);
...
...
@@ -38,7 +38,7 @@ TEST_F(GlobalAvgPoolingOpTest, 3x7x7_NEON) {
OpDefBuilder
(
"GlobalAvgPooling"
,
"GlobalAvgPoolingTest"
)
.
Input
(
"Input"
)
.
Output
(
"Output"
)
.
Finalize
(
net
.
operator_d
ef
());
.
Finalize
(
net
.
NewOperatorD
ef
());
// Add input data
std
::
vector
<
float
>
input
(
147
);
...
...
mace/ops/ops_test_util.h
浏览文件 @
0461beb5
...
...
@@ -17,21 +17,70 @@ namespace mace {
class
OpDefBuilder
{
public:
OpDefBuilder
(
const
char
*
type
,
const
char
*
name
)
{
OpDefBuilder
(
const
char
*
type
,
const
std
::
string
&
name
)
{
op_def_
.
set_type
(
type
);
op_def_
.
set_name
(
name
);
}
OpDefBuilder
&
Input
(
const
char
*
input_name
)
{
OpDefBuilder
&
Input
(
const
std
::
string
&
input_name
)
{
op_def_
.
add_input
(
input_name
);
return
*
this
;
}
OpDefBuilder
&
Output
(
const
char
*
output_name
)
{
OpDefBuilder
&
Output
(
const
std
::
string
&
output_name
)
{
op_def_
.
add_output
(
output_name
);
return
*
this
;
}
OpDefBuilder
AddIntArg
(
const
std
::
string
&
name
,
const
int
value
)
{
auto
arg
=
op_def_
.
add_arg
();
arg
->
set_name
(
name
);
arg
->
set_i
(
value
);
return
*
this
;
}
OpDefBuilder
AddFloatArg
(
const
std
::
string
&
name
,
const
float
value
)
{
auto
arg
=
op_def_
.
add_arg
();
arg
->
set_name
(
name
);
arg
->
set_f
(
value
);
return
*
this
;
}
OpDefBuilder
AddStringArg
(
const
std
::
string
&
name
,
const
char
*
value
)
{
auto
arg
=
op_def_
.
add_arg
();
arg
->
set_name
(
name
);
arg
->
set_s
(
value
);
return
*
this
;
}
OpDefBuilder
AddIntsArg
(
const
std
::
string
&
name
,
const
std
::
vector
<
int
>
&
values
)
{
auto
arg
=
op_def_
.
add_arg
();
arg
->
set_name
(
name
);
for
(
auto
value
:
values
)
{
arg
->
add_ints
(
value
);
}
return
*
this
;
}
OpDefBuilder
AddFloatsArg
(
const
std
::
string
&
name
,
const
std
::
vector
<
float
>
&
values
)
{
auto
arg
=
op_def_
.
add_arg
();
arg
->
set_name
(
name
);
for
(
auto
value
:
values
)
{
arg
->
add_floats
(
value
);
}
return
*
this
;
}
OpDefBuilder
AddStringsArg
(
const
std
::
string
&
name
,
const
std
::
vector
<
const
char
*>
&
values
)
{
auto
arg
=
op_def_
.
add_arg
();
arg
->
set_name
(
name
);
for
(
auto
value
:
values
)
{
arg
->
add_strings
(
value
);
}
return
*
this
;
}
void
Finalize
(
OperatorDef
*
op_def
)
const
{
MACE_CHECK
(
op_def
!=
nullptr
,
"input should not be null."
);
*
op_def
=
op_def_
;
...
...
@@ -45,7 +94,7 @@ class OpsTestNet {
OpsTestNet
()
{}
template
<
DeviceType
D
,
typename
T
>
void
AddInputFromArray
(
const
char
*
name
,
void
AddInputFromArray
(
const
std
::
string
&
name
,
const
std
::
vector
<
index_t
>
&
shape
,
const
std
::
vector
<
T
>
&
data
)
{
Tensor
*
input
=
...
...
@@ -58,7 +107,7 @@ class OpsTestNet {
}
template
<
DeviceType
D
,
typename
T
>
void
AddRepeatedInput
(
const
char
*
name
,
void
AddRepeatedInput
(
const
std
::
string
&
name
,
const
std
::
vector
<
index_t
>
&
shape
,
const
T
data
)
{
Tensor
*
input
=
...
...
@@ -70,7 +119,7 @@ class OpsTestNet {
}
template
<
DeviceType
D
,
typename
T
>
void
AddRandomInput
(
const
char
*
name
,
void
AddRandomInput
(
const
std
::
string
&
name
,
const
std
::
vector
<
index_t
>
&
shape
,
bool
positive
=
false
)
{
Tensor
*
input
=
...
...
@@ -89,56 +138,18 @@ class OpsTestNet {
});
}
void
AddIntArg
(
const
char
*
name
,
const
int
value
)
{
auto
arg
=
op_def_
.
add_arg
();
arg
->
set_name
(
name
);
arg
->
set_i
(
value
);
}
void
AddFloatArg
(
const
char
*
name
,
const
float
value
)
{
auto
arg
=
op_def_
.
add_arg
();
arg
->
set_name
(
name
);
arg
->
set_f
(
value
);
}
void
AddStringArg
(
const
char
*
name
,
const
char
*
value
)
{
auto
arg
=
op_def_
.
add_arg
();
arg
->
set_name
(
name
);
arg
->
set_s
(
value
);
}
void
AddIntsArg
(
const
char
*
name
,
const
std
::
vector
<
int
>
&
values
)
{
auto
arg
=
op_def_
.
add_arg
();
arg
->
set_name
(
name
);
for
(
auto
value
:
values
)
{
arg
->
add_ints
(
value
);
}
}
void
AddFloatsArg
(
const
char
*
name
,
const
std
::
vector
<
float
>
&
values
)
{
auto
arg
=
op_def_
.
add_arg
();
arg
->
set_name
(
name
);
for
(
auto
value
:
values
)
{
arg
->
add_floats
(
value
);
}
}
void
AddStringsArg
(
const
char
*
name
,
const
std
::
vector
<
const
char
*>
&
values
)
{
auto
arg
=
op_def_
.
add_arg
();
arg
->
set_name
(
name
);
for
(
auto
value
:
values
)
{
arg
->
add_strings
(
value
);
}
OperatorDef
*
NewOperatorDef
()
{
op_defs_
.
emplace_back
(
OperatorDef
());
return
&
op_defs_
[
op_defs_
.
size
()
-
1
];
}
OperatorDef
*
operator_def
()
{
return
&
op_def_
;
}
Workspace
*
ws
()
{
return
&
ws_
;
}
bool
RunOp
(
DeviceType
device
)
{
NetDef
net_def
;
net_def
.
add_op
()
->
CopyFrom
(
op_def_
);
for
(
auto
&
op_def_
:
op_defs_
)
{
net_def
.
add_op
()
->
CopyFrom
(
op_def_
);
}
VLOG
(
3
)
<<
net_def
.
DebugString
();
net_
=
CreateNet
(
net_def
,
&
ws_
,
device
);
device_
=
device
;
...
...
@@ -159,7 +170,7 @@ class OpsTestNet {
public:
Workspace
ws_
;
OperatorDef
op_def
_
;
std
::
vector
<
OperatorDef
>
op_defs
_
;
std
::
unique_ptr
<
NetBase
>
net_
;
DeviceType
device_
;
};
...
...
mace/ops/pooling_benchmark.cc
浏览文件 @
0461beb5
...
...
@@ -27,14 +27,12 @@ static void Pooling(int iters,
OpDefBuilder
(
"Pooling"
,
"PoolingTest"
)
.
Input
(
"Input"
)
.
Output
(
"Output"
)
.
Finalize
(
net
.
operator_def
());
// Add args
net
.
AddIntArg
(
"pooling_type"
,
pooling_type
);
net
.
AddIntsArg
(
"kernels"
,
{
kernel
,
kernel
});
net
.
AddIntsArg
(
"strides"
,
{
stride
,
stride
});
net
.
AddIntArg
(
"padding"
,
padding
);
net
.
AddIntsArg
(
"dilations"
,
{
1
,
1
});
.
AddIntArg
(
"pooling_type"
,
pooling_type
)
.
AddIntsArg
(
"kernels"
,
{
kernel
,
kernel
})
.
AddIntsArg
(
"strides"
,
{
stride
,
stride
})
.
AddIntArg
(
"padding"
,
padding
)
.
AddIntsArg
(
"dilations"
,
{
1
,
1
})
.
Finalize
(
net
.
NewOperatorDef
());
// Add input data
net
.
AddRandomInput
<
DeviceType
::
CPU
,
float
>
(
"Input"
,
{
batch
,
channels
,
height
,
width
});
...
...
mace/ops/pooling_test.cc
浏览文件 @
0461beb5
...
...
@@ -19,14 +19,12 @@ TEST_F(PoolingOpTest, MAX_VALID) {
OpDefBuilder
(
"Pooling"
,
"PoolingTest"
)
.
Input
(
"Input"
)
.
Output
(
"Output"
)
.
Finalize
(
net
.
operator_def
());
// Add args
net
.
AddIntsArg
(
"kernels"
,
{
2
,
2
});
net
.
AddIntsArg
(
"strides"
,
{
2
,
2
});
net
.
AddIntArg
(
"padding"
,
Padding
::
VALID
);
net
.
AddIntsArg
(
"dilations"
,
{
1
,
1
});
net
.
AddIntArg
(
"pooling_type"
,
PoolingType
::
MAX
);
.
AddIntsArg
(
"kernels"
,
{
2
,
2
})
.
AddIntsArg
(
"strides"
,
{
2
,
2
})
.
AddIntArg
(
"padding"
,
Padding
::
VALID
)
.
AddIntsArg
(
"dilations"
,
{
1
,
1
})
.
AddIntArg
(
"pooling_type"
,
PoolingType
::
MAX
)
.
Finalize
(
net
.
NewOperatorDef
());
// Add input data
net
.
AddInputFromArray
<
DeviceType
::
CPU
,
float
>
(
...
...
@@ -50,14 +48,12 @@ TEST_F(PoolingOpTest, AVG_VALID) {
OpDefBuilder
(
"Pooling"
,
"PoolingTest"
)
.
Input
(
"Input"
)
.
Output
(
"Output"
)
.
Finalize
(
net
.
operator_def
());
// Add args
net
.
AddIntsArg
(
"kernels"
,
{
2
,
2
});
net
.
AddIntsArg
(
"strides"
,
{
2
,
2
});
net
.
AddIntArg
(
"padding"
,
Padding
::
VALID
);
net
.
AddIntsArg
(
"dilations"
,
{
1
,
1
});
net
.
AddIntArg
(
"pooling_type"
,
PoolingType
::
AVG
);
.
AddIntsArg
(
"kernels"
,
{
2
,
2
})
.
AddIntsArg
(
"strides"
,
{
2
,
2
})
.
AddIntArg
(
"padding"
,
Padding
::
VALID
)
.
AddIntsArg
(
"dilations"
,
{
1
,
1
})
.
AddIntArg
(
"pooling_type"
,
PoolingType
::
AVG
)
.
Finalize
(
net
.
NewOperatorDef
());
// Add input data
net
.
AddInputFromArray
<
DeviceType
::
CPU
,
float
>
(
...
...
@@ -81,14 +77,12 @@ TEST_F(PoolingOpTest, MAX_SAME) {
OpDefBuilder
(
"Pooling"
,
"PoolingTest"
)
.
Input
(
"Input"
)
.
Output
(
"Output"
)
.
Finalize
(
net
.
operator_def
());
// Add args
net
.
AddIntsArg
(
"kernels"
,
{
2
,
2
});
net
.
AddIntsArg
(
"strides"
,
{
2
,
2
});
net
.
AddIntArg
(
"padding"
,
Padding
::
SAME
);
net
.
AddIntsArg
(
"dilations"
,
{
1
,
1
});
net
.
AddIntArg
(
"pooling_type"
,
PoolingType
::
MAX
);
.
AddIntsArg
(
"kernels"
,
{
2
,
2
})
.
AddIntsArg
(
"strides"
,
{
2
,
2
})
.
AddIntArg
(
"padding"
,
Padding
::
SAME
)
.
AddIntsArg
(
"dilations"
,
{
1
,
1
})
.
AddIntArg
(
"pooling_type"
,
PoolingType
::
MAX
)
.
Finalize
(
net
.
NewOperatorDef
());
// Add input data
net
.
AddInputFromArray
<
DeviceType
::
CPU
,
float
>
(
"Input"
,
{
1
,
1
,
3
,
3
},
...
...
@@ -109,14 +103,12 @@ TEST_F(PoolingOpTest, MAX_VALID_DILATION) {
OpDefBuilder
(
"Pooling"
,
"PoolingTest"
)
.
Input
(
"Input"
)
.
Output
(
"Output"
)
.
Finalize
(
net
.
operator_def
());
// Add args
net
.
AddIntsArg
(
"kernels"
,
{
2
,
2
});
net
.
AddIntsArg
(
"strides"
,
{
1
,
1
});
net
.
AddIntArg
(
"padding"
,
Padding
::
VALID
);
net
.
AddIntsArg
(
"dilations"
,
{
2
,
2
});
net
.
AddIntArg
(
"pooling_type"
,
PoolingType
::
MAX
);
.
AddIntsArg
(
"kernels"
,
{
2
,
2
})
.
AddIntsArg
(
"strides"
,
{
1
,
1
})
.
AddIntArg
(
"padding"
,
Padding
::
VALID
)
.
AddIntsArg
(
"dilations"
,
{
2
,
2
})
.
AddIntArg
(
"pooling_type"
,
PoolingType
::
MAX
)
.
Finalize
(
net
.
NewOperatorDef
());
// Add input data
net
.
AddInputFromArray
<
DeviceType
::
CPU
,
float
>
(
...
...
@@ -138,14 +130,12 @@ TEST_F(PoolingOpTest, MAX_k2x2s2x2) {
OpDefBuilder
(
"Pooling"
,
"PoolingTest"
)
.
Input
(
"Input"
)
.
Output
(
"Output"
)
.
Finalize
(
net
.
operator_def
());
// Add args
net
.
AddIntArg
(
"pooling_type"
,
PoolingType
::
MAX
);
net
.
AddIntsArg
(
"kernels"
,
{
2
,
2
});
net
.
AddIntsArg
(
"strides"
,
{
2
,
2
});
net
.
AddIntArg
(
"padding"
,
Padding
::
SAME
);
net
.
AddIntsArg
(
"dilations"
,
{
1
,
1
});
.
AddIntArg
(
"pooling_type"
,
PoolingType
::
MAX
)
.
AddIntsArg
(
"kernels"
,
{
2
,
2
})
.
AddIntsArg
(
"strides"
,
{
2
,
2
})
.
AddIntArg
(
"padding"
,
Padding
::
SAME
)
.
AddIntsArg
(
"dilations"
,
{
1
,
1
})
.
Finalize
(
net
.
NewOperatorDef
());
// Add input data
net
.
AddInputFromArray
<
DeviceType
::
CPU
,
float
>
(
...
...
@@ -166,14 +156,12 @@ TEST_F(PoolingOpTest, MAX_k3x3s2x2) {
OpDefBuilder
(
"Pooling"
,
"PoolingTest"
)
.
Input
(
"Input"
)
.
Output
(
"Output"
)
.
Finalize
(
net
.
operator_def
());
// Add args
net
.
AddIntArg
(
"pooling_type"
,
PoolingType
::
MAX
);
net
.
AddIntsArg
(
"kernels"
,
{
3
,
3
});
net
.
AddIntsArg
(
"strides"
,
{
2
,
2
});
net
.
AddIntArg
(
"padding"
,
Padding
::
VALID
);
net
.
AddIntsArg
(
"dilations"
,
{
1
,
1
});
.
AddIntArg
(
"pooling_type"
,
PoolingType
::
MAX
)
.
AddIntsArg
(
"kernels"
,
{
3
,
3
})
.
AddIntsArg
(
"strides"
,
{
2
,
2
})
.
AddIntArg
(
"padding"
,
Padding
::
VALID
)
.
AddIntsArg
(
"dilations"
,
{
1
,
1
})
.
Finalize
(
net
.
NewOperatorDef
());
// Add input data
net
.
AddInputFromArray
<
DeviceType
::
CPU
,
float
>
(
...
...
@@ -195,14 +183,12 @@ TEST_F(PoolingOpTest, AVG_k2x2s2x2) {
OpDefBuilder
(
"Pooling"
,
"PoolingTest"
)
.
Input
(
"Input"
)
.
Output
(
"Output"
)
.
Finalize
(
net
.
operator_def
());
// Add args
net
.
AddIntArg
(
"pooling_type"
,
PoolingType
::
AVG
);
net
.
AddIntsArg
(
"kernels"
,
{
2
,
2
});
net
.
AddIntsArg
(
"strides"
,
{
2
,
2
});
net
.
AddIntArg
(
"padding"
,
Padding
::
SAME
);
net
.
AddIntsArg
(
"dilations"
,
{
1
,
1
});
.
AddIntArg
(
"pooling_type"
,
PoolingType
::
AVG
)
.
AddIntsArg
(
"kernels"
,
{
2
,
2
})
.
AddIntsArg
(
"strides"
,
{
2
,
2
})
.
AddIntArg
(
"padding"
,
Padding
::
SAME
)
.
AddIntsArg
(
"dilations"
,
{
1
,
1
})
.
Finalize
(
net
.
NewOperatorDef
());
// Add input data
net
.
AddInputFromArray
<
DeviceType
::
CPU
,
float
>
(
...
...
mace/ops/relu_benchmark.cc
浏览文件 @
0461beb5
...
...
@@ -16,7 +16,7 @@ static void ReluBenchmark(int iters, int size) {
OpDefBuilder
(
"Relu"
,
"ReluBM"
)
.
Input
(
"Input"
)
.
Output
(
"Output"
)
.
Finalize
(
net
.
operator_d
ef
());
.
Finalize
(
net
.
NewOperatorD
ef
());
// Add input data
net
.
AddRandomInput
<
DeviceType
::
CPU
,
float
>
(
"Input"
,
{
size
});
...
...
mace/ops/relu_test.cc
浏览文件 @
0461beb5
...
...
@@ -15,7 +15,7 @@ TEST_F(ReluOpTest, ReluOp) {
OpDefBuilder
(
"Relu"
,
"ReluTest"
)
.
Input
(
"Input"
)
.
Output
(
"Output"
)
.
Finalize
(
net
.
operator_d
ef
());
.
Finalize
(
net
.
NewOperatorD
ef
());
// Add input data
net
.
AddRandomInput
<
DeviceType
::
CPU
,
float
>
(
"Input"
,
{
1
,
2
,
3
,
5
});
...
...
@@ -38,11 +38,11 @@ TEST_F(ReluOpTest, ReluOpWithMax) {
OpDefBuilder
(
"Relu"
,
"ReluTestWithMax"
)
.
Input
(
"Input"
)
.
Output
(
"Output"
)
.
Finalize
(
net
.
operator_def
());
.
AddFloatArg
(
"max_limit"
,
0.5
)
.
Finalize
(
net
.
NewOperatorDef
());
// Add input data
net
.
AddRandomInput
<
DeviceType
::
CPU
,
float
>
(
"Input"
,
{
1
,
2
,
3
,
5
});
net
.
AddFloatArg
(
"max_limit"
,
0.5
);
// Run
net
.
RunOp
();
...
...
mace/ops/resize_bilinear_test.cc
浏览文件 @
0461beb5
...
...
@@ -18,7 +18,7 @@ TEST_F(ResizeBilinearTest, ResizeBilinearWOAlignCorners) {
.
Input
(
"Input"
)
.
Input
(
"OutSize"
)
.
Output
(
"Output"
)
.
Finalize
(
net
.
operator_d
ef
());
.
Finalize
(
net
.
NewOperatorD
ef
());
// Add input data
vector
<
float
>
input
(
24
);
...
...
@@ -43,9 +43,8 @@ TEST_F(ResizeBilinearTest, ResizeBilinearWAlignCorners) {
.
Input
(
"Input"
)
.
Input
(
"OutSize"
)
.
Output
(
"Output"
)
.
Finalize
(
net
.
operator_def
());
net
.
AddIntArg
(
"align_corners"
,
1
);
.
AddIntArg
(
"align_corners"
,
1
)
.
Finalize
(
net
.
NewOperatorDef
());
// Add input data
vector
<
float
>
input
(
24
);
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录