Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
冰之2023
Mace
提交
99963c98
Mace
项目概览
冰之2023
/
Mace
与 Fork 源项目一致
Fork自
Xiaomi / Mace
通知
1
Star
0
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
DevOps
流水线
流水线任务
计划
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
Mace
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
DevOps
DevOps
流水线
流水线任务
计划
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
流水线任务
提交
Issue看板
体验新版 GitCode,发现更多精彩内容 >>
提交
99963c98
编写于
12月 03, 2017
作者:
Y
yejianwu
浏览文件
操作
浏览文件
下载
差异文件
merge with master
上级
511ee878
2b3d78e0
变更
20
隐藏空白更改
内联
并排
Showing
20 changed file
with
286 addition
and
157 deletion
+286
-157
mace/core/opencl_allocator.cc
mace/core/opencl_allocator.cc
+2
-1
mace/kernels/opencl/addn.cc
mace/kernels/opencl/addn.cc
+1
-1
mace/kernels/opencl/batch_norm_opencl.cc
mace/kernels/opencl/batch_norm_opencl.cc
+2
-2
mace/kernels/opencl/buffer_to_image.cc
mace/kernels/opencl/buffer_to_image.cc
+7
-2
mace/kernels/opencl/cl/resize_bilinear.cl
mace/kernels/opencl/cl/resize_bilinear.cl
+30
-19
mace/kernels/opencl/conv_2d_opencl_1x1.cc
mace/kernels/opencl/conv_2d_opencl_1x1.cc
+2
-2
mace/kernels/opencl/conv_2d_opencl_3x3.cc
mace/kernels/opencl/conv_2d_opencl_3x3.cc
+2
-2
mace/kernels/opencl/depthwise_conv_opencl_3x3.cc
mace/kernels/opencl/depthwise_conv_opencl_3x3.cc
+1
-1
mace/kernels/opencl/helper.cc
mace/kernels/opencl/helper.cc
+26
-18
mace/kernels/opencl/helper.h
mace/kernels/opencl/helper.h
+5
-2
mace/kernels/opencl/pooling_opencl.cc
mace/kernels/opencl/pooling_opencl.cc
+2
-2
mace/kernels/opencl/relu_opencl.cc
mace/kernels/opencl/relu_opencl.cc
+1
-1
mace/kernels/opencl/resize_bilinear_opencl.cc
mace/kernels/opencl/resize_bilinear_opencl.cc
+34
-17
mace/kernels/opencl/space_to_batch_opecl.cc
mace/kernels/opencl/space_to_batch_opecl.cc
+1
-1
mace/kernels/resize_bilinear.h
mace/kernels/resize_bilinear.h
+70
-57
mace/ops/buffer_to_image_test.cc
mace/ops/buffer_to_image_test.cc
+35
-1
mace/ops/resize_bilinear.cc
mace/ops/resize_bilinear.cc
+5
-0
mace/ops/resize_bilinear_benchmark.cc
mace/ops/resize_bilinear_benchmark.cc
+23
-8
mace/ops/resize_bilinear_test.cc
mace/ops/resize_bilinear_test.cc
+36
-20
mace/python/tools/tf_ops_stats.py
mace/python/tools/tf_ops_stats.py
+1
-0
未找到文件。
mace/core/opencl_allocator.cc
浏览文件 @
99963c98
...
...
@@ -54,10 +54,11 @@ void *OpenCLAllocator::NewImage(const std::vector<size_t> &image_shape,
cl_int
error
;
cl
::
Image2D
*
cl_image
=
new
cl
::
Image2D
(
OpenCLRuntime
::
Get
()
->
context
(),
CL_MEM_READ_WRITE
|
CL_MEM_ALLOC_HOST_PTR
,
CL_MEM_READ_WRITE
|
CL_MEM_ALLOC_HOST_PTR
,
img_format
,
image_shape
[
0
],
image_shape
[
1
],
0
,
nullptr
,
&
error
);
MACE_CHECK
(
error
==
CL_SUCCESS
);
return
cl_image
;
}
...
...
mace/kernels/opencl/addn.cc
浏览文件 @
99963c98
...
...
@@ -17,7 +17,7 @@ static void Add2(const Tensor *input0, const Tensor *input1, Tensor *output) {
auto
runtime
=
OpenCLRuntime
::
Get
();
std
::
set
<
std
::
string
>
built_options
;
built_options
.
emplace
(
"-DDATA_TYPE="
+
D
ataTypeToCLType
(
output
->
dtype
()));
built_options
.
emplace
(
"-DDATA_TYPE="
+
D
tToUpstreamCLDt
(
output
->
dtype
()));
auto
addn_kernel
=
runtime
->
BuildKernel
(
"addn"
,
"add2"
,
built_options
);
const
uint32_t
lws
=
runtime
->
GetKernelMaxWorkGroupSize
(
addn_kernel
);
...
...
mace/kernels/opencl/batch_norm_opencl.cc
浏览文件 @
99963c98
...
...
@@ -35,8 +35,8 @@ void BatchNormFunctor<DeviceType::OPENCL, float>::operator()(
auto
runtime
=
OpenCLRuntime
::
Get
();
std
::
set
<
std
::
string
>
built_options
;
built_options
.
emplace
(
"-DDATA_TYPE="
+
D
ataTypeToCLType
(
input
->
dtype
()));
built_options
.
emplace
(
"-DCMD_DATA_TYPE="
+
D
ataTypeToOPENCLCMDDataType
(
input
->
dtype
()));
built_options
.
emplace
(
"-DDATA_TYPE="
+
D
tToUpstreamCLDt
(
input
->
dtype
()));
built_options
.
emplace
(
"-DCMD_DATA_TYPE="
+
D
tToUpstreamCLCMDDt
(
input
->
dtype
()));
auto
bm_kernel
=
runtime
->
BuildKernel
(
"batch_norm"
,
"batch_norm"
,
built_options
);
const
uint32_t
kwg_size
=
runtime
->
GetKernelMaxWorkGroupSize
(
bm_kernel
);
...
...
mace/kernels/opencl/buffer_to_image.cc
浏览文件 @
99963c98
...
...
@@ -24,8 +24,13 @@ void BufferToImageFunctor<DeviceType::OPENCL, T>::operator()(Tensor *buffer,
}
std
::
set
<
std
::
string
>
built_options
;
built_options
.
emplace
(
"-DDATA_TYPE="
+
DataTypeToCLType
(
DataTypeToEnum
<
T
>::
value
));
built_options
.
emplace
(
"-DCMD_DATA_TYPE="
+
DataTypeToOPENCLCMDDataType
(
DataTypeToEnum
<
T
>::
value
));
if
(
buffer
->
dtype
()
==
image
->
dtype
())
{
built_options
.
emplace
(
"-DDATA_TYPE="
+
DtToCLDt
(
DataTypeToEnum
<
T
>::
value
));
built_options
.
emplace
(
"-DCMD_DATA_TYPE="
+
DtToCLCMDDt
(
DataTypeToEnum
<
T
>::
value
));
}
else
{
built_options
.
emplace
(
"-DDATA_TYPE="
+
DtToUpstreamCLDt
(
DataTypeToEnum
<
T
>::
value
));
built_options
.
emplace
(
"-DCMD_DATA_TYPE="
+
DtToUpstreamCLCMDDt
(
DataTypeToEnum
<
T
>::
value
));
}
auto
runtime
=
OpenCLRuntime
::
Get
();
string
kernel_name
;
switch
(
type
)
{
...
...
mace/kernels/opencl/cl/resize_bilinear.cl
浏览文件 @
99963c98
#
include
<common.h>
//
Supported
data
type:
half/float
__kernel
void
resize_bilinear_nocache
(
__global
const
DATA_TYPE
*input,
/*
n
*
c,
h,
w
*/
__global
DATA_TYPE
*output
/*
n
*
c,
h,
w
*/,
__kernel
void
resize_bilinear_nocache
(
__read_only
image2d_t
input,
/*
[c%4
*
w
*
c/4,
h
*
b]
*/
__write_only
image2d_t
output,
__private
const
float
height_scale,
__private
const
float
width_scale,
__private
const
int
in_height,
__private
const
int
in_width
)
{
const
int
c
=
get_global_id
(
0
)
;
const
int
h
=
get_global_id
(
1
)
;
const
int
w
=
get_global_id
(
2
)
;
const
int
channels
=
get_global_size
(
0
)
;
const
int
height
=
get_global_size
(
1
)
;
const
int
width
=
get_global_size
(
2
)
;
__private
const
int
in_width,
__private
const
int
out_height
)
{
const
int
ch_blk
=
get_global_id
(
0
)
;
const
int
ch_blks
=
get_global_size
(
0
)
;
const
int
w
=
get_global_id
(
1
)
;
const
int
out_width
=
get_global_size
(
1
)
;
const
int
hb
=
get_global_id
(
2
)
;
const
int
b
=
hb
/
out_height
;
const
int
h
=
hb
%
out_height
;
const
float
h_in
=
h
*
height_scale
;
const
float
w_in
=
w
*
width_scale
;
...
...
@@ -24,16 +25,26 @@ __kernel void resize_bilinear_nocache(__global const DATA_TYPE *input, /* n * c,
const
float
h_lerp
=
h_in
-
h_lower
;
const
float
w_lerp
=
w_in
-
w_lower
;
const
DATA_TYPE
*input_base
=
input
+
c
*
in_height
*
in_width
;
DATA_TYPE
*output_base
=
output
+
c
*
height
*
width
;
const
sampler_t
sampler
=
CLK_NORMALIZED_COORDS_FALSE
| CLK_ADDRESS_CLAMP |
CLK_FILTER_NEAREST
;
const
int
in_w_offset
=
ch_blk
*
in_width
;
const
int
in_h_offset
=
b
*
in_height
;
DATA_TYPE
top_left
=
input_base[h_lower
*
in_width
+
w_lower]
;
DATA_TYPE
top_right
=
input_base[h_lower
*
in_width
+
w_upper]
;
DATA_TYPE
bottom_left
=
input_base[h_upper
*
in_width
+
w_lower]
;
DATA_TYPE
bottom_right
=
input_base[h_upper
*
in_width
+
w_upper]
;
DATA_TYPE4
top_left
=
READ_IMAGET
(
input,
sampler,
(
int2
)(
in_w_offset
+
w_lower,
in_h_offset
+
h_lower
))
;
DATA_TYPE4
top_right
=
READ_IMAGET
(
input,
sampler,
(
int2
)(
in_w_offset
+
w_upper,
in_h_offset
+
h_lower
))
;
DATA_TYPE4
bottom_left
=
READ_IMAGET
(
input,
sampler,
(
int2
)(
in_w_offset
+
w_lower,
in_h_offset
+
h_upper
))
;
DATA_TYPE4
bottom_right
=
READ_IMAGET
(
input,
sampler,
(
int2
)(
in_w_offset
+
w_upper,
in_h_offset
+
h_upper
))
;
const
DATA_TYPE
top
=
top_left
+
(
top_right
-
top_left
)
*
w_lerp
;
const
DATA_TYPE
bottom
=
bottom_left
+
(
bottom_right
-
bottom_left
)
*
w_lerp
;
output_base[h
*
width
+
w]
=
top
+
(
bottom
-
top
)
*
h_lerp
;
DATA_TYPE4
top
=
top_left
+
(
top_right
-
top_left
)
*
w_lerp
;
DATA_TYPE4
bottom
=
bottom_left
+
(
bottom_right
-
bottom_left
)
*
w_lerp
;
DATA_TYPE4
out
=
top
+
(
bottom
-
top
)
*
h_lerp
;
const
int
out_w_offset
=
ch_blk
*
out_width
;
const
int
out_h_offset
=
b
*
out_height
;
WRITE_IMAGET
(
output,
(
int2
)(
out_w_offset
+
w,
out_h_offset
+
h
)
,
out
)
;
}
mace/kernels/opencl/conv_2d_opencl_1x1.cc
浏览文件 @
99963c98
...
...
@@ -34,8 +34,8 @@ void Conv1x1(const Tensor *input,
MACE_CHECK
(
input_batch
==
batch
);
std
::
set
<
std
::
string
>
built_options
;
built_options
.
emplace
(
"-DDATA_TYPE="
+
D
ataTypeToCLType
(
dt
));
built_options
.
emplace
(
"-DCMD_DATA_TYPE="
+
D
ataTypeToOPENCLCMDDataType
(
dt
));
built_options
.
emplace
(
"-DDATA_TYPE="
+
D
tToUpstreamCLDt
(
dt
));
built_options
.
emplace
(
"-DCMD_DATA_TYPE="
+
D
tToUpstreamCLCMDDt
(
dt
));
built_options
.
emplace
(
"-DSTRIDE="
+
ToString
(
stride
));
if
(
bias
!=
nullptr
)
{
built_options
.
emplace
(
"-DBIAS"
);
...
...
mace/kernels/opencl/conv_2d_opencl_3x3.cc
浏览文件 @
99963c98
...
...
@@ -26,8 +26,8 @@ static void Conv2d3x3S12(const Tensor *input, const Tensor *filter,
const
index_t
width_blocks
=
RoundUpDiv
<
index_t
,
5
>
(
width
);
std
::
set
<
std
::
string
>
built_options
;
built_options
.
emplace
(
"-DDATA_TYPE="
+
D
ataTypeToCLType
(
dt
));
built_options
.
emplace
(
"-DCMD_DATA_TYPE="
+
D
ataTypeToOPENCLCMDDataType
(
dt
));
built_options
.
emplace
(
"-DDATA_TYPE="
+
D
tToUpstreamCLDt
(
dt
));
built_options
.
emplace
(
"-DCMD_DATA_TYPE="
+
D
tToUpstreamCLCMDDt
(
dt
));
built_options
.
emplace
(
bias
!=
nullptr
?
"-DBIAS"
:
""
);
built_options
.
emplace
(
"-DSTRIDE="
+
ToString
(
stride
));
if
(
fused_relu
)
{
...
...
mace/kernels/opencl/depthwise_conv_opencl_3x3.cc
浏览文件 @
99963c98
...
...
@@ -32,7 +32,7 @@ static void InnerDepthwiseConvOpenclK3x3S12(const Tensor *input,
auto
runtime
=
OpenCLRuntime
::
Get
();
std
::
set
<
std
::
string
>
built_options
;
built_options
.
emplace
(
"-DDATA_TYPE="
+
D
ataTypeToCLType
(
input
->
dtype
()));
built_options
.
emplace
(
"-DDATA_TYPE="
+
D
tToUpstreamCLDt
(
input
->
dtype
()));
built_options
.
emplace
(
stride
==
1
?
"-DSTRIDE_1"
:
""
);
built_options
.
emplace
(
bias
!=
nullptr
?
"-DBIAS"
:
""
);
auto
conv_kernel
=
runtime
->
BuildKernel
(
"depthwise_conv_3x3"
,
"depthwise_conv_3x3"
,
built_options
);
...
...
mace/kernels/opencl/helper.cc
浏览文件 @
99963c98
...
...
@@ -54,34 +54,42 @@ void CalImage2DShape(const std::vector<index_t> &shape, /* NHWC */
}
std
::
string
DataTypeToCLType
(
const
DataType
dt
)
{
std
::
string
DtToCLDt
(
const
DataType
dt
)
{
switch
(
dt
)
{
case
DT_FLOAT
:
return
"float"
;
case
DT_HALF
:
return
"half"
;
default:
LOG
(
FATAL
)
<<
"Unsupported data type"
;
return
""
;
}
}
std
::
string
DtToCLCMDDt
(
const
DataType
dt
)
{
switch
(
dt
)
{
case
DT_FLOAT
:
return
"f"
;
case
DT_HALF
:
return
"h"
;
default:
LOG
(
FATAL
)
<<
"Not supported data type for opencl cmd data type"
;
return
""
;
}
}
std
::
string
DtToUpstreamCLDt
(
const
DataType
dt
)
{
switch
(
dt
)
{
case
DT_FLOAT
:
case
DT_HALF
:
return
"float"
;
case
DT_UINT8
:
return
"uchar"
;
case
DT_INT8
:
return
"char"
;
case
DT_DOUBLE
:
return
"double"
;
case
DT_INT32
:
return
"int"
;
case
DT_UINT32
:
return
"int"
;
case
DT_UINT16
:
return
"ushort"
;
case
DT_INT16
:
return
"short"
;
case
DT_INT64
:
return
"long"
;
default:
LOG
(
FATAL
)
<<
"Unsupported data type"
;
return
""
;
}
}
std
::
string
D
ataTypeToOPENCLCMDDataType
(
const
DataType
dt
)
{
std
::
string
D
tToUpstreamCLCMDDt
(
const
DataType
dt
)
{
switch
(
dt
)
{
case
DT_FLOAT
:
case
DT_HALF
:
...
...
mace/kernels/opencl/helper.h
浏览文件 @
99963c98
...
...
@@ -19,10 +19,13 @@ void CalImage2DShape(const std::vector<index_t> &shape, /* NHWC */
const
BufferType
type
,
std
::
vector
<
size_t
>
&
image_shape
);
std
::
string
D
ataTypeToOPENCLCMDDataType
(
const
DataType
dt
);
std
::
string
D
tToCLCMDDt
(
const
DataType
dt
);
std
::
string
D
ataTypeToCLType
(
const
DataType
dt
);
std
::
string
D
tToUpstreamCLCMDDt
(
const
DataType
dt
);
std
::
string
DtToCLDt
(
const
DataType
dt
);
std
::
string
DtToUpstreamCLDt
(
const
DataType
dt
);
}
// namespace kernels
}
// namespace mace
...
...
mace/kernels/opencl/pooling_opencl.cc
浏览文件 @
99963c98
...
...
@@ -32,7 +32,7 @@ static void Pooling3(const Tensor *input,
auto
runtime
=
OpenCLRuntime
::
Get
();
std
::
set
<
std
::
string
>
built_options
;
built_options
.
emplace
(
"-DDATA_TYPE="
+
D
ataTypeToCLType
(
input
->
dtype
()));
built_options
.
emplace
(
"-DDATA_TYPE="
+
D
tToUpstreamCLDt
(
input
->
dtype
()));
built_options
.
emplace
(
stride
[
0
]
==
1
?
"-DSTRIDE_1"
:
""
);
auto
pooling_kernel
=
runtime
->
BuildKernel
(
"pooling"
,
"pooling3"
,
built_options
);
...
...
@@ -80,7 +80,7 @@ static void PoolingN(const Tensor *input,
auto
runtime
=
OpenCLRuntime
::
Get
();
std
::
set
<
std
::
string
>
built_options
;
built_options
.
emplace
(
"-DDATA_TYPE="
+
D
ataTypeToCLType
(
input
->
dtype
()));
built_options
.
emplace
(
"-DDATA_TYPE="
+
D
tToUpstreamCLDt
(
input
->
dtype
()));
auto
pooling_kernel
=
runtime
->
BuildKernel
(
"pooling"
,
"poolingn"
,
built_options
);
const
uint32_t
lws
[
3
]
=
{
1
,
8
,
128
};
...
...
mace/kernels/opencl/relu_opencl.cc
浏览文件 @
99963c98
...
...
@@ -23,7 +23,7 @@ void ReluFunctor<DeviceType::OPENCL, float>::operator()(const Tensor *input,
auto
program
=
runtime
->
program
();
std
::
set
<
std
::
string
>
built_options
;
built_options
.
emplace
(
"-DDATA_TYPE="
+
D
ataTypeToCLType
(
input
->
dtype
()));
built_options
.
emplace
(
"-DDATA_TYPE="
+
D
tToUpstreamCLDt
(
input
->
dtype
()));
if
(
max_limit_
<
0
)
{
auto
relu_kernel
=
runtime
->
BuildKernel
(
"relu"
,
"relu"
,
built_options
);
const
uint32_t
lws
=
runtime
->
GetKernelMaxWorkGroupSize
(
relu_kernel
);
...
...
mace/kernels/opencl/resize_bilinear_opencl.cc
浏览文件 @
99963c98
...
...
@@ -6,24 +6,33 @@
#include "mace/core/tensor.h"
#include "mace/kernels/resize_bilinear.h"
#include "mace/kernels/opencl/helper.h"
#include "mace/utils/utils.h"
namespace
mace
{
namespace
kernels
{
template
<
>
void
ResizeBilinearFunctor
<
DeviceType
::
OPENCL
,
float
>::
operator
()(
template
<
typename
T
>
void
ResizeBilinearFunctor
<
DeviceType
::
OPENCL
,
T
>::
operator
()(
const
Tensor
*
input
,
const
Tensor
*
resize_dims
,
Tensor
*
output
)
{
const
index_t
batch
=
input
->
dim
(
0
);
const
index_t
channels
=
input
->
dim
(
1
);
const
index_t
in_height
=
input
->
dim
(
2
);
const
index_t
in_width
=
input
->
dim
(
3
);
const
index_t
in_height
=
input
->
dim
(
1
);
const
index_t
in_width
=
input
->
dim
(
2
);
const
index_t
channels
=
input
->
dim
(
3
);
const
index_t
channel_blocks
=
RoundUpDiv4
(
channels
);
index_t
out_height
;
index_t
out_width
;
GetOutputSize
(
resize_dims
,
&
out_height
,
&
out_width
);
MACE_CHECK
(
out_height
>
0
&&
out_width
>
0
);
std
::
vector
<
index_t
>
out_shape
{
batch
,
channels
,
out_height
,
out_width
};
output
->
Resize
(
out_shape
);
std
::
vector
<
index_t
>
output_shape
{
batch
,
out_height
,
out_width
,
channels
};
if
(
input
->
is_image
())
{
std
::
vector
<
size_t
>
output_image_shape
;
CalImage2DShape
(
output_shape
,
BufferType
::
IN_OUT
,
output_image_shape
);
output
->
ResizeImage
(
output_shape
,
output_image_shape
);
}
else
{
output
->
Resize
(
output_shape
);
}
float
height_scale
=
CalculateResizeScale
(
in_height
,
out_height
,
align_corners_
);
...
...
@@ -31,29 +40,37 @@ void ResizeBilinearFunctor<DeviceType::OPENCL, float>::operator()(
auto
runtime
=
OpenCLRuntime
::
Get
();
std
::
set
<
std
::
string
>
built_options
;
built_options
.
emplace
(
"-DDATA_TYPE="
+
DataTypeToCLType
(
input
->
dtype
()));
auto
dt
=
DataTypeToEnum
<
T
>::
value
;
built_options
.
emplace
(
"-DDATA_TYPE="
+
DtToUpstreamCLDt
(
dt
));
built_options
.
emplace
(
"-DCMD_DATA_TYPE="
+
DtToUpstreamCLCMDDt
(
dt
));
auto
rb_kernel
=
runtime
->
BuildKernel
(
"resize_bilinear"
,
"resize_bilinear_nocache"
,
built_options
);
const
uint32_t
kwg_size
=
runtime
->
GetKernelMaxWorkGroupSize
(
rb_kernel
);
uint32_t
idx
=
0
;
rb_kernel
.
setArg
(
idx
++
,
*
(
static_cast
<
const
cl
::
Buffer
*>
(
input
->
buffer
())));
rb_kernel
.
setArg
(
idx
++
,
*
(
static_cast
<
cl
::
Buffer
*>
(
output
->
buffer
())));
rb_kernel
.
setArg
(
idx
++
,
*
(
static_cast
<
const
cl
::
Image2D
*>
(
input
->
buffer
())));
rb_kernel
.
setArg
(
idx
++
,
*
(
static_cast
<
cl
::
Image2D
*>
(
output
->
buffer
())));
rb_kernel
.
setArg
(
idx
++
,
height_scale
);
rb_kernel
.
setArg
(
idx
++
,
width_scale
);
rb_kernel
.
setArg
(
idx
++
,
static_cast
<
int
>
(
in_height
));
rb_kernel
.
setArg
(
idx
++
,
static_cast
<
int
>
(
in_width
));
rb_kernel
.
setArg
(
idx
++
,
static_cast
<
int32_t
>
(
in_height
));
rb_kernel
.
setArg
(
idx
++
,
static_cast
<
int32_t
>
(
in_width
));
rb_kernel
.
setArg
(
idx
++
,
static_cast
<
int32_t
>
(
out_height
));
auto
command_queue
=
runtime
->
command_queue
();
cl_int
error
=
command_queue
.
enqueueNDRangeKernel
(
rb_kernel
,
cl
::
NullRange
,
cl
::
NDRange
(
static_cast
<
int
>
(
batch
*
channels
),
static_cast
<
int
>
(
out_height
),
static_cast
<
int
>
(
out_width
)),
// TODO (heliangliang) tuning and fix when kwg_size < devisor
cl
::
NDRange
(
1
,
16
,
kwg_size
/
16
),
NULL
,
OpenCLRuntime
::
Get
()
->
GetDefaultEvent
());
cl
::
NDRange
(
static_cast
<
int32_t
>
(
channel_blocks
),
static_cast
<
int32_t
>
(
out_width
),
static_cast
<
int32_t
>
(
out_height
*
batch
)),
// TODO tuning
cl
::
NDRange
(
1
,
static_cast
<
int32_t
>
(
out_width
>
kwg_size
?
kwg_size
:
out_width
),
1
),
nullptr
,
OpenCLRuntime
::
Get
()
->
GetDefaultEvent
());
MACE_CHECK
(
error
==
CL_SUCCESS
,
error
);
}
template
struct
ResizeBilinearFunctor
<
DeviceType
::
OPENCL
,
float
>;
template
struct
ResizeBilinearFunctor
<
DeviceType
::
OPENCL
,
half
>;
}
// namespace kernels
}
// namespace mace
mace/kernels/opencl/space_to_batch_opecl.cc
浏览文件 @
99963c98
...
...
@@ -20,7 +20,7 @@ void SpaceToBatchFunctor<DeviceType::OPENCL, float>::operator()(Tensor *space_te
Tensor
*
batch_tensor
)
{
auto
runtime
=
OpenCLRuntime
::
Get
();
std
::
set
<
std
::
string
>
built_options
;
built_options
.
emplace
(
"-DDATA_TYPE="
+
D
ataTypeToCLType
(
space_tensor
->
dtype
()));
built_options
.
emplace
(
"-DDATA_TYPE="
+
D
tToUpstreamCLDt
(
space_tensor
->
dtype
()));
auto
s2b_kernel
=
runtime
->
BuildKernel
(
"space_to_batch"
,
"space_to_batch"
,
built_options
);
uint32_t
idx
=
0
;
...
...
mace/kernels/resize_bilinear.h
浏览文件 @
99963c98
...
...
@@ -61,63 +61,90 @@ void ResizeImage(const T *images,
const
index_t
channels
,
const
std
::
vector
<
CachedInterpolation
>
&
xs_vec
,
const
std
::
vector
<
CachedInterpolation
>
&
ys
,
float
*
output
)
{
const
index_t
in_channel_size
=
in_height
*
in_width
;
const
index_t
in_batch_num_values
=
channels
*
in_channel_size
;
const
index_t
out_channel_size
=
out_height
*
out_width
;
const
index_t
out_batch_num_values
=
channels
*
out_channel_size
;
T
*
output
)
{
const
index_t
in_batch_num_values
=
channels
*
in_height
*
in_width
;
const
index_t
out_batch_num_values
=
channels
*
out_height
*
out_width
;
const
CachedInterpolation
*
xs
=
xs_vec
.
data
();
#pragma omp parallel for
collapse(2)
#pragma omp parallel for
for
(
index_t
b
=
0
;
b
<
batch_size
;
++
b
)
{
for
(
index_t
c
=
0
;
c
<
channels
;
++
c
)
{
const
T
*
input_ptr
=
images
+
in_batch_num_values
*
b
+
in_channel_size
*
c
;
float
*
output_ptr
=
output
+
out_batch_num_values
*
b
+
out_channel_size
*
c
;
for
(
index_t
y
=
0
;
y
<
out_height
;
++
y
)
{
const
T
*
ys_input_lower_ptr
=
input_ptr
+
ys
[
y
].
lower
*
in_width
;
const
T
*
ys_input_upper_ptr
=
input_ptr
+
ys
[
y
].
upper
*
in_width
;
const
float
ys_lerp
=
ys
[
y
].
lerp
;
for
(
index_t
x
=
0
;
x
<
out_width
;
++
x
)
{
auto
xs_lower
=
xs
[
x
].
lower
;
auto
xs_upper
=
xs
[
x
].
upper
;
auto
xs_lerp
=
xs
[
x
].
lerp
;
const
float
top_left
=
ys_input_lower_ptr
[
xs_lower
];
const
float
top_right
=
ys_input_lower_ptr
[
xs_upper
];
const
float
bottom_left
=
ys_input_upper_ptr
[
xs_lower
];
const
float
bottom_right
=
ys_input_upper_ptr
[
xs_upper
];
output_ptr
[
x
]
=
ComputeLerp
(
top_left
,
top_right
,
bottom_left
,
bottom_right
,
xs_lerp
,
ys_lerp
);
const
T
*
batch_input_ptr
=
images
+
in_batch_num_values
*
b
;;
T
*
batch_output_ptr
=
output
+
out_batch_num_values
*
b
;
for
(
index_t
y
=
0
;
y
<
out_height
;
++
y
)
{
const
T
*
y_lower_input_ptr
=
batch_input_ptr
+
ys
[
y
].
lower
*
in_width
*
channels
;
const
T
*
y_upper_input_ptr
=
batch_input_ptr
+
ys
[
y
].
upper
*
in_width
*
channels
;
T
*
y_output_ptr
=
batch_output_ptr
+
y
*
out_width
*
channels
;
const
float
ys_lerp
=
ys
[
y
].
lerp
;
for
(
index_t
x
=
0
;
x
<
out_width
;
++
x
)
{
const
float
xs_lerp
=
xs
[
x
].
lerp
;
const
T
*
top_left_ptr
=
y_lower_input_ptr
+
xs
[
x
].
lower
*
channels
;
const
T
*
top_right_ptr
=
y_lower_input_ptr
+
xs
[
x
].
upper
*
channels
;
const
T
*
bottom_left_ptr
=
y_upper_input_ptr
+
xs
[
x
].
lower
*
channels
;
const
T
*
bottom_right_ptr
=
y_upper_input_ptr
+
xs
[
x
].
upper
*
channels
;
T
*
output_ptr
=
y_output_ptr
+
x
*
channels
;
for
(
index_t
c
=
0
;
c
<
channels
;
++
c
)
{
const
T
top_left
=
top_left_ptr
[
c
];
const
T
top_right
=
top_right_ptr
[
c
];
const
T
bottom_left
=
bottom_left_ptr
[
c
];
const
T
bottom_right
=
bottom_right_ptr
[
c
];
output_ptr
[
c
]
=
ComputeLerp
(
top_left
,
top_right
,
bottom_left
,
bottom_right
,
xs_lerp
,
ys_lerp
);
}
output_ptr
+=
out_width
;
}
}
}
}
}
struct
ResizeBilinearFunctorBase
{
ResizeBilinearFunctorBase
(
const
std
::
vector
<
index_t
>
&
size
,
bool
align_corners
)
:
align_corners_
(
align_corners
),
size_
(
size
)
{}
protected:
void
GetOutputSize
(
const
Tensor
*
resize_dims
,
index_t
*
out_height
,
index_t
*
out_width
)
{
if
(
size_
[
0
]
<
0
||
size_
[
1
]
<
0
)
{
MACE_CHECK
(
resize_dims
!=
nullptr
&&
resize_dims
->
dim_size
()
==
1
);
Tensor
::
MappingGuard
resize_dims_mapper
(
resize_dims
);
auto
dims_data
=
resize_dims
->
data
<
int32_t
>
();
*
out_height
=
dims_data
[
0
];
*
out_width
=
dims_data
[
1
];
}
else
{
*
out_height
=
size_
[
0
];
*
out_width
=
size_
[
1
];
}
}
bool
align_corners_
;
std
::
vector
<
index_t
>
size_
;
};
template
<
DeviceType
D
,
typename
T
>
class
ResizeBilinearFunctor
{
public:
struct
ResizeBilinearFunctor
:
ResizeBilinearFunctorBase
{
ResizeBilinearFunctor
(
const
std
::
vector
<
index_t
>
&
size
,
bool
align_corners
)
:
align_corners_
(
align_corners
),
size_
(
size
)
{}
:
ResizeBilinearFunctorBase
(
size
,
align_corners
)
{}
void
operator
()(
const
Tensor
*
input
,
const
Tensor
*
resize_dims
,
Tensor
*
output
)
{
const
index_t
batch
=
input
->
dim
(
0
);
const
index_t
channels
=
input
->
dim
(
1
);
const
index_t
in_
height
=
input
->
dim
(
2
);
const
index_t
in_width
=
input
->
dim
(
3
);
const
index_t
in_height
=
input
->
dim
(
1
);
const
index_t
in_
width
=
input
->
dim
(
2
);
const
index_t
channels
=
input
->
dim
(
3
);
index_t
out_height
;
index_t
out_width
;
GetOutputSize
(
resize_dims
,
&
out_height
,
&
out_width
);
MACE_CHECK
(
out_height
>
0
&&
out_width
>
0
);
std
::
vector
<
index_t
>
out_shape
{
batch
,
channels
,
out_height
,
out_width
};
std
::
vector
<
index_t
>
out_shape
{
batch
,
out_height
,
out_width
,
channels
};
output
->
Resize
(
out_shape
);
Tensor
::
MappingGuard
input_mapper
(
input
);
...
...
@@ -146,32 +173,18 @@ class ResizeBilinearFunctor {
ResizeImage
(
input_data
,
batch
,
in_height
,
in_width
,
out_height
,
out_width
,
channels
,
xs
,
ys
,
output_data
);
}
};
protected:
void
GetOutputSize
(
const
Tensor
*
resize_dims
,
index_t
*
out_height
,
index_t
*
out_width
)
{
if
(
size_
[
0
]
<
0
||
size_
[
1
]
<
0
)
{
MACE_CHECK
(
resize_dims
!=
nullptr
&&
resize_dims
->
dim_size
()
==
1
);
Tensor
::
MappingGuard
resize_dims_mapper
(
resize_dims
);
auto
dims_data
=
resize_dims
->
data
<
int32_t
>
();
*
out_height
=
dims_data
[
0
];
*
out_width
=
dims_data
[
1
];
}
else
{
*
out_height
=
size_
[
0
];
*
out_width
=
size_
[
1
];
}
}
template
<
typename
T
>
struct
ResizeBilinearFunctor
<
DeviceType
::
OPENCL
,
T
>
:
ResizeBilinearFunctorBase
{
ResizeBilinearFunctor
(
const
std
::
vector
<
index_t
>
&
size
,
bool
align_corners
)
:
ResizeBilinearFunctorBase
(
size
,
align_corners
)
{}
private:
bool
align_corners_
;
std
::
vector
<
index_t
>
size_
;
void
operator
()(
const
Tensor
*
input
,
const
Tensor
*
resize_dims
,
Tensor
*
output
)
;
};
template
<
>
void
ResizeBilinearFunctor
<
DeviceType
::
OPENCL
,
float
>::
operator
()(
const
Tensor
*
input
,
const
Tensor
*
resize_dims
,
Tensor
*
output
);
}
// namespace kernels
}
// namespace mace
...
...
mace/ops/buffer_to_image_test.cc
浏览文件 @
99963c98
...
...
@@ -43,7 +43,7 @@ TEST(BufferToImageTest, ArgSmall) {
}
TEST
(
BufferToImageTest
,
ArgHalfSmall
)
{
TestBidirectionTransform
<
DeviceType
::
OPENCL
,
half
>
(
kernels
::
ARGUMENT
,
{
1
});
TestBidirectionTransform
<
DeviceType
::
OPENCL
,
half
>
(
kernels
::
ARGUMENT
,
{
1
1
});
}
TEST
(
BufferToImageTest
,
ArgMedia
)
{
...
...
@@ -97,3 +97,37 @@ TEST(BufferToImageTest, Filter3x3Meida) {
TEST
(
BufferToImageTest
,
Filter3x3Large
)
{
TestBidirectionTransform
<
DeviceType
::
OPENCL
,
float
>
(
kernels
::
FILTER
,
{
3
,
3
,
128
,
256
});
}
template
<
DeviceType
D
,
typename
T
>
void
TestDiffTypeBidirectionTransform
(
const
int
type
,
const
std
::
vector
<
index_t
>
&
input_shape
)
{
OpsTestNet
net
;
OpDefBuilder
(
"BufferToImage"
,
"BufferToImageTest"
)
.
Input
(
"Input"
)
.
Output
(
"B2IOutput"
)
.
AddIntArg
(
"buffer_type"
,
type
)
.
AddIntArg
(
"T"
,
DataTypeToEnum
<
T
>::
value
)
.
Finalize
(
net
.
NewOperatorDef
());
// Add input data
net
.
AddRandomInput
<
D
,
float
>
(
"Input"
,
input_shape
);
// Run
net
.
RunOp
(
D
);
OpDefBuilder
(
"ImageToBuffer"
,
"ImageToBufferTest"
)
.
Input
(
"B2IOutput"
)
.
Output
(
"I2BOutput"
)
.
AddIntArg
(
"buffer_type"
,
type
)
.
AddIntArg
(
"T"
,
DataTypeToEnum
<
T
>::
value
)
.
Finalize
(
net
.
NewOperatorDef
());
// Run
net
.
RunOp
(
D
);
// Check
ExpectTensorNear
<
float
,
T
>
(
*
net
.
GetOutput
(
"Input"
),
*
net
.
GetOutput
(
"I2BOutput"
),
1e-2
);
}
TEST
(
BufferToImageTest
,
ArgFloatToHalfSmall
)
{
TestDiffTypeBidirectionTransform
<
DeviceType
::
OPENCL
,
half
>
(
kernels
::
ARGUMENT
,
{
11
});
}
mace/ops/resize_bilinear.cc
浏览文件 @
99963c98
...
...
@@ -23,4 +23,9 @@ REGISTER_OPENCL_OPERATOR(OpKeyBuilder("ResizeBilinear")
.
Build
(),
ResizeBilinearOp
<
DeviceType
::
OPENCL
,
float
>
);
REGISTER_OPENCL_OPERATOR
(
OpKeyBuilder
(
"ResizeBilinear"
)
.
TypeConstraint
<
half
>
(
"T"
)
.
Build
(),
ResizeBilinearOp
<
DeviceType
::
OPENCL
,
half
>
);
}
// namespace mace
mace/ops/resize_bilinear_benchmark.cc
浏览文件 @
99963c98
...
...
@@ -19,18 +19,30 @@ static void ResizeBilinearBenchmark(int iters,
mace
::
testing
::
StopTiming
();
OpsTestNet
net
;
OpDefBuilder
(
"ResizeBilinear"
,
"ResizeBilinearBenchmark"
)
.
Input
(
"Input"
)
.
Input
(
"OutSize"
)
.
Output
(
"Output"
)
.
AddIntsArg
(
"size"
,
{
output_height
,
output_width
})
.
Finalize
(
net
.
NewOperatorDef
());
// Add input data
net
.
AddRandomInput
<
D
,
float
>
(
"Input"
,
{
batch
,
channels
,
input_height
,
input_width
});
{
batch
,
input_height
,
input_width
,
channels
});
net
.
AddInputFromArray
<
D
,
index_t
>
(
"OutSize"
,
{
2
},
{
output_height
,
output_width
});
if
(
D
==
DeviceType
::
OPENCL
)
{
BufferToImage
<
D
,
T
>
(
net
,
"Input"
,
"InputImage"
,
kernels
::
BufferType
::
IN_OUT
);
OpDefBuilder
(
"ResizeBilinear"
,
"ResizeBilinearBenchmark"
)
.
Input
(
"InputImage"
)
.
Input
(
"OutSize"
)
.
Output
(
"OutputImage"
)
.
AddIntsArg
(
"size"
,
{
output_height
,
output_width
})
.
AddIntArg
(
"T"
,
static_cast
<
int
>
(
DataTypeToEnum
<
T
>::
value
))
.
Finalize
(
net
.
NewOperatorDef
());
}
else
{
OpDefBuilder
(
"ResizeBilinear"
,
"ResizeBilinearBenchmark"
)
.
Input
(
"Input"
)
.
Input
(
"OutSize"
)
.
Output
(
"Output"
)
.
AddIntsArg
(
"size"
,
{
output_height
,
output_width
})
.
AddIntArg
(
"T"
,
static_cast
<
int
>
(
DataTypeToEnum
<
T
>::
value
))
.
Finalize
(
net
.
NewOperatorDef
());
}
// Warm-up
for
(
int
i
=
0
;
i
<
5
;
++
i
)
{
...
...
@@ -58,9 +70,12 @@ static void ResizeBilinearBenchmark(int iters,
#define BM_RESIZE_BILINEAR(N, C, H0, W0, H1, W1, TYPE) \
BM_RESIZE_BILINEAR_MACRO(N, C, H0, W0, H1, W1, TYPE, CPU); \
BM_RESIZE_BILINEAR_MACRO(N, C, H0, W0, H1, W1, TYPE, NEON); \
BM_RESIZE_BILINEAR_MACRO(N, C, H0, W0, H1, W1, TYPE, OPENCL);
// SNPE 835 GPU: 6870us
BM_RESIZE_BILINEAR
(
1
,
128
,
120
,
120
,
480
,
480
,
half
);
BM_RESIZE_BILINEAR
(
1
,
128
,
120
,
120
,
480
,
480
,
float
);
BM_RESIZE_BILINEAR
(
1
,
256
,
7
,
7
,
15
,
15
,
float
);
BM_RESIZE_BILINEAR
(
1
,
256
,
15
,
15
,
30
,
30
,
float
);
BM_RESIZE_BILINEAR
(
1
,
128
,
30
,
30
,
60
,
60
,
float
);
...
...
mace/ops/resize_bilinear_test.cc
浏览文件 @
99963c98
...
...
@@ -23,14 +23,14 @@ TEST_F(ResizeBilinearTest, CPUResizeBilinearWOAlignCorners) {
// Add input data
vector
<
float
>
input
(
24
);
std
::
iota
(
begin
(
input
),
end
(
input
),
0
);
net
.
AddInputFromArray
<
DeviceType
::
CPU
,
float
>
(
"Input"
,
{
1
,
3
,
2
,
4
},
input
);
net
.
AddInputFromArray
<
DeviceType
::
CPU
,
float
>
(
"Input"
,
{
1
,
2
,
4
,
3
},
input
);
net
.
AddInputFromArray
<
DeviceType
::
CPU
,
int
>
(
"OutSize"
,
{
2
},
{
1
,
2
});
// Run
net
.
RunOp
();
// Check
auto
expected
=
CreateTensor
<
float
>
({
1
,
3
,
1
,
2
},
{
0
,
2
,
8
,
10
,
16
,
1
8
});
auto
expected
=
CreateTensor
<
float
>
({
1
,
1
,
2
,
3
},
{
0
,
1
,
2
,
6
,
7
,
8
});
ExpectTensorNear
<
float
>
(
*
expected
,
*
net
.
GetOutput
(
"Output"
),
0.001
);
}
...
...
@@ -49,14 +49,14 @@ TEST_F(ResizeBilinearTest, ResizeBilinearWAlignCorners) {
// Add input data
vector
<
float
>
input
(
24
);
std
::
iota
(
begin
(
input
),
end
(
input
),
0
);
net
.
AddInputFromArray
<
DeviceType
::
CPU
,
float
>
(
"Input"
,
{
1
,
3
,
2
,
4
},
input
);
net
.
AddInputFromArray
<
DeviceType
::
CPU
,
float
>
(
"Input"
,
{
1
,
2
,
4
,
3
},
input
);
net
.
AddInputFromArray
<
DeviceType
::
CPU
,
int
>
(
"OutSize"
,
{
2
},
{
1
,
2
});
// Run
net
.
RunOp
();
// Check
auto
expected
=
CreateTensor
<
float
>
({
1
,
3
,
1
,
2
},
{
0
,
3
,
8
,
11
,
16
,
19
});
auto
expected
=
CreateTensor
<
float
>
({
1
,
1
,
2
,
3
},
{
0
,
1
,
2
,
9
,
10
,
11
});
ExpectTensorNear
<
float
>
(
*
expected
,
*
net
.
GetOutput
(
"Output"
),
0.001
);
}
...
...
@@ -65,6 +65,7 @@ template <DeviceType D>
void
TestRandomResizeBilinear
()
{
srand
(
time
(
nullptr
));
testing
::
internal
::
LogToStderr
();
for
(
int
round
=
0
;
round
<
10
;
++
round
)
{
int
batch
=
1
+
rand
()
%
5
;
int
channels
=
1
+
rand
()
%
100
;
...
...
@@ -72,39 +73,54 @@ void TestRandomResizeBilinear() {
int
width
=
1
+
rand
()
%
100
;
int
in_height
=
1
+
rand
()
%
100
;
int
in_width
=
1
+
rand
()
%
100
;
int
align_corners
=
rand
()
%
1
;
// Construct graph
OpsTestNet
net
;
OpDefBuilder
(
"ResizeBilinear"
,
"ResizeBilinearTest"
)
.
Input
(
"Input"
)
.
Input
(
"OutSize"
)
.
Output
(
"Output"
)
.
AddIntArg
(
"align_corners"
,
1
)
.
AddIntsArg
(
"size"
,
{
height
,
width
})
.
Finalize
(
net
.
NewOperatorDef
());
// Add input data
net
.
AddRandomInput
<
D
,
float
>
(
"Input"
,
{
batch
,
channels
,
in_height
,
in_width
});
{
batch
,
in_height
,
in_width
,
channels
});
net
.
AddInputFromArray
<
D
,
int
>
(
"OutSize"
,
{
2
},
{
height
,
width
});
// Run
net
.
RunOp
(
D
);
Tensor
actual
;
actual
.
Copy
(
*
net
.
GetOutput
(
"Output"
));
OpDefBuilder
(
"ResizeBilinear"
,
"ResizeBilinearTest"
)
.
Input
(
"Input"
)
.
Input
(
"OutSize"
)
.
Output
(
"Output"
)
.
AddIntArg
(
"align_corners"
,
align_corners
)
.
AddIntsArg
(
"size"
,
{
height
,
width
})
.
Finalize
(
net
.
NewOperatorDef
());
// Run on CPU
net
.
RunOp
(
DeviceType
::
CPU
);
Tensor
*
expected
=
net
.
GetOutput
(
"Output"
);
Tensor
expected
;
expected
.
Copy
(
*
net
.
GetOutput
(
"Output"
));
if
(
D
==
DeviceType
::
OPENCL
)
{
BufferToImage
<
D
,
float
>
(
net
,
"Input"
,
"InputImage"
,
kernels
::
BufferType
::
IN_OUT
);
OpDefBuilder
(
"ResizeBilinear"
,
"ResizeBilinearTest"
)
.
Input
(
"InputImage"
)
.
Input
(
"OutSize"
)
.
Output
(
"OutputImage"
)
.
AddIntArg
(
"align_corners"
,
align_corners
)
.
AddIntsArg
(
"size"
,
{
height
,
width
})
.
Finalize
(
net
.
NewOperatorDef
());
// Run
net
.
RunOp
(
D
);
ImageToBuffer
<
D
,
float
>
(
net
,
"OutputImage"
,
"DeviceOutput"
,
kernels
::
BufferType
::
IN_OUT
);
}
else
{
// TODO support NEON
}
// Check
ExpectTensorNear
<
float
>
(
*
expected
,
actual
,
0.001
);
ExpectTensorNear
<
float
>
(
expected
,
*
net
.
GetOutput
(
"DeviceOutput"
)
,
0.001
);
}
}
/*
TEST_F(ResizeBilinearTest, NEONRandomResizeBilinear) {
TestRandomResizeBilinear<DeviceType::NEON>();
}
*/
TEST_F
(
ResizeBilinearTest
,
OPENCLRandomResizeBilinear
)
{
TestRandomResizeBilinear
<
DeviceType
::
OPENCL
>
();
...
...
mace/python/tools/tf_ops_stats.py
浏览文件 @
99963c98
...
...
@@ -92,6 +92,7 @@ def main(unused_args):
size
=
tensor_values
[
input_name
]
break
key
=
'%s(size=%s, align_corners=%s)'
%
(
op
.
type
,
size
,
align_corners
)
print
(
key
)
hist_inc
(
stats
,
key
)
elif
op
.
type
in
[
'AvgPool'
,
'MaxPool'
]:
padding
=
op
.
get_attr
(
'padding'
)
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录