Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
项目经理老王
Mace
提交
a5c9db3a
Mace
项目概览
项目经理老王
/
Mace
与 Fork 源项目一致
Fork自
Xiaomi / Mace
通知
1
Star
0
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
DevOps
流水线
流水线任务
计划
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
Mace
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
DevOps
DevOps
流水线
流水线任务
计划
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
流水线任务
提交
Issue看板
体验新版 GitCode,发现更多精彩内容 >>
提交
a5c9db3a
编写于
9月 17, 2018
作者:
李
李寅
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Implement quantized resize_bilinear, b2s, s2b
上级
df23f428
变更
12
显示空白变更内容
内联
并排
Showing
12 changed file
with
707 addition
and
150 deletion
+707
-150
mace/kernels/batch_to_space.h
mace/kernels/batch_to_space.h
+113
-26
mace/kernels/resize_bilinear.h
mace/kernels/resize_bilinear.h
+162
-33
mace/kernels/space_to_batch.h
mace/kernels/space_to_batch.h
+129
-24
mace/ops/batch_to_space.cc
mace/ops/batch_to_space.cc
+5
-0
mace/ops/concat.cc
mace/ops/concat.cc
+5
-0
mace/ops/ops_test_util.h
mace/ops/ops_test_util.h
+11
-8
mace/ops/resize_bilinear.cc
mace/ops/resize_bilinear.cc
+6
-0
mace/ops/resize_bilinear_benchmark.cc
mace/ops/resize_bilinear_benchmark.cc
+10
-2
mace/ops/resize_bilinear_test.cc
mace/ops/resize_bilinear_test.cc
+77
-0
mace/ops/space_to_batch.cc
mace/ops/space_to_batch.cc
+5
-0
mace/ops/space_to_batch_benchmark.cc
mace/ops/space_to_batch_benchmark.cc
+10
-2
mace/ops/space_to_batch_test.cc
mace/ops/space_to_batch_test.cc
+174
-55
未找到文件。
mace/kernels/batch_to_space.h
浏览文件 @
a5c9db3a
...
@@ -107,8 +107,8 @@ struct BatchToSpaceFunctor<DeviceType::CPU, float> : BatchToSpaceFunctorBase {
...
@@ -107,8 +107,8 @@ struct BatchToSpaceFunctor<DeviceType::CPU, float> : BatchToSpaceFunctorBase {
output_shape
.
data
());
output_shape
.
data
());
MACE_RETURN_IF_ERROR
(
space_tensor
->
Resize
(
output_shape
));
MACE_RETURN_IF_ERROR
(
space_tensor
->
Resize
(
output_shape
));
Tensor
::
MappingGuard
input_guard
(
space
_tensor
);
Tensor
::
MappingGuard
input_guard
(
batch
_tensor
);
Tensor
::
MappingGuard
output_guard
(
batch
_tensor
);
Tensor
::
MappingGuard
output_guard
(
space
_tensor
);
int
pad_top
=
paddings_
[
0
];
int
pad_top
=
paddings_
[
0
];
int
pad_left
=
paddings_
[
2
];
int
pad_left
=
paddings_
[
2
];
...
@@ -184,6 +184,93 @@ struct BatchToSpaceFunctor<DeviceType::CPU, float> : BatchToSpaceFunctorBase {
...
@@ -184,6 +184,93 @@ struct BatchToSpaceFunctor<DeviceType::CPU, float> : BatchToSpaceFunctorBase {
}
}
};
};
template
<
>
struct
BatchToSpaceFunctor
<
CPU
,
uint8_t
>
:
BatchToSpaceFunctorBase
{
BatchToSpaceFunctor
(
OpKernelContext
*
context
,
const
std
::
vector
<
int
>
&
paddings
,
const
std
::
vector
<
int
>
&
block_shape
)
:
BatchToSpaceFunctorBase
(
context
,
paddings
,
block_shape
)
{}
MaceStatus
operator
()(
Tensor
*
space_tensor
,
Tensor
*
batch_tensor
,
StatsFuture
*
future
)
{
MACE_UNUSED
(
future
);
std
::
vector
<
index_t
>
output_shape
(
4
,
0
);
CalculateBatchToSpaceOutputShape
(
batch_tensor
,
DataFormat
::
NHWC
,
output_shape
.
data
());
MACE_RETURN_IF_ERROR
(
space_tensor
->
Resize
(
output_shape
));
Tensor
::
MappingGuard
input_guard
(
batch_tensor
);
Tensor
::
MappingGuard
output_guard
(
space_tensor
);
int
pad_top
=
paddings_
[
0
];
int
pad_left
=
paddings_
[
2
];
int
block_shape_h
=
block_shape_
[
0
];
int
block_shape_w
=
block_shape_
[
1
];
space_tensor
->
SetScale
(
batch_tensor
->
scale
());
space_tensor
->
SetZeroPoint
(
batch_tensor
->
zero_point
());
const
uint8_t
*
input_data
=
batch_tensor
->
data
<
uint8_t
>
();
uint8_t
*
output_data
=
space_tensor
->
mutable_data
<
uint8_t
>
();
index_t
in_batches
=
batch_tensor
->
dim
(
0
);
index_t
in_height
=
batch_tensor
->
dim
(
1
);
index_t
in_width
=
batch_tensor
->
dim
(
2
);
index_t
out_batches
=
space_tensor
->
dim
(
0
);
index_t
out_height
=
space_tensor
->
dim
(
1
);
index_t
out_width
=
space_tensor
->
dim
(
2
);
index_t
channels
=
space_tensor
->
dim
(
3
);
#pragma omp parallel for
for
(
index_t
in_b
=
0
;
in_b
<
in_batches
;
++
in_b
)
{
const
index_t
b
=
in_b
%
out_batches
;
const
index_t
tile_index
=
in_b
/
out_batches
;
const
index_t
tile_h
=
tile_index
/
block_shape_w
;
const
index_t
tile_w
=
tile_index
%
block_shape_w
;
const
index_t
valid_h_start
=
std
::
max
(
static_cast
<
index_t
>
(
0
),
(
pad_top
-
tile_h
+
block_shape_h
-
1
)
/
block_shape_h
);
const
index_t
valid_h_end
=
std
::
min
(
in_height
,
(
out_height
+
pad_top
-
tile_h
+
block_shape_h
-
1
)
/
block_shape_h
);
const
index_t
valid_w_start
=
std
::
max
(
static_cast
<
index_t
>
(
0
),
(
pad_left
-
tile_w
+
block_shape_w
-
1
)
/
block_shape_w
);
const
index_t
valid_w_end
=
std
::
min
(
in_width
,
(
out_width
+
pad_left
-
tile_w
+
block_shape_w
-
1
)
/
block_shape_w
);
const
uint8_t
*
input_base
=
input_data
+
in_b
*
in_height
*
in_width
*
channels
;
uint8_t
*
output_base
=
output_data
+
b
*
out_height
*
out_width
*
channels
;
index_t
h
=
valid_h_start
*
block_shape_h
+
tile_h
-
pad_top
;
for
(
index_t
in_h
=
valid_h_start
;
in_h
<
valid_h_end
;
++
in_h
)
{
index_t
w
=
valid_w_start
*
block_shape_w
+
tile_w
-
pad_left
;
for
(
index_t
in_w
=
valid_w_start
;
in_w
<
valid_w_end
;
++
in_w
)
{
memcpy
(
output_base
+
(
h
*
out_width
+
w
)
*
channels
,
input_base
+
(
in_h
*
in_width
+
in_w
)
*
channels
,
channels
*
sizeof
(
uint8_t
));
w
+=
block_shape_w
;
}
// w
h
+=
block_shape_h
;
}
// h
}
// b
return
MACE_SUCCESS
;
}
};
#ifdef MACE_ENABLE_OPENCL
#ifdef MACE_ENABLE_OPENCL
template
<
typename
T
>
template
<
typename
T
>
struct
BatchToSpaceFunctor
<
DeviceType
::
GPU
,
T
>
:
BatchToSpaceFunctorBase
{
struct
BatchToSpaceFunctor
<
DeviceType
::
GPU
,
T
>
:
BatchToSpaceFunctorBase
{
...
...
mace/kernels/resize_bilinear.h
浏览文件 @
a5c9db3a
...
@@ -61,7 +61,16 @@ inline void ComputeInterpolationWeights(
...
@@ -61,7 +61,16 @@ inline void ComputeInterpolationWeights(
}
}
}
}
inline
float
ComputeLerp
(
const
float
top_left
,
template
<
typename
T
>
inline
T
ComputeLerp
(
const
T
top_left
,
const
T
top_right
,
const
T
bottom_left
,
const
T
bottom_right
,
const
float
x_lerp
,
const
float
y_lerp
);
template
<
>
inline
float
ComputeLerp
<
float
>
(
const
float
top_left
,
const
float
top_right
,
const
float
top_right
,
const
float
bottom_left
,
const
float
bottom_left
,
const
float
bottom_right
,
const
float
bottom_right
,
...
@@ -72,7 +81,20 @@ inline float ComputeLerp(const float top_left,
...
@@ -72,7 +81,20 @@ inline float ComputeLerp(const float top_left,
return
top
+
(
bottom
-
top
)
*
y_lerp
;
return
top
+
(
bottom
-
top
)
*
y_lerp
;
}
}
inline
void
ResizeImage
(
const
float
*
images
,
template
<
>
inline
uint8_t
ComputeLerp
<
uint8_t
>
(
const
uint8_t
top_left
,
const
uint8_t
top_right
,
const
uint8_t
bottom_left
,
const
uint8_t
bottom_right
,
const
float
x_lerp
,
const
float
y_lerp
)
{
const
float
top
=
top_left
+
(
top_right
-
top_left
)
*
x_lerp
;
const
float
bottom
=
bottom_left
+
(
bottom_right
-
bottom_left
)
*
x_lerp
;
return
static_cast
<
uint8_t
>
(
roundf
(
top
+
(
bottom
-
top
)
*
y_lerp
));
}
template
<
typename
T
>
inline
void
ResizeImageNCHW
(
const
T
*
images
,
const
index_t
batch_size
,
const
index_t
batch_size
,
const
index_t
in_height
,
const
index_t
in_height
,
const
index_t
in_width
,
const
index_t
in_width
,
...
@@ -81,30 +103,30 @@ inline void ResizeImage(const float *images,
...
@@ -81,30 +103,30 @@ inline void ResizeImage(const float *images,
const
index_t
channels
,
const
index_t
channels
,
const
std
::
vector
<
CachedInterpolation
>
&
xs_vec
,
const
std
::
vector
<
CachedInterpolation
>
&
xs_vec
,
const
std
::
vector
<
CachedInterpolation
>
&
ys
,
const
std
::
vector
<
CachedInterpolation
>
&
ys
,
float
*
output
)
{
T
*
output
)
{
const
CachedInterpolation
*
xs
=
xs_vec
.
data
();
const
CachedInterpolation
*
xs
=
xs_vec
.
data
();
#pragma omp parallel for collapse(2)
#pragma omp parallel for collapse(2)
for
(
index_t
b
=
0
;
b
<
batch_size
;
++
b
)
{
for
(
index_t
b
=
0
;
b
<
batch_size
;
++
b
)
{
for
(
index_t
c
=
0
;
c
<
channels
;
++
c
)
{
for
(
index_t
c
=
0
;
c
<
channels
;
++
c
)
{
const
float
const
T
*
channel_input_ptr
=
*
channel_input_ptr
=
images
+
(
b
*
channels
+
c
)
*
in_height
*
in_width
;
images
+
(
b
*
channels
+
c
)
*
in_height
*
in_width
;
float
*
channel_output_ptr
=
T
*
channel_output_ptr
=
output
+
(
b
*
channels
+
c
)
*
out_height
*
out_width
;
output
+
(
b
*
channels
+
c
)
*
out_height
*
out_width
;
for
(
index_t
y
=
0
;
y
<
out_height
;
++
y
)
{
for
(
index_t
y
=
0
;
y
<
out_height
;
++
y
)
{
const
float
*
y_lower_input_ptr
=
const
T
*
y_lower_input_ptr
=
channel_input_ptr
+
ys
[
y
].
lower
*
in_width
;
channel_input_ptr
+
ys
[
y
].
lower
*
in_width
;
const
float
*
y_upper_input_ptr
=
const
T
*
y_upper_input_ptr
=
channel_input_ptr
+
ys
[
y
].
upper
*
in_width
;
channel_input_ptr
+
ys
[
y
].
upper
*
in_width
;
const
float
ys_lerp
=
ys
[
y
].
lerp
;
const
float
ys_lerp
=
ys
[
y
].
lerp
;
for
(
index_t
x
=
0
;
x
<
out_width
;
++
x
)
{
for
(
index_t
x
=
0
;
x
<
out_width
;
++
x
)
{
const
float
xs_lerp
=
xs
[
x
].
lerp
;
const
float
xs_lerp
=
xs
[
x
].
lerp
;
const
float
top_left
=
y_lower_input_ptr
[
xs
[
x
].
lower
];
const
T
top_left
=
y_lower_input_ptr
[
xs
[
x
].
lower
];
const
float
top_right
=
y_lower_input_ptr
[
xs
[
x
].
upper
];
const
T
top_right
=
y_lower_input_ptr
[
xs
[
x
].
upper
];
const
float
bottom_left
=
y_upper_input_ptr
[
xs
[
x
].
lower
];
const
T
bottom_left
=
y_upper_input_ptr
[
xs
[
x
].
lower
];
const
float
bottom_right
=
y_upper_input_ptr
[
xs
[
x
].
upper
];
const
T
bottom_right
=
y_upper_input_ptr
[
xs
[
x
].
upper
];
channel_output_ptr
[
y
*
out_width
+
x
]
=
channel_output_ptr
[
y
*
out_width
+
x
]
=
ComputeLerp
(
top_left
,
top_right
,
bottom_left
,
ComputeLerp
(
top_left
,
top_right
,
bottom_left
,
bottom_right
,
xs_lerp
,
ys_lerp
);
bottom_right
,
xs_lerp
,
ys_lerp
);
...
@@ -114,6 +136,48 @@ inline void ResizeImage(const float *images,
...
@@ -114,6 +136,48 @@ inline void ResizeImage(const float *images,
}
}
}
}
template
<
typename
T
>
inline
void
ResizeImageNHWC
(
const
T
*
images
,
const
index_t
batch_size
,
const
index_t
in_height
,
const
index_t
in_width
,
const
index_t
out_height
,
const
index_t
out_width
,
const
index_t
channels
,
const
std
::
vector
<
CachedInterpolation
>
&
xs_vec
,
const
std
::
vector
<
CachedInterpolation
>
&
ys
,
T
*
output
)
{
const
CachedInterpolation
*
xs
=
xs_vec
.
data
();
for
(
index_t
b
=
0
;
b
<
batch_size
;
++
b
)
{
const
T
*
input_base
=
images
+
b
*
channels
*
in_height
*
in_width
;
T
*
output_base
=
output
+
b
*
channels
*
out_height
*
out_width
;
#pragma omp parallel for
for
(
index_t
y
=
0
;
y
<
out_height
;
++
y
)
{
const
T
*
y_lower_input_ptr
=
input_base
+
ys
[
y
].
lower
*
in_width
*
channels
;
const
T
*
y_upper_input_ptr
=
input_base
+
ys
[
y
].
upper
*
in_width
*
channels
;
const
float
ys_lerp
=
ys
[
y
].
lerp
;
for
(
index_t
x
=
0
;
x
<
out_width
;
++
x
)
{
const
float
xs_lerp
=
xs
[
x
].
lerp
;
const
T
*
top_left
=
y_lower_input_ptr
+
xs
[
x
].
lower
*
channels
;
const
T
*
top_right
=
y_lower_input_ptr
+
xs
[
x
].
upper
*
channels
;
const
T
*
bottom_left
=
y_upper_input_ptr
+
xs
[
x
].
lower
*
channels
;
const
T
*
bottom_right
=
y_upper_input_ptr
+
xs
[
x
].
upper
*
channels
;
T
*
output_ptr
=
output_base
+
(
y
*
out_width
+
x
)
*
channels
;
for
(
index_t
c
=
0
;
c
<
channels
;
++
c
)
{
output_ptr
[
c
]
=
ComputeLerp
(
top_left
[
c
],
top_right
[
c
],
bottom_left
[
c
],
bottom_right
[
c
],
xs_lerp
,
ys_lerp
);
}
}
}
}
}
struct
ResizeBilinearFunctorBase
:
OpKernel
{
struct
ResizeBilinearFunctorBase
:
OpKernel
{
ResizeBilinearFunctorBase
(
OpKernelContext
*
context
,
ResizeBilinearFunctorBase
(
OpKernelContext
*
context
,
const
std
::
vector
<
index_t
>
&
size
,
const
std
::
vector
<
index_t
>
&
size
,
...
@@ -132,11 +196,7 @@ struct ResizeBilinearFunctorBase : OpKernel {
...
@@ -132,11 +196,7 @@ struct ResizeBilinearFunctorBase : OpKernel {
};
};
template
<
DeviceType
D
,
typename
T
>
template
<
DeviceType
D
,
typename
T
>
struct
ResizeBilinearFunctor
;
struct
ResizeBilinearFunctor
:
ResizeBilinearFunctorBase
{
template
<
>
struct
ResizeBilinearFunctor
<
DeviceType
::
CPU
,
float
>
:
ResizeBilinearFunctorBase
{
ResizeBilinearFunctor
(
OpKernelContext
*
context
,
ResizeBilinearFunctor
(
OpKernelContext
*
context
,
const
std
::
vector
<
index_t
>
&
size
,
const
std
::
vector
<
index_t
>
&
size
,
bool
align_corners
)
bool
align_corners
)
...
@@ -159,8 +219,8 @@ struct ResizeBilinearFunctor<DeviceType::CPU, float>
...
@@ -159,8 +219,8 @@ struct ResizeBilinearFunctor<DeviceType::CPU, float>
Tensor
::
MappingGuard
input_mapper
(
input
);
Tensor
::
MappingGuard
input_mapper
(
input
);
Tensor
::
MappingGuard
output_mapper
(
output
);
Tensor
::
MappingGuard
output_mapper
(
output
);
const
float
*
input_data
=
input
->
data
<
float
>
();
const
T
*
input_data
=
input
->
data
<
T
>
();
float
*
output_data
=
output
->
mutable_data
<
float
>
();
T
*
output_data
=
output
->
mutable_data
<
T
>
();
if
(
out_height
==
in_height
&&
out_width
==
in_width
)
{
if
(
out_height
==
in_height
&&
out_width
==
in_width
)
{
std
::
copy
(
input_data
,
std
::
copy
(
input_data
,
...
@@ -181,8 +241,77 @@ struct ResizeBilinearFunctor<DeviceType::CPU, float>
...
@@ -181,8 +241,77 @@ struct ResizeBilinearFunctor<DeviceType::CPU, float>
ComputeInterpolationWeights
(
out_height
,
in_height
,
height_scale
,
ys
.
data
());
ComputeInterpolationWeights
(
out_height
,
in_height
,
height_scale
,
ys
.
data
());
ComputeInterpolationWeights
(
out_width
,
in_width
,
width_scale
,
xs
.
data
());
ComputeInterpolationWeights
(
out_width
,
in_width
,
width_scale
,
xs
.
data
());
ResizeImage
(
input_data
,
batch
,
in_height
,
in_width
,
out_height
,
out_width
,
ResizeImageNCHW
(
input_data
,
channels
,
xs
,
ys
,
output_data
);
batch
,
in_height
,
in_width
,
out_height
,
out_width
,
channels
,
xs
,
ys
,
output_data
);
return
MACE_SUCCESS
;
}
};
template
<
DeviceType
D
>
struct
ResizeBilinearFunctor
<
D
,
uint8_t
>
:
ResizeBilinearFunctorBase
{
ResizeBilinearFunctor
(
OpKernelContext
*
context
,
const
std
::
vector
<
index_t
>
&
size
,
bool
align_corners
)
:
ResizeBilinearFunctorBase
(
context
,
size
,
align_corners
)
{}
MaceStatus
operator
()(
const
Tensor
*
input
,
Tensor
*
output
,
StatsFuture
*
future
)
{
MACE_UNUSED
(
future
);
const
index_t
batch
=
input
->
dim
(
0
);
const
index_t
in_height
=
input
->
dim
(
1
);
const
index_t
in_width
=
input
->
dim
(
2
);
const
index_t
channels
=
input
->
dim
(
3
);
index_t
out_height
=
out_height_
;
index_t
out_width
=
out_width_
;
MACE_CHECK
(
out_height
>
0
&&
out_width
>
0
);
std
::
vector
<
index_t
>
out_shape
{
batch
,
out_height
,
out_width
,
channels
};
MACE_RETURN_IF_ERROR
(
output
->
Resize
(
out_shape
));
Tensor
::
MappingGuard
input_mapper
(
input
);
Tensor
::
MappingGuard
output_mapper
(
output
);
const
uint8_t
*
input_data
=
input
->
data
<
uint8_t
>
();
uint8_t
*
output_data
=
output
->
mutable_data
<
uint8_t
>
();
if
(
out_height
==
in_height
&&
out_width
==
in_width
)
{
std
::
copy
(
input_data
,
input_data
+
batch
*
in_height
*
in_width
*
channels
,
output_data
);
return
MACE_SUCCESS
;
}
float
height_scale
=
CalculateResizeScale
(
in_height
,
out_height
,
align_corners_
);
float
width_scale
=
CalculateResizeScale
(
in_width
,
out_width
,
align_corners_
);
std
::
vector
<
CachedInterpolation
>
ys
(
out_height
+
1
);
std
::
vector
<
CachedInterpolation
>
xs
(
out_width
+
1
);
// Compute the cached interpolation weights on the x and y dimensions.
ComputeInterpolationWeights
(
out_height
,
in_height
,
height_scale
,
ys
.
data
());
ComputeInterpolationWeights
(
out_width
,
in_width
,
width_scale
,
xs
.
data
());
ResizeImageNHWC
(
input_data
,
batch
,
in_height
,
in_width
,
out_height
,
out_width
,
channels
,
xs
,
ys
,
output_data
);
return
MACE_SUCCESS
;
return
MACE_SUCCESS
;
}
}
...
...
mace/kernels/space_to_batch.h
浏览文件 @
a5c9db3a
...
@@ -205,6 +205,111 @@ struct SpaceToBatchFunctor<DeviceType::CPU, float> : SpaceToBatchFunctorBase {
...
@@ -205,6 +205,111 @@ struct SpaceToBatchFunctor<DeviceType::CPU, float> : SpaceToBatchFunctorBase {
}
}
};
};
template
<
>
struct
SpaceToBatchFunctor
<
DeviceType
::
CPU
,
uint8_t
>
:
SpaceToBatchFunctorBase
{
SpaceToBatchFunctor
(
OpKernelContext
*
context
,
const
std
::
vector
<
int
>
&
paddings
,
const
std
::
vector
<
int
>
&
block_shape
)
:
SpaceToBatchFunctorBase
(
context
,
paddings
,
block_shape
)
{}
MaceStatus
operator
()(
Tensor
*
space_tensor
,
Tensor
*
batch_tensor
,
StatsFuture
*
future
)
{
MACE_UNUSED
(
future
);
std
::
vector
<
index_t
>
output_shape
(
4
,
0
);
CalculateSpaceToBatchOutputShape
(
space_tensor
,
DataFormat
::
NHWC
,
output_shape
.
data
());
MACE_RETURN_IF_ERROR
(
batch_tensor
->
Resize
(
output_shape
));
int
zero_point
=
space_tensor
->
zero_point
();
Tensor
::
MappingGuard
input_guard
(
space_tensor
);
Tensor
::
MappingGuard
output_guard
(
batch_tensor
);
int
pad_top
=
paddings_
[
0
];
int
pad_left
=
paddings_
[
2
];
int
block_shape_h
=
block_shape_
[
0
];
int
block_shape_w
=
block_shape_
[
1
];
batch_tensor
->
SetScale
(
space_tensor
->
scale
());
batch_tensor
->
SetZeroPoint
(
space_tensor
->
zero_point
());
const
uint8_t
*
input_data
=
space_tensor
->
data
<
uint8_t
>
();
uint8_t
*
output_data
=
batch_tensor
->
mutable_data
<
uint8_t
>
();
index_t
in_batches
=
space_tensor
->
dim
(
0
);
index_t
in_height
=
space_tensor
->
dim
(
1
);
index_t
in_width
=
space_tensor
->
dim
(
2
);
index_t
out_batches
=
batch_tensor
->
dim
(
0
);
index_t
out_height
=
batch_tensor
->
dim
(
1
);
index_t
out_width
=
batch_tensor
->
dim
(
2
);
index_t
channels
=
batch_tensor
->
dim
(
3
);
#pragma omp parallel for
for
(
index_t
b
=
0
;
b
<
out_batches
;
++
b
)
{
const
index_t
in_b
=
b
%
in_batches
;
const
index_t
tile_index
=
b
/
in_batches
;
const
index_t
tile_h
=
tile_index
/
block_shape_w
;
const
index_t
tile_w
=
tile_index
%
block_shape_w
;
const
index_t
valid_h_start
=
std
::
max
(
static_cast
<
index_t
>
(
0
),
(
pad_top
-
tile_h
+
block_shape_h
-
1
)
/
block_shape_h
);
const
index_t
valid_h_end
=
std
::
min
(
out_height
,
(
in_height
+
pad_top
-
tile_h
+
block_shape_h
-
1
)
/
block_shape_h
);
const
index_t
valid_w_start
=
std
::
max
(
static_cast
<
index_t
>
(
0
),
(
pad_left
-
tile_w
+
block_shape_w
-
1
)
/
block_shape_w
);
const
index_t
valid_w_end
=
std
::
min
(
out_width
,
(
in_width
+
pad_left
-
tile_w
+
block_shape_w
-
1
)
/
block_shape_w
);
const
uint8_t
*
input_base
=
input_data
+
in_b
*
channels
*
in_height
*
in_width
;
uint8_t
*
output_base
=
output_data
+
b
*
channels
*
out_height
*
out_width
;
memset
(
output_base
,
zero_point
,
valid_h_start
*
out_width
*
channels
*
sizeof
(
uint8_t
));
index_t
in_h
=
valid_h_start
*
block_shape_h
+
tile_h
-
pad_top
;
for
(
index_t
h
=
valid_h_start
;
h
<
valid_h_end
;
++
h
)
{
memset
(
output_base
+
h
*
out_width
*
channels
,
zero_point
,
valid_w_start
*
channels
*
sizeof
(
uint8_t
));
index_t
in_w
=
valid_w_start
*
block_shape_w
+
tile_w
-
pad_left
;
for
(
index_t
w
=
valid_w_start
;
w
<
valid_w_end
;
++
w
)
{
memcpy
(
output_base
+
(
h
*
out_width
+
w
)
*
channels
,
input_base
+
(
in_h
*
in_width
+
in_w
)
*
channels
,
sizeof
(
uint8_t
)
*
channels
);
in_w
+=
block_shape_w
;
}
// w
in_h
+=
block_shape_h
;
memset
(
output_base
+
(
h
*
out_width
+
valid_w_end
)
*
channels
,
zero_point
,
(
out_width
-
valid_w_end
)
*
channels
*
sizeof
(
uint8_t
));
}
// h
memset
(
output_base
+
valid_h_end
*
out_width
*
channels
,
zero_point
,
(
out_height
-
valid_h_end
)
*
out_width
*
channels
*
sizeof
(
uint8_t
));
}
// b
return
MACE_SUCCESS
;
}
};
#ifdef MACE_ENABLE_OPENCL
#ifdef MACE_ENABLE_OPENCL
template
<
typename
T
>
template
<
typename
T
>
struct
SpaceToBatchFunctor
<
DeviceType
::
GPU
,
T
>
:
SpaceToBatchFunctorBase
{
struct
SpaceToBatchFunctor
<
DeviceType
::
GPU
,
T
>
:
SpaceToBatchFunctorBase
{
...
...
mace/ops/batch_to_space.cc
浏览文件 @
a5c9db3a
...
@@ -23,6 +23,11 @@ void Register_BatchToSpaceND(OperatorRegistryBase *op_registry) {
...
@@ -23,6 +23,11 @@ void Register_BatchToSpaceND(OperatorRegistryBase *op_registry) {
.
TypeConstraint
<
float
>
(
"T"
)
.
TypeConstraint
<
float
>
(
"T"
)
.
Build
(),
.
Build
(),
BatchToSpaceNDOp
<
DeviceType
::
CPU
,
float
>
);
BatchToSpaceNDOp
<
DeviceType
::
CPU
,
float
>
);
MACE_REGISTER_OPERATOR
(
op_registry
,
OpKeyBuilder
(
"BatchToSpaceND"
)
.
Device
(
DeviceType
::
CPU
)
.
TypeConstraint
<
uint8_t
>
(
"T"
)
.
Build
(),
BatchToSpaceNDOp
<
DeviceType
::
CPU
,
uint8_t
>
);
#ifdef MACE_ENABLE_OPENCL
#ifdef MACE_ENABLE_OPENCL
MACE_REGISTER_OPERATOR
(
op_registry
,
OpKeyBuilder
(
"BatchToSpaceND"
)
MACE_REGISTER_OPERATOR
(
op_registry
,
OpKeyBuilder
(
"BatchToSpaceND"
)
.
Device
(
DeviceType
::
GPU
)
.
Device
(
DeviceType
::
GPU
)
...
...
mace/ops/concat.cc
浏览文件 @
a5c9db3a
...
@@ -28,6 +28,11 @@ void Register_Concat(OperatorRegistryBase *op_registry) {
...
@@ -28,6 +28,11 @@ void Register_Concat(OperatorRegistryBase *op_registry) {
.
TypeConstraint
<
int32_t
>
(
"T"
)
.
TypeConstraint
<
int32_t
>
(
"T"
)
.
Build
(),
.
Build
(),
ConcatOp
<
DeviceType
::
CPU
,
int32_t
>
);
ConcatOp
<
DeviceType
::
CPU
,
int32_t
>
);
MACE_REGISTER_OPERATOR
(
op_registry
,
OpKeyBuilder
(
"Concat"
)
.
Device
(
DeviceType
::
CPU
)
.
TypeConstraint
<
uint8_t
>
(
"T"
)
.
Build
(),
ConcatOp
<
DeviceType
::
CPU
,
uint8_t
>
);
#ifdef MACE_ENABLE_OPENCL
#ifdef MACE_ENABLE_OPENCL
MACE_REGISTER_OPERATOR
(
op_registry
,
OpKeyBuilder
(
"Concat"
)
MACE_REGISTER_OPERATOR
(
op_registry
,
OpKeyBuilder
(
"Concat"
)
.
Device
(
DeviceType
::
GPU
)
.
Device
(
DeviceType
::
GPU
)
...
...
mace/ops/ops_test_util.h
浏览文件 @
a5c9db3a
...
@@ -162,11 +162,13 @@ class OpsTestNet {
...
@@ -162,11 +162,13 @@ class OpsTestNet {
std
::
fill
(
input_data
,
input_data
+
input
->
size
(),
data
);
std
::
fill
(
input_data
,
input_data
+
input
->
size
(),
data
);
}
}
template
<
DeviceType
D
,
typename
T
>
template
<
DeviceType
D
,
typename
T
>
void
AddRandomInput
(
const
std
::
string
&
name
,
void
AddRandomInput
(
const
std
::
string
&
name
,
const
std
::
vector
<
index_t
>
&
shape
,
const
std
::
vector
<
index_t
>
&
shape
,
bool
positive
=
true
,
bool
positive
=
true
,
bool
truncate
=
false
)
{
bool
truncate
=
false
,
const
float
truncate_min
=
0.001
f
,
const
float
truncate_max
=
100.
f
)
{
Tensor
*
input
=
Tensor
*
input
=
ws_
.
CreateTensor
(
name
,
OpTestContext
::
Get
()
->
GetDevice
(
D
)
->
allocator
(),
ws_
.
CreateTensor
(
name
,
OpTestContext
::
Get
()
->
GetDevice
(
D
)
->
allocator
(),
DataTypeToEnum
<
T
>::
v
());
DataTypeToEnum
<
T
>::
v
());
...
@@ -180,21 +182,22 @@ class OpsTestNet {
...
@@ -180,21 +182,22 @@ class OpsTestNet {
if
(
DataTypeToEnum
<
T
>::
value
==
DT_HALF
)
{
if
(
DataTypeToEnum
<
T
>::
value
==
DT_HALF
)
{
std
::
generate
(
std
::
generate
(
input_data
,
input_data
+
input
->
size
(),
input_data
,
input_data
+
input
->
size
(),
[
&
gen
,
&
nd
,
positive
,
truncate
]
{
[
&
gen
,
&
nd
,
positive
,
truncate
,
truncate_min
,
truncate_max
]
{
float
d
=
nd
(
gen
);
float
d
=
nd
(
gen
);
if
(
truncate
)
{
if
(
truncate
)
{
if
(
std
::
abs
(
d
)
>
100.
f
)
d
=
100.
f
;
if
(
std
::
abs
(
d
)
>
truncate_max
)
d
=
truncate_max
;
if
(
std
::
abs
(
d
)
<
0.001
f
)
d
=
0.001
f
;
if
(
std
::
abs
(
d
)
<
truncate_min
)
d
=
truncate_min
;
}
}
return
half_float
::
half_cast
<
half
>
(
positive
?
std
::
abs
(
d
)
:
d
);
return
half_float
::
half_cast
<
half
>
(
positive
?
std
::
abs
(
d
)
:
d
);
});
});
}
else
{
}
else
{
std
::
generate
(
input_data
,
input_data
+
input
->
size
(),
std
::
generate
(
input_data
,
input_data
+
input
->
size
(),
[
&
gen
,
&
nd
,
positive
,
truncate
]
{
[
&
gen
,
&
nd
,
positive
,
truncate
,
truncate_min
,
truncate_max
]
{
float
d
=
nd
(
gen
);
float
d
=
nd
(
gen
);
if
(
truncate
)
{
if
(
truncate
)
{
if
(
std
::
abs
(
d
)
>
100.
f
)
d
=
100.
f
;
if
(
std
::
abs
(
d
)
>
truncate_max
)
d
=
truncate_max
;
if
(
std
::
abs
(
d
)
<
0.001
f
)
d
=
0.001
f
;
if
(
std
::
abs
(
d
)
<
truncate_min
)
d
=
truncate_min
;
}
}
return
(
positive
?
std
::
abs
(
d
)
:
d
);
return
(
positive
?
std
::
abs
(
d
)
:
d
);
});
});
...
...
mace/ops/resize_bilinear.cc
浏览文件 @
a5c9db3a
...
@@ -24,6 +24,12 @@ void Register_ResizeBilinear(OperatorRegistryBase *op_registry) {
...
@@ -24,6 +24,12 @@ void Register_ResizeBilinear(OperatorRegistryBase *op_registry) {
.
Build
(),
.
Build
(),
ResizeBilinearOp
<
DeviceType
::
CPU
,
float
>
);
ResizeBilinearOp
<
DeviceType
::
CPU
,
float
>
);
MACE_REGISTER_OPERATOR
(
op_registry
,
OpKeyBuilder
(
"ResizeBilinear"
)
.
Device
(
DeviceType
::
CPU
)
.
TypeConstraint
<
uint8_t
>
(
"T"
)
.
Build
(),
ResizeBilinearOp
<
DeviceType
::
CPU
,
uint8_t
>
);
#ifdef MACE_ENABLE_OPENCL
#ifdef MACE_ENABLE_OPENCL
MACE_REGISTER_OPERATOR
(
op_registry
,
OpKeyBuilder
(
"ResizeBilinear"
)
MACE_REGISTER_OPERATOR
(
op_registry
,
OpKeyBuilder
(
"ResizeBilinear"
)
.
Device
(
DeviceType
::
GPU
)
.
Device
(
DeviceType
::
GPU
)
...
...
mace/ops/resize_bilinear_benchmark.cc
浏览文件 @
a5c9db3a
...
@@ -36,8 +36,15 @@ void ResizeBilinearBenchmark(int iters,
...
@@ -36,8 +36,15 @@ void ResizeBilinearBenchmark(int iters,
// Add input data
// Add input data
if
(
D
==
DeviceType
::
CPU
)
{
if
(
D
==
DeviceType
::
CPU
)
{
if
(
DataTypeToEnum
<
T
>::
value
!=
DT_UINT8
)
{
net
.
AddRandomInput
<
D
,
float
>
(
"Input"
,
net
.
AddRandomInput
<
D
,
float
>
(
"Input"
,
{
batch
,
channels
,
input_height
,
input_width
});
{
batch
,
channels
,
input_height
,
input_width
});
}
else
{
net
.
AddRandomInput
<
D
,
uint8_t
>
(
"Input"
,
{
batch
,
input_height
,
input_width
,
channels
});
}
}
else
if
(
D
==
DeviceType
::
GPU
)
{
}
else
if
(
D
==
DeviceType
::
GPU
)
{
net
.
AddRandomInput
<
D
,
float
>
(
"Input"
,
net
.
AddRandomInput
<
D
,
float
>
(
"Input"
,
{
batch
,
input_height
,
input_width
,
channels
});
{
batch
,
input_height
,
input_width
,
channels
});
...
@@ -99,6 +106,7 @@ void ResizeBilinearBenchmark(int iters,
...
@@ -99,6 +106,7 @@ void ResizeBilinearBenchmark(int iters,
#define MACE_BM_RESIZE_BILINEAR(N, C, H0, W0, H1, W1) \
#define MACE_BM_RESIZE_BILINEAR(N, C, H0, W0, H1, W1) \
MACE_BM_RESIZE_BILINEAR_MACRO(N, C, H0, W0, H1, W1, float, CPU); \
MACE_BM_RESIZE_BILINEAR_MACRO(N, C, H0, W0, H1, W1, float, CPU); \
MACE_BM_RESIZE_BILINEAR_MACRO(N, C, H0, W0, H1, W1, uint8_t, CPU); \
MACE_BM_RESIZE_BILINEAR_MACRO(N, C, H0, W0, H1, W1, float, GPU); \
MACE_BM_RESIZE_BILINEAR_MACRO(N, C, H0, W0, H1, W1, float, GPU); \
MACE_BM_RESIZE_BILINEAR_MACRO(N, C, H0, W0, H1, W1, half, GPU);
MACE_BM_RESIZE_BILINEAR_MACRO(N, C, H0, W0, H1, W1, half, GPU);
...
...
mace/ops/resize_bilinear_test.cc
浏览文件 @
a5c9db3a
...
@@ -140,12 +140,89 @@ void TestRandomResizeBilinear() {
...
@@ -140,12 +140,89 @@ void TestRandomResizeBilinear() {
1e-6
);
1e-6
);
}
}
}
}
void
TestQuantizedResizeBilinear
()
{
testing
::
internal
::
LogToStderr
();
static
unsigned
int
seed
=
time
(
NULL
);
for
(
int
round
=
0
;
round
<
10
;
++
round
)
{
int
batch
=
1
+
rand_r
(
&
seed
)
%
5
;
int
channels
=
1
+
rand_r
(
&
seed
)
%
100
;
int
height
=
1
+
rand_r
(
&
seed
)
%
100
;
int
width
=
1
+
rand_r
(
&
seed
)
%
100
;
int
in_height
=
1
+
rand_r
(
&
seed
)
%
100
;
int
in_width
=
1
+
rand_r
(
&
seed
)
%
100
;
int
align_corners
=
rand_r
(
&
seed
)
%
1
;
// Construct graph
OpsTestNet
net
;
// Add input data
net
.
AddRandomInput
<
CPU
,
float
>
(
"Input"
,
{
batch
,
in_height
,
in_width
,
channels
},
false
,
true
,
-
1.
f
,
1.
f
);
net
.
TransformDataFormat
<
DeviceType
::
CPU
,
float
>
(
"Input"
,
NHWC
,
"InputNCHW"
,
NCHW
);
OpDefBuilder
(
"ResizeBilinear"
,
"ResizeBilinearTest"
)
.
Input
(
"InputNCHW"
)
.
Output
(
"OutputNCHW"
)
.
AddIntArg
(
"align_corners"
,
align_corners
)
.
AddIntsArg
(
"size"
,
{
height
,
width
})
.
Finalize
(
net
.
NewOperatorDef
());
// Run on CPU
net
.
RunOp
(
DeviceType
::
CPU
);
net
.
TransformDataFormat
<
DeviceType
::
CPU
,
float
>
(
"OutputNCHW"
,
NCHW
,
"Output"
,
NHWC
);
// run quantize
OpDefBuilder
(
"Quantize"
,
"QuantizeInput"
)
.
Input
(
"Input"
)
.
Output
(
"QuantizedInput"
)
.
OutputType
({
DT_UINT8
})
.
AddIntArg
(
"T"
,
DT_UINT8
)
.
Finalize
(
net
.
NewOperatorDef
());
net
.
RunOp
();
OpDefBuilder
(
"ResizeBilinear"
,
"ResizeBilinearTest"
)
.
Input
(
"QuantizedInput"
)
.
Output
(
"QuantizedOutput"
)
.
AddIntArg
(
"align_corners"
,
align_corners
)
.
AddIntsArg
(
"size"
,
{
height
,
width
})
.
OutputType
({
DT_UINT8
})
.
AddIntArg
(
"T"
,
DT_UINT8
)
.
Finalize
(
net
.
NewOperatorDef
());
net
.
RunOp
();
Tensor
*
eq_output
=
net
.
GetTensor
(
"QuantizedInput"
);
Tensor
*
q_output
=
net
.
GetTensor
(
"QuantizedOutput"
);
q_output
->
SetScale
(
eq_output
->
scale
());
q_output
->
SetZeroPoint
(
eq_output
->
zero_point
());
OpDefBuilder
(
"Dequantize"
,
"DeQuantizeTest"
)
.
Input
(
"QuantizedOutput"
)
.
Output
(
"DequantizedOutput"
)
.
OutputType
({
DT_FLOAT
})
.
AddIntArg
(
"T"
,
DT_UINT8
)
.
Finalize
(
net
.
NewOperatorDef
());
net
.
RunOp
();
// Check
ExpectTensorSimilar
<
float
>
(
*
net
.
GetOutput
(
"Output"
),
*
net
.
GetTensor
(
"DequantizedOutput"
),
0.01
);
}
}
}
// namespace
}
// namespace
TEST_F
(
ResizeBilinearTest
,
OPENCLRandomResizeBilinear
)
{
TEST_F
(
ResizeBilinearTest
,
OPENCLRandomResizeBilinear
)
{
TestRandomResizeBilinear
<
DeviceType
::
GPU
>
();
TestRandomResizeBilinear
<
DeviceType
::
GPU
>
();
}
}
TEST_F
(
ResizeBilinearTest
,
QuantizedResizeBilinear
)
{
TestQuantizedResizeBilinear
();
}
}
// namespace test
}
// namespace test
}
// namespace ops
}
// namespace ops
}
// namespace mace
}
// namespace mace
mace/ops/space_to_batch.cc
浏览文件 @
a5c9db3a
...
@@ -23,6 +23,11 @@ void Register_SpaceToBatchND(OperatorRegistryBase *op_registry) {
...
@@ -23,6 +23,11 @@ void Register_SpaceToBatchND(OperatorRegistryBase *op_registry) {
.
TypeConstraint
<
float
>
(
"T"
)
.
TypeConstraint
<
float
>
(
"T"
)
.
Build
(),
.
Build
(),
SpaceToBatchNDOp
<
DeviceType
::
CPU
,
float
>
);
SpaceToBatchNDOp
<
DeviceType
::
CPU
,
float
>
);
MACE_REGISTER_OPERATOR
(
op_registry
,
OpKeyBuilder
(
"SpaceToBatchND"
)
.
Device
(
DeviceType
::
CPU
)
.
TypeConstraint
<
uint8_t
>
(
"T"
)
.
Build
(),
SpaceToBatchNDOp
<
DeviceType
::
CPU
,
uint8_t
>
);
#ifdef MACE_ENABLE_OPENCL
#ifdef MACE_ENABLE_OPENCL
MACE_REGISTER_OPERATOR
(
op_registry
,
OpKeyBuilder
(
"SpaceToBatchND"
)
MACE_REGISTER_OPERATOR
(
op_registry
,
OpKeyBuilder
(
"SpaceToBatchND"
)
.
Device
(
DeviceType
::
GPU
)
.
Device
(
DeviceType
::
GPU
)
...
...
mace/ops/space_to_batch_benchmark.cc
浏览文件 @
a5c9db3a
...
@@ -28,7 +28,13 @@ void BMSpaceToBatch(
...
@@ -28,7 +28,13 @@ void BMSpaceToBatch(
OpsTestNet
net
;
OpsTestNet
net
;
if
(
D
==
DeviceType
::
CPU
)
{
if
(
D
==
DeviceType
::
CPU
)
{
net
.
AddRandomInput
<
D
,
float
>
(
"Input"
,
{
batch
,
channels
,
height
,
width
});
if
(
DataTypeToEnum
<
T
>::
value
!=
DT_UINT8
)
{
net
.
AddRandomInput
<
D
,
float
>
(
"Input"
,
{
batch
,
channels
,
height
,
width
});
}
else
{
net
.
AddRandomInput
<
DeviceType
::
CPU
,
uint8_t
>
(
"Input"
,
{
batch
,
height
,
width
,
channels
});
}
}
else
if
(
D
==
DeviceType
::
GPU
)
{
}
else
if
(
D
==
DeviceType
::
GPU
)
{
net
.
AddRandomInput
<
D
,
float
>
(
"Input"
,
{
batch
,
height
,
width
,
channels
});
net
.
AddRandomInput
<
D
,
float
>
(
"Input"
,
{
batch
,
height
,
width
,
channels
});
}
}
...
@@ -39,6 +45,7 @@ void BMSpaceToBatch(
...
@@ -39,6 +45,7 @@ void BMSpaceToBatch(
.
Output
(
"Output"
)
.
Output
(
"Output"
)
.
AddIntsArg
(
"paddings"
,
{
shape
,
shape
,
shape
,
shape
})
.
AddIntsArg
(
"paddings"
,
{
shape
,
shape
,
shape
,
shape
})
.
AddIntsArg
(
"block_shape"
,
{
shape
,
shape
})
.
AddIntsArg
(
"block_shape"
,
{
shape
,
shape
})
.
AddIntArg
(
"T"
,
static_cast
<
int
>
(
DataTypeToEnum
<
T
>::
value
))
.
Finalize
(
net
.
NewOperatorDef
());
.
Finalize
(
net
.
NewOperatorDef
());
}
else
if
(
D
==
DeviceType
::
GPU
)
{
}
else
if
(
D
==
DeviceType
::
GPU
)
{
BufferToImage
<
D
,
float
>
(
&
net
,
"Input"
,
"InputImage"
,
BufferToImage
<
D
,
float
>
(
&
net
,
"Input"
,
"InputImage"
,
...
@@ -78,7 +85,8 @@ void BMSpaceToBatch(
...
@@ -78,7 +85,8 @@ void BMSpaceToBatch(
#define MACE_BM_SPACE_TO_BATCH(N, H, W, C, SHAPE) \
#define MACE_BM_SPACE_TO_BATCH(N, H, W, C, SHAPE) \
MACE_BM_SPACE_TO_BATCH_MACRO(N, H, W, C, SHAPE, float, GPU); \
MACE_BM_SPACE_TO_BATCH_MACRO(N, H, W, C, SHAPE, float, GPU); \
MACE_BM_SPACE_TO_BATCH_MACRO(N, H, W, C, SHAPE, float, CPU);
MACE_BM_SPACE_TO_BATCH_MACRO(N, H, W, C, SHAPE, float, CPU); \
MACE_BM_SPACE_TO_BATCH_MACRO(N, H, W, C, SHAPE, uint8_t, CPU);
MACE_BM_SPACE_TO_BATCH
(
128
,
16
,
16
,
128
,
2
);
MACE_BM_SPACE_TO_BATCH
(
128
,
16
,
16
,
128
,
2
);
MACE_BM_SPACE_TO_BATCH
(
1
,
256
,
256
,
32
,
2
);
MACE_BM_SPACE_TO_BATCH
(
1
,
256
,
256
,
32
,
2
);
...
...
mace/ops/space_to_batch_test.cc
浏览文件 @
a5c9db3a
...
@@ -147,62 +147,7 @@ void TestBidirectionalTransform(const std::vector<index_t> &space_shape,
...
@@ -147,62 +147,7 @@ void TestBidirectionalTransform(const std::vector<index_t> &space_shape,
RunBatchToSpace
<
DeviceType
::
CPU
>
(
batch_shape
,
batch_data
,
block_data
,
RunBatchToSpace
<
DeviceType
::
CPU
>
(
batch_shape
,
batch_data
,
block_data
,
padding_data
,
space_tensor
.
get
());
padding_data
,
space_tensor
.
get
());
}
}
}
// namespace
TEST
(
SpaceToBatchTest
,
SmallData
)
{
TestBidirectionalTransform
<
float
>
({
1
,
2
,
2
,
1
},
{
1
,
2
,
3
,
4
},
{
2
,
2
},
{
0
,
0
,
0
,
0
},
{
4
,
1
,
1
,
1
},
{
1
,
2
,
3
,
4
});
}
TEST
(
SpaceToBatchTest
,
SmallDataWithOnePadding
)
{
TestBidirectionalTransform
<
float
>
({
1
,
2
,
2
,
1
},
{
1
,
2
,
3
,
4
},
{
3
,
3
},
{
1
,
0
,
1
,
0
},
{
9
,
1
,
1
,
1
},
{
0
,
0
,
0
,
0
,
1
,
2
,
0
,
3
,
4
});
}
TEST
(
SpaceToBatchTest
,
SmallDataWithTwoPadding
)
{
TestBidirectionalTransform
<
float
>
(
{
1
,
2
,
2
,
1
},
{
1
,
2
,
3
,
4
},
{
2
,
2
},
{
1
,
1
,
1
,
1
},
{
4
,
2
,
2
,
1
},
{
0
,
0
,
0
,
4
,
0
,
0
,
3
,
0
,
0
,
2
,
0
,
0
,
1
,
0
,
0
,
0
});
}
TEST
(
SpaceToBatchTest
,
SmallDataWithLargeImage
)
{
TestBidirectionalTransform
<
float
>
(
{
1
,
2
,
10
,
1
},
{
1
,
2
,
3
,
4
,
5
,
6
,
7
,
8
,
9
,
10
,
11
,
12
,
13
,
14
,
15
,
16
,
17
,
18
,
19
,
20
},
{
2
,
2
},
{
0
,
0
,
0
,
0
},
{
4
,
1
,
5
,
1
},
{
1
,
3
,
5
,
7
,
9
,
2
,
4
,
6
,
8
,
10
,
11
,
13
,
15
,
17
,
19
,
12
,
14
,
16
,
18
,
20
});
}
TEST
(
SpaceToBatchTest
,
MultiChannelData
)
{
TestBidirectionalTransform
<
float
>
(
{
1
,
2
,
2
,
3
},
{
1
,
2
,
3
,
4
,
5
,
6
,
7
,
8
,
9
,
10
,
11
,
12
},
{
2
,
2
},
{
0
,
0
,
0
,
0
},
{
4
,
1
,
1
,
3
},
{
1
,
2
,
3
,
4
,
5
,
6
,
7
,
8
,
9
,
10
,
11
,
12
});
}
TEST
(
SpaceToBatchTest
,
LargerMultiChannelData
)
{
TestBidirectionalTransform
<
float
>
(
{
1
,
4
,
4
,
1
},
{
1
,
2
,
3
,
4
,
5
,
6
,
7
,
8
,
9
,
10
,
11
,
12
,
13
,
14
,
15
,
16
},
{
2
,
2
},
{
0
,
0
,
0
,
0
},
{
4
,
2
,
2
,
1
},
{
1
,
3
,
9
,
11
,
2
,
4
,
10
,
12
,
5
,
7
,
13
,
15
,
6
,
8
,
14
,
16
});
}
TEST
(
SpaceToBatchTest
,
MultiBatchData
)
{
TestBidirectionalTransform
<
float
>
(
{
2
,
2
,
4
,
1
},
{
1
,
2
,
3
,
4
,
5
,
6
,
7
,
8
,
9
,
10
,
11
,
12
,
13
,
14
,
15
,
16
},
{
2
,
2
},
{
0
,
0
,
0
,
0
},
{
8
,
1
,
2
,
1
},
{
1
,
3
,
9
,
11
,
2
,
4
,
10
,
12
,
5
,
7
,
13
,
15
,
6
,
8
,
14
,
16
});
}
TEST
(
SpaceToBatchTest
,
MultiBatchAndChannelData
)
{
TestBidirectionalTransform
<
float
>
(
{
2
,
2
,
4
,
2
},
{
1
,
2
,
3
,
4
,
5
,
6
,
7
,
8
,
9
,
10
,
11
,
12
,
13
,
14
,
15
,
16
,
17
,
18
,
19
,
20
,
21
,
22
,
23
,
24
,
25
,
26
,
27
,
28
,
29
,
30
,
31
,
32
},
{
2
,
2
},
{
0
,
0
,
0
,
0
},
{
8
,
1
,
2
,
2
},
{
1
,
2
,
5
,
6
,
17
,
18
,
21
,
22
,
3
,
4
,
7
,
8
,
19
,
20
,
23
,
24
,
9
,
10
,
13
,
14
,
25
,
26
,
29
,
30
,
11
,
12
,
15
,
16
,
27
,
28
,
31
,
32
});
}
void
TestSpaceToBatchLargeInput
(
const
std
::
vector
<
index_t
>
&
input_shape
,
void
TestSpaceToBatchLargeInput
(
const
std
::
vector
<
index_t
>
&
input_shape
,
const
std
::
vector
<
int
>
&
block_shape_data
,
const
std
::
vector
<
int
>
&
block_shape_data
,
...
@@ -278,6 +223,173 @@ void TestoBatchToSpaceLargeInput(const std::vector<index_t> &input_shape,
...
@@ -278,6 +223,173 @@ void TestoBatchToSpaceLargeInput(const std::vector<index_t> &input_shape,
*
net
.
GetOutput
(
"OutputGPU"
));
*
net
.
GetOutput
(
"OutputGPU"
));
}
}
void
TestSpaceToBatchQuantize
(
const
std
::
vector
<
index_t
>
&
input_shape
,
const
std
::
vector
<
int
>
&
block_shape_data
,
const
std
::
vector
<
int
>
&
padding_data
)
{
OpsTestNet
net
;
net
.
AddRandomInput
<
CPU
,
float
>
(
"Input"
,
input_shape
,
false
,
true
,
-
1.
f
,
1.
f
);
// run cpu
net
.
TransformDataFormat
<
DeviceType
::
CPU
,
float
>
(
"Input"
,
NHWC
,
"InputNCHW"
,
NCHW
);
OpDefBuilder
(
"SpaceToBatchND"
,
"SpaceToBatchNDTest"
)
.
Input
(
"InputNCHW"
)
.
Output
(
"OutputNCHW"
)
.
AddIntsArg
(
"paddings"
,
padding_data
)
.
AddIntsArg
(
"block_shape"
,
block_shape_data
)
.
Finalize
(
net
.
NewOperatorDef
());
net
.
RunOp
(
CPU
);
net
.
TransformDataFormat
<
DeviceType
::
CPU
,
float
>
(
"OutputNCHW"
,
NCHW
,
"OutputCPU"
,
NHWC
);
// run quantize
OpDefBuilder
(
"Quantize"
,
"QuantizeInput"
)
.
Input
(
"Input"
)
.
Output
(
"QuantizedInput"
)
.
OutputType
({
DT_UINT8
})
.
AddIntArg
(
"T"
,
DT_UINT8
)
.
Finalize
(
net
.
NewOperatorDef
());
net
.
RunOp
();
OpDefBuilder
(
"SpaceToBatchND"
,
"SpaceToBatchNDTest"
)
.
Input
(
"QuantizedInput"
)
.
Output
(
"QuantizedOutput"
)
.
AddIntsArg
(
"paddings"
,
padding_data
)
.
AddIntsArg
(
"block_shape"
,
block_shape_data
)
.
OutputType
({
DT_UINT8
})
.
AddIntArg
(
"T"
,
DT_UINT8
)
.
Finalize
(
net
.
NewOperatorDef
());
net
.
RunOp
();
Tensor
*
eq_output
=
net
.
GetTensor
(
"QuantizedInput"
);
Tensor
*
q_output
=
net
.
GetTensor
(
"QuantizedOutput"
);
q_output
->
SetScale
(
eq_output
->
scale
());
q_output
->
SetZeroPoint
(
eq_output
->
zero_point
());
OpDefBuilder
(
"Dequantize"
,
"DeQuantizeTest"
)
.
Input
(
"QuantizedOutput"
)
.
Output
(
"DequantizedOutput"
)
.
OutputType
({
DT_FLOAT
})
.
AddIntArg
(
"T"
,
DT_UINT8
)
.
Finalize
(
net
.
NewOperatorDef
());
net
.
RunOp
();
// Check
ExpectTensorSimilar
<
float
>
(
*
net
.
GetOutput
(
"OutputCPU"
),
*
net
.
GetTensor
(
"DequantizedOutput"
),
0.01
);
}
void
TestoBatchToSpaceQuantize
(
const
std
::
vector
<
index_t
>
&
input_shape
,
const
std
::
vector
<
int
>
&
block_shape_data
,
const
std
::
vector
<
int
>
&
crops_data
)
{
OpsTestNet
net
;
net
.
AddRandomInput
<
CPU
,
float
>
(
"Input"
,
input_shape
,
false
,
true
,
-
1.
f
,
1.
f
);
// run cpu
net
.
TransformDataFormat
<
DeviceType
::
CPU
,
float
>
(
"Input"
,
NHWC
,
"InputNCHW"
,
NCHW
);
OpDefBuilder
(
"BatchToSpaceND"
,
"BatchToSpaceNDTest"
)
.
Input
(
"InputNCHW"
)
.
Output
(
"OutputNCHW"
)
.
AddIntsArg
(
"crops"
,
crops_data
)
.
AddIntsArg
(
"block_shape"
,
block_shape_data
)
.
Finalize
(
net
.
NewOperatorDef
());
net
.
RunOp
(
CPU
);
net
.
TransformDataFormat
<
DeviceType
::
CPU
,
float
>
(
"OutputNCHW"
,
NCHW
,
"OutputCPU"
,
NHWC
);
// run quantize
OpDefBuilder
(
"Quantize"
,
"QuantizeInput"
)
.
Input
(
"Input"
)
.
Output
(
"QuantizedInput"
)
.
OutputType
({
DT_UINT8
})
.
AddIntArg
(
"T"
,
DT_UINT8
)
.
Finalize
(
net
.
NewOperatorDef
());
net
.
RunOp
();
OpDefBuilder
(
"BatchToSpaceND"
,
"BatchToSpaceNDTest"
)
.
Input
(
"QuantizedInput"
)
.
Output
(
"QuantizedOutput"
)
.
AddIntsArg
(
"crops"
,
crops_data
)
.
AddIntsArg
(
"block_shape"
,
block_shape_data
)
.
OutputType
({
DT_UINT8
})
.
AddIntArg
(
"T"
,
DT_UINT8
)
.
Finalize
(
net
.
NewOperatorDef
());
net
.
RunOp
();
Tensor
*
eq_output
=
net
.
GetTensor
(
"QuantizedInput"
);
Tensor
*
q_output
=
net
.
GetTensor
(
"QuantizedOutput"
);
q_output
->
SetScale
(
eq_output
->
scale
());
q_output
->
SetZeroPoint
(
eq_output
->
zero_point
());
OpDefBuilder
(
"Dequantize"
,
"DeQuantizeTest"
)
.
Input
(
"QuantizedOutput"
)
.
Output
(
"DequantizedOutput"
)
.
OutputType
({
DT_FLOAT
})
.
AddIntArg
(
"T"
,
DT_UINT8
)
.
Finalize
(
net
.
NewOperatorDef
());
net
.
RunOp
();
// Check
ExpectTensorSimilar
<
float
>
(
*
net
.
GetOutput
(
"OutputCPU"
),
*
net
.
GetTensor
(
"DequantizedOutput"
),
0.01
);
}
}
// namespace
TEST
(
SpaceToBatchTest
,
SmallData
)
{
TestBidirectionalTransform
<
float
>
({
1
,
2
,
2
,
1
},
{
1
,
2
,
3
,
4
},
{
2
,
2
},
{
0
,
0
,
0
,
0
},
{
4
,
1
,
1
,
1
},
{
1
,
2
,
3
,
4
});
}
TEST
(
SpaceToBatchTest
,
SmallDataWithOnePadding
)
{
TestBidirectionalTransform
<
float
>
({
1
,
2
,
2
,
1
},
{
1
,
2
,
3
,
4
},
{
3
,
3
},
{
1
,
0
,
1
,
0
},
{
9
,
1
,
1
,
1
},
{
0
,
0
,
0
,
0
,
1
,
2
,
0
,
3
,
4
});
}
TEST
(
SpaceToBatchTest
,
SmallDataWithTwoPadding
)
{
TestBidirectionalTransform
<
float
>
(
{
1
,
2
,
2
,
1
},
{
1
,
2
,
3
,
4
},
{
2
,
2
},
{
1
,
1
,
1
,
1
},
{
4
,
2
,
2
,
1
},
{
0
,
0
,
0
,
4
,
0
,
0
,
3
,
0
,
0
,
2
,
0
,
0
,
1
,
0
,
0
,
0
});
}
TEST
(
SpaceToBatchTest
,
SmallDataWithLargeImage
)
{
TestBidirectionalTransform
<
float
>
(
{
1
,
2
,
10
,
1
},
{
1
,
2
,
3
,
4
,
5
,
6
,
7
,
8
,
9
,
10
,
11
,
12
,
13
,
14
,
15
,
16
,
17
,
18
,
19
,
20
},
{
2
,
2
},
{
0
,
0
,
0
,
0
},
{
4
,
1
,
5
,
1
},
{
1
,
3
,
5
,
7
,
9
,
2
,
4
,
6
,
8
,
10
,
11
,
13
,
15
,
17
,
19
,
12
,
14
,
16
,
18
,
20
});
}
TEST
(
SpaceToBatchTest
,
MultiChannelData
)
{
TestBidirectionalTransform
<
float
>
(
{
1
,
2
,
2
,
3
},
{
1
,
2
,
3
,
4
,
5
,
6
,
7
,
8
,
9
,
10
,
11
,
12
},
{
2
,
2
},
{
0
,
0
,
0
,
0
},
{
4
,
1
,
1
,
3
},
{
1
,
2
,
3
,
4
,
5
,
6
,
7
,
8
,
9
,
10
,
11
,
12
});
}
TEST
(
SpaceToBatchTest
,
LargerMultiChannelData
)
{
TestBidirectionalTransform
<
float
>
(
{
1
,
4
,
4
,
1
},
{
1
,
2
,
3
,
4
,
5
,
6
,
7
,
8
,
9
,
10
,
11
,
12
,
13
,
14
,
15
,
16
},
{
2
,
2
},
{
0
,
0
,
0
,
0
},
{
4
,
2
,
2
,
1
},
{
1
,
3
,
9
,
11
,
2
,
4
,
10
,
12
,
5
,
7
,
13
,
15
,
6
,
8
,
14
,
16
});
}
TEST
(
SpaceToBatchTest
,
MultiBatchData
)
{
TestBidirectionalTransform
<
float
>
(
{
2
,
2
,
4
,
1
},
{
1
,
2
,
3
,
4
,
5
,
6
,
7
,
8
,
9
,
10
,
11
,
12
,
13
,
14
,
15
,
16
},
{
2
,
2
},
{
0
,
0
,
0
,
0
},
{
8
,
1
,
2
,
1
},
{
1
,
3
,
9
,
11
,
2
,
4
,
10
,
12
,
5
,
7
,
13
,
15
,
6
,
8
,
14
,
16
});
}
TEST
(
SpaceToBatchTest
,
MultiBatchAndChannelData
)
{
TestBidirectionalTransform
<
float
>
(
{
2
,
2
,
4
,
2
},
{
1
,
2
,
3
,
4
,
5
,
6
,
7
,
8
,
9
,
10
,
11
,
12
,
13
,
14
,
15
,
16
,
17
,
18
,
19
,
20
,
21
,
22
,
23
,
24
,
25
,
26
,
27
,
28
,
29
,
30
,
31
,
32
},
{
2
,
2
},
{
0
,
0
,
0
,
0
},
{
8
,
1
,
2
,
2
},
{
1
,
2
,
5
,
6
,
17
,
18
,
21
,
22
,
3
,
4
,
7
,
8
,
19
,
20
,
23
,
24
,
9
,
10
,
13
,
14
,
25
,
26
,
29
,
30
,
11
,
12
,
15
,
16
,
27
,
28
,
31
,
32
});
}
TEST
(
SpaceToBatchTest
,
LargeData
)
{
TEST
(
SpaceToBatchTest
,
LargeData
)
{
TestSpaceToBatchLargeInput
({
1
,
256
,
256
,
32
},
{
8
,
8
},
{
0
,
0
,
0
,
0
});
TestSpaceToBatchLargeInput
({
1
,
256
,
256
,
32
},
{
8
,
8
},
{
0
,
0
,
0
,
0
});
TestSpaceToBatchLargeInput
({
1
,
256
,
256
,
32
},
{
8
,
8
},
{
4
,
4
,
4
,
4
});
TestSpaceToBatchLargeInput
({
1
,
256
,
256
,
32
},
{
8
,
8
},
{
4
,
4
,
4
,
4
});
...
@@ -285,6 +397,13 @@ TEST(SpaceToBatchTest, LargeData) {
...
@@ -285,6 +397,13 @@ TEST(SpaceToBatchTest, LargeData) {
TestoBatchToSpaceLargeInput
({
64
,
32
,
32
,
32
},
{
8
,
8
},
{
4
,
4
,
4
,
4
});
TestoBatchToSpaceLargeInput
({
64
,
32
,
32
,
32
},
{
8
,
8
},
{
4
,
4
,
4
,
4
});
}
}
TEST
(
SpaceToBatchTest
,
Quantize
)
{
TestSpaceToBatchQuantize
({
1
,
256
,
256
,
32
},
{
8
,
8
},
{
0
,
0
,
0
,
0
});
TestSpaceToBatchQuantize
({
1
,
256
,
256
,
32
},
{
8
,
8
},
{
4
,
4
,
4
,
4
});
TestoBatchToSpaceQuantize
({
64
,
32
,
32
,
32
},
{
8
,
8
},
{
0
,
0
,
0
,
0
});
TestoBatchToSpaceQuantize
({
64
,
32
,
32
,
32
},
{
8
,
8
},
{
4
,
4
,
4
,
4
});
}
}
// namespace test
}
// namespace test
}
// namespace ops
}
// namespace ops
}
// namespace mace
}
// namespace mace
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录