Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
Xiaomi
Mace
提交
4563cc60
Mace
项目概览
Xiaomi
/
Mace
通知
106
Star
40
Fork
27
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
DevOps
流水线
流水线任务
计划
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
Mace
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
DevOps
DevOps
流水线
流水线任务
计划
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
流水线任务
提交
Issue看板
体验新版 GitCode,发现更多精彩内容 >>
提交
4563cc60
编写于
5月 16, 2018
作者:
李
李寅
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Add batch space cpu implementation
上级
66a49116
变更
10
隐藏空白更改
内联
并排
Showing
10 changed file
with
273 addition
and
100 deletion
+273
-100
mace/core/tensor.h
mace/core/tensor.h
+2
-0
mace/kernels/opencl/cl/space_to_batch.cl
mace/kernels/opencl/cl/space_to_batch.cl
+6
-4
mace/kernels/opencl/space_to_batch.cc
mace/kernels/opencl/space_to_batch.cc
+12
-1
mace/kernels/space_to_batch.h
mace/kernels/space_to_batch.h
+184
-12
mace/ops/batch_to_space.cc
mace/ops/batch_to_space.cc
+5
-2
mace/ops/batch_to_space.h
mace/ops/batch_to_space.h
+1
-31
mace/ops/ops_test_util.h
mace/ops/ops_test_util.h
+0
-2
mace/ops/space_to_batch.cc
mace/ops/space_to_batch.cc
+5
-2
mace/ops/space_to_batch.h
mace/ops/space_to_batch.h
+1
-33
mace/ops/space_to_batch_test.cc
mace/ops/space_to_batch_test.cc
+57
-13
未找到文件。
mace/core/tensor.h
浏览文件 @
4563cc60
...
...
@@ -107,6 +107,8 @@ inline std::ostream &operator<<(std::ostream &os, unsigned char c) {
}
}
// namespace numerical_chars
enum
DataFormat
{
NHWC
=
0
,
NCHW
=
1
,
HWOI
=
2
,
OIHW
=
3
,
HWIO
=
4
};
class
Tensor
{
public:
Tensor
(
Allocator
*
alloc
,
DataType
type
)
...
...
mace/kernels/opencl/cl/space_to_batch.cl
浏览文件 @
4563cc60
...
...
@@ -8,6 +8,7 @@ __kernel void space_to_batch(KERNEL_ERROR_PARAMS
__private
const
int
block_width,
__private
const
int
padding_height,
__private
const
int
padding_width,
__private
const
int
batch_size,
__private
const
int
space_height,
__private
const
int
space_width,
__private
const
int
batch_height,
...
...
@@ -27,8 +28,8 @@ __kernel void space_to_batch(KERNEL_ERROR_PARAMS
const int batch_h_idx = batch_hb_idx % batch_height;
const int block_size = mul24(block_height, block_width);
const int space_b_idx = batch_b_idx
/ block
_size;
const int remaining_batch_idx = batch_b_idx
% block
_size;
const int space_b_idx = batch_b_idx
% batch
_size;
const int remaining_batch_idx = batch_b_idx
/ batch
_size;
const int space_h_idx = (remaining_batch_idx / block_width) +
mul24(batch_h_idx, block_height) - padding_height;
const int space_w_idx = (remaining_batch_idx % block_width) +
...
...
@@ -57,6 +58,7 @@ __kernel void batch_to_space(KERNEL_ERROR_PARAMS
__private const int block_width,
__private const int padding_height,
__private const int padding_width,
__private const int batch_size,
__private const int space_height,
__private const int space_width,
__private const int batch_height,
...
...
@@ -76,8 +78,8 @@ __kernel void batch_to_space(KERNEL_ERROR_PARAMS
const
int
batch_h_idx
=
batch_hb_idx
%
batch_height
;
const
int
block_size
=
mul24
(
block_height,
block_width
)
;
const
int
space_b_idx
=
batch_b_idx
/
block
_size
;
const
int
remaining_batch_idx
=
batch_b_idx
%
block
_size
;
const
int
space_b_idx
=
batch_b_idx
%
batch
_size
;
const
int
remaining_batch_idx
=
batch_b_idx
/
batch
_size
;
const
int
space_h_idx
=
(
remaining_batch_idx
/
block_width
)
+
mul24
(
batch_h_idx,
block_height
)
-
padding_height
;
const
int
space_w_idx
=
(
remaining_batch_idx
%
block_width
)
+
...
...
mace/kernels/opencl/space_to_batch.cc
浏览文件 @
4563cc60
...
...
@@ -27,9 +27,19 @@ namespace kernels {
template
<
typename
T
>
void
SpaceToBatchFunctor
<
DeviceType
::
GPU
,
T
>::
operator
()(
Tensor
*
space_tensor
,
const
std
::
vector
<
index_t
>
&
output_shape
,
Tensor
*
batch_tensor
,
StatsFuture
*
future
)
{
std
::
vector
<
index_t
>
output_shape
(
4
,
0
);
if
(
b2s_
)
{
CalculateBatchToSpaceOutputShape
(
batch_tensor
,
DataFormat
::
NHWC
,
output_shape
.
data
());
}
else
{
CalculateSpaceToBatchOutputShape
(
space_tensor
,
DataFormat
::
NHWC
,
output_shape
.
data
());
}
const
char
*
kernel_name
=
nullptr
;
std
::
vector
<
size_t
>
output_image_shape
;
CalImage2DShape
(
output_shape
,
BufferType
::
IN_OUT_CHANNEL
,
...
...
@@ -97,6 +107,7 @@ void SpaceToBatchFunctor<DeviceType::GPU, T>::operator()(
kernel_
.
setArg
(
idx
++
,
block_shape_
[
1
]);
kernel_
.
setArg
(
idx
++
,
paddings_
[
0
]);
kernel_
.
setArg
(
idx
++
,
paddings_
[
2
]);
kernel_
.
setArg
(
idx
++
,
static_cast
<
int32_t
>
(
space_tensor
->
dim
(
0
)));
kernel_
.
setArg
(
idx
++
,
static_cast
<
int32_t
>
(
space_tensor
->
dim
(
1
)));
kernel_
.
setArg
(
idx
++
,
static_cast
<
int32_t
>
(
space_tensor
->
dim
(
2
)));
kernel_
.
setArg
(
idx
++
,
static_cast
<
int32_t
>
(
batch_tensor
->
dim
(
1
)));
...
...
mace/kernels/space_to_batch.h
浏览文件 @
4563cc60
...
...
@@ -33,31 +33,204 @@ struct SpaceToBatchFunctorBase {
SpaceToBatchFunctorBase
(
const
std
::
vector
<
int
>
&
paddings
,
const
std
::
vector
<
int
>
&
block_shape
,
bool
b2s
)
:
paddings_
(
paddings
.
begin
(),
paddings
.
end
()),
block_shape_
(
block_shape
.
begin
(),
block_shape
.
end
()),
b2s_
(
b2s
)
{}
:
paddings_
(
paddings
.
begin
(),
paddings
.
end
()),
block_shape_
(
block_shape
.
begin
(),
block_shape
.
end
()),
b2s_
(
b2s
)
{
MACE_CHECK
(
block_shape
.
size
()
==
2
&&
block_shape
[
0
]
>
1
&&
block_shape
[
1
]
>
1
,
"Block's shape should be 1D, and greater than 1"
);
MACE_CHECK
(
paddings
.
size
()
==
4
,
"Paddings' shape should be 2D"
);
}
std
::
vector
<
int
>
paddings_
;
std
::
vector
<
int
>
block_shape_
;
bool
b2s_
;
protected:
void
CalculateSpaceToBatchOutputShape
(
const
Tensor
*
input_tensor
,
const
DataFormat
data_format
,
index_t
*
output_shape
)
{
MACE_CHECK
(
input_tensor
->
dim_size
()
==
4
,
"Input's shape should be 4D"
);
index_t
batch
=
input_tensor
->
dim
(
0
);
index_t
channels
=
0
;
index_t
height
=
0
;
index_t
width
=
0
;
if
(
data_format
==
DataFormat
::
NHWC
)
{
height
=
input_tensor
->
dim
(
1
);
width
=
input_tensor
->
dim
(
2
);
channels
=
input_tensor
->
dim
(
3
);
}
else
if
(
data_format
==
DataFormat
::
NCHW
)
{
height
=
input_tensor
->
dim
(
2
);
width
=
input_tensor
->
dim
(
3
);
channels
=
input_tensor
->
dim
(
1
);
}
else
{
MACE_NOT_IMPLEMENTED
;
}
index_t
padded_height
=
height
+
paddings_
[
0
]
+
paddings_
[
1
];
index_t
padded_width
=
width
+
paddings_
[
2
]
+
paddings_
[
3
];
MACE_CHECK
(
padded_height
%
block_shape_
[
0
]
==
0
,
"padded input height"
,
padded_height
,
" is not divisible by block height"
);
MACE_CHECK
(
padded_width
%
block_shape_
[
1
]
==
0
,
"padded input width"
,
padded_height
,
" is not divisible by block width"
);
index_t
new_batch
=
batch
*
block_shape_
[
0
]
*
block_shape_
[
1
];
index_t
new_height
=
padded_height
/
block_shape_
[
0
];
index_t
new_width
=
padded_width
/
block_shape_
[
1
];
if
(
data_format
==
DataFormat
::
NHWC
)
{
output_shape
[
0
]
=
new_batch
;
output_shape
[
1
]
=
new_height
;
output_shape
[
2
]
=
new_width
;
output_shape
[
3
]
=
channels
;
}
else
{
output_shape
[
0
]
=
new_batch
;
output_shape
[
1
]
=
channels
;
output_shape
[
2
]
=
new_height
;
output_shape
[
3
]
=
new_width
;
}
}
void
CalculateBatchToSpaceOutputShape
(
const
Tensor
*
input_tensor
,
const
DataFormat
data_format
,
index_t
*
output_shape
)
{
MACE_CHECK
(
input_tensor
->
dim_size
()
==
4
,
"Input's shape should be 4D"
);
index_t
batch
=
input_tensor
->
dim
(
0
);
index_t
channels
=
0
;
index_t
height
=
0
;
index_t
width
=
0
;
if
(
data_format
==
DataFormat
::
NHWC
)
{
height
=
input_tensor
->
dim
(
1
);
width
=
input_tensor
->
dim
(
2
);
channels
=
input_tensor
->
dim
(
3
);
}
else
if
(
data_format
==
DataFormat
::
NCHW
)
{
height
=
input_tensor
->
dim
(
2
);
width
=
input_tensor
->
dim
(
3
);
channels
=
input_tensor
->
dim
(
1
);
}
else
{
MACE_NOT_IMPLEMENTED
;
}
index_t
new_batch
=
batch
/
block_shape_
[
0
]
/
block_shape_
[
1
];
index_t
new_height
=
height
*
block_shape_
[
0
]
-
paddings_
[
0
]
-
paddings_
[
1
];
index_t
new_width
=
width
*
block_shape_
[
1
]
-
paddings_
[
2
]
-
paddings_
[
3
];
if
(
data_format
==
DataFormat
::
NHWC
)
{
output_shape
[
0
]
=
new_batch
;
output_shape
[
1
]
=
new_height
;
output_shape
[
2
]
=
new_width
;
output_shape
[
3
]
=
channels
;
}
else
{
output_shape
[
0
]
=
new_batch
;
output_shape
[
1
]
=
channels
;
output_shape
[
2
]
=
new_height
;
output_shape
[
3
]
=
new_width
;
}
}
};
template
<
DeviceType
D
,
typename
T
>
struct
SpaceToBatchFunctor
:
SpaceToBatchFunctorBase
{
template
<
DeviceType
D
,
typename
T
>
struct
SpaceToBatchFunctor
;
template
<
>
struct
SpaceToBatchFunctor
<
DeviceType
::
CPU
,
float
>
:
SpaceToBatchFunctorBase
{
SpaceToBatchFunctor
(
const
std
::
vector
<
int
>
&
paddings
,
const
std
::
vector
<
int
>
&
block_shape
,
bool
b2s
)
:
SpaceToBatchFunctorBase
(
paddings
,
block_shape
,
b2s
)
{}
:
SpaceToBatchFunctorBase
(
paddings
,
block_shape
,
b2s
)
{}
void
operator
()(
Tensor
*
space_tensor
,
const
std
::
vector
<
index_t
>
&
output_shape
,
Tensor
*
batch_tensor
,
StatsFuture
*
future
)
{
MACE_UNUSED
(
space_tensor
);
MACE_UNUSED
(
output_shape
);
MACE_UNUSED
(
batch_tensor
);
MACE_UNUSED
(
future
);
MACE_NOT_IMPLEMENTED
;
std
::
vector
<
index_t
>
output_shape
(
4
,
0
);
if
(
b2s_
)
{
CalculateBatchToSpaceOutputShape
(
batch_tensor
,
DataFormat
::
NCHW
,
output_shape
.
data
());
space_tensor
->
Resize
(
output_shape
);
}
else
{
CalculateSpaceToBatchOutputShape
(
space_tensor
,
DataFormat
::
NCHW
,
output_shape
.
data
());
batch_tensor
->
Resize
(
output_shape
);
}
Tensor
::
MappingGuard
input_guard
(
space_tensor
);
Tensor
::
MappingGuard
output_guard
(
batch_tensor
);
if
(
b2s_
)
{
const
float
*
input_data
=
batch_tensor
->
data
<
float
>
();
float
*
output_data
=
space_tensor
->
mutable_data
<
float
>
();
index_t
in_height
=
batch_tensor
->
dim
(
2
);
index_t
in_width
=
batch_tensor
->
dim
(
3
);
index_t
out_batches
=
space_tensor
->
dim
(
0
);
index_t
channels
=
space_tensor
->
dim
(
1
);
index_t
out_height
=
space_tensor
->
dim
(
2
);
index_t
out_width
=
space_tensor
->
dim
(
3
);
#pragma omp parallel for collapse(2)
for
(
index_t
b
=
0
;
b
<
out_batches
;
++
b
)
{
for
(
index_t
c
=
0
;
c
<
channels
;
++
c
)
{
for
(
index_t
h
=
0
;
h
<
out_height
;
++
h
)
{
const
index_t
in_h
=
(
h
+
paddings_
[
0
])
/
block_shape_
[
0
];
const
index_t
tile_h
=
(
h
+
paddings_
[
0
])
%
block_shape_
[
0
];
for
(
index_t
w
=
0
;
w
<
out_width
;
++
w
)
{
const
index_t
in_w
=
(
w
+
paddings_
[
2
])
/
block_shape_
[
1
];
const
index_t
tile_w
=
(
w
+
paddings_
[
2
])
%
block_shape_
[
1
];
const
index_t
in_b
=
(
tile_h
*
block_shape_
[
1
]
+
tile_w
)
*
out_batches
+
b
;
output_data
[((
b
*
channels
+
c
)
*
out_height
+
h
)
*
out_width
+
w
]
=
input_data
[
((
in_b
*
channels
+
c
)
*
in_height
+
in_h
)
*
in_width
+
in_w
];
}
}
}
}
}
else
{
const
float
*
input_data
=
space_tensor
->
data
<
float
>
();
float
*
output_data
=
batch_tensor
->
mutable_data
<
float
>
();
index_t
in_batches
=
space_tensor
->
dim
(
0
);
index_t
in_height
=
space_tensor
->
dim
(
2
);
index_t
in_width
=
space_tensor
->
dim
(
3
);
index_t
out_batches
=
batch_tensor
->
dim
(
0
);
index_t
channels
=
batch_tensor
->
dim
(
1
);
index_t
out_height
=
batch_tensor
->
dim
(
2
);
index_t
out_width
=
batch_tensor
->
dim
(
3
);
#pragma omp parallel for collapse(2)
for
(
index_t
b
=
0
;
b
<
out_batches
;
++
b
)
{
for
(
index_t
c
=
0
;
c
<
channels
;
++
c
)
{
const
index_t
in_b
=
b
%
in_batches
;
const
index_t
tile_h
=
b
/
in_batches
/
block_shape_
[
1
];
const
index_t
tile_w
=
b
/
in_batches
%
block_shape_
[
1
];
for
(
index_t
h
=
0
;
h
<
out_height
;
++
h
)
{
const
index_t
in_h
=
h
*
block_shape_
[
0
]
+
tile_h
-
paddings_
[
0
];
for
(
index_t
w
=
0
;
w
<
out_width
;
++
w
)
{
const
index_t
in_w
=
w
*
block_shape_
[
1
]
+
tile_w
-
paddings_
[
2
];
if
(
in_h
>=
0
&&
in_w
>=
0
&&
in_h
<
in_height
&&
in_w
<
in_width
)
{
output_data
[((
b
*
channels
+
c
)
*
out_height
+
h
)
*
out_width
+
w
]
=
input_data
[
((
in_b
*
channels
+
c
)
*
in_height
+
in_h
)
*
in_width
+
in_w
];
}
else
{
output_data
[((
b
*
channels
+
c
)
*
out_height
+
h
)
*
out_width
+
w
]
=
0
;
}
}
}
}
}
}
}
};
...
...
@@ -70,7 +243,6 @@ struct SpaceToBatchFunctor<DeviceType::GPU, T> : SpaceToBatchFunctorBase {
:
SpaceToBatchFunctorBase
(
paddings
,
block_shape
,
b2s
)
{}
void
operator
()(
Tensor
*
space_tensor
,
const
std
::
vector
<
index_t
>
&
output_shape
,
Tensor
*
batch_tensor
,
StatsFuture
*
future
);
...
...
mace/ops/batch_to_space.cc
浏览文件 @
4563cc60
...
...
@@ -18,6 +18,11 @@ namespace mace {
namespace
ops
{
void
Register_BatchToSpaceND
(
OperatorRegistry
*
op_registry
)
{
REGISTER_OPERATOR
(
op_registry
,
OpKeyBuilder
(
"BatchToSpaceND"
)
.
Device
(
DeviceType
::
CPU
)
.
TypeConstraint
<
float
>
(
"T"
)
.
Build
(),
BatchToSpaceNDOp
<
DeviceType
::
CPU
,
float
>
);
#ifdef MACE_ENABLE_OPENCL
REGISTER_OPERATOR
(
op_registry
,
OpKeyBuilder
(
"BatchToSpaceND"
)
.
Device
(
DeviceType
::
GPU
)
...
...
@@ -29,8 +34,6 @@ void Register_BatchToSpaceND(OperatorRegistry *op_registry) {
.
TypeConstraint
<
half
>
(
"T"
)
.
Build
(),
BatchToSpaceNDOp
<
DeviceType
::
GPU
,
half
>
);
#else
MACE_UNUSED
(
op_registry
);
#endif // MACE_ENABLE_OPENCL
}
...
...
mace/ops/batch_to_space.h
浏览文件 @
4563cc60
...
...
@@ -36,41 +36,11 @@ class BatchToSpaceNDOp : public Operator<D, T> {
bool
Run
(
StatsFuture
*
future
)
override
{
const
Tensor
*
batch_tensor
=
this
->
Input
(
INPUT
);
Tensor
*
space_tensor
=
this
->
Output
(
OUTPUT
);
std
::
vector
<
index_t
>
output_shape
(
4
,
0
);
CalculateOutputShape
(
batch_tensor
,
output_shape
.
data
());
functor_
(
space_tensor
,
output_shape
,
const_cast
<
Tensor
*>
(
batch_tensor
),
functor_
(
space_tensor
,
const_cast
<
Tensor
*>
(
batch_tensor
),
future
);
return
true
;
}
private:
inline
void
CalculateOutputShape
(
const
Tensor
*
input_tensor
,
index_t
*
output_shape
)
{
auto
crops
=
OperatorBase
::
GetRepeatedArgument
<
int
>
(
"crops"
,
{
0
,
0
,
0
,
0
});
auto
block_shape
=
OperatorBase
::
GetRepeatedArgument
<
int
>
(
"block_shape"
,
{
1
,
1
});
MACE_CHECK
(
input_tensor
->
dim_size
()
==
4
,
"Input's shape should be 4D"
);
MACE_CHECK
(
block_shape
.
size
()
==
2
,
"Block's shape should be 1D"
);
MACE_CHECK
(
crops
.
size
()
==
4
,
"Crops' shape should be 2D"
);
const
index_t
block_dims
=
block_shape
.
size
();
index_t
block_shape_product
=
1
;
for
(
uint32_t
block_dim
=
0
;
block_dim
<
block_dims
;
++
block_dim
)
{
MACE_CHECK
(
block_shape
[
block_dim
]
>
1
,
"block_shape's value should be great to 1"
);
const
index_t
block_shape_value
=
block_shape
[
block_dim
];
const
index_t
cropped_input_size
=
input_tensor
->
dim
(
block_dim
+
1
)
*
block_shape_value
-
crops
[
block_dim
*
2
]
-
crops
[
block_dim
*
2
+
1
];
MACE_CHECK
(
cropped_input_size
>=
0
,
"cropped size must be non-negative"
);
block_shape_product
*=
block_shape_value
;
output_shape
[
block_dim
+
1
]
=
cropped_input_size
;
}
output_shape
[
0
]
=
input_tensor
->
dim
(
0
)
/
block_shape_product
;
output_shape
[
3
]
=
input_tensor
->
dim
(
3
);
}
private:
kernels
::
SpaceToBatchFunctor
<
D
,
T
>
functor_
;
...
...
mace/ops/ops_test_util.h
浏览文件 @
4563cc60
...
...
@@ -35,8 +35,6 @@ namespace mace {
namespace
ops
{
namespace
test
{
enum
DataFormat
{
NHWC
=
0
,
NCHW
=
1
,
HWOI
=
2
,
OIHW
=
3
,
HWIO
=
4
};
class
OpDefBuilder
{
public:
OpDefBuilder
(
const
char
*
type
,
const
std
::
string
&
name
)
{
...
...
mace/ops/space_to_batch.cc
浏览文件 @
4563cc60
...
...
@@ -18,6 +18,11 @@ namespace mace {
namespace
ops
{
void
Register_SpaceToBatchND
(
OperatorRegistry
*
op_registry
)
{
REGISTER_OPERATOR
(
op_registry
,
OpKeyBuilder
(
"SpaceToBatchND"
)
.
Device
(
DeviceType
::
CPU
)
.
TypeConstraint
<
float
>
(
"T"
)
.
Build
(),
SpaceToBatchNDOp
<
DeviceType
::
CPU
,
float
>
);
#ifdef MACE_ENABLE_OPENCL
REGISTER_OPERATOR
(
op_registry
,
OpKeyBuilder
(
"SpaceToBatchND"
)
.
Device
(
DeviceType
::
GPU
)
...
...
@@ -30,8 +35,6 @@ void Register_SpaceToBatchND(OperatorRegistry *op_registry) {
.
TypeConstraint
<
half
>
(
"T"
)
.
Build
(),
SpaceToBatchNDOp
<
DeviceType
::
GPU
,
half
>
);
#else
MACE_UNUSED
(
op_registry
);
#endif // MACE_ENABLE_OPENCL
}
...
...
mace/ops/space_to_batch.h
浏览文件 @
4563cc60
...
...
@@ -37,43 +37,11 @@ class SpaceToBatchNDOp : public Operator<D, T> {
bool
Run
(
StatsFuture
*
future
)
override
{
const
Tensor
*
space_tensor
=
this
->
Input
(
INPUT
);
Tensor
*
batch_tensor
=
this
->
Output
(
OUTPUT
);
std
::
vector
<
index_t
>
output_shape
(
4
,
0
);
CalculateOutputShape
(
space_tensor
,
output_shape
.
data
());
functor_
(
const_cast
<
Tensor
*>
(
space_tensor
),
output_shape
,
batch_tensor
,
functor_
(
const_cast
<
Tensor
*>
(
space_tensor
),
batch_tensor
,
future
);
return
true
;
}
private:
inline
void
CalculateOutputShape
(
const
Tensor
*
input_tensor
,
index_t
*
output_shape
)
{
auto
paddings
=
OperatorBase
::
GetRepeatedArgument
<
int
>
(
"paddings"
,
{
0
,
0
,
0
,
0
});
auto
block_shape
=
OperatorBase
::
GetRepeatedArgument
<
int
>
(
"block_shape"
,
{
1
,
1
});
MACE_CHECK
(
input_tensor
->
dim_size
()
==
4
,
"Input's shape should be 4D"
);
MACE_CHECK
(
block_shape
.
size
()
==
2
,
"Block's shape should be 1D"
);
MACE_CHECK
(
paddings
.
size
()
==
4
,
"Paddings' shape should be 2D"
);
const
index_t
block_dims
=
block_shape
.
size
();
index_t
block_shape_product
=
1
;
for
(
uint32_t
block_dim
=
0
;
block_dim
<
block_dims
;
++
block_dim
)
{
MACE_CHECK
(
block_shape
[
block_dim
]
>
1
,
"block_shape's value should be great to 1"
);
const
index_t
block_shape_value
=
block_shape
[
block_dim
];
const
index_t
padded_input_size
=
input_tensor
->
dim
(
block_dim
+
1
)
+
paddings
[
block_dim
*
2
]
+
paddings
[
block_dim
*
2
+
1
];
MACE_CHECK
(
padded_input_size
%
block_shape_value
==
0
,
"padded input "
,
padded_input_size
,
" is not divisible by block_shape"
);
block_shape_product
*=
block_shape_value
;
output_shape
[
block_dim
+
1
]
=
padded_input_size
/
block_shape_value
;
}
output_shape
[
0
]
=
input_tensor
->
dim
(
0
)
*
block_shape_product
;
output_shape
[
3
]
=
input_tensor
->
dim
(
3
);
}
private:
kernels
::
SpaceToBatchFunctor
<
D
,
T
>
functor_
;
...
...
mace/ops/space_to_batch_test.cc
浏览文件 @
4563cc60
...
...
@@ -31,20 +31,40 @@ void RunSpaceToBatch(const std::vector<index_t> &input_shape,
OpsTestNet
net
;
net
.
AddInputFromArray
<
D
,
float
>
(
"Input"
,
input_shape
,
input_data
);
BufferToImage
<
D
,
float
>
(
&
net
,
"Input"
,
"InputImage"
,
kernels
::
BufferType
::
IN_OUT_CHANNEL
);
OpDefBuilder
(
"SpaceToBatchND"
,
"SpaceToBatchNDTest"
)
if
(
D
==
GPU
)
{
BufferToImage
<
D
,
float
>
(
&
net
,
"Input"
,
"InputImage"
,
kernels
::
BufferType
::
IN_OUT_CHANNEL
);
OpDefBuilder
(
"SpaceToBatchND"
,
"SpaceToBatchNDTest"
)
.
Input
(
"InputImage"
)
.
Output
(
"OutputImage"
)
.
AddIntsArg
(
"paddings"
,
padding_data
)
.
AddIntsArg
(
"block_shape"
,
block_shape_data
)
.
Finalize
(
net
.
NewOperatorDef
());
}
else
if
(
D
==
CPU
)
{
net
.
TransformDataFormat
<
DeviceType
::
CPU
,
float
>
(
"Input"
,
NHWC
,
"InputNCHW"
,
NCHW
);
OpDefBuilder
(
"SpaceToBatchND"
,
"SpaceToBatchNDTest"
)
.
Input
(
"InputNCHW"
)
.
Output
(
"OutputNCHW"
)
.
AddIntsArg
(
"paddings"
,
padding_data
)
.
AddIntsArg
(
"block_shape"
,
block_shape_data
)
.
Finalize
(
net
.
NewOperatorDef
());
}
// Run
net
.
RunOp
(
D
);
ImageToBuffer
<
D
,
float
>
(
&
net
,
"OutputImage"
,
"Output"
,
kernels
::
BufferType
::
IN_OUT_CHANNEL
);
if
(
D
==
GPU
)
{
ImageToBuffer
<
D
,
float
>
(
&
net
,
"OutputImage"
,
"Output"
,
kernels
::
BufferType
::
IN_OUT_CHANNEL
);
}
else
if
(
D
==
CPU
)
{
net
.
TransformDataFormat
<
DeviceType
::
CPU
,
float
>
(
"OutputNCHW"
,
NCHW
,
"Output"
,
NHWC
);
}
// Check
ExpectTensorNear
<
float
>
(
*
expected
,
*
net
.
GetOutput
(
"Output"
));
}
...
...
@@ -59,20 +79,40 @@ void RunBatchToSpace(const std::vector<index_t> &input_shape,
// Add input data
net
.
AddInputFromArray
<
D
,
float
>
(
"Input"
,
input_shape
,
input_data
);
BufferToImage
<
D
,
float
>
(
&
net
,
"Input"
,
"InputImage"
,
kernels
::
BufferType
::
IN_OUT_CHANNEL
);
OpDefBuilder
(
"BatchToSpaceND"
,
"BatchToSpaceNDTest"
)
if
(
D
==
GPU
)
{
BufferToImage
<
D
,
float
>
(
&
net
,
"Input"
,
"InputImage"
,
kernels
::
BufferType
::
IN_OUT_CHANNEL
);
OpDefBuilder
(
"BatchToSpaceND"
,
"BatchToSpaceNDTest"
)
.
Input
(
"InputImage"
)
.
Output
(
"OutputImage"
)
.
AddIntsArg
(
"crops"
,
crops_data
)
.
AddIntsArg
(
"block_shape"
,
block_shape_data
)
.
Finalize
(
net
.
NewOperatorDef
());
}
else
if
(
D
==
CPU
)
{
net
.
TransformDataFormat
<
DeviceType
::
CPU
,
float
>
(
"Input"
,
NHWC
,
"InputNCHW"
,
NCHW
);
OpDefBuilder
(
"BatchToSpaceND"
,
"BatchToSpaceNDTest"
)
.
Input
(
"InputNCHW"
)
.
Output
(
"OutputNCHW"
)
.
AddIntsArg
(
"crops"
,
crops_data
)
.
AddIntsArg
(
"block_shape"
,
block_shape_data
)
.
Finalize
(
net
.
NewOperatorDef
());
}
// Run
net
.
RunOp
(
D
);
ImageToBuffer
<
D
,
float
>
(
&
net
,
"OutputImage"
,
"Output"
,
kernels
::
BufferType
::
IN_OUT_CHANNEL
);
if
(
D
==
GPU
)
{
ImageToBuffer
<
D
,
float
>
(
&
net
,
"OutputImage"
,
"Output"
,
kernels
::
BufferType
::
IN_OUT_CHANNEL
);
}
else
if
(
D
==
CPU
)
{
net
.
TransformDataFormat
<
DeviceType
::
CPU
,
float
>
(
"OutputNCHW"
,
NCHW
,
"Output"
,
NHWC
);
}
// Check
ExpectTensorNear
<
float
>
(
*
expected
,
*
net
.
GetOutput
(
"Output"
));
}
...
...
@@ -108,9 +148,13 @@ void TestBidirectionalTransform(const std::vector<index_t> &space_shape,
RunSpaceToBatch
<
DeviceType
::
GPU
>
(
space_shape
,
space_data
,
block_data
,
padding_data
,
batch_tensor
.
get
());
RunSpaceToBatch
<
DeviceType
::
CPU
>
(
space_shape
,
space_data
,
block_data
,
padding_data
,
batch_tensor
.
get
());
RunBatchToSpace
<
DeviceType
::
GPU
>
(
batch_shape
,
batch_data
,
block_data
,
padding_data
,
space_tensor
.
get
());
RunBatchToSpace
<
DeviceType
::
CPU
>
(
batch_shape
,
batch_data
,
block_data
,
padding_data
,
space_tensor
.
get
());
}
}
// namespace
...
...
@@ -156,7 +200,7 @@ TEST(SpaceToBatchTest, MultiBatchData) {
TestBidirectionalTransform
<
float
>
(
{
2
,
2
,
4
,
1
},
{
1
,
2
,
3
,
4
,
5
,
6
,
7
,
8
,
9
,
10
,
11
,
12
,
13
,
14
,
15
,
16
},
{
2
,
2
},
{
0
,
0
,
0
,
0
},
{
8
,
1
,
2
,
1
},
{
1
,
3
,
2
,
4
,
5
,
7
,
6
,
8
,
9
,
11
,
10
,
12
,
13
,
15
,
14
,
16
});
{
1
,
3
,
9
,
11
,
2
,
4
,
10
,
12
,
5
,
7
,
13
,
15
,
6
,
8
,
14
,
16
});
}
TEST
(
SpaceToBatchTest
,
MultiBatchAndChannelData
)
{
...
...
@@ -165,8 +209,8 @@ TEST(SpaceToBatchTest, MultiBatchAndChannelData) {
{
1
,
2
,
3
,
4
,
5
,
6
,
7
,
8
,
9
,
10
,
11
,
12
,
13
,
14
,
15
,
16
,
17
,
18
,
19
,
20
,
21
,
22
,
23
,
24
,
25
,
26
,
27
,
28
,
29
,
30
,
31
,
32
},
{
2
,
2
},
{
0
,
0
,
0
,
0
},
{
8
,
1
,
2
,
2
},
{
1
,
2
,
5
,
6
,
3
,
4
,
7
,
8
,
9
,
10
,
13
,
14
,
11
,
12
,
15
,
16
,
17
,
18
,
21
,
22
,
19
,
20
,
23
,
24
,
25
,
26
,
29
,
30
,
27
,
28
,
31
,
32
});
{
1
,
2
,
5
,
6
,
17
,
18
,
21
,
22
,
3
,
4
,
7
,
8
,
19
,
20
,
23
,
24
,
9
,
10
,
13
,
14
,
25
,
26
,
29
,
30
,
11
,
12
,
15
,
16
,
27
,
28
,
31
,
32
});
}
}
// namespace test
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录