Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
PaddleDetection
提交
ba791f7b
P
PaddleDetection
项目概览
PaddlePaddle
/
PaddleDetection
大约 1 年 前同步成功
通知
694
Star
11112
Fork
2696
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
184
列表
看板
标记
里程碑
合并请求
40
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
PaddleDetection
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
184
Issue
184
列表
看板
标记
里程碑
合并请求
40
合并请求
40
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
体验新版 GitCode,发现更多精彩内容 >>
提交
ba791f7b
编写于
9月 27, 2017
作者:
C
chengduoZH
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
Add vol2col functor and unit test
上级
cce682fe
变更
5
显示空白变更内容
内联
并排
Showing
5 changed file
with
597 addition
and
3 deletion
+597
-3
paddle/operators/math/CMakeLists.txt
paddle/operators/math/CMakeLists.txt
+4
-3
paddle/operators/math/vol2col.cc
paddle/operators/math/vol2col.cc
+155
-0
paddle/operators/math/vol2col.cu
paddle/operators/math/vol2col.cu
+204
-0
paddle/operators/math/vol2col.h
paddle/operators/math/vol2col.h
+78
-0
paddle/operators/math/vol2col_test.cc
paddle/operators/math/vol2col_test.cc
+156
-0
未找到文件。
paddle/operators/math/CMakeLists.txt
浏览文件 @
ba791f7b
if
(
WITH_GPU
)
nv_library
(
math_function SRCS math_function.cc math_function.cu im2col.cc
im2col.cu DEPS cblas device_context operator
)
im2col.cu
vol2col.cc vol2col.cu
DEPS cblas device_context operator
)
nv_library
(
softmax_function SRCS softmax.cc softmax.cu
DEPS operator
)
nv_library
(
cross_entropy_function SRCS cross_entropy.cc cross_entropy.cu
DEPS operator
)
else
()
cc_library
(
math_function SRCS math_function.cc im2col.cc
cc_library
(
math_function SRCS math_function.cc im2col.cc
vol2col.cc
DEPS cblas device_context operator
)
cc_library
(
softmax_function SRCS softmax.cc DEPS operator
)
cc_library
(
cross_entropy_function SRCS cross_entropy.cc DEPS operator
)
...
...
@@ -14,3 +14,4 @@ endif()
nv_test
(
math_function_test SRCS math_function_test.cc DEPS math_function tensor
)
cc_test
(
im2col_test SRCS im2col_test.cc DEPS math_function tensor
)
cc_test
(
vol2col_test SRCS vol2col_test.cc DEPS math_function tensor
)
paddle/operators/math/vol2col.cc
0 → 100644
浏览文件 @
ba791f7b
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/operators/math/vol2col.h"
namespace
paddle
{
namespace
operators
{
namespace
math
{
/*
* vol = [input_channels, input_depth, input_height, input_width]
* col =
* [input_channels, filter_depth, filter_height, filter_width,
* output_depth, output_height, output_width]
*/
template
<
class
T
>
class
Vol2ColFunctor
<
platform
::
CPUPlace
,
T
>
{
public:
void
operator
()(
const
platform
::
DeviceContext
&
context
,
const
framework
::
Tensor
&
vol
,
framework
::
Tensor
&
col
,
int
stride_depth
,
int
stride_height
,
int
stride_width
,
int
padding_depth
,
int
padding_height
,
int
padding_width
)
const
{
PADDLE_ENFORCE
(
vol
.
dims
().
size
()
==
4
);
PADDLE_ENFORCE
(
col
.
dims
().
size
()
==
7
);
int
input_channels
=
vol
.
dims
()[
0
];
int
input_depth
=
vol
.
dims
()[
1
];
int
input_height
=
vol
.
dims
()[
2
];
int
input_width
=
vol
.
dims
()[
3
];
int
filter_depth
=
col
.
dims
()[
1
];
int
filter_height
=
col
.
dims
()[
2
];
int
filter_width
=
col
.
dims
()[
3
];
int
output_depth
=
col
.
dims
()[
4
];
int
output_height
=
col
.
dims
()[
5
];
int
output_width
=
col
.
dims
()[
6
];
int
channels_col
=
input_channels
*
filter_depth
*
filter_height
*
filter_width
;
const
T
*
vol_data
=
vol
.
data
<
T
>
();
T
*
col_data
=
col
.
data
<
T
>
();
for
(
int
c
=
0
;
c
<
channels_col
;
++
c
)
{
int
w_offset
=
c
%
filter_width
;
int
h_offset
=
(
c
/
filter_width
)
%
filter_height
;
int
d_offset
=
(
c
/
filter_width
/
filter_height
)
%
filter_depth
;
int
c_in
=
c
/
filter_width
/
filter_height
/
filter_depth
;
for
(
int
d
=
0
;
d
<
output_depth
;
++
d
)
{
int
d_pad
=
d
*
stride_depth
-
padding_depth
+
d_offset
;
for
(
int
h
=
0
;
h
<
output_height
;
++
h
)
{
int
h_pad
=
h
*
stride_height
-
padding_height
+
h_offset
;
for
(
int
w
=
0
;
w
<
output_width
;
++
w
)
{
int
w_pad
=
w
*
stride_width
-
padding_width
+
w_offset
;
int
col_idx
=
((
c
*
output_depth
+
d
)
*
output_height
+
h
)
*
output_width
+
w
;
if
(
h_pad
<
0
||
h_pad
>=
input_height
||
w_pad
<
0
||
w_pad
>=
input_width
||
d_pad
<
0
||
d_pad
>=
input_depth
)
{
col_data
[
col_idx
]
=
T
(
0
);
}
else
{
int
vol_idx
=
((
c_in
*
input_depth
+
d_pad
)
*
input_height
+
h_pad
)
*
input_width
+
w_pad
;
col_data
[
col_idx
]
=
vol_data
[
vol_idx
];
}
}
}
}
}
}
};
/*
* vol = [input_channels,input_depth, input_height, input_width]
* col =
* [input_channels, filter_depth, filter_height, filter_width,
* output_depth, output_height, output_width]
*/
template
<
class
T
>
class
Col2VolFunctor
<
platform
::
CPUPlace
,
T
>
{
public:
void
operator
()(
const
platform
::
DeviceContext
&
context
,
framework
::
Tensor
&
vol
,
const
framework
::
Tensor
&
col
,
int
stride_depth
,
int
stride_height
,
int
stride_width
,
int
padding_depth
,
int
padding_height
,
int
padding_width
)
const
{
PADDLE_ENFORCE
(
vol
.
dims
().
size
()
==
4
);
PADDLE_ENFORCE
(
col
.
dims
().
size
()
==
7
);
int
input_channels
=
vol
.
dims
()[
0
];
int
input_depth
=
vol
.
dims
()[
1
];
int
input_height
=
vol
.
dims
()[
2
];
int
input_width
=
vol
.
dims
()[
3
];
int
filter_depth
=
col
.
dims
()[
1
];
int
filter_height
=
col
.
dims
()[
2
];
int
filter_width
=
col
.
dims
()[
3
];
int
output_depth
=
col
.
dims
()[
4
];
int
output_height
=
col
.
dims
()[
5
];
int
output_width
=
col
.
dims
()[
6
];
int
channels_col
=
input_channels
*
filter_depth
*
filter_height
*
filter_width
;
T
*
vol_data
=
vol
.
data
<
T
>
();
const
T
*
col_data
=
col
.
data
<
T
>
();
for
(
int
c
=
0
;
c
<
channels_col
;
++
c
)
{
int
w_offset
=
c
%
filter_width
;
int
h_offset
=
(
c
/
filter_width
)
%
filter_height
;
int
d_offset
=
(
c
/
filter_width
/
filter_height
)
%
filter_depth
;
int
cIm
=
c
/
filter_width
/
filter_height
/
filter_depth
;
for
(
int
d
=
0
;
d
<
output_depth
;
++
d
)
{
int
d_pad
=
d
*
stride_depth
-
padding_depth
+
d_offset
;
for
(
int
h
=
0
;
h
<
output_height
;
++
h
)
{
int
h_pad
=
h
*
stride_height
-
padding_height
+
h_offset
;
for
(
int
w
=
0
;
w
<
output_width
;
++
w
)
{
int
w_pad
=
w
*
stride_width
-
padding_width
+
w_offset
;
if
(
h_pad
>=
0
&&
h_pad
<
input_height
&&
w_pad
>=
0
&&
w_pad
<
input_width
&&
d_pad
>=
0
&&
d_pad
<
input_depth
)
{
int
vol_idx
=
((
cIm
*
input_depth
+
d_pad
)
*
input_height
+
h_pad
)
*
input_width
+
w_pad
;
int
col_idx
=
((
c
*
output_depth
+
d
)
*
output_height
+
h
)
*
output_width
+
w
;
vol_data
[
vol_idx
]
+=
col_data
[
col_idx
];
}
}
}
}
}
}
};
template
class
Vol2ColFunctor
<
platform
::
CPUPlace
,
float
>;
template
class
Vol2ColFunctor
<
platform
::
CPUPlace
,
double
>;
template
class
Col2VolFunctor
<
platform
::
CPUPlace
,
float
>;
template
class
Col2VolFunctor
<
platform
::
CPUPlace
,
double
>;
}
// namespace math
}
// namespace operators
}
// namespace paddle
paddle/operators/math/vol2col.cu
0 → 100644
浏览文件 @
ba791f7b
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/operators/math/vol2col.h"
#include "paddle/platform/cuda_helper.h"
namespace
paddle
{
namespace
operators
{
namespace
math
{
template
<
class
T
>
__global__
void
vol2col
(
int
num_kernels
,
const
T
*
data_vol
,
int
depth
,
int
height
,
int
width
,
int
filter_depth
,
int
filter_height
,
int
filter_width
,
int
stride_depth
,
int
stride_height
,
int
stride_width
,
int
padding_depth
,
int
padding_height
,
int
padding_width
,
int
output_detph
,
int
output_height
,
int
output_width
,
T
*
data_col
)
{
for
(
int
index
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
index
<
num_kernels
;
index
+=
blockDim
.
x
*
gridDim
.
x
)
{
int
w_out
=
index
%
output_width
;
int
h_out
=
(
index
/
output_width
)
%
output_height
;
int
d_out
=
(
index
/
output_width
/
output_height
)
%
output_detph
;
int
channel_in
=
index
/
output_width
/
output_height
/
output_detph
;
int
channel_out
=
channel_in
*
filter_depth
*
filter_height
*
filter_width
;
int
w_in
=
w_out
*
stride_width
-
padding_width
;
int
h_in
=
h_out
*
stride_height
-
padding_height
;
int
d_in
=
d_out
*
stride_depth
-
padding_depth
;
data_col
+=
((
channel_out
*
output_detph
+
d_out
)
*
output_height
+
h_out
)
*
output_width
+
w_out
;
data_vol
+=
((
channel_in
*
depth
+
d_in
)
*
height
+
h_in
)
*
width
+
w_in
;
for
(
int
k
=
0
;
k
<
filter_depth
;
++
k
)
{
for
(
int
i
=
0
;
i
<
filter_height
;
++
i
)
{
for
(
int
j
=
0
;
j
<
filter_width
;
++
j
)
{
int
d
=
d_in
+
k
;
int
h
=
h_in
+
i
;
int
w
=
w_in
+
j
;
*
data_col
=
(
d
>=
0
&&
d
<
depth
&&
h
>=
0
&&
h
<
height
&&
w
>=
0
&&
w
<
width
)
?
data_vol
[(
k
*
height
+
i
)
*
width
+
j
]
:
0
;
data_col
+=
output_detph
*
output_height
*
output_width
;
}
}
}
}
}
/*
* im = [input_channels,intpu_depth, input_height, input_width]
* col =
* [input_channels, filter_depth, filter_height, filter_width,
* output_depth, output_height, output_width]
*/
template
<
class
T
>
class
Vol2ColFunctor
<
platform
::
GPUPlace
,
T
>
{
public:
void
operator
()(
const
platform
::
DeviceContext
&
context
,
const
framework
::
Tensor
&
vol
,
framework
::
Tensor
&
col
,
int
stride_depth
,
int
stride_height
,
int
stride_width
,
int
padding_depth
,
int
padding_height
,
int
padding_width
)
const
{
PADDLE_ENFORCE
(
vol
.
dims
().
size
()
==
4
);
PADDLE_ENFORCE
(
col
.
dims
().
size
()
==
7
);
int
input_channels
=
vol
.
dims
()[
0
];
int
input_depth
=
vol
.
dims
()[
1
];
int
input_height
=
vol
.
dims
()[
2
];
int
input_width
=
vol
.
dims
()[
3
];
int
filter_depth
=
col
.
dims
()[
1
];
int
filter_height
=
col
.
dims
()[
2
];
int
filter_width
=
col
.
dims
()[
3
];
int
output_depth
=
col
.
dims
()[
4
];
int
output_height
=
col
.
dims
()[
5
];
int
output_width
=
col
.
dims
()[
6
];
int
num_outputs
=
input_channels
*
output_depth
*
output_height
*
output_width
;
const
int
threads
=
1024
;
const
int
blocks
=
(
num_outputs
+
1024
-
1
)
/
1024
;
vol2col
<
T
><<<
blocks
,
threads
,
0
,
reinterpret_cast
<
const
platform
::
CUDADeviceContext
&>
(
context
)
.
stream
()
>>>
(
num_outputs
,
vol
.
data
<
T
>
(),
input_depth
,
input_height
,
input_width
,
filter_depth
,
filter_height
,
filter_width
,
stride_depth
,
stride_height
,
stride_width
,
padding_depth
,
padding_height
,
padding_width
,
output_depth
,
output_height
,
output_width
,
col
.
data
<
T
>
());
}
};
template
<
class
T
>
__global__
void
col2vol
(
int
num_kernels
,
const
T
*
data_col
,
int
depth
,
int
height
,
int
width
,
int
filter_depth
,
int
filter_height
,
int
filter_width
,
int
stride_depth
,
int
stride_height
,
int
stride_width
,
int
padding_depth
,
int
padding_height
,
int
padding_width
,
int
output_detph
,
int
output_height
,
int
output_width
,
T
*
data_vol
)
{
for
(
int
index
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
index
<
num_kernels
;
index
+=
blockDim
.
x
*
gridDim
.
x
)
{
T
src_val
=
0
;
int
w
=
index
%
width
+
padding_width
;
int
h
=
(
index
/
width
)
%
height
+
padding_height
;
int
d
=
(
index
/
width
/
height
)
%
depth
+
padding_depth
;
int
c
=
index
/
width
/
height
/
depth
;
// compute the start and end of the output
int
w_col_start
=
(
w
<
filter_width
)
?
0
:
(
w
-
filter_width
)
/
stride_width
+
1
;
int
w_col_end
=
min
(
w
/
stride_width
+
1
,
output_width
);
int
h_col_start
=
(
h
<
filter_height
)
?
0
:
(
h
-
filter_height
)
/
stride_height
+
1
;
int
h_col_end
=
min
(
h
/
stride_height
+
1
,
output_height
);
int
d_col_start
=
(
d
<
filter_depth
)
?
0
:
(
d
-
filter_depth
)
/
stride_depth
+
1
;
int
d_col_end
=
min
(
d
/
stride_depth
+
1
,
output_detph
);
int
offset
=
(
c
*
filter_depth
*
filter_height
*
filter_width
+
d
*
filter_width
*
filter_height
+
h
*
filter_width
+
w
)
*
output_detph
*
output_height
*
output_width
;
int
coeff_d_col
=
(
1
-
stride_depth
*
filter_width
*
filter_height
*
output_detph
)
*
output_height
*
output_width
;
int
coeff_h_col
=
(
1
-
stride_height
*
filter_width
*
output_detph
*
output_height
)
*
output_width
;
int
coeff_w_col
=
(
1
-
stride_width
*
output_detph
*
output_height
*
output_width
);
for
(
int
d_col
=
d_col_start
;
d_col
<
d_col_end
;
++
d_col
)
{
for
(
int
h_col
=
h_col_start
;
h_col
<
h_col_end
;
++
h_col
)
{
for
(
int
w_col
=
w_col_start
;
w_col
<
w_col_end
;
++
w_col
)
{
src_val
+=
data_col
[
offset
+
d_col
*
coeff_d_col
+
h_col
*
coeff_h_col
+
w_col
*
coeff_w_col
];
}
}
}
data_vol
[
index
]
=
src_val
;
}
}
/*
* im = [input_channels, input_depth, input_height, input_width]
* col =
* [input_channels, filter_depth, filter_height, filter_width,
* output_depth, output_height, output_width]
*/
template
<
class
T
>
class
Col2VolFunctor
<
platform
::
GPUPlace
,
T
>
{
public:
void
operator
()(
const
platform
::
DeviceContext
&
context
,
framework
::
Tensor
&
vol
,
const
framework
::
Tensor
&
col
,
int
stride_depth
,
int
stride_height
,
int
stride_width
,
int
padding_depth
,
int
padding_height
,
int
padding_width
)
const
{
PADDLE_ENFORCE
(
vol
.
dims
().
size
()
==
4
);
PADDLE_ENFORCE
(
col
.
dims
().
size
()
==
7
);
int
input_channels
=
vol
.
dims
()[
0
];
int
input_depth
=
vol
.
dims
()[
1
];
int
input_height
=
vol
.
dims
()[
2
];
int
input_width
=
vol
.
dims
()[
3
];
int
filter_depth
=
col
.
dims
()[
1
];
int
filter_height
=
col
.
dims
()[
2
];
int
filter_width
=
col
.
dims
()[
3
];
int
output_depth
=
col
.
dims
()[
4
];
int
output_height
=
col
.
dims
()[
5
];
int
output_width
=
col
.
dims
()[
6
];
int
num_kernels
=
input_channels
*
input_depth
*
input_height
*
input_width
;
const
int
threads
=
1024
;
const
int
blocks
=
(
num_kernels
+
1024
-
1
)
/
1024
;
col2vol
<
T
><<<
blocks
,
threads
,
0
,
reinterpret_cast
<
const
platform
::
CUDADeviceContext
&>
(
context
)
.
stream
()
>>>
(
num_kernels
,
col
.
data
<
T
>
(),
input_depth
,
input_height
,
input_width
,
filter_depth
,
filter_height
,
filter_width
,
stride_depth
,
stride_height
,
stride_width
,
padding_depth
,
padding_height
,
padding_width
,
output_depth
,
output_height
,
output_width
,
vol
.
data
<
T
>
());
}
};
template
class
Vol2ColFunctor
<
platform
::
GPUPlace
,
float
>;
template
class
Vol2ColFunctor
<
platform
::
GPUPlace
,
double
>;
template
class
Col2VolFunctor
<
platform
::
GPUPlace
,
float
>;
template
class
Col2VolFunctor
<
platform
::
GPUPlace
,
double
>;
}
// namespace math
}
// namespace operators
}
// namespace paddle
paddle/operators/math/vol2col.h
0 → 100644
浏览文件 @
ba791f7b
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include "paddle/framework/tensor.h"
#include "paddle/platform/device_context.h"
namespace
paddle
{
namespace
operators
{
namespace
math
{
/*
* \brief Converts the feature data of four dimensions(CDHW) into a colData of
* seven dimensions in the Vol2ColFunctor calculation,
* And in the Col2VolFunctor calculation, it is reversed.
*
* \param volData Vol data.
* \param volShape The shape of volData,
* [input_channels, input_depth, input_height, input_width].
* \param colData Column data.
* \param colShape The shape of colData.
*
* The shape of colData is:
* [input_channels, filter_depth, filter_height, filter_width, output_depth,
* output_height, output_width]
* So, it is easy to reshape into a convolution matrix for convolution
* calculation based on matrix multiplication.
* The shape of convolution matrix is [height, width], where the height is equal
* input_channels * filter_depth * filter_height * filter_width, and the width
* is equal output_depth * output_height * output_width.
*
* Reshape:
* shape of colData shape of convolution matrix
* [input_channels,
* filter_depth,
* filter_height,
* filter_width, ======> [height, width]
* output_depth,
* output_height,
* output_width]
*
* \note The caller needs to ensure that volShape.inputChannels is equal to
* colShape.inputChannels.
*/
template
<
typename
Place
,
typename
T
>
class
Vol2ColFunctor
{
public:
void
operator
()(
const
platform
::
DeviceContext
&
context
,
const
framework
::
Tensor
&
vol
,
framework
::
Tensor
&
col
,
int
stride_depth
,
int
stride_height
,
int
stride_width
,
int
padding_depth
,
int
padding_height
,
int
padding_width
)
const
;
};
template
<
typename
Place
,
typename
T
>
class
Col2VolFunctor
{
public:
void
operator
()(
const
platform
::
DeviceContext
&
context
,
framework
::
Tensor
&
vol
,
const
framework
::
Tensor
&
col
,
int
stride_depth
,
int
stride_height
,
int
stride_width
,
int
padding_depth
,
int
padding_height
,
int
padding_width
)
const
;
};
}
// namespace math
}
// namespace operators
}
// namespace paddle
paddle/operators/math/vol2col_test.cc
0 → 100644
浏览文件 @
ba791f7b
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/operators/math/vol2col.h"
#include <gtest/gtest.h>
#include <iostream>
template
<
typename
Place
>
void
testVol2col
()
{
paddle
::
framework
::
Tensor
input_tmp
;
paddle
::
framework
::
Tensor
input
;
paddle
::
framework
::
Tensor
output_cfo
;
paddle
::
framework
::
Tensor
output_ocf
;
paddle
::
framework
::
Tensor
output_tmp
;
auto
*
place
=
new
Place
();
paddle
::
platform
::
DeviceContext
*
context
;
if
(
paddle
::
platform
::
is_cpu_place
(
*
place
))
{
context
=
new
paddle
::
platform
::
CPUDeviceContext
(
paddle
::
platform
::
CPUPlace
());
}
else
{
#ifndef PADDLE_ONLY_CPU
context
=
new
paddle
::
platform
::
CUDADeviceContext
(
paddle
::
platform
::
GPUPlace
());
#else
PADDLE_THROW
(
"no GPU support"
);
#endif // PADDLE_ONLY_CPU
}
/**
* input = [[0, 1, 2,
* 3, 4, 5]
* [6, 7, 8,
* 9, 10, 11]]
*
* output_cfo = [0, 1
* 1, 2
* 3, 4
* 4, 5
* 6, 7
* 7, 8
* 9, 10
* 10, 11]
*
* col2vol = [[0, 2, 2,
* 3, 8, 5]
* [6, 14, 8,
* 9, 20, 11]]
*
*/
int
input_depth
=
2
;
int
input_height
=
2
;
int
input_width
=
3
;
int
filter_size
=
2
;
int
stride
=
1
;
int
padding
=
0
;
int
output_depth
=
(
input_depth
-
filter_size
+
2
*
padding
)
/
stride
+
1
;
int
output_height
=
(
input_height
-
filter_size
+
2
*
padding
)
/
stride
+
1
;
int
output_width
=
(
input_width
-
filter_size
+
2
*
padding
)
/
stride
+
1
;
// Vol2Col test
float
*
input_ptr
=
input_tmp
.
mutable_data
<
float
>
({
1
,
input_depth
,
input_height
,
input_width
},
paddle
::
platform
::
CPUPlace
());
float
arr
[
12
]
=
{
0
,
1
,
2
,
3
,
4
,
5
,
6
,
7
,
8
,
9
,
10
,
11
};
memcpy
(
input_ptr
,
arr
,
12
*
sizeof
(
float
));
if
(
paddle
::
platform
::
is_cpu_place
(
*
place
))
{
input
=
input_tmp
;
}
else
{
input
.
CopyFrom
<
float
>
(
input_tmp
,
*
place
);
}
output_cfo
.
mutable_data
<
float
>
({
1
,
filter_size
,
filter_size
,
filter_size
,
output_depth
,
output_height
,
output_width
},
*
place
);
paddle
::
operators
::
math
::
Vol2ColFunctor
<
Place
,
float
>
vol2col
;
vol2col
(
*
context
,
input
,
output_cfo
,
stride
,
stride
,
stride
,
padding
,
padding
,
padding
);
float
*
out_cfo_ptr
;
if
(
paddle
::
platform
::
is_cpu_place
(
*
place
))
{
out_cfo_ptr
=
output_cfo
.
data
<
float
>
();
}
else
{
output_tmp
.
CopyFrom
<
float
>
(
output_cfo
,
paddle
::
platform
::
CPUPlace
());
out_cfo_ptr
=
output_tmp
.
data
<
float
>
();
}
EXPECT_EQ
(
out_cfo_ptr
[
0
],
0
);
EXPECT_EQ
(
out_cfo_ptr
[
1
],
1
);
EXPECT_EQ
(
out_cfo_ptr
[
2
],
1
);
EXPECT_EQ
(
out_cfo_ptr
[
3
],
2
);
EXPECT_EQ
(
out_cfo_ptr
[
4
],
3
);
EXPECT_EQ
(
out_cfo_ptr
[
5
],
4
);
EXPECT_EQ
(
out_cfo_ptr
[
6
],
4
);
EXPECT_EQ
(
out_cfo_ptr
[
7
],
5
);
EXPECT_EQ
(
out_cfo_ptr
[
8
],
6
);
EXPECT_EQ
(
out_cfo_ptr
[
9
],
7
);
EXPECT_EQ
(
out_cfo_ptr
[
10
],
7
);
EXPECT_EQ
(
out_cfo_ptr
[
11
],
8
);
EXPECT_EQ
(
out_cfo_ptr
[
12
],
9
);
EXPECT_EQ
(
out_cfo_ptr
[
13
],
10
);
EXPECT_EQ
(
out_cfo_ptr
[
14
],
10
);
EXPECT_EQ
(
out_cfo_ptr
[
15
],
11
);
// Col2Vol test
memset
(
input_ptr
,
0
,
12
*
sizeof
(
float
));
if
(
paddle
::
platform
::
is_cpu_place
(
*
place
))
{
input
=
input_tmp
;
}
else
{
input
.
CopyFrom
<
float
>
(
input_tmp
,
*
place
);
}
paddle
::
operators
::
math
::
Col2VolFunctor
<
Place
,
float
>
col2vol
;
col2vol
(
*
context
,
input
,
output_cfo
,
stride
,
stride
,
stride
,
padding
,
padding
,
padding
);
float
*
in_cfo_ptr
;
if
(
paddle
::
platform
::
is_cpu_place
(
*
place
))
{
in_cfo_ptr
=
input
.
data
<
float
>
();
}
else
{
input_tmp
.
CopyFrom
<
float
>
(
input
,
paddle
::
platform
::
CPUPlace
());
in_cfo_ptr
=
input_tmp
.
data
<
float
>
();
}
EXPECT_EQ
(
in_cfo_ptr
[
0
],
0
);
EXPECT_EQ
(
in_cfo_ptr
[
1
],
2
);
EXPECT_EQ
(
in_cfo_ptr
[
2
],
2
);
EXPECT_EQ
(
in_cfo_ptr
[
3
],
3
);
EXPECT_EQ
(
in_cfo_ptr
[
4
],
8
);
EXPECT_EQ
(
in_cfo_ptr
[
5
],
5
);
EXPECT_EQ
(
in_cfo_ptr
[
6
],
6
);
EXPECT_EQ
(
in_cfo_ptr
[
7
],
14
);
EXPECT_EQ
(
in_cfo_ptr
[
8
],
8
);
EXPECT_EQ
(
in_cfo_ptr
[
9
],
9
);
EXPECT_EQ
(
in_cfo_ptr
[
10
],
20
);
EXPECT_EQ
(
in_cfo_ptr
[
11
],
11
);
}
TEST
(
math
,
vol2col
)
{
testVol2col
<
paddle
::
platform
::
CPUPlace
>
();
#ifndef PADDLE_ONLY_CPU
testVol2col
<
paddle
::
platform
::
GPUPlace
>
();
#endif
}
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录