Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
机器未来
Paddle
提交
0ee967b5
P
Paddle
项目概览
机器未来
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
0ee967b5
编写于
9月 21, 2017
作者:
H
hedaoyuan
提交者:
GitHub
9月 21, 2017
浏览文件
操作
浏览文件
下载
差异文件
Merge pull request #4288 from hedaoyuan/fix_bug
Bug fix for get device_context in conv2d op.
上级
8c3b8af3
ccbb2853
变更
6
隐藏空白更改
内联
并排
Showing
6 changed file
with
70 addition
and
61 deletion
+70
-61
paddle/operators/gemm_conv2d_op.h
paddle/operators/gemm_conv2d_op.h
+14
-19
paddle/operators/math/im2col.cc
paddle/operators/math/im2col.cc
+12
-10
paddle/operators/math/im2col.cu
paddle/operators/math/im2col.cu
+24
-22
paddle/operators/math/im2col.h
paddle/operators/math/im2col.h
+6
-5
paddle/operators/math/im2col_test.cc
paddle/operators/math/im2col_test.cc
+2
-2
python/paddle/v2/framework/tests/test_conv2d_op.py
python/paddle/v2/framework/tests/test_conv2d_op.py
+12
-3
未找到文件。
paddle/operators/gemm_conv2d_op.h
浏览文件 @
0ee967b5
...
...
@@ -75,9 +75,6 @@ class GemmConv2DKernel : public framework::OpKernel {
framework
::
DDim
output_matrix_shape
=
{
output_channels
,
output_height
*
output_width
};
auto
*
device_context
=
const_cast
<
platform
::
DeviceContext
*>
(
context
.
device_context_
);
// convolution operator: im2col + gemm
int
in_step
=
input_channels
/
groups
;
int
out_step
=
output_channels
/
groups
;
...
...
@@ -87,14 +84,14 @@ class GemmConv2DKernel : public framework::OpKernel {
for
(
int
g
=
0
;
g
<
groups
;
g
++
)
{
// im2col
Tensor
in_slice
=
in_batch
.
Slice
<
T
>
(
g
*
in_step
,
(
g
+
1
)
*
in_step
);
im2col
(
in_slice
,
col
,
strides
[
0
],
strides
[
1
],
paddings
[
0
],
padding
s
[
1
],
device_context
);
im2col
(
context
.
device_context
(),
in_slice
,
col
,
strides
[
0
],
stride
s
[
1
],
paddings
[
0
],
paddings
[
1
]
);
// gemm
Tensor
out_slice
=
out_batch
.
Slice
<
T
>
(
g
*
out_step
,
(
g
+
1
)
*
out_step
);
Tensor
filter_slice
=
filter
.
Slice
<
T
>
(
g
*
out_step
,
(
g
+
1
)
*
out_step
);
math
::
matmul
<
Place
,
T
>
(
filter_slice
,
false
,
col_matrix
,
false
,
T
(
1.0
)
,
&
out_slice
,
T
(
0.0
),
device_context
);
math
::
matmul
<
Place
,
T
>
(
context
.
device_context
(),
filter_slice
,
false
,
col_matrix
,
false
,
T
(
1.0
),
&
out_slice
,
T
(
0.0
)
);
}
}
}
...
...
@@ -160,9 +157,6 @@ class GemmConvGrad2DKernel : public framework::OpKernel {
filter
.
numel
()
/
filter
.
dims
()[
0
]};
filter
.
Resize
(
filter_matrix_shape
);
auto
*
device_context
=
const_cast
<
platform
::
DeviceContext
*>
(
context
.
device_context_
);
// convolution backward input operator: gemm + col2im
// convolution backward weight operator: im2col + gemm
int
in_step
=
input_channels
/
groups
;
...
...
@@ -184,14 +178,15 @@ class GemmConvGrad2DKernel : public framework::OpKernel {
out_grad_batch
.
Slice
<
T
>
(
g
*
out_step
,
(
g
+
1
)
*
out_step
);
Tensor
filter_slice
=
filter
.
Slice
<
T
>
(
g
*
out_step
,
(
g
+
1
)
*
out_step
);
math
::
matmul
<
Place
,
T
>
(
filter_slice
,
true
,
out_grad_slice
,
false
,
T
(
1.0
),
&
col_matrix
,
T
(
0.0
),
device_context
);
math
::
matmul
<
Place
,
T
>
(
context
.
device_context
(),
filter_slice
,
true
,
out_grad_slice
,
false
,
T
(
1.0
),
&
col_matrix
,
T
(
0.0
));
// col2im
Tensor
in_grad_slice
=
in_grad_batch
.
Slice
<
T
>
(
g
*
in_step
,
(
g
+
1
)
*
in_step
);
col2im
(
in_grad_slice
,
col
,
strides
[
0
],
strides
[
1
],
padding
s
[
0
],
paddings
[
1
],
device_context
);
col2im
(
context
.
device_context
(),
in_grad_slice
,
col
,
stride
s
[
0
],
strides
[
1
],
paddings
[
0
],
paddings
[
1
]
);
}
}
}
...
...
@@ -212,15 +207,15 @@ class GemmConvGrad2DKernel : public framework::OpKernel {
Tensor
out_grad_slice
=
out_grad_batch
.
Slice
<
T
>
(
g
*
out_step
,
(
g
+
1
)
*
out_step
);
Tensor
in_slice
=
in_batch
.
Slice
<
T
>
(
g
*
in_step
,
(
g
+
1
)
*
in_step
);
im2col
(
in_slice
,
col
,
strides
[
0
],
strides
[
1
],
padding
s
[
0
],
paddings
[
1
],
device_context
);
im2col
(
context
.
device_context
(),
in_slice
,
col
,
stride
s
[
0
],
strides
[
1
],
paddings
[
0
],
paddings
[
1
]
);
// gemm
Tensor
filter_grad_slice
=
filter_grad_
.
Slice
<
T
>
(
g
*
out_step
,
(
g
+
1
)
*
out_step
);
math
::
matmul
<
Place
,
T
>
(
out_grad_slice
,
false
,
col_matrix
,
tru
e
,
T
(
1.0
),
&
filter_grad_slic
e
,
T
(
1.0
),
device_context
);
math
::
matmul
<
Place
,
T
>
(
context
.
device_context
(),
out_grad_slic
e
,
false
,
col_matrix
,
tru
e
,
T
(
1.0
),
&
filter_grad_slice
,
T
(
1.0
)
);
}
}
}
...
...
paddle/operators/math/im2col.cc
浏览文件 @
0ee967b5
...
...
@@ -27,9 +27,10 @@ template <class T>
class
Im2ColFunctor
<
paddle
::
operators
::
math
::
ColFormat
::
kCFO
,
platform
::
CPUPlace
,
T
>
{
public:
void
operator
()(
const
framework
::
Tensor
&
im
,
framework
::
Tensor
&
col
,
void
operator
()(
const
platform
::
DeviceContext
&
context
,
const
framework
::
Tensor
&
im
,
framework
::
Tensor
&
col
,
int
stride_height
,
int
stride_width
,
int
padding_height
,
int
padding_width
,
platform
::
DeviceContext
*
context
)
{
int
padding_width
)
{
PADDLE_ENFORCE
(
im
.
dims
().
size
()
==
3
);
PADDLE_ENFORCE
(
col
.
dims
().
size
()
==
5
);
...
...
@@ -79,9 +80,9 @@ template <class T>
class
Col2ImFunctor
<
paddle
::
operators
::
math
::
ColFormat
::
kCFO
,
platform
::
CPUPlace
,
T
>
{
public:
void
operator
()(
framework
::
Tensor
&
im
,
const
framework
::
Tensor
&
col
,
int
stride_height
,
int
stride_width
,
int
padding
_height
,
int
padding_width
,
platform
::
DeviceContext
*
context
)
{
void
operator
()(
const
platform
::
DeviceContext
&
context
,
framework
::
Tensor
&
im
,
const
framework
::
Tensor
&
col
,
int
stride
_height
,
int
stride_width
,
int
padding_height
,
int
padding_width
)
{
PADDLE_ENFORCE
(
im
.
dims
().
size
()
==
3
);
PADDLE_ENFORCE
(
col
.
dims
().
size
()
==
5
);
int
input_channels
=
im
.
dims
()[
0
];
...
...
@@ -137,9 +138,10 @@ template <class T>
class
Im2ColFunctor
<
paddle
::
operators
::
math
::
ColFormat
::
kOCF
,
platform
::
CPUPlace
,
T
>
{
public:
void
operator
()(
const
framework
::
Tensor
&
im
,
framework
::
Tensor
&
col
,
void
operator
()(
const
platform
::
DeviceContext
&
context
,
const
framework
::
Tensor
&
im
,
framework
::
Tensor
&
col
,
int
stride_height
,
int
stride_width
,
int
padding_height
,
int
padding_width
,
platform
::
DeviceContext
*
context
)
{
int
padding_width
)
{
PADDLE_ENFORCE
(
im
.
dims
().
size
()
==
3
);
PADDLE_ENFORCE
(
col
.
dims
().
size
()
==
5
);
int
input_channels
=
im
.
dims
()[
0
];
...
...
@@ -197,9 +199,9 @@ template <class T>
class
Col2ImFunctor
<
paddle
::
operators
::
math
::
ColFormat
::
kOCF
,
platform
::
CPUPlace
,
T
>
{
public:
void
operator
()(
framework
::
Tensor
&
im
,
const
framework
::
Tensor
&
col
,
int
stride_height
,
int
stride_width
,
int
padding
_height
,
int
padding_width
,
platform
::
DeviceContext
*
context
)
{
void
operator
()(
const
platform
::
DeviceContext
&
context
,
framework
::
Tensor
&
im
,
const
framework
::
Tensor
&
col
,
int
stride
_height
,
int
stride_width
,
int
padding_height
,
int
padding_width
)
{
PADDLE_ENFORCE
(
im
.
dims
().
size
()
==
3
);
PADDLE_ENFORCE
(
col
.
dims
().
size
()
==
5
);
int
input_channels
=
im
.
dims
()[
0
];
...
...
paddle/operators/math/im2col.cu
浏览文件 @
0ee967b5
...
...
@@ -64,9 +64,10 @@ template <class T>
class
Im2ColFunctor
<
paddle
::
operators
::
math
::
ColFormat
::
kCFO
,
platform
::
GPUPlace
,
T
>
{
public:
void
operator
()(
const
framework
::
Tensor
&
im
,
framework
::
Tensor
&
col
,
void
operator
()(
const
platform
::
DeviceContext
&
context
,
const
framework
::
Tensor
&
im
,
framework
::
Tensor
&
col
,
int
stride_height
,
int
stride_width
,
int
padding_height
,
int
padding_width
,
platform
::
DeviceContext
*
context
)
{
int
padding_width
)
{
PADDLE_ENFORCE
(
im
.
dims
().
size
()
==
3
);
PADDLE_ENFORCE
(
col
.
dims
().
size
()
==
5
);
...
...
@@ -84,9 +85,9 @@ class Im2ColFunctor<paddle::operators::math::ColFormat::kCFO,
int
block_y
=
(
blocks
+
512
-
1
)
/
512
;
dim3
threads
(
1024
,
1
);
dim3
grid
(
block_x
,
block_y
);
im2col
<
T
><<<
grid
,
threads
,
0
,
reinterpret_cast
<
platform
::
CUDADeviceContext
*>
(
context
)
->
stream
()
>>>
(
im2col
<
T
><<<
grid
,
threads
,
0
,
reinterpret_cast
<
const
platform
::
CUDADeviceContext
&>
(
context
)
.
stream
()
>>>
(
im
.
data
<
T
>
(),
num_outputs
,
input_height
,
input_width
,
filter_height
,
filter_width
,
stride_height
,
stride_width
,
padding_height
,
padding_width
,
output_height
,
output_width
,
col
.
data
<
T
>
());
...
...
@@ -149,9 +150,9 @@ template <class T>
class
Col2ImFunctor
<
paddle
::
operators
::
math
::
ColFormat
::
kCFO
,
platform
::
GPUPlace
,
T
>
{
public:
void
operator
()(
framework
::
Tensor
&
im
,
const
framework
::
Tensor
&
col
,
int
stride_height
,
int
stride_width
,
int
padding
_height
,
int
padding_width
,
platform
::
DeviceContext
*
context
)
{
void
operator
()(
const
platform
::
DeviceContext
&
context
,
framework
::
Tensor
&
im
,
const
framework
::
Tensor
&
col
,
int
stride
_height
,
int
stride_width
,
int
padding_height
,
int
padding_width
)
{
PADDLE_ENFORCE
(
im
.
dims
().
size
()
==
3
);
PADDLE_ENFORCE
(
col
.
dims
().
size
()
==
5
);
...
...
@@ -174,9 +175,9 @@ class Col2ImFunctor<paddle::operators::math::ColFormat::kCFO,
// To avoid involving atomic operations, we will launch one kernel per
// bottom dimension, and then in the kernel add up the top dimensions.
col2im
<
T
><<<
grid
,
threads
,
0
,
reinterpret_cast
<
platform
::
CUDADeviceContext
*>
(
context
)
->
stream
()
>>>
(
col2im
<
T
><<<
grid
,
threads
,
0
,
reinterpret_cast
<
const
platform
::
CUDADeviceContext
&>
(
context
)
.
stream
()
>>>
(
num_kernels
,
col
.
data
<
T
>
(),
input_height
+
2
*
padding_height
,
input_width
+
2
*
padding_width
,
input_channels
,
filter_height
,
filter_width
,
stride_height
,
stride_width
,
padding_height
,
...
...
@@ -235,9 +236,10 @@ template <class T>
class
Im2ColFunctor
<
paddle
::
operators
::
math
::
ColFormat
::
kOCF
,
platform
::
GPUPlace
,
T
>
{
public:
void
operator
()(
const
framework
::
Tensor
&
im
,
framework
::
Tensor
&
col
,
void
operator
()(
const
platform
::
DeviceContext
&
context
,
const
framework
::
Tensor
&
im
,
framework
::
Tensor
&
col
,
int
stride_height
,
int
stride_width
,
int
padding_height
,
int
padding_width
,
platform
::
DeviceContext
*
context
)
{
int
padding_width
)
{
PADDLE_ENFORCE
(
im
.
dims
().
size
()
==
3
);
PADDLE_ENFORCE
(
col
.
dims
().
size
()
==
5
);
int
input_channels
=
im
.
dims
()[
0
];
...
...
@@ -268,9 +270,9 @@ class Im2ColFunctor<paddle::operators::math::ColFormat::kOCF,
dim3
threads
(
block_dim_x
,
block_dim_y
,
std
::
min
(
block_dim_z
,
input_channels
));
dim3
grid
(
output_width
,
output_height
);
im2colOCF
<
T
><<<
grid
,
threads
,
0
,
reinterpret_cast
<
platform
::
CUDADeviceContext
*>
(
context
)
->
stream
()
>>>
(
im2colOCF
<
T
><<<
grid
,
threads
,
0
,
reinterpret_cast
<
const
platform
::
CUDADeviceContext
&>
(
context
)
.
stream
()
>>>
(
im
.
data
<
T
>
(),
col
.
data
<
T
>
(),
input_channels
,
input_height
,
input_width
,
filter_height
,
filter_width
,
stride_height
,
stride_width
,
padding_height
,
padding_width
,
output_height
,
output_width
);
...
...
@@ -318,9 +320,9 @@ template <class T>
class
Col2ImFunctor
<
paddle
::
operators
::
math
::
ColFormat
::
kOCF
,
platform
::
GPUPlace
,
T
>
{
public:
void
operator
()(
framework
::
Tensor
&
im
,
const
framework
::
Tensor
&
col
,
int
stride_height
,
int
stride_width
,
int
padding
_height
,
int
padding_width
,
platform
::
DeviceContext
*
context
)
{
void
operator
()(
const
platform
::
DeviceContext
&
context
,
framework
::
Tensor
&
im
,
const
framework
::
Tensor
&
col
,
int
stride
_height
,
int
stride_width
,
int
padding_height
,
int
padding_width
)
{
PADDLE_ENFORCE
(
im
.
dims
().
size
()
==
3
);
PADDLE_ENFORCE
(
col
.
dims
().
size
()
==
5
);
int
input_channels
=
im
.
dims
()[
0
];
...
...
@@ -351,9 +353,9 @@ class Col2ImFunctor<paddle::operators::math::ColFormat::kOCF,
dim3
threads
(
block_dim_x
,
block_dim_y
,
std
::
min
(
block_dim_z
,
input_channels
));
dim3
grid
(
output_width
,
output_height
);
col2imOCF
<
T
><<<
grid
,
threads
,
0
,
reinterpret_cast
<
platform
::
CUDADeviceContext
*>
(
context
)
->
stream
()
>>>
(
col2imOCF
<
T
><<<
grid
,
threads
,
0
,
reinterpret_cast
<
const
platform
::
CUDADeviceContext
&>
(
context
)
.
stream
()
>>>
(
im
.
data
<
T
>
(),
col
.
data
<
T
>
(),
input_channels
,
input_height
,
input_width
,
filter_height
,
filter_width
,
stride_height
,
stride_width
,
padding_height
,
padding_width
,
output_height
,
output_width
);
...
...
paddle/operators/math/im2col.h
浏览文件 @
0ee967b5
...
...
@@ -72,17 +72,18 @@ enum class ColFormat { kCFO = 0, kOCF = 1 };
template
<
ColFormat
Format
,
typename
Place
,
typename
T
>
class
Im2ColFunctor
{
public:
void
operator
()(
const
framework
::
Tensor
&
im
,
framework
::
Tensor
&
col
,
void
operator
()(
const
platform
::
DeviceContext
&
context
,
const
framework
::
Tensor
&
im
,
framework
::
Tensor
&
col
,
int
stride_height
,
int
stride_width
,
int
padding_height
,
int
padding_width
,
platform
::
DeviceContext
*
context
);
int
padding_width
);
};
template
<
ColFormat
Format
,
typename
Place
,
typename
T
>
class
Col2ImFunctor
{
public:
void
operator
()(
framework
::
Tensor
&
im
,
const
framework
::
Tensor
&
col
,
int
stride_height
,
int
stride_width
,
int
padding
_height
,
int
padding_width
,
platform
::
DeviceContext
*
context
);
void
operator
()(
const
platform
::
DeviceContext
&
context
,
framework
::
Tensor
&
im
,
const
framework
::
Tensor
&
col
,
int
stride
_height
,
int
stride_width
,
int
padding_height
,
int
padding_width
);
};
}
// namespace math
...
...
paddle/operators/math/im2col_test.cc
浏览文件 @
0ee967b5
...
...
@@ -78,8 +78,8 @@ void testIm2col() {
PADDLE_THROW
(
"no GPU support"
);
#endif // PADDLE_ONLY_CPU
}
im2col
(
input
,
output_cfo
,
stride
,
stride
,
padding
,
padding
,
context
);
im2col_ocf
(
input
,
output_ocf
,
stride
,
stride
,
padding
,
padding
,
context
);
im2col
(
*
context
,
input
,
output_cfo
,
stride
,
stride
,
padding
,
padding
);
im2col_ocf
(
*
context
,
input
,
output_ocf
,
stride
,
stride
,
padding
,
padding
);
float
*
out_cfo_ptr
;
if
(
paddle
::
platform
::
is_cpu_place
(
*
place
))
{
...
...
python/paddle/v2/framework/tests/test_conv2d_op.py
浏览文件 @
0ee967b5
...
...
@@ -73,13 +73,22 @@ class TestConv2dOp(OpTest):
self
.
check_output
()
def
test_check_grad
(
self
):
self
.
check_grad
(
set
([
'Input'
,
'Filter'
]),
'Output'
)
self
.
check_grad
(
set
([
'Input'
,
'Filter'
]),
'Output'
,
max_relative_error
=
0.05
)
def
test_check_grad_no_filter
(
self
):
self
.
check_grad
([
'Input'
],
'Output'
,
no_grad_set
=
set
([
'Filter'
]))
self
.
check_grad
(
[
'Input'
],
'Output'
,
max_relative_error
=
0.05
,
no_grad_set
=
set
([
'Filter'
]))
def
test_check_grad_no_input
(
self
):
self
.
check_grad
([
'Filter'
],
'Output'
,
no_grad_set
=
set
([
'Input'
]))
self
.
check_grad
(
[
'Filter'
],
'Output'
,
max_relative_error
=
0.05
,
no_grad_set
=
set
([
'Input'
]))
def
init_groups
(
self
):
self
.
groups
=
1
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录