Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
BaiXuePrincess
Paddle
提交
5e0614a1
P
Paddle
项目概览
BaiXuePrincess
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
未验证
提交
5e0614a1
编写于
10月 10, 2022
作者:
R
Rayman
提交者:
GitHub
10月 10, 2022
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
【Hackathon No.56&38】deformable_conv_v1 算子实现 float16 数据类型支持&前向运行加速 (#46111)
support fp16 for deformable conv
上级
a7e1b9d2
变更
9
隐藏空白更改
内联
并排
Showing
9 changed file
with
275 addition
and
137 deletion
+275
-137
paddle/phi/kernels/cpu/deformable_conv_grad_kernel.cc
paddle/phi/kernels/cpu/deformable_conv_grad_kernel.cc
+17
-17
paddle/phi/kernels/funcs/deformable_conv_functor.cc
paddle/phi/kernels/funcs/deformable_conv_functor.cc
+3
-3
paddle/phi/kernels/funcs/deformable_conv_functor.cu
paddle/phi/kernels/funcs/deformable_conv_functor.cu
+30
-11
paddle/phi/kernels/funcs/deformable_conv_functor.h
paddle/phi/kernels/funcs/deformable_conv_functor.h
+29
-26
paddle/phi/kernels/gpu/deformable_conv_grad_kernel.cu
paddle/phi/kernels/gpu/deformable_conv_grad_kernel.cu
+47
-43
paddle/phi/kernels/gpu/deformable_conv_kernel.cu
paddle/phi/kernels/gpu/deformable_conv_kernel.cu
+2
-1
paddle/phi/kernels/impl/deformable_conv_grad_kernel_impl.h
paddle/phi/kernels/impl/deformable_conv_grad_kernel_impl.h
+33
-23
paddle/phi/kernels/impl/deformable_conv_kernel_impl.h
paddle/phi/kernels/impl/deformable_conv_kernel_impl.h
+41
-5
python/paddle/fluid/tests/unittests/test_deformable_conv_v1_op.py
...addle/fluid/tests/unittests/test_deformable_conv_v1_op.py
+73
-8
未找到文件。
paddle/phi/kernels/cpu/deformable_conv_grad_kernel.cc
浏览文件 @
5e0614a1
...
...
@@ -97,14 +97,14 @@ inline void ModulatedDeformableCol2imCPUKernel(
width
);
*
(
grad_im
+
cur_bottom_grad_pos
)
=
*
(
grad_im
+
cur_bottom_grad_pos
)
+
weight
*
cur_top_grad
;
*
(
grad_im
+
cur_bottom_grad_pos
)
+
(
weight
*
cur_top_grad
)
;
}
}
}
}
}
template
<
typename
T
,
typename
Context
>
template
<
typename
T
,
typename
MT
,
typename
Context
>
void
ModulatedDeformableCol2im
(
const
Context
&
dev_ctx
,
const
T
*
data_col
,
const
T
*
data_offset
,
...
...
@@ -116,7 +116,7 @@ void ModulatedDeformableCol2im(const Context& dev_ctx,
const
std
::
vector
<
int
>&
stride
,
const
std
::
vector
<
int
>&
dilation
,
const
int
deformable_group
,
T
*
grad_im
)
{
M
T
*
grad_im
)
{
int
channel_per_deformable_group
=
im_shape
[
0
]
/
deformable_group
;
int
num_kernels
=
col_shape
[
0
]
*
col_shape
[
1
]
*
col_shape
[
2
]
*
col_shape
[
3
];
...
...
@@ -222,22 +222,22 @@ void ModulatedDeformableCol2imCoordCPUKernel(
if
(
inv_h
<=
-
1
||
inv_w
<=
-
1
||
inv_h
>=
height
||
inv_w
>=
width
)
{
inv_h
=
inv_w
=
-
2
;
}
else
{
mval
+=
data_col_ptr
[
col_pos
]
*
funcs
::
DmcnIm2colBilinear
(
data_im_ptr
+
cnt
*
height
*
width
,
width
,
height
,
width
,
inv_h
,
inv_w
);
mval
+=
data_col_ptr
[
col_pos
]
*
funcs
::
DmcnIm2colBilinear
<
T
,
T
>
(
data_im_ptr
+
cnt
*
height
*
width
,
width
,
height
,
width
,
inv_h
,
inv_w
);
}
const
T
weight
=
DmcnGetCoordinateWeight
(
inv_h
,
inv_w
,
height
,
width
,
data_im_ptr
+
cnt
*
height
*
width
,
width
,
bp_dir
);
DmcnGetCoordinateWeight
<
T
,
T
>
(
inv_h
,
inv_w
,
height
,
width
,
data_im_ptr
+
cnt
*
height
*
width
,
width
,
bp_dir
);
if
(
data_mask_ptr
)
{
const
int
data_mask_hw_ptr
=
(((
i
*
kernel_w
+
j
)
*
height_col
+
h_out
)
*
width_col
+
w_out
);
...
...
paddle/phi/kernels/funcs/deformable_conv_functor.cc
浏览文件 @
5e0614a1
...
...
@@ -13,8 +13,8 @@
// limitations under the License.
#include "paddle/phi/kernels/funcs/deformable_conv_functor.h"
#include "paddle/phi/backends/cpu/cpu_context.h"
#include "paddle/phi/common/amp_type_traits.h"
namespace
phi
{
namespace
funcs
{
...
...
@@ -82,8 +82,8 @@ inline void ModulatedDeformableIm2colCPUKernel(
const
T
h_im
=
h_in
+
i
*
dilation_h
+
offset_h
;
const
T
w_im
=
w_in
+
j
*
dilation_w
+
offset_w
;
if
(
h_im
>
-
1
&&
w_im
>
-
1
&&
h_im
<
height
&&
w_im
<
width
)
{
val
=
DmcnIm2colBilinear
(
data_im_ptr
,
width
,
height
,
width
,
h_im
,
w_im
);
val
=
DmcnIm2colBilinear
<
T
,
T
>
(
data_im_ptr
,
width
,
height
,
width
,
h_im
,
w_im
);
}
*
data_col_ptr
=
val
;
if
(
data_mask_ptr
)
{
...
...
paddle/phi/kernels/funcs/deformable_conv_functor.cu
浏览文件 @
5e0614a1
...
...
@@ -12,8 +12,11 @@
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/phi/backends/gpu/gpu_context.h"
#include "paddle/phi/kernels/funcs/deformable_conv_functor.h"
#include "paddle/phi/backends/gpu/gpu_context.h"
#include "paddle/phi/common/amp_type_traits.h"
#include "paddle/phi/common/float16.h"
#include "paddle/phi/core/device_context.h"
namespace
phi
{
namespace
funcs
{
...
...
@@ -51,6 +54,8 @@ __global__ void ModulatedDeformableIm2colGpuKernel(
T
*
data_col
)
{
int
index
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
int
offset
=
blockDim
.
x
*
gridDim
.
x
;
using
MT
=
typename
phi
::
dtype
::
MPTypeTrait
<
T
>::
Type
;
for
(
size_t
i
=
index
;
i
<
nthreads
;
i
+=
offset
)
{
const
int
w_col
=
i
%
width_col
;
const
int
h_col
=
(
i
/
width_col
)
%
height_col
;
...
...
@@ -85,22 +90,22 @@ __global__ void ModulatedDeformableIm2colGpuKernel(
((
2
*
(
i
*
kernel_w
+
j
)
+
1
)
*
height_col
+
h_col
)
*
width_col
+
w_col
;
const
T
offset_h
=
data_offset_ptr
[
data_offset_h_ptr
]
;
const
T
offset_w
=
data_offset_ptr
[
data_offset_w_ptr
]
;
T
val
=
static_cast
<
T
>
(
0
);
const
T
h_im
=
h_in
+
i
*
dilation_h
+
offset_h
;
const
T
w_im
=
w_in
+
j
*
dilation_w
+
offset_w
;
const
MT
offset_h
=
static_cast
<
MT
>
(
data_offset_ptr
[
data_offset_h_ptr
])
;
const
MT
offset_w
=
static_cast
<
MT
>
(
data_offset_ptr
[
data_offset_w_ptr
])
;
MT
val
=
static_cast
<
M
T
>
(
0
);
const
M
T
h_im
=
h_in
+
i
*
dilation_h
+
offset_h
;
const
M
T
w_im
=
w_in
+
j
*
dilation_w
+
offset_w
;
if
(
h_im
>
-
1
&&
w_im
>
-
1
&&
h_im
<
height
&&
w_im
<
width
)
{
val
=
DmcnIm2colBilinear
(
data_im_ptr
,
width
,
height
,
width
,
h_im
,
w_im
);
val
=
DmcnIm2colBilinear
<
T
,
MT
>
(
data_im_ptr
,
width
,
height
,
width
,
h_im
,
w_im
);
}
*
data_col_ptr
=
val
;
if
(
data_mask_ptr
)
{
const
int
data_mask_hw_ptr
=
((
i
*
kernel_w
+
j
)
*
height_col
+
h_col
)
*
width_col
+
w_col
;
const
T
mask
=
data_mask_ptr
[
data_mask_hw_ptr
]
;
*
data_col_ptr
*=
mask
;
const
MT
mask
=
static_cast
<
MT
>
(
data_mask_ptr
[
data_mask_hw_ptr
])
;
val
*=
mask
;
}
*
data_col_ptr
=
static_cast
<
T
>
(
val
);
data_col_ptr
+=
batch_size
*
height_col
*
width_col
;
}
}
...
...
@@ -164,6 +169,20 @@ template void ModulatedDeformableIm2col(
const
int
deformable_groups
,
float
*
data_col
);
template
void
ModulatedDeformableIm2col
(
const
phi
::
GPUContext
&
dev_ctx
,
const
phi
::
dtype
::
float16
*
data_im
,
const
phi
::
dtype
::
float16
*
data_offset
,
const
phi
::
dtype
::
float16
*
data_mask
,
const
std
::
vector
<
int64_t
>
&
im_shape
,
const
std
::
vector
<
int64_t
>&
col_shape
,
const
std
::
vector
<
int64_t
>&
filter_shape
,
const
std
::
vector
<
int
>&
paddings
,
const
std
::
vector
<
int
>&
strides
,
const
std
::
vector
<
int
>&
dilations
,
const
int
deformable_groups
,
phi
::
dtype
::
float16
*
data_col
);
template
void
ModulatedDeformableIm2col
(
const
phi
::
GPUContext
&
dev_ctx
,
const
double
*
data_im
,
...
...
paddle/phi/kernels/funcs/deformable_conv_functor.h
浏览文件 @
5e0614a1
...
...
@@ -14,44 +14,47 @@
#pragma once
#include "paddle/phi/common/amp_type_traits.h"
#include "paddle/phi/common/float16.h"
#include "paddle/phi/core/dense_tensor.h"
namespace
phi
{
namespace
funcs
{
template
<
typename
T
>
HOSTDEVICE
T
DmcnIm2colBilinear
(
const
T
*
bottom_data
,
const
int
data_width
,
const
int
height
,
const
int
width
,
T
h
,
T
w
)
{
template
<
typename
T
,
typename
MT
>
HOSTDEVICE
M
T
DmcnIm2colBilinear
(
const
T
*
bottom_data
,
const
int
data_width
,
const
int
height
,
const
int
width
,
M
T
h
,
M
T
w
)
{
int
h_low
=
floor
(
h
);
int
w_low
=
floor
(
w
);
int
h_high
=
h_low
+
1
;
int
w_high
=
w_low
+
1
;
T
lh
=
h
-
h_low
;
T
lw
=
w
-
w_low
;
T
hh
=
1
-
lh
;
T
hw
=
1
-
lw
;
M
T
lh
=
h
-
h_low
;
M
T
lw
=
w
-
w_low
;
M
T
hh
=
1
-
lh
;
M
T
hw
=
1
-
lw
;
T
v1
=
(
h_low
>=
0
&&
w_low
>=
0
)
?
bottom_data
[
h_low
*
data_width
+
w_low
]
:
0
;
T
v2
=
(
h_low
>=
0
&&
w_high
<=
width
-
1
)
?
bottom_data
[
h_low
*
data_width
+
w_high
]
:
0
;
T
v3
=
(
h_high
<=
height
-
1
&&
w_low
>=
0
)
?
bottom_data
[
h_high
*
data_width
+
w_low
]
:
0
;
T
v4
=
(
h_high
<=
height
-
1
&&
w_high
<=
width
-
1
)
?
bottom_data
[
h_high
*
data_width
+
w_high
]
:
0
;
MT
v1
=
(
h_low
>=
0
&&
w_low
>=
0
)
?
static_cast
<
MT
>
(
bottom_data
[
h_low
*
data_width
+
w_low
])
:
0
;
MT
v2
=
(
h_low
>=
0
&&
w_high
<=
width
-
1
)
?
static_cast
<
MT
>
(
bottom_data
[
h_low
*
data_width
+
w_high
])
:
0
;
MT
v3
=
(
h_high
<=
height
-
1
&&
w_low
>=
0
)
?
static_cast
<
MT
>
(
bottom_data
[
h_high
*
data_width
+
w_low
])
:
0
;
MT
v4
=
(
h_high
<=
height
-
1
&&
w_high
<=
width
-
1
)
?
static_cast
<
MT
>
(
bottom_data
[
h_high
*
data_width
+
w_high
])
:
0
;
T
w1
=
hh
*
hw
;
T
w2
=
hh
*
lw
;
T
w3
=
lh
*
hw
;
T
w4
=
lh
*
lw
;
M
T
w1
=
hh
*
hw
;
M
T
w2
=
hh
*
lw
;
M
T
w3
=
lh
*
hw
;
M
T
w4
=
lh
*
lw
;
return
w1
*
v1
+
w2
*
v2
+
w3
*
v3
+
w4
*
v4
;
}
...
...
paddle/phi/kernels/gpu/deformable_conv_grad_kernel.cu
浏览文件 @
5e0614a1
...
...
@@ -29,7 +29,7 @@ static inline int NumBlocks(const int N) {
kNumMaximumNumBlocks
);
}
template
<
typename
T
>
template
<
typename
T
,
typename
MT
>
__global__
void
ModulatedDeformableCol2imGpuKernel
(
const
int
nthreads
,
const
T
*
data_col
,
...
...
@@ -51,9 +51,10 @@ __global__ void ModulatedDeformableCol2imGpuKernel(
const
int
deformable_group
,
const
int
height_col
,
const
int
width_col
,
T
*
grad_im
)
{
M
T
*
grad_im
)
{
int
index
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
int
offset
=
blockDim
.
x
*
gridDim
.
x
;
// using MT = typename phi::dtype::MPTypeTrait<T>::Type;
for
(
size_t
thread
=
index
;
thread
<
nthreads
;
thread
+=
offset
)
{
const
int
j
=
(
thread
/
width_col
/
height_col
/
batch_size
)
%
kernel_w
;
const
int
i
=
...
...
@@ -78,17 +79,17 @@ __global__ void ModulatedDeformableCol2imGpuKernel(
((
2
*
(
i
*
kernel_w
+
j
)
+
1
)
*
height_col
+
h_out
)
*
width_col
+
w_out
;
const
int
data_mask_hw_ptr
=
((
i
*
kernel_w
+
j
)
*
height_col
+
h_out
)
*
width_col
+
w_out
;
const
T
offset_h
=
data_offset_ptr
[
data_offset_h_ptr
]
;
const
T
offset_w
=
data_offset_ptr
[
data_offset_w_ptr
]
;
const
T
cur_inv_h_data
=
h_in
+
i
*
dilation_h
+
offset_h
;
const
T
cur_inv_w_data
=
w_in
+
j
*
dilation_w
+
offset_w
;
const
MT
offset_h
=
static_cast
<
MT
>
(
data_offset_ptr
[
data_offset_h_ptr
])
;
const
MT
offset_w
=
static_cast
<
MT
>
(
data_offset_ptr
[
data_offset_w_ptr
])
;
const
M
T
cur_inv_h_data
=
h_in
+
i
*
dilation_h
+
offset_h
;
const
M
T
cur_inv_w_data
=
w_in
+
j
*
dilation_w
+
offset_w
;
T
cur_top_grad
=
data_col
[
thread
]
;
MT
cur_top_grad
=
static_cast
<
MT
>
(
data_col
[
thread
])
;
if
(
data_mask
)
{
const
T
*
data_mask_ptr
=
data_mask
+
(
b
*
deformable_group
+
deformable_group_index
)
*
kernel_h
*
kernel_w
*
height_col
*
width_col
;
const
T
mask
=
data_mask_ptr
[
data_mask_hw_ptr
]
;
const
MT
mask
=
static_cast
<
MT
>
(
data_mask_ptr
[
data_mask_hw_ptr
])
;
cur_top_grad
*=
mask
;
}
const
int
cur_h
=
static_cast
<
int
>
(
cur_inv_h_data
);
...
...
@@ -100,13 +101,12 @@ __global__ void ModulatedDeformableCol2imGpuKernel(
abs
(
cur_inv_w_data
-
(
cur_w
+
dx
))
<
1
)
{
int
cur_bottom_grad_pos
=
((
b
*
channels
+
c
)
*
height
+
cur_h
+
dy
)
*
width
+
cur_w
+
dx
;
T
weight
=
DmcnGetGradientWeight
(
cur_inv_h_data
,
cur_inv_w_data
,
cur_h
+
dy
,
cur_w
+
dx
,
height
,
width
);
MT
weight
=
DmcnGetGradientWeight
(
cur_inv_h_data
,
cur_inv_w_data
,
cur_h
+
dy
,
cur_w
+
dx
,
height
,
width
);
paddle
::
platform
::
CudaAtomicAdd
(
grad_im
+
cur_bottom_grad_pos
,
weight
*
cur_top_grad
);
}
...
...
@@ -115,7 +115,7 @@ __global__ void ModulatedDeformableCol2imGpuKernel(
}
}
template
<
typename
T
,
typename
Context
>
template
<
typename
T
,
typename
MT
,
typename
Context
>
void
ModulatedDeformableCol2im
(
const
Context
&
dev_ctx
,
const
T
*
data_col
,
const
T
*
data_offset
,
...
...
@@ -127,13 +127,13 @@ void ModulatedDeformableCol2im(const Context& dev_ctx,
const
std
::
vector
<
int
>&
stride
,
const
std
::
vector
<
int
>&
dilation
,
const
int
deformable_group
,
T
*
grad_im
)
{
M
T
*
grad_im
)
{
int
channel_per_deformable_group
=
im_shape
[
0
]
/
deformable_group
;
int
num_kernels
=
col_shape
[
0
]
*
col_shape
[
1
]
*
col_shape
[
2
]
*
col_shape
[
3
];
int
blocks
=
NumBlocks
(
num_kernels
);
int
threads
=
kNumCUDAThreads
;
ModulatedDeformableCol2imGpuKernel
<
T
>
ModulatedDeformableCol2imGpuKernel
<
T
,
MT
>
<<<
blocks
,
threads
,
0
,
dev_ctx
.
stream
()
>>>
(
num_kernels
,
data_col
,
data_offset
,
...
...
@@ -185,8 +185,9 @@ __global__ void ModulatedDeformableCol2imCoordGpuKernel(
T
*
grad_mask
)
{
int
index
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
int
offset
=
blockDim
.
x
*
gridDim
.
x
;
using
MT
=
typename
phi
::
dtype
::
MPTypeTrait
<
T
>::
Type
;
for
(
size_t
i
=
index
;
i
<
nthreads
;
i
+=
offset
)
{
T
val
=
0
,
mval
=
0
;
M
T
val
=
0
,
mval
=
0
;
const
int
w
=
i
%
width_col
;
const
int
h
=
(
i
/
width_col
)
%
height_col
;
const
int
c
=
(
i
/
width_col
/
height_col
)
%
offset_channels
;
...
...
@@ -231,40 +232,42 @@ __global__ void ModulatedDeformableCol2imCoordGpuKernel(
const
int
data_offset_w_ptr
=
(((
2
*
(
i
*
kernel_w
+
j
)
+
1
)
*
height_col
+
h_out
)
*
width_col
+
w_out
);
const
T
offset_h
=
data_offset_ptr
[
data_offset_h_ptr
];
const
T
offset_w
=
data_offset_ptr
[
data_offset_w_ptr
];
T
inv_h
=
h_in
+
i
*
dilation_h
+
offset_h
;
T
inv_w
=
w_in
+
j
*
dilation_w
+
offset_w
;
const
MT
offset_h
=
static_cast
<
MT
>
(
data_offset_ptr
[
data_offset_h_ptr
]);
const
MT
offset_w
=
static_cast
<
MT
>
(
data_offset_ptr
[
data_offset_w_ptr
]);
MT
inv_h
=
h_in
+
i
*
dilation_h
+
offset_h
;
MT
inv_w
=
w_in
+
j
*
dilation_w
+
offset_w
;
if
(
inv_h
<=
-
1
||
inv_w
<=
-
1
||
inv_h
>=
height
||
inv_w
>=
width
)
{
inv_h
=
inv_w
=
-
2
;
}
else
{
mval
+=
data_col_ptr
[
col_pos
]
*
funcs
::
DmcnIm2colBilinear
(
data_im_ptr
+
cnt
*
height
*
width
,
width
,
height
,
width
,
inv_h
,
inv_w
);
mval
+=
static_cast
<
MT
>
(
data_col_ptr
[
col_pos
])
*
funcs
::
DmcnIm2colBilinear
<
T
,
MT
>
(
data_im_ptr
+
cnt
*
height
*
width
,
width
,
height
,
width
,
inv_h
,
inv_w
);
}
const
T
weight
=
DmcnGetCoordinateWeight
(
inv_h
,
inv_w
,
height
,
width
,
data_im_ptr
+
cnt
*
height
*
width
,
width
,
bp_dir
);
const
M
T
weight
=
DmcnGetCoordinateWeight
<
T
,
MT
>
(
inv_h
,
inv_w
,
height
,
width
,
data_im_ptr
+
cnt
*
height
*
width
,
width
,
bp_dir
);
if
(
data_mask_ptr
)
{
const
int
data_mask_hw_ptr
=
(((
i
*
kernel_w
+
j
)
*
height_col
+
h_out
)
*
width_col
+
w_out
);
const
T
mask
=
data_mask_ptr
[
data_mask_hw_ptr
]
;
val
+=
weight
*
data_col_ptr
[
col_pos
]
*
mask
;
const
MT
mask
=
static_cast
<
MT
>
(
data_mask_ptr
[
data_mask_hw_ptr
])
;
val
+=
weight
*
static_cast
<
MT
>
(
data_col_ptr
[
col_pos
])
*
mask
;
}
else
{
val
+=
weight
*
data_col_ptr
[
col_pos
]
;
val
+=
weight
*
static_cast
<
MT
>
(
data_col_ptr
[
col_pos
])
;
}
cnt
+=
1
;
}
grad_offset
[
i
]
=
val
;
grad_offset
[
i
]
=
static_cast
<
T
>
(
val
)
;
if
(
grad_mask
&&
offset_c
%
2
==
0
)
grad_mask
[(((
b
*
deformable_group
+
deformable_group_index
)
*
kernel_h
*
kernel_w
+
...
...
@@ -359,4 +362,5 @@ PD_REGISTER_KERNEL(deformable_conv_grad,
ALL_LAYOUT
,
phi
::
DeformableConvGradKernel
,
float
,
double
)
{}
double
,
paddle
::
platform
::
float16
)
{}
paddle/phi/kernels/gpu/deformable_conv_kernel.cu
浏览文件 @
5e0614a1
...
...
@@ -23,4 +23,5 @@ PD_REGISTER_KERNEL(deformable_conv,
ALL_LAYOUT
,
phi
::
DeformableConvKernel
,
float
,
double
)
{}
double
,
phi
::
dtype
::
float16
)
{}
paddle/phi/kernels/impl/deformable_conv_grad_kernel_impl.h
浏览文件 @
5e0614a1
...
...
@@ -14,8 +14,10 @@
#pragma once
#include "paddle/phi/common/amp_type_traits.h"
#include "paddle/phi/core/dense_tensor.h"
#include "paddle/phi/core/hostdevice.h"
#include "paddle/phi/kernels/cast_kernel.h"
#include "paddle/phi/kernels/empty_kernel.h"
#include "paddle/phi/kernels/full_kernel.h"
#include "paddle/phi/kernels/funcs/blas/blas.h"
...
...
@@ -58,14 +60,14 @@ HOSTDEVICE T DmcnGetGradientWeight(T argmax_h,
return
weight
;
}
template
<
typename
T
>
HOSTDEVICE
T
DmcnGetCoordinateWeight
(
T
argmax_h
,
T
argmax_w
,
const
int
height
,
const
int
width
,
const
T
*
im_data
,
const
int
data_width
,
const
int
bp_dir
)
{
template
<
typename
T
,
typename
MT
>
HOSTDEVICE
MT
DmcnGetCoordinateWeight
(
M
T
argmax_h
,
M
T
argmax_w
,
const
int
height
,
const
int
width
,
const
T
*
im_data
,
const
int
data_width
,
const
int
bp_dir
)
{
if
(
argmax_h
<=
-
1
||
argmax_h
>=
height
||
argmax_w
<=
-
1
||
argmax_w
>=
width
)
{
return
0
;
...
...
@@ -76,43 +78,51 @@ HOSTDEVICE T DmcnGetCoordinateWeight(T argmax_h,
int
argmax_h_high
=
argmax_h_low
+
1
;
int
argmax_w_high
=
argmax_w_low
+
1
;
T
weight
=
0
;
M
T
weight
=
0
;
if
(
bp_dir
==
0
)
{
weight
+=
(
argmax_h_low
>=
0
&&
argmax_w_low
>=
0
)
?
-
1
*
(
argmax_w_low
+
1
-
argmax_w
)
*
im_data
[
argmax_h_low
*
data_width
+
argmax_w_low
]
static_cast
<
MT
>
(
im_data
[
argmax_h_low
*
data_width
+
argmax_w_low
])
:
0
;
weight
+=
(
argmax_h_low
>=
0
&&
argmax_w_high
<=
width
-
1
)
?
-
1
*
(
argmax_w
-
argmax_w_low
)
*
im_data
[
argmax_h_low
*
data_width
+
argmax_w_high
]
static_cast
<
MT
>
(
im_data
[
argmax_h_low
*
data_width
+
argmax_w_high
])
:
0
;
weight
+=
(
argmax_h_high
<=
height
-
1
&&
argmax_w_low
>=
0
)
?
(
argmax_w_low
+
1
-
argmax_w
)
*
im_data
[
argmax_h_high
*
data_width
+
argmax_w_low
]
static_cast
<
MT
>
(
im_data
[
argmax_h_high
*
data_width
+
argmax_w_low
])
:
0
;
weight
+=
(
argmax_h_high
<=
height
-
1
&&
argmax_w_high
<=
width
-
1
)
?
(
argmax_w
-
argmax_w_low
)
*
im_data
[
argmax_h_high
*
data_width
+
argmax_w_high
]
static_cast
<
MT
>
(
im_data
[
argmax_h_high
*
data_width
+
argmax_w_high
])
:
0
;
}
else
if
(
bp_dir
==
1
)
{
weight
+=
(
argmax_h_low
>=
0
&&
argmax_w_low
>=
0
)
?
-
1
*
(
argmax_h_low
+
1
-
argmax_h
)
*
im_data
[
argmax_h_low
*
data_width
+
argmax_w_low
]
static_cast
<
MT
>
(
im_data
[
argmax_h_low
*
data_width
+
argmax_w_low
])
:
0
;
weight
+=
(
argmax_h_low
>=
0
&&
argmax_w_high
<=
width
-
1
)
?
(
argmax_h_low
+
1
-
argmax_h
)
*
im_data
[
argmax_h_low
*
data_width
+
argmax_w_high
]
static_cast
<
MT
>
(
im_data
[
argmax_h_low
*
data_width
+
argmax_w_high
])
:
0
;
weight
+=
(
argmax_h_high
<=
height
-
1
&&
argmax_w_low
>=
0
)
?
-
1
*
(
argmax_h
-
argmax_h_low
)
*
im_data
[
argmax_h_high
*
data_width
+
argmax_w_low
]
static_cast
<
MT
>
(
im_data
[
argmax_h_high
*
data_width
+
argmax_w_low
])
:
0
;
weight
+=
(
argmax_h_high
<=
height
-
1
&&
argmax_w_high
<=
width
-
1
)
?
(
argmax_h
-
argmax_h_low
)
*
im_data
[
argmax_h_high
*
data_width
+
argmax_w_high
]
static_cast
<
MT
>
(
im_data
[
argmax_h_high
*
data_width
+
argmax_w_high
])
:
0
;
}
...
...
@@ -135,7 +145,7 @@ void ModulatedDeformableCol2imCoord(const Context& dev_ctx,
T
*
grad_offset
,
T
*
grad_mask
);
template
<
typename
T
,
typename
Context
>
template
<
typename
T
,
typename
MT
,
typename
Context
>
void
ModulatedDeformableCol2im
(
const
Context
&
dev_ctx
,
const
T
*
data_col
,
const
T
*
data_offset
,
...
...
@@ -147,7 +157,7 @@ void ModulatedDeformableCol2im(const Context& dev_ctx,
const
std
::
vector
<
int
>&
stride
,
const
std
::
vector
<
int
>&
dilation
,
const
int
deformable_group
,
T
*
grad_im
);
M
T
*
grad_im
);
template
<
typename
T
,
typename
Context
>
void
FilterGradAddup
(
const
Context
&
dev_ctx
,
...
...
@@ -176,7 +186,7 @@ void DeformableConvGradKernel(const Context& dev_ctx,
DenseTensor
*
filter_grad
,
DenseTensor
*
mask_grad
)
{
const
int
batch_size
=
static_cast
<
int
>
(
x
.
dims
()[
0
]);
using
MT
=
typename
phi
::
dtype
::
MPTypeTrait
<
T
>::
Type
;
DDim
input_shape
=
phi
::
slice_ddim
(
x
.
dims
(),
1
,
x
.
dims
().
size
());
std
::
vector
<
int64_t
>
input_shape_vec
=
phi
::
vectorize
(
input_shape
);
std
::
vector
<
int64_t
>
filter_shape_vec
(
phi
::
vectorize
(
filter
.
dims
()));
...
...
@@ -292,8 +302,8 @@ void DeformableConvGradKernel(const Context& dev_ctx,
mask_grad_data_ptr
);
}
if
(
dx
)
{
T
*
dx_ptr
=
dx
->
data
<
T
>
(
);
// get grad of input
MT
*
mt_dx_ptr
=
dev_ctx
.
template
Alloc
<
MT
>(
dx
);
ModulatedDeformableCol2im
(
dev_ctx
,
col_buffer_ptr
,
offset_ptr
+
i
*
im2col_step
*
input_offset_dim
,
...
...
@@ -305,7 +315,7 @@ void DeformableConvGradKernel(const Context& dev_ctx,
strides
,
dilations
,
deformable_groups
,
dx_ptr
+
i
*
im2col_step
*
input_dim
);
mt_
dx_ptr
+
i
*
im2col_step
*
input_dim
);
dx
->
Resize
(
x
.
dims
());
}
...
...
paddle/phi/kernels/impl/deformable_conv_kernel_impl.h
浏览文件 @
5e0614a1
...
...
@@ -14,11 +14,13 @@
#pragma once
#include "paddle/phi/common/amp_type_traits.h"
#include "paddle/phi/core/dense_tensor.h"
#include "paddle/phi/core/hostdevice.h"
#include "paddle/phi/kernels/empty_kernel.h"
#include "paddle/phi/kernels/funcs/blas/blas.h"
#include "paddle/phi/kernels/funcs/deformable_conv_functor.h"
#include "paddle/phi/kernels/transpose_kernel.h"
#include "paddle/utils/optional.h"
namespace
phi
{
...
...
@@ -38,6 +40,12 @@ void DeformableConvKernel(const Context& dev_ctx,
DenseTensor
*
out
)
{
const
int
batch_size
=
static_cast
<
int
>
(
x
.
dims
()[
0
]);
int
temp_step
=
std
::
min
(
64
,
batch_size
);
if
(
batch_size
%
temp_step
==
0
)
{
im2col_step
=
temp_step
;
}
using
MT
=
typename
phi
::
dtype
::
MPTypeTrait
<
T
>::
Type
;
std
::
vector
<
int64_t
>
filter_shape_vec
(
phi
::
vectorize
(
filter
.
dims
()));
std
::
vector
<
int64_t
>
output_shape_vec
(
phi
::
vectorize
(
out
->
dims
()));
...
...
@@ -101,8 +109,11 @@ void DeformableConvKernel(const Context& dev_ctx,
dilations
,
deformable_groups
,
col_buffer_ptr
);
DenseTensor
output_3d
=
output_4d
.
Slice
(
i
,
i
+
1
).
Resize
(
phi
::
slice_ddim
(
output_4d
.
dims
(),
1
,
output_4d
.
dims
().
size
()));
DenseTensor
output_3d
=
output_4d
.
Slice
(
i
,
i
+
1
).
Resize
(
phi
::
slice_ddim
(
output_4d
.
dims
(),
1
,
output_4d
.
dims
().
size
()));
// group * C/group * (im2step * H * W)
// get the product of pixel and weight
for
(
int
g
=
0
;
g
<
groups
;
++
g
)
{
DenseTensor
weight_3d_slice
=
weight_3d
.
Slice
(
g
,
g
+
1
).
Resize
(
...
...
@@ -110,8 +121,11 @@ void DeformableConvKernel(const Context& dev_ctx,
DenseTensor
col_buffer_3d_slice
=
col_buffer_3d
.
Slice
(
g
,
g
+
1
).
Resize
(
phi
::
slice_ddim
(
col_buffer_3d
.
dims
(),
1
,
col_buffer_3d
.
dims
().
size
()));
DenseTensor
output_3d_slice
=
output_3d
.
Slice
(
g
,
g
+
1
).
Resize
(
phi
::
slice_ddim
(
output_3d
.
dims
(),
1
,
output_3d
.
dims
().
size
()));
DenseTensor
output_3d_slice
=
output_3d
.
Slice
(
g
,
g
+
1
).
Resize
(
phi
::
slice_ddim
(
output_3d
.
dims
(),
1
,
output_3d
.
dims
().
size
()));
// C * ((im2col_step)*H*W))
blas
.
MatMul
(
weight_3d_slice
,
false
,
col_buffer_3d_slice
,
...
...
@@ -121,7 +135,29 @@ void DeformableConvKernel(const Context& dev_ctx,
T
(
0.0
));
}
}
out
->
ShareDataWith
(
output_buffer
).
Resize
(
phi
::
make_ddim
(
output_shape_vec
));
// swap axis to get the right result when im2col_step is greater than 1
if
(
im2col_step
>
1
)
{
std
::
vector
<
int
>
axis
(
4
);
axis
[
0
]
=
0
;
axis
[
1
]
=
2
;
axis
[
2
]
=
1
;
axis
[
3
]
=
3
;
DenseTensor
real_output_buffer
=
phi
::
Transpose
<
T
,
Context
>
(
dev_ctx
,
output_4d
.
Resize
(
phi
::
make_ddim
({
batch_size
/
im2col_step
,
output_shape_vec
[
1
],
im2col_step
,
output_shape_vec
[
2
]
*
output_shape_vec
[
3
]})),
axis
);
out
->
ShareDataWith
(
real_output_buffer
)
.
Resize
(
phi
::
make_ddim
(
output_shape_vec
));
}
else
{
out
->
ShareDataWith
(
output_buffer
).
Resize
(
phi
::
make_ddim
(
output_shape_vec
));
}
}
}
// namespace phi
python/paddle/fluid/tests/unittests/test_deformable_conv_v1_op.py
浏览文件 @
5e0614a1
...
...
@@ -17,9 +17,11 @@ import unittest
import
numpy
as
np
import
paddle.fluid
as
fluid
import
paddle.fluid.core
as
core
from
op_test
import
OpTest
from
op_test
import
OpTest
,
skip_check_grad_ci
from
paddle.fluid.framework
import
_test_eager_guard
paddle
.
enable_static
()
def
dmc_bilinear
(
data_im
,
height
,
width
,
h
,
w
):
h_low
=
int
(
np
.
floor
(
h
))
...
...
@@ -59,8 +61,8 @@ def dconv_im2col_gemm(input, offset, filter, group, conv_param):
assert
f_c
*
group
==
in_c
assert
np
.
mod
(
out_c
,
group
)
==
0
stride
,
pad
,
dilation
=
conv_param
[
'stride'
],
conv_param
[
'pad'
],
\
conv_param
[
'dilation'
]
stride
,
pad
,
dilation
=
conv_param
[
'stride'
],
conv_param
[
'pad'
],
\
conv_param
[
'dilation'
]
out_h
=
1
+
(
in_h
+
2
*
pad
[
0
]
-
(
dilation
[
0
]
*
(
f_h
-
1
)
+
1
))
//
stride
[
0
]
out_w
=
1
+
(
in_w
+
2
*
pad
[
1
]
-
(
dilation
[
1
]
*
(
f_w
-
1
)
+
1
))
//
stride
[
1
]
assert
out_h
==
in_h
...
...
@@ -74,18 +76,18 @@ def dconv_im2col_gemm(input, offset, filter, group, conv_param):
for
kh
in
range
(
f_h
):
for
kw
in
range
(
f_w
):
offset_h_table
=
\
offset
[
n
,
::
2
,
h
,
w
].
reshape
(
f_h
,
f_w
)
offset
[
n
,
::
2
,
h
,
w
].
reshape
(
f_h
,
f_w
)
offset_w_table
=
\
offset
[
n
,
1
::
2
,
h
,
w
].
reshape
(
f_h
,
f_w
)
offset
[
n
,
1
::
2
,
h
,
w
].
reshape
(
f_h
,
f_w
)
offset_h
=
offset_h_table
[
kh
,
kw
]
offset_w
=
offset_w_table
[
kh
,
kw
]
val
=
0
im_h
=
h
*
stride
[
0
]
+
kh
*
dilation
[
0
]
\
+
offset_h
-
pad
[
0
]
+
offset_h
-
pad
[
0
]
im_w
=
w
*
stride
[
0
]
+
kw
*
dilation
[
0
]
\
+
offset_w
-
pad
[
1
]
+
offset_w
-
pad
[
1
]
if
im_h
>
-
1
and
im_w
>
-
1
and
\
im_h
<
in_h
and
im_w
<
in_h
:
im_h
<
in_h
and
im_w
<
in_h
:
val
=
dmc_bilinear
(
input
[
n
,
c
],
in_h
,
in_w
,
im_h
,
im_w
)
val_out
=
val
...
...
@@ -284,6 +286,69 @@ class TestWithDouble(TestModulatedDeformableConvOp):
self
.
dtype
=
np
.
float64
class
TestFP16
(
unittest
.
TestCase
):
def
check_main
(
self
,
input_np
,
offset_np
,
filter_np
,
dtype
):
paddle
.
disable_static
()
input_np
=
input_np
.
astype
(
dtype
)
offset_np
=
offset_np
.
astype
(
dtype
)
filter_np
=
filter_np
.
astype
(
dtype
)
input
=
paddle
.
to_tensor
(
input_np
)
offset
=
paddle
.
to_tensor
(
offset_np
)
filter
=
paddle
.
to_tensor
(
filter_np
)
input
.
stop_gradient
=
False
offset
.
stop_gradient
=
False
filter
.
stop_gradient
=
False
y
=
paddle
.
vision
.
ops
.
deform_conv2d
(
input
,
offset
,
filter
)
input_grad
,
offset_grad
,
filter_grad
=
paddle
.
grad
(
y
,
[
input
,
offset
,
filter
])
y_np
=
y
.
numpy
().
astype
(
'float32'
)
input_grad_np
=
input_grad
.
numpy
().
astype
(
'float32'
)
offset_grad_np
=
offset_grad
.
numpy
().
astype
(
'float32'
)
filter_grad_np
=
filter_grad
.
numpy
().
astype
(
'float32'
)
paddle
.
enable_static
()
return
y_np
,
input_grad_np
,
offset_grad_np
,
filter_grad_np
def
test_main
(
self
):
if
not
paddle
.
is_compiled_with_cuda
():
return
self
.
pad
=
[
1
,
1
]
self
.
stride
=
[
1
,
1
]
self
.
dilations
=
[
1
,
1
]
self
.
groups
=
1
self
.
input_size
=
[
2
,
3
,
5
,
5
]
# NCHW
assert
np
.
mod
(
self
.
input_size
[
1
],
self
.
groups
)
==
0
f_c
=
self
.
input_size
[
1
]
//
self
.
groups
self
.
filter_size
=
[
40
,
f_c
,
1
,
1
]
self
.
im2col_step
=
1
self
.
deformable_groups
=
1
offset_c
=
2
*
self
.
deformable_groups
*
self
.
filter_size
[
2
]
*
self
.
filter_size
[
3
]
self
.
offset_size
=
[
self
.
input_size
[
0
],
offset_c
,
self
.
input_size
[
2
],
self
.
input_size
[
3
]
]
input
=
np
.
random
.
random
(
self
.
input_size
)
offset
=
10
*
np
.
random
.
random
(
self
.
offset_size
)
filter
=
np
.
random
.
random
(
self
.
filter_size
)
y_np_1
,
input_g_np_1
,
offset_g_np_1
,
filter_g_np_1
=
self
.
check_main
(
input
,
offset
,
filter
,
'float16'
)
y_np_2
,
input_g_np_2
,
offset_g_np_2
,
filter_g_np_2
=
self
.
check_main
(
input
,
offset
,
filter
,
'float32'
)
def
assert_equal
(
x
,
y
):
np
.
testing
.
assert_allclose
(
x
,
y
,
atol
=
3e-2
)
assert_equal
(
y_np_1
,
y_np_2
)
assert_equal
(
input_g_np_1
,
input_g_np_2
)
assert_equal
(
offset_g_np_1
,
offset_g_np_2
)
assert_equal
(
filter_g_np_1
,
filter_g_np_2
)
class
TestModulatedDeformableConvV1InvalidInput
(
unittest
.
TestCase
):
def
test_error
(
self
):
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录