Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
PaddleDetection
提交
0a7516d1
P
PaddleDetection
项目概览
PaddlePaddle
/
PaddleDetection
大约 1 年 前同步成功
通知
695
Star
11112
Fork
2696
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
184
列表
看板
标记
里程碑
合并请求
40
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
PaddleDetection
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
184
Issue
184
列表
看板
标记
里程碑
合并请求
40
合并请求
40
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
0a7516d1
编写于
8月 18, 2017
作者:
C
chengduoZH
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
fix col2vol vol2col kernel
上级
43f6cdc8
变更
1
隐藏空白更改
内联
并排
Showing
1 changed file
with
129 addition
and
63 deletion
+129
-63
paddle/cuda/src/hl_cuda_matrix.cu
paddle/cuda/src/hl_cuda_matrix.cu
+129
-63
未找到文件。
paddle/cuda/src/hl_cuda_matrix.cu
浏览文件 @
0a7516d1
...
...
@@ -593,21 +593,28 @@ void hl_matrix_rotate(
CHECK_SYNC
(
"hl_matrix_rotate failed"
);
}
__global__
void
keMatrixVol2Col
(
int
num_kernels
,
real
*
dataSrc
,
real
*
dataDst
,
int
depth
,
int
height
,
int
width
,
int
filterD
,
int
filterH
,
int
filterW
,
int
strideD
,
int
strideH
,
int
strideW
,
int
paddingD
,
int
paddingH
,
int
paddingW
,
int
depth_col
,
int
height_col
,
int
width_col
){
for
(
int
index
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
index
<
num_kernels
;
index
+=
blockDim
.
x
*
gridDim
.
x
){
__global__
void
keMatrixVol2Col
(
int
num_kernels
,
real
*
dataSrc
,
real
*
dataDst
,
int
depth
,
int
height
,
int
width
,
int
filterD
,
int
filterH
,
int
filterW
,
int
strideD
,
int
strideH
,
int
strideW
,
int
paddingD
,
int
paddingH
,
int
paddingW
,
int
depth_col
,
int
height_col
,
int
width_col
)
{
for
(
int
index
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
index
<
num_kernels
;
index
+=
blockDim
.
x
*
gridDim
.
x
)
{
int
w_out
=
index
%
width_col
;
int
h_out
=
(
index
/
width_col
)
%
height_col
;
int
h_out
=
(
index
/
width_col
)
%
height_col
;
int
d_out
=
(
index
/
width_col
/
height_col
)
%
depth_col
;
int
channel_in
=
index
/
width_col
/
height_col
/
depth_col
;
int
channel_out
=
channel_in
*
filterD
*
filterH
*
filterW
;
...
...
@@ -615,7 +622,9 @@ __global__ void keMatrixVol2Col(
int
h_in
=
h_out
*
strideH
-
paddingH
;
int
d_in
=
d_out
*
strideD
-
paddingD
;
dataDst
+=
((
channel_out
*
depth_col
+
d_out
)
*
height_col
+
h_out
)
*
width_col
+
w_out
;
dataDst
+=
((
channel_out
*
depth_col
+
d_out
)
*
height_col
+
h_out
)
*
width_col
+
w_out
;
dataSrc
+=
((
channel_in
*
depth
+
d_in
)
*
height
+
h_in
)
*
width
+
w_in
;
for
(
int
k
=
0
;
k
<
filterD
;
++
k
)
{
for
(
int
i
=
0
;
i
<
filterH
;
++
i
)
{
...
...
@@ -623,8 +632,10 @@ __global__ void keMatrixVol2Col(
int
d
=
d_in
+
k
;
int
h
=
h_in
+
i
;
int
w
=
w_in
+
j
;
*
dataDst
=
(
d
>=
0
&&
d
<
depth
&&
h
>=
0
&&
h
<
height
&&
w
>=
0
&&
w
<
width
)
?
dataSrc
[(
k
*
height
+
i
)
*
width
+
j
]
:
0
;
*
dataDst
=
(
d
>=
0
&&
d
<
depth
&&
h
>=
0
&&
h
<
height
&&
w
>=
0
&&
w
<
width
)
?
dataSrc
[(
k
*
height
+
i
)
*
width
+
j
]
:
0
;
dataDst
+=
depth_col
*
height_col
*
width_col
;
}
}
...
...
@@ -633,11 +644,20 @@ __global__ void keMatrixVol2Col(
}
void
hl_matrix_vol2Col
(
real
*
dataSrc
,
int
channels
,
int
depth
,
int
height
,
int
width
,
int
filterD
,
int
filterH
,
int
filterW
,
int
strideD
,
int
strideH
,
int
strideW
,
int
paddingD
,
int
paddingH
,
int
paddingW
,
real
*
dataDst
){
int
channels
,
int
depth
,
int
height
,
int
width
,
int
filterD
,
int
filterH
,
int
filterW
,
int
strideD
,
int
strideH
,
int
strideW
,
int
paddingD
,
int
paddingH
,
int
paddingW
,
real
*
dataDst
)
{
int
depth_col
=
(
depth
+
2
*
paddingD
-
filterD
)
/
strideD
+
1
;
int
height_col
=
(
height
+
2
*
paddingH
-
filterH
)
/
strideH
+
1
;
int
width_col
=
(
width
+
2
*
paddingW
-
filterW
)
/
strideW
+
1
;
...
...
@@ -646,34 +666,55 @@ void hl_matrix_vol2Col(real* dataSrc,
const
int
threads
=
512
;
const
int
blocks
=
DIVUP
(
num_kernels
,
threads
);
keMatrixVol2Col
<<<
blocks
,
threads
>>>
(
num_kernels
,
dataSrc
,
dataDst
,
depth
,
height
,
width
,
filterD
,
filterH
,
filterW
,
strideD
,
strideH
,
strideW
,
paddingD
,
paddingH
,
paddingW
,
depth_col
,
height_col
,
width_col
);
keMatrixVol2Col
<<<
blocks
,
threads
>>>
(
num_kernels
,
dataSrc
,
dataDst
,
depth
,
height
,
width
,
filterD
,
filterH
,
filterW
,
strideD
,
strideH
,
strideW
,
paddingD
,
paddingH
,
paddingW
,
depth_col
,
height_col
,
width_col
);
CHECK_SYNC
(
"hl_matrix_vol2Col failed"
);
}
__global__
void
keMatrixCol2Vol
(
int
num_kernels
,
real
*
dataDst
,
real
*
dataSrc
,
int
depth
,
int
height
,
int
width
,
int
filterD
,
int
filterH
,
int
filterW
,
int
strideD
,
int
strideH
,
int
strideW
,
int
paddingD
,
int
paddingH
,
int
paddingW
,
int
depth_col
,
int
height_col
,
int
width_col
,
real
alpha
,
real
beta
){
for
(
int
index
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
index
<
num_kernels
;
__global__
void
keMatrixCol2Vol
(
int
num_kernels
,
real
*
dataDst
,
real
*
dataSrc
,
int
depth
,
int
height
,
int
width
,
int
filterD
,
int
filterH
,
int
filterW
,
int
strideD
,
int
strideH
,
int
strideW
,
int
paddingD
,
int
paddingH
,
int
paddingW
,
int
depth_col
,
int
height_col
,
int
width_col
,
real
alpha
,
real
beta
)
{
for
(
int
index
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
index
<
num_kernels
;
index
+=
blockDim
.
x
*
gridDim
.
x
)
{
real
val
=
0
;
real
srcVal
=
0
;
real
dstVal
=
dataDst
[
index
]
;
int
w
=
index
%
width
+
paddingW
;
int
h
=
(
index
/
width
)
%
height
+
paddingH
;
int
d
=
(
index
/
width
/
height
)
%
depth
+
paddingD
;
int
c
=
index
/
(
width
*
height
*
depth
)
;
int
c
=
index
/
width
/
height
/
depth
;
// compute the start and end of the output
int
w_col_start
=
(
w
<
filterW
)
?
0
:
(
w
-
filterW
)
/
strideW
+
1
;
int
w_col_end
=
min
(
w
/
strideW
+
1
,
width_col
);
...
...
@@ -682,32 +723,45 @@ __global__ void keMatrixCol2Vol(
int
d_col_start
=
(
d
<
filterD
)
?
0
:
(
d
-
filterD
)
/
strideD
+
1
;
int
d_col_end
=
min
(
d
/
strideD
+
1
,
depth_col
);
int
offset
=
(
c
*
filterD
*
filterW
*
filterH
+
\
d
*
filterW
*
filterH
+
h
*
filterW
+
w
)
*
depth_col
*
height_col
*
width_col
;
int
offset
=
(
c
*
filterD
*
filterW
*
filterH
+
d
*
filterW
*
filterH
+
h
*
filterW
+
w
)
*
depth_col
*
height_col
*
width_col
;
int
coeff_d_col
=
(
1
-
strideD
*
filterW
*
filterH
*
depth_col
)
*
height_col
*
width_col
;
int
coeff_h_col
=
(
1
-
strideH
*
filterW
*
depth_col
*
height_col
)
*
width_col
;
int
coeff_d_col
=
(
1
-
strideD
*
filterW
*
filterH
*
depth_col
)
*
height_col
*
width_col
;
int
coeff_h_col
=
(
1
-
strideH
*
filterW
*
depth_col
*
height_col
)
*
width_col
;
int
coeff_w_col
=
(
1
-
strideW
*
depth_col
*
height_col
*
width_col
);
for
(
int
d_col
=
d_col_start
;
d_col
<
d_col_end
;
++
d_col
)
{
for
(
int
h_col
=
h_col_start
;
h_col
<
h_col_end
;
++
h_col
)
{
for
(
int
w_col
=
w_col_start
;
w_col
<
w_col_end
;
++
w_col
)
{
val
+=
dataSrc
[
offset
+
d_col
*
coeff_d_col
+
h_col
*
coeff_h_col
+
w_col
*
coeff_w_col
];
srcVal
+=
dataSrc
[
offset
+
d_col
*
coeff_d_col
+
h_col
*
coeff_h_col
+
w_col
*
coeff_w_col
];
}
}
}
dataDst
[
index
]
=
v
al
;
dataDst
[
index
]
=
alpha
*
srcVal
+
beta
*
dstV
al
;
}
}
void
hl_matrix_col2Vol
(
real
*
dataDst
,
int
channels
,
int
depth
,
int
height
,
int
width
,
int
filterD
,
int
filterH
,
int
filterW
,
int
strideD
,
int
strideH
,
int
strideW
,
int
paddingD
,
int
paddingH
,
int
paddingW
,
int
channels
,
int
depth
,
int
height
,
int
width
,
int
filterD
,
int
filterH
,
int
filterW
,
int
strideD
,
int
strideH
,
int
strideW
,
int
paddingD
,
int
paddingH
,
int
paddingW
,
real
*
dataSrc
,
real
alpha
,
real
beta
){
real
alpha
,
real
beta
)
{
int
depth_col
=
(
depth
+
2
*
paddingD
-
filterD
)
/
strideD
+
1
;
int
height_col
=
(
height
+
2
*
paddingH
-
filterH
)
/
strideH
+
1
;
int
width_col
=
(
width
+
2
*
paddingW
-
filterW
)
/
strideW
+
1
;
...
...
@@ -716,14 +770,26 @@ void hl_matrix_col2Vol(real* dataDst,
const
int
threads
=
512
;
const
int
blocks
=
DIVUP
(
num_kernels
,
threads
);
keMatrixCol2Vol
<<<
blocks
,
threads
>>>
(
num_kernels
,
dataDst
,
dataSrc
,
depth
,
height
,
width
,
filterD
,
filterH
,
filterW
,
strideD
,
strideH
,
strideW
,
paddingD
,
paddingH
,
paddingW
,
depth_col
,
height_col
,
width_col
,
alpha
,
beta
);
keMatrixCol2Vol
<<<
blocks
,
threads
>>>
(
num_kernels
,
dataDst
,
dataSrc
,
depth
,
height
,
width
,
filterD
,
filterH
,
filterW
,
strideD
,
strideH
,
strideW
,
paddingD
,
paddingH
,
paddingW
,
depth_col
,
height_col
,
width_col
,
alpha
,
beta
);
CHECK_SYNC
(
"hl_matrix_col2Vol failed"
);
}
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录