Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
机器未来
Paddle
提交
11588b36
P
Paddle
项目概览
机器未来
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
11588b36
编写于
7月 18, 2017
作者:
X
xzl
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
support inputchannels != outputchannels of depthwiseconv
上级
02e04b44
变更
4
隐藏空白更改
内联
并排
Showing
4 changed file
with
85 addition
and
57 deletion
+85
-57
paddle/function/DepthwiseConvOp.cpp
paddle/function/DepthwiseConvOp.cpp
+11
-2
paddle/function/DepthwiseConvOp.h
paddle/function/DepthwiseConvOp.h
+8
-2
paddle/function/DepthwiseConvOpGpu.cu
paddle/function/DepthwiseConvOpGpu.cu
+65
-52
paddle/gserver/tests/test_LayerGrad.cpp
paddle/gserver/tests/test_LayerGrad.cpp
+1
-1
未找到文件。
paddle/function/DepthwiseConvOp.cpp
浏览文件 @
11588b36
...
...
@@ -30,6 +30,7 @@ public:
int
inputChannels
,
int
inputHeight
,
int
inputWidth
,
int
filterMultiplier
,
int
filterHeight
,
int
filterWidth
,
int
strideH
,
...
...
@@ -53,6 +54,7 @@ public:
int
inputChannels
,
int
inputHeight
,
int
inputWidth
,
int
filterMultiplier
,
int
filterHeight
,
int
filterWidth
,
int
strideH
,
...
...
@@ -75,6 +77,7 @@ public:
int
inputChannels
,
int
inputHeight
,
int
inputWidth
,
int
filterMultiplier
,
int
filterHeight
,
int
filterWidth
,
int
strideH
,
...
...
@@ -122,6 +125,7 @@ public:
size_t
outputChannels
=
output
[
1
];
size_t
outputHeight
=
output
[
2
];
size_t
outputWidth
=
output
[
3
];
size_t
filterMultiplier
=
outputChannels
/
groups_
;
real
*
inputData
=
inputs
[
0
].
data
<
real
>
();
real
*
filterData
=
inputs
[
1
].
data
<
real
>
();
...
...
@@ -137,6 +141,7 @@ public:
inputChannels
,
inputHeight
,
inputWidth
,
filterMultiplier
,
filterHeight
,
filterWidth
,
strideH
(),
...
...
@@ -183,6 +188,7 @@ public:
size_t
outputChannels
=
output
[
1
];
size_t
outputHeight
=
output
[
2
];
size_t
outputWidth
=
output
[
3
];
size_t
filterMultiplier
=
outputChannels
/
groups_
;
real
*
outputGrad
=
inputs
[
0
].
data
<
real
>
();
real
*
filterData
=
inputs
[
1
].
data
<
real
>
();
...
...
@@ -198,6 +204,7 @@ public:
inputChannels
,
inputHeight
,
inputWidth
,
filterMultiplier
,
filterHeight
,
filterWidth
,
strideH
(),
...
...
@@ -243,13 +250,14 @@ public:
size_t
outputChannels
=
output
[
1
];
size_t
outputHeight
=
output
[
2
];
size_t
outputWidth
=
output
[
3
];
size_t
filterMultiplier
=
outputChannels
/
groups_
;
real
*
outputGrad
=
inputs
[
0
].
data
<
real
>
();
real
*
inputData
=
inputs
[
1
].
data
<
real
>
();
real
*
filterGrad
=
outputs
[
0
].
data
<
real
>
();
int
size
=
inputChannels
*
filterHeight
*
filterWidth
*
outputHeight
*
outputWidth
;
int
size
=
outputChannels
*
filterHeight
*
filterWidth
*
outputHeight
*
outputWidth
;
resizeBuffer
<
Device
>
(
size
);
real
*
colData
=
reinterpret_cast
<
real
*>
(
memory_
->
getBuf
());
...
...
@@ -264,6 +272,7 @@ public:
inputChannels
,
inputHeight
,
inputWidth
,
filterMultiplier
,
filterHeight
,
filterWidth
,
strideH
(),
...
...
paddle/function/DepthwiseConvOp.h
浏览文件 @
11588b36
...
...
@@ -32,6 +32,7 @@ namespace paddle {
* \param[in] inputChannels channels of inputData.
* \param[in] inputHeight height of inputData.
* \param[in] inputWidth width of inputData..
* \param[in] filterMultiplier equals to outputChannels/groups_.
* \param[in] filterHeight height of filter.
* \param[in] filterWidth widht of filter.
* \param[in] strideH stride size in height direction.
...
...
@@ -53,6 +54,7 @@ public:
int
inputChannels
,
int
inputHeight
,
int
inputWidth
,
int
filterMultiplier
,
int
filterHeight
,
int
filterWidth
,
int
strideH
,
...
...
@@ -74,7 +76,8 @@ public:
* \param[in] outputWidth width of outputData.
* \param[in] inputChannels channels of input data.
* \param[in] inputHeight height of inputData.
* \param[in] inputWidth width of inputData..
* \param[in] inputWidth width of inputData.
* \param[in] filterMultiplier equals to outputChannels/groups_.
* \param[in] filterHeight height of filter.
* \param[in] filterWidth widht of filter.
* \param[in] strideH stride size in height direction.
...
...
@@ -96,6 +99,7 @@ public:
int
inputChannels
,
int
inputHeight
,
int
inputWidth
,
int
filterMultiplier
,
int
filterHeight
,
int
filterWidth
,
int
strideH
,
...
...
@@ -116,7 +120,8 @@ public:
* \param[in] outputWidth width of outputData.
* \param[in] inputChannels channels of input data.
* \param[in] inputHeight height of inputData.
* \param[in] inputWidth width of inputData..
* \param[in] inputWidth width of inputData.
* \param[in] filterMultiplier equals to outputChannels/groups_.
* \param[in] filterHeight height of filter.
* \param[in] filterWidth widht of filter.
* \param[in] strideH stride size in height direction.
...
...
@@ -140,6 +145,7 @@ public:
int
inputChannels
,
int
inputHeight
,
int
inputWidth
,
int
filterMultiplier
,
int
filterHeight
,
int
filterWidth
,
int
strideH
,
...
...
paddle/function/DepthwiseConvOpGpu.cu
浏览文件 @
11588b36
...
...
@@ -25,7 +25,7 @@ void ConvolutionDepthwiseForward(const int nthreads,
const
T
*
const
inputData
,
const
T
*
const
filterData
,
const
int
batchSize
,
const
int
outputChannels
,
const
int
outputHeight
,
const
int
outputWidth
,
const
int
inputChannels
,
const
int
inputHeight
,
const
int
inputWidth
,
const
int
filterHeight
,
const
int
filterWidth
,
const
int
strideH
,
const
int
filter
Multiplier
,
const
int
filter
Height
,
const
int
filterWidth
,
const
int
strideH
,
const
int
strideW
,
const
int
paddingH
,
const
int
paddingW
,
T
*
const
outputData
)
{
...
...
@@ -33,23 +33,25 @@ void ConvolutionDepthwiseForward(const int nthreads,
(
blockIdx
.
x
*
gridDim
.
y
+
blockIdx
.
y
)
*
blockDim
.
x
+
threadIdx
.
x
;
if
(
index
<
nthreads
)
{
const
int
n
=
index
/
outputChannels
/
outputHeight
/
outputWidth
;
const
int
c
=
(
index
/
outputHeight
/
outputWidth
)
%
outputChannels
;
const
int
h
=
(
index
/
outputWidth
)
%
outputHeight
;
const
int
w
=
index
%
outputWidth
;
const
T
*
weight
=
filterData
+
c
*
filterHeight
*
filterWidth
;
const
int
batch
=
index
/
outputChannels
/
outputHeight
/
outputWidth
;
const
int
c_out
=
(
index
/
outputHeight
/
outputWidth
)
%
outputChannels
;
const
int
h_out
=
(
index
/
outputWidth
)
%
outputHeight
;
const
int
w_out
=
index
%
outputWidth
;
const
int
c_in
=
c_out
/
filterMultiplier
;
const
T
*
weight
=
filterData
+
c_out
*
filterHeight
*
filterWidth
;
T
value
=
0
;
const
int
h_in_start
=
-
paddingH
+
h
*
strideH
;
const
int
w_in_start
=
-
paddingW
+
w
*
strideW
;
const
int
h_in_end
=
-
paddingH
+
h
*
strideH
+
filterHeight
-
1
;
const
int
w_in_end
=
-
paddingW
+
w
*
strideW
+
filterWidth
-
1
;
const
int
h_in_start
=
-
paddingH
+
h
_out
*
strideH
;
const
int
w_in_start
=
-
paddingW
+
w
_out
*
strideW
;
const
int
h_in_end
=
-
paddingH
+
h
_out
*
strideH
+
filterHeight
-
1
;
const
int
w_in_end
=
-
paddingW
+
w
_out
*
strideW
+
filterWidth
-
1
;
if
((
h_in_start
>=
0
)
&&
(
h_in_end
<
inputHeight
)
&&
(
w_in_start
>=
0
)
&&
(
w_in_end
<
inputWidth
))
{
for
(
int
kh
=
0
;
kh
<
filterHeight
;
++
kh
)
{
for
(
int
kw
=
0
;
kw
<
filterWidth
;
++
kw
)
{
const
int
h_in
=
-
paddingH
+
h
*
strideH
+
kh
;
const
int
w_in
=
-
paddingW
+
w
*
strideW
+
kw
;
const
int
offset
=
((
n
*
inputChannels
+
c
)
*
inputHeight
+
h_in
)
const
int
h_in
=
-
paddingH
+
h
_out
*
strideH
+
kh
;
const
int
w_in
=
-
paddingW
+
w
_out
*
strideW
+
kw
;
const
int
offset
=
((
batch
*
inputChannels
+
c_in
)
*
inputHeight
+
h_in
)
*
inputWidth
+
w_in
;
value
+=
(
*
weight
)
*
inputData
[
offset
];
++
weight
;
...
...
@@ -58,11 +60,11 @@ void ConvolutionDepthwiseForward(const int nthreads,
}
else
{
for
(
int
kh
=
0
;
kh
<
filterHeight
;
++
kh
)
{
for
(
int
kw
=
0
;
kw
<
filterWidth
;
++
kw
)
{
const
int
h_in
=
-
paddingH
+
h
*
strideH
+
kh
;
const
int
w_in
=
-
paddingW
+
w
*
strideW
+
kw
;
const
int
h_in
=
-
paddingH
+
h
_out
*
strideH
+
kh
;
const
int
w_in
=
-
paddingW
+
w
_out
*
strideW
+
kw
;
if
((
h_in
>=
0
)
&&
(
h_in
<
inputHeight
)
&&
(
w_in
>=
0
)
&&
(
w_in
<
inputWidth
))
{
const
int
offset
=
((
n
*
outputChannels
+
c
)
*
inputHeight
+
h_in
)
const
int
offset
=
((
batch
*
inputChannels
+
c_in
)
*
inputHeight
+
h_in
)
*
inputWidth
+
w_in
;
value
+=
(
*
weight
)
*
inputData
[
offset
];
}
...
...
@@ -81,38 +83,42 @@ void ConvolutionDepthwiseInputBackward(const int nthreads,
const
T
*
const
top_diff
,
const
T
*
const
weight_data
,
const
int
num
,
const
int
outputChannels
,
const
int
outputHeight
,
const
int
outputWidth
,
const
int
inputChannels
,
const
int
inputHeight
,
const
int
inputWidth
,
const
int
filterHeight
,
const
int
filterWidth
,
const
int
strideH
,
const
int
filter
Multiplier
,
const
int
filter
Height
,
const
int
filterWidth
,
const
int
strideH
,
const
int
strideW
,
const
int
paddingH
,
const
int
paddingW
,
T
*
const
bottom_diff
)
{
int
index
=
(
blockIdx
.
x
*
gridDim
.
y
+
blockIdx
.
y
)
*
blockDim
.
x
+
threadIdx
.
x
;
if
(
index
<
nthreads
)
{
const
int
n
=
index
/
inputChannels
/
inputHeight
/
inputWidth
;
const
int
c
=
(
index
/
inputHeight
/
inputWidth
)
%
inputChannels
;
const
int
h
=
(
index
/
inputWidth
)
%
inputHeight
;
const
int
w
=
index
%
inputWidth
;
const
T
*
weight
=
weight_data
+
c
*
filterHeight
*
filterWidth
;
const
int
batch
=
index
/
inputChannels
/
inputHeight
/
inputWidth
;
const
int
c
_in
=
(
index
/
inputHeight
/
inputWidth
)
%
inputChannels
;
const
int
h
_in
=
(
index
/
inputWidth
)
%
inputHeight
;
const
int
w
_in
=
index
%
inputWidth
;
const
int
c_out_start
=
c_in
*
filterMultiplier
;
T
value
=
0
;
for
(
int
kh
=
0
;
kh
<
filterHeight
;
++
kh
)
{
for
(
int
kw
=
0
;
kw
<
filterWidth
;
++
kw
)
{
const
int
h_out_s
=
h
+
paddingH
-
kh
;
const
int
w_out_s
=
w
+
paddingW
-
kw
;
if
(((
h_out_s
%
strideH
)
==
0
)
&&
((
w_out_s
%
strideW
)
==
0
))
{
const
int
h_out
=
h_out_s
/
strideH
;
const
int
w_out
=
w_out_s
/
strideW
;
// TODO(zhaolong) : the 'if' affect the effectiveness, it needs to optimize
if
((
h_out
>=
0
)
&&
(
h_out
<
outputHeight
)
&&
(
w_out
>=
0
)
&&
(
w_out
<
outputWidth
))
{
const
int
offset
=
((
n
*
outputChannels
+
c
)
*
outputHeight
+
h_out
)
*
outputWidth
+
w_out
;
value
+=
(
*
weight
)
*
top_diff
[
offset
];
}
for
(
int
c_out
=
c_out_start
;
c_out
<
c_out_start
+
filterMultiplier
;
c_out
++
){
//weight bixu c_out
const
T
*
weight
=
weight_data
+
c_out
*
filterHeight
*
filterWidth
;
for
(
int
kh
=
0
;
kh
<
filterHeight
;
++
kh
)
{
for
(
int
kw
=
0
;
kw
<
filterWidth
;
++
kw
)
{
const
int
h_out_s
=
h_in
+
paddingH
-
kh
;
const
int
w_out_s
=
w_in
+
paddingW
-
kw
;
if
(((
h_out_s
%
strideH
)
==
0
)
&&
((
w_out_s
%
strideW
)
==
0
))
{
const
int
h_out
=
h_out_s
/
strideH
;
const
int
w_out
=
w_out_s
/
strideW
;
// TODO(zhaolong) : the 'if' affect the effectiveness, it needs to optimize
if
((
h_out
>=
0
)
&&
(
h_out
<
outputHeight
)
&&
(
w_out
>=
0
)
&&
(
w_out
<
outputWidth
))
{
const
int
offset
=
((
batch
*
outputChannels
+
c_out
)
*
outputHeight
+
h_out
)
*
outputWidth
+
w_out
;
value
+=
(
*
weight
)
*
top_diff
[
offset
];
}
}
++
weight
;
}
}
++
weight
;
}
}
bottom_diff
[
index
]
+=
value
;
}
}
}
// CUDA kernel to compute the depthwise convolution backprop w.r.t filter.
...
...
@@ -122,26 +128,27 @@ void ConvolutionDepthwiseFilterBackward(const int num_i, const int nthreads,
const
T
*
const
top_diff
,
const
T
*
const
inputData
,
const
int
num
,
const
int
outputChannels
,
const
int
outputHeight
,
const
int
outputWidth
,
const
int
inputChannels
,
const
int
inputHeight
,
const
int
inputWidth
,
const
int
filterHeight
,
const
int
filterWidth
,
const
int
strideH
,
const
int
filter
Multiplier
,
const
int
filter
Height
,
const
int
filterWidth
,
const
int
strideH
,
const
int
strideW
,
const
int
paddingH
,
const
int
paddingW
,
T
*
const
buffer_data
)
{
int
index
=
(
blockIdx
.
x
*
gridDim
.
y
+
blockIdx
.
y
)
*
blockDim
.
x
+
threadIdx
.
x
;
if
(
index
<
nthreads
)
{
const
int
h
=
(
index
/
outputWidth
)
%
outputHeight
;
const
int
w
=
index
%
outputWidth
;
const
int
h
_out
=
(
index
/
outputWidth
)
%
outputHeight
;
const
int
w
_out
=
index
%
outputWidth
;
const
int
kh
=
(
index
/
filterWidth
/
outputHeight
/
outputWidth
)
%
filterHeight
;
const
int
kw
=
(
index
/
outputHeight
/
outputWidth
)
%
filterWidth
;
const
int
h_in
=
-
paddingH
+
h
*
strideH
+
kh
;
const
int
w_in
=
-
paddingW
+
w
*
strideW
+
kw
;
const
int
h_in
=
-
paddingH
+
h
_out
*
strideH
+
kh
;
const
int
w_in
=
-
paddingW
+
w
_out
*
strideW
+
kw
;
if
((
h_in
>=
0
)
&&
(
h_in
<
inputHeight
)
&&
(
w_in
>=
0
)
&&
(
w_in
<
inputWidth
))
{
const
int
c
=
index
/
filterHeight
/
filterWidth
/
outputHeight
/
outputWidth
;
const
int
n
=
num_i
;
const
int
top_offset
=
((
n
*
outputChannels
+
c
)
*
outputHeight
+
h
)
*
outputWidth
+
w
;
const
int
bottom_offset
=
((
n
*
inputChannels
+
c
)
*
inputHeight
+
h_in
)
const
int
c_out
=
index
/
filterHeight
/
filterWidth
/
outputHeight
/
outputWidth
;
const
int
c_in
=
c_out
/
filterMultiplier
;
const
int
batch
=
num_i
;
const
int
top_offset
=
((
batch
*
outputChannels
+
c_out
)
*
outputHeight
+
h_out
)
*
outputWidth
+
w_out
;
const
int
bottom_offset
=
((
batch
*
inputChannels
+
c_in
)
*
inputHeight
+
h_in
)
*
inputWidth
+
w_in
;
buffer_data
[
index
]
=
top_diff
[
top_offset
]
*
inputData
[
bottom_offset
];
}
else
{
...
...
@@ -162,6 +169,7 @@ public:
int
inputChannels
,
int
inputHeight
,
int
inputWidth
,
int
filterMultiplier
,
int
filterHeight
,
int
filterWidth
,
int
strideH
,
...
...
@@ -190,6 +198,7 @@ public:
inputChannels
,
inputHeight
,
inputWidth
,
filterMultiplier
,
filterHeight
,
filterWidth
,
strideH
,
...
...
@@ -212,6 +221,7 @@ public:
int
inputChannels
,
int
inputHeight
,
int
inputWidth
,
int
filterMultiplier
,
int
filterHeight
,
int
filterWidth
,
int
strideH
,
...
...
@@ -242,6 +252,7 @@ public:
inputChannels
,
inputHeight
,
inputWidth
,
filterMultiplier
,
filterHeight
,
filterWidth
,
strideH
,
...
...
@@ -264,6 +275,7 @@ public:
int
inputChannels
,
int
inputHeight
,
int
inputWidth
,
int
filterMultiplier
,
int
filterHeight
,
int
filterWidth
,
int
strideH
,
...
...
@@ -273,14 +285,14 @@ public:
T
*
colData
,
T
*
filterGrad
){
int
colDataSize
=
in
putChannels
*
filterHeight
*
filterWidth
*
outputHeight
*
outputWidth
;
int
colDataSize
=
out
putChannels
*
filterHeight
*
filterWidth
*
outputHeight
*
outputWidth
;
size_t
blocks
=
(
colDataSize
+
1024
-
1
)
/
1024
;
size_t
blockX
=
512
;
size_t
blockY
=
(
blocks
+
512
-
1
)
/
512
;
dim3
threads
(
1024
,
1
);
dim3
grid
(
blockX
,
blockY
);
BaseMatrix
filterGradMatrix
(
in
putChannels
*
filterHeight
*
filterWidth
,
1
,
filterGrad
,
false
,
true
);
BaseMatrix
filterGradMatrix
(
out
putChannels
*
filterHeight
*
filterWidth
,
1
,
filterGrad
,
false
,
true
);
for
(
int
i
=
0
;
i
<
batchSize
;
i
++
)
{
ConvolutionDepthwiseFilterBackward
<
T
>
...
...
@@ -296,6 +308,7 @@ public:
inputChannels
,
inputHeight
,
inputWidth
,
filterMultiplier
,
filterHeight
,
filterWidth
,
strideH
,
...
...
@@ -304,8 +317,8 @@ public:
paddingW
,
colData
);
int
M
=
colDataSize
/
outputHeight
/
outputWidth
;
int
K
=
outputHeight
*
outputWidth
;
int
M
=
colDataSize
/
K
;
BaseMatrix
colMatrix
(
M
,
K
,
colData
,
false
,
true
);
filterGradMatrix
.
sumRows
(
colMatrix
,
(
T
)
1.0
,
(
T
)
1.0
);
...
...
paddle/gserver/tests/test_LayerGrad.cpp
浏览文件 @
11588b36
...
...
@@ -355,7 +355,7 @@ void testDepthwiseConvLayer(const string& type, bool useGpu) {
config
.
layerConfig
.
set_partial_sum
(
1
);
config
.
layerConfig
.
set_shared_biases
(
true
);
config
.
inputDefs
.
push_back
({
INPUT_DATA
,
"layer_0"
,
2048
,
96
});
config
.
inputDefs
.
push_back
({
INPUT_DATA
,
"layer_0"
,
2048
,
192
/
2
});
LayerInputConfig
*
input
=
config
.
layerConfig
.
add_inputs
();
ConvConfig
*
conv
=
input
->
mutable_conv_conf
();
conv
->
set_filter_size
(
2
);
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录