Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
BaiXuePrincess
Paddle
提交
c43f6936
P
Paddle
项目概览
BaiXuePrincess
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
c43f6936
编写于
7月 14, 2017
作者:
X
xzl
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
modify the format and delete useless comment
上级
fc8aedb1
变更
3
隐藏空白更改
内联
并排
Showing
3 changed file
with
52 addition
and
48 deletion
+52
-48
paddle/function/DepthwiseConvOp.cpp
paddle/function/DepthwiseConvOp.cpp
+5
-7
paddle/function/DepthwiseConvOp.h
paddle/function/DepthwiseConvOp.h
+3
-1
paddle/function/DepthwiseConvOpGpu.cu
paddle/function/DepthwiseConvOpGpu.cu
+44
-40
未找到文件。
paddle/function/DepthwiseConvOp.cpp
浏览文件 @
c43f6936
...
...
@@ -15,7 +15,6 @@ limitations under the License. */
#include "DepthwiseConvOp.h"
#include "ConvOp.h"
#include "GemmFunctor.h"
//#include "paddle/math/MemoryHandle.h"
namespace
paddle
{
...
...
@@ -28,6 +27,7 @@ public:
int
outputChannels
,
int
outputHeight
,
int
outputWidth
,
int
inputChannels
,
int
inputHeight
,
int
inputWidth
,
int
filterHeight
,
...
...
@@ -114,7 +114,7 @@ public:
const
TensorShape
&
output
=
outputs
[
0
].
shape
();
size_t
batchSize
=
input
[
0
];
//
size_t inputChannels = input[1];
size_t
inputChannels
=
input
[
1
];
size_t
inputHeight
=
input
[
2
];
size_t
inputWidth
=
input
[
3
];
size_t
filterHeight
=
getFilterHeight
(
filter
);
...
...
@@ -134,6 +134,7 @@ public:
outputChannels
,
outputHeight
,
outputWidth
,
inputChannels
,
inputHeight
,
inputWidth
,
filterHeight
,
...
...
@@ -168,8 +169,6 @@ public:
CHECK_EQ
(
numInputs_
,
inputs
.
size
());
CHECK_EQ
(
numOutputs_
,
outputs
.
size
());
check
(
inputs
,
outputs
);
// Since the implementation of Col2ImFunctor is ADD_TO,
// this function only supports ADD_TO mode.
CHECK_EQ
(
outputs
[
0
].
getArgType
(),
ADD_TO
);
const
TensorShape
&
output
=
inputs
[
0
].
shape
();
const
TensorShape
&
filter
=
inputs
[
1
].
shape
();
...
...
@@ -228,12 +227,11 @@ public:
}
void
calc
(
const
BufferArgs
&
inputs
,
const
BufferArgs
&
outputs
)
override
{
//
CHECK_EQ(numInputs_, inputs.size());
//
CHECK_EQ(numOutputs_, outputs.size());
CHECK_EQ
(
numInputs_
,
inputs
.
size
());
CHECK_EQ
(
numOutputs_
,
outputs
.
size
());
check
(
inputs
,
outputs
);
const
TensorShape
&
output
=
inputs
[
0
].
shape
();
const
TensorShape
&
input
=
inputs
[
1
].
shape
();
// const TensorShape& multiplier = inputs[2].shape();
const
TensorShape
&
filter
=
outputs
[
0
].
shape
();
size_t
batchSize
=
input
[
0
];
...
...
paddle/function/DepthwiseConvOp.h
浏览文件 @
c43f6936
...
...
@@ -29,6 +29,7 @@ namespace paddle {
* \param[in] outputChannels channels of outputData.
* \param[in] outputHeight height of outputData.
* \param[in] outputWidth width of outputData.
* \param[in] inputChannels channels of inputData.
* \param[in] inputHeight height of inputData.
* \param[in] inputWidth width of inputData..
* \param[in] filterHeight height of filter.
...
...
@@ -49,8 +50,9 @@ public:
int
outputChannels
,
int
outputHeight
,
int
outputWidth
,
int
inputChannels
,
int
inputHeight
,
int
in
t
putWidth
,
int
inputWidth
,
int
filterHeight
,
int
filterWidth
,
int
strideH
,
...
...
paddle/function/DepthwiseConvOpGpu.cu
浏览文件 @
c43f6936
...
...
@@ -24,7 +24,7 @@ __global__
void
ConvolutionDepthwiseForward
(
const
int
nthreads
,
const
T
*
const
inputData
,
const
T
*
const
filterData
,
const
int
batchSize
,
const
int
outputChannels
,
const
int
outputHeight
,
const
int
outputWidth
,
const
int
inputHeight
,
const
int
inputWidth
,
const
int
outputWidth
,
const
int
inputChannels
,
const
int
inputHeight
,
const
int
inputWidth
,
const
int
filterHeight
,
const
int
filterWidth
,
const
int
strideH
,
const
int
strideW
,
const
int
paddingH
,
const
int
paddingW
,
T
*
const
outputData
)
{
...
...
@@ -39,36 +39,36 @@ void ConvolutionDepthwiseForward(const int nthreads,
const
int
w
=
index
%
outputWidth
;
const
T
*
weight
=
filterData
+
c
*
filterHeight
*
filterWidth
;
T
value
=
0
;
const
int
h_in_start
=
-
paddingH
+
h
*
strideH
;
const
int
w_in_start
=
-
paddingW
+
w
*
strideW
;
const
int
h_in_end
=
-
paddingH
+
h
*
strideH
+
filterHeight
-
1
;
const
int
w_in_end
=
-
paddingW
+
w
*
strideW
+
filterWidth
-
1
;
const
int
h_in_start
=
-
paddingH
+
h
*
strideH
;
const
int
w_in_start
=
-
paddingW
+
w
*
strideW
;
const
int
h_in_end
=
-
paddingH
+
h
*
strideH
+
filterHeight
-
1
;
const
int
w_in_end
=
-
paddingW
+
w
*
strideW
+
filterWidth
-
1
;
if
((
h_in_start
>=
0
)
&&
(
h_in_end
<
inputHeight
)
&&
(
w_in_start
>=
0
)
&&
(
w_in_end
<
inputWidth
))
{
for
(
int
kh
=
0
;
kh
<
filterHeight
;
++
kh
)
{
for
(
int
kw
=
0
;
kw
<
filterWidth
;
++
kw
)
{
const
int
h_in
=
-
paddingH
+
h
*
strideH
+
kh
;
const
int
w_in
=
-
paddingW
+
w
*
strideW
+
kw
;
const
int
offset
=
((
n
*
out
putChannels
+
c
)
*
inputHeight
+
h_in
)
for
(
int
kh
=
0
;
kh
<
filterHeight
;
++
kh
)
{
for
(
int
kw
=
0
;
kw
<
filterWidth
;
++
kw
)
{
const
int
h_in
=
-
paddingH
+
h
*
strideH
+
kh
;
const
int
w_in
=
-
paddingW
+
w
*
strideW
+
kw
;
const
int
offset
=
((
n
*
in
putChannels
+
c
)
*
inputHeight
+
h_in
)
*
inputWidth
+
w_in
;
value
+=
(
*
weight
)
*
inputData
[
offset
];
++
weight
;
}
}
}
else
{
for
(
int
kh
=
0
;
kh
<
filterHeight
;
++
kh
)
{
for
(
int
kw
=
0
;
kw
<
filterWidth
;
++
kw
)
{
const
int
h_in
=
-
paddingH
+
h
*
strideH
+
kh
;
const
int
w_in
=
-
paddingW
+
w
*
strideW
+
kw
;
if
((
h_in
>=
0
)
&&
(
h_in
<
inputHeight
)
&&
(
w_in
>=
0
)
&&
(
w_in
<
inputWidth
))
{
const
int
offset
=
((
n
*
outputChannels
+
c
)
*
inputHeight
+
h_in
)
*
inputWidth
+
w_in
;
value
+=
(
*
weight
)
*
inputData
[
offset
];
}
++
weight
;
value
+=
(
*
weight
)
*
inputData
[
offset
];
++
weight
;
}
}
}
else
{
for
(
int
kh
=
0
;
kh
<
filterHeight
;
++
kh
)
{
for
(
int
kw
=
0
;
kw
<
filterWidth
;
++
kw
)
{
const
int
h_in
=
-
paddingH
+
h
*
strideH
+
kh
;
const
int
w_in
=
-
paddingW
+
w
*
strideW
+
kw
;
if
((
h_in
>=
0
)
&&
(
h_in
<
inputHeight
)
&&
(
w_in
>=
0
)
&&
(
w_in
<
inputWidth
))
{
const
int
offset
=
((
n
*
outputChannels
+
c
)
*
inputHeight
+
h_in
)
*
inputWidth
+
w_in
;
value
+=
(
*
weight
)
*
inputData
[
offset
];
}
++
weight
;
}
}
}
outputData
[
index
]
=
value
;
}
...
...
@@ -80,15 +80,15 @@ __global__
void
ConvolutionDepthwiseInputBackward
(
const
int
nthreads
,
const
T
*
const
top_diff
,
const
T
*
const
weight_data
,
const
int
num
,
const
int
outputChannels
,
const
int
outputHeight
,
const
int
outputWidth
,
const
int
inputHeight
,
const
int
inputWidth
,
const
int
outputWidth
,
const
int
inputChannels
,
const
int
inputHeight
,
const
int
inputWidth
,
const
int
filterHeight
,
const
int
filterWidth
,
const
int
strideH
,
const
int
strideW
,
const
int
paddingH
,
const
int
paddingW
,
T
*
const
bottom_diff
)
{
int
index
=
(
blockIdx
.
x
*
gridDim
.
y
+
blockIdx
.
y
)
*
blockDim
.
x
+
threadIdx
.
x
;
if
(
index
<
nthreads
)
{
const
int
n
=
index
/
out
putChannels
/
inputHeight
/
inputWidth
;
const
int
c
=
(
index
/
inputHeight
/
inputWidth
)
%
out
putChannels
;
const
int
n
=
index
/
in
putChannels
/
inputHeight
/
inputWidth
;
const
int
c
=
(
index
/
inputHeight
/
inputWidth
)
%
in
putChannels
;
const
int
h
=
(
index
/
inputWidth
)
%
inputHeight
;
const
int
w
=
index
%
inputWidth
;
const
T
*
weight
=
weight_data
+
c
*
filterHeight
*
filterWidth
;
...
...
@@ -100,7 +100,7 @@ void ConvolutionDepthwiseInputBackward(const int nthreads,
if
(((
h_out_s
%
strideH
)
==
0
)
&&
((
w_out_s
%
strideW
)
==
0
))
{
const
int
h_out
=
h_out_s
/
strideH
;
const
int
w_out
=
w_out_s
/
strideW
;
// TODO(zhaolong) : the 'if' affect the effectiveness, it needs to optimize
// TODO(zhaolong) : the 'if' affect the effectiveness, it needs to optimize
if
((
h_out
>=
0
)
&&
(
h_out
<
outputHeight
)
&&
(
w_out
>=
0
)
&&
(
w_out
<
outputWidth
))
{
const
int
offset
=
((
n
*
outputChannels
+
c
)
*
outputHeight
+
h_out
)
...
...
@@ -121,7 +121,7 @@ __global__
void
ConvolutionDepthwiseFilterBackward
(
const
int
num_i
,
const
int
nthreads
,
const
T
*
const
top_diff
,
const
T
*
const
inputData
,
const
int
num
,
const
int
outputChannels
,
const
int
outputHeight
,
const
int
outputWidth
,
const
int
inputHeight
,
const
int
inputWidth
,
const
int
outputWidth
,
const
int
input
Channels
,
const
int
input
Height
,
const
int
inputWidth
,
const
int
filterHeight
,
const
int
filterWidth
,
const
int
strideH
,
const
int
strideW
,
const
int
paddingH
,
const
int
paddingW
,
T
*
const
buffer_data
)
{
...
...
@@ -141,7 +141,7 @@ void ConvolutionDepthwiseFilterBackward(const int num_i, const int nthreads,
const
int
n
=
num_i
;
const
int
top_offset
=
((
n
*
outputChannels
+
c
)
*
outputHeight
+
h
)
*
outputWidth
+
w
;
const
int
bottom_offset
=
((
n
*
out
putChannels
+
c
)
*
inputHeight
+
h_in
)
const
int
bottom_offset
=
((
n
*
in
putChannels
+
c
)
*
inputHeight
+
h_in
)
*
inputWidth
+
w_in
;
buffer_data
[
index
]
=
top_diff
[
top_offset
]
*
inputData
[
bottom_offset
];
}
else
{
...
...
@@ -159,6 +159,7 @@ public:
int
outputChannels
,
int
outputHeight
,
int
outputWidth
,
int
inputChannels
,
int
inputHeight
,
int
inputWidth
,
int
filterHeight
,
...
...
@@ -186,6 +187,7 @@ public:
outputChannels
,
outputHeight
,
outputWidth
,
inputChannels
,
inputHeight
,
inputWidth
,
filterHeight
,
...
...
@@ -218,7 +220,7 @@ public:
int
paddingW
,
T
*
inputGrad
){
int
inputSize
=
batchSize
*
inputChannels
*
inputHeight
*
inputWidth
;
int
inputSize
=
batchSize
*
inputChannels
*
inputHeight
*
inputWidth
;
size_t
blocks
=
(
inputSize
+
1024
-
1
)
/
1024
;
size_t
blockX
=
512
;
...
...
@@ -237,6 +239,7 @@ public:
outputChannels
,
outputHeight
,
outputWidth
,
inputChannels
,
inputHeight
,
inputWidth
,
filterHeight
,
...
...
@@ -277,11 +280,11 @@ public:
size_t
blockY
=
(
blocks
+
512
-
1
)
/
512
;
dim3
threads
(
1024
,
1
);
dim3
grid
(
blockX
,
blockY
);
BaseMatrix
filterGradMatrix
(
inputChannels
*
filterHeight
*
filterWidth
,
1
,
filterGrad
,
false
,
true
);
BaseMatrix
filterGradMatrix
(
inputChannels
*
filterHeight
*
filterWidth
,
1
,
filterGrad
,
false
,
true
);
for
(
int
i
=
0
;
i
<
batchSize
;
i
++
)
{
ConvolutionDepthwiseFilterBackward
<
T
>
<<<
grid
,
threads
,
0
,
STREAM_DEFAULT
>>>
(
ConvolutionDepthwiseFilterBackward
<
T
>
<<<
grid
,
threads
,
0
,
STREAM_DEFAULT
>>>
(
i
,
colDataSize
,
outputGrad
,
...
...
@@ -290,6 +293,7 @@ public:
outputChannels
,
outputHeight
,
outputWidth
,
inputChannels
,
inputHeight
,
inputWidth
,
filterHeight
,
...
...
@@ -299,12 +303,12 @@ public:
paddingH
,
paddingW
,
colData
);
int
M
=
colDataSize
/
outputHeight
/
outputWidth
;
int
K
=
outputHeight
*
outputWidth
;
);
int
M
=
colDataSize
/
outputHeight
/
outputWidth
;
int
K
=
outputHeight
*
outputWidth
;
BaseMatrix
colMatrix
(
M
,
K
,
colData
,
false
,
true
);
filterGradMatrix
.
sumRows
(
colMatrix
,
(
T
)
1.0
,
(
T
)
1.0
);
filterGradMatrix
.
sumRows
(
colMatrix
,
(
T
)
1.0
,
(
T
)
1.0
);
}
}
};
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录