Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
PaddleDetection
提交
064dc888
P
PaddleDetection
项目概览
PaddlePaddle
/
PaddleDetection
大约 1 年 前同步成功
通知
695
Star
11112
Fork
2696
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
184
列表
看板
标记
里程碑
合并请求
40
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
PaddleDetection
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
184
Issue
184
列表
看板
标记
里程碑
合并请求
40
合并请求
40
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
064dc888
编写于
7月 10, 2017
作者:
X
xzl
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
add the comments for .h file and code tiny modify
上级
36e7800a
变更
5
隐藏空白更改
内联
并排
Showing
5 changed file
with
180 addition
and
117 deletion
+180
-117
paddle/function/DepthwiseConvOp.cpp
paddle/function/DepthwiseConvOp.cpp
+32
-41
paddle/function/DepthwiseConvOp.h
paddle/function/DepthwiseConvOp.h
+76
-8
paddle/function/DepthwiseConvOpGpu.cu
paddle/function/DepthwiseConvOpGpu.cu
+67
-58
paddle/gserver/layers/DepthwiseConvLayer.cpp
paddle/gserver/layers/DepthwiseConvLayer.cpp
+3
-6
paddle/gserver/layers/DepthwiseConvLayer.h
paddle/gserver/layers/DepthwiseConvLayer.h
+2
-4
未找到文件。
paddle/function/DepthwiseConvOp.cpp
浏览文件 @
064dc888
...
...
@@ -13,16 +13,16 @@ See the License for the specific language governing permissions and
limitations under the License. */
#include "DepthwiseConvOp.h"
#include "ConvOp.h"
#include "GemmFunctor.h"
#include "paddle/math/MemoryHandle.h"
//
#include "paddle/math/MemoryHandle.h"
namespace
paddle
{
template
<
class
T
>
class
DepthwiseConvFunctor
<
DEVICE_TYPE_CPU
,
T
>
{
public:
void
operator
()(
int
outputSize
,
const
T
*
inputData
,
void
operator
()(
const
T
*
inputData
,
const
T
*
filterData
,
int
batchSize
,
int
outputChannels
,
...
...
@@ -44,13 +44,13 @@ public:
template
<
class
T
>
class
DepthwiseConvGradInputFunctor
<
DEVICE_TYPE_CPU
,
T
>
{
public:
void
operator
()(
int
inputSize
,
const
T
*
outputGrad
,
void
operator
()(
const
T
*
outputGrad
,
const
T
*
filterData
,
int
batchSize
,
int
outputChannels
,
int
outputHeight
,
int
outputWidth
,
int
inputChannels
,
int
inputHeight
,
int
inputWidth
,
int
filterHeight
,
...
...
@@ -65,14 +65,13 @@ public:
template
<
class
T
>
class
DepthwiseConvGradFilterFunctor
<
DEVICE_TYPE_CPU
,
T
>
{
public:
void
operator
()(
int
num_i
,
int
colDataSize
,
const
T
*
outputGrad
,
void
operator
()(
const
T
*
outputGrad
,
const
T
*
inputData
,
int
batchSize
,
int
outputChannels
,
int
outputHeight
,
int
outputWidth
,
int
inputChannels
,
int
inputHeight
,
int
inputWidth
,
int
filterHeight
,
...
...
@@ -87,7 +86,7 @@ public:
};
/*
* \brief Forward calculation of convolution.
* \brief Forward calculation of
depthwise
convolution.
*/
template
<
DeviceType
Device
>
class
DepthwiseConvFunction
:
public
ConvFunctionBase
{
...
...
@@ -126,11 +125,9 @@ public:
real
*
inputData
=
inputs
[
0
].
data
<
real
>
();
real
*
filterData
=
inputs
[
1
].
data
<
real
>
();
real
*
outputData
=
outputs
[
0
].
data
<
real
>
();
size_t
outputSize
=
batchSize
*
outputChannels
*
outputHeight
*
outputWidth
;
DepthwiseConvFunctor
<
Device
,
real
>
depthwiseConv
;
depthwiseConv
(
outputSize
,
inputData
,
depthwiseConv
(
inputData
,
filterData
,
batchSize
,
outputChannels
,
...
...
@@ -149,7 +146,7 @@ public:
};
/*
* \brief Backward input calculation of convolution.
* \brief Backward input calculation of
depthwise
convolution.
*/
template
<
DeviceType
Device
>
class
DepthwiseConvGradInputFunction
:
public
ConvFunctionBase
{
...
...
@@ -191,16 +188,14 @@ public:
real
*
filterData
=
inputs
[
1
].
data
<
real
>
();
real
*
inputGrad
=
outputs
[
0
].
data
<
real
>
();
size_t
inputSize
=
batchSize
*
inputChannels
*
inputHeight
*
inputWidth
;
DepthwiseConvGradInputFunctor
<
Device
,
real
>
depthwiseConvGradInput
;
depthwiseConvGradInput
(
inputSize
,
outputGrad
,
depthwiseConvGradInput
(
outputGrad
,
filterData
,
batchSize
,
outputChannels
,
outputHeight
,
outputWidth
,
inputChannels
,
inputHeight
,
inputWidth
,
filterHeight
,
...
...
@@ -214,7 +209,7 @@ public:
};
/*
* \brief Backward filter calculation of convolution.
* \brief Backward filter calculation of
depthwise
convolution.
*/
template
<
DeviceType
Device
>
class
DepthwiseConvGradFilterFunction
:
public
ConvFunctionBase
{
...
...
@@ -255,35 +250,31 @@ public:
real
*
multiplierData
=
inputs
[
2
].
data
<
real
>
();
real
*
filterGrad
=
outputs
[
0
].
data
<
real
>
();
size_
t
size
=
in
t
size
=
inputChannels
*
filterHeight
*
filterWidth
*
outputHeight
*
outputWidth
;
resizeBuffer
<
Device
>
(
size
);
real
*
colData
=
reinterpret_cast
<
real
*>
(
memory_
->
getBuf
());
DepthwiseConvGradFilterFunctor
<
Device
,
real
>
depthwiseConvGradFilter
;
for
(
size_t
i
=
0
;
i
<
batchSize
;
i
++
)
{
depthwiseConvGradFilter
(
i
,
size
,
outputGrad
,
inputData
,
batchSize
,
outputChannels
,
outputHeight
,
outputWidth
,
inputHeight
,
inputWidth
,
filterHeight
,
filterWidth
,
strideH
(),
strideW
(),
paddingH
(),
paddingW
(),
colData
,
multiplierData
,
filterGrad
);
}
depthwiseConvGradFilter
(
outputGrad
,
inputData
,
batchSize
,
outputChannels
,
outputHeight
,
outputWidth
,
inputChannels
,
inputHeight
,
inputWidth
,
filterHeight
,
filterWidth
,
strideH
(),
strideW
(),
paddingH
(),
paddingW
(),
colData
,
multiplierData
,
filterGrad
);
}
};
...
...
paddle/function/DepthwiseConvOp.h
浏览文件 @
064dc888
...
...
@@ -14,15 +14,36 @@ limitations under the License. */
#pragma once
#include "
ConvOp
.h"
#include "
TensorType
.h"
namespace
paddle
{
/**
*\brief Depthwise convolution forward. The outputData
* of depthwise convolution is same with ExpandConvLayer
* when groups equals inputChannels in ExpandConvLayer.
*
* \param[in] inputData input data.
* \param[in] filterData the Paramters of the depthwise conv layer..
* \param[in] batchSize batch size of input data.
* \param[in] outputChannels channels of outputData.
* \param[in] outputHeight height of outputData.
* \param[in] outputWidth width of outputData.
* \param[in] inputHeight height of inputData.
* \param[in] inputWidth width of inputData..
* \param[in] filterHeight height of filter.
* \param[in] filterWidth widht of filter.
* \param[in] strideH stride size in height direction.
* \param[in] strideW stride size in width direction.
* \param[in] paddingH padding size in height direction.
* \param[in] paddingW padding size in width direction.
* \param[out] outputData outputData.
*
*/
template
<
DeviceType
Device
,
class
T
>
class
DepthwiseConvFunctor
{
public:
void
operator
()(
int
outputSize
,
const
T
*
inputData
,
void
operator
()(
const
T
*
inputData
,
const
T
*
filterData
,
int
batchSize
,
int
outputChannels
,
...
...
@@ -39,16 +60,38 @@ public:
T
*
outputData
);
};
/**
*\brief Functor tot compute the depthwise convolution backprop w.r.t input.
*
*
* \param[in] outputGradData the grad data of output.
* \param[in] filterData the Paramters of the depthwise conv layer..
* \param[in] batchSize batch size of input data.
* \param[in] outputChannels channels of outputData.
* \param[in] outputHeight height of outputData.
* \param[in] outputWidth width of outputData.
* \param[in] inputChannels channels of input data.
* \param[in] inputHeight height of inputData.
* \param[in] inputWidth width of inputData..
* \param[in] filterHeight height of filter.
* \param[in] filterWidth widht of filter.
* \param[in] strideH stride size in height direction.
* \param[in] strideW stride size in width direction.
* \param[in] paddingH padding size in height direction.
* \param[in] paddingW padding size in width direction.
* \param[out] inputGrad the grad data of input.
*
*/
template
<
DeviceType
Device
,
class
T
>
class
DepthwiseConvGradInputFunctor
{
public:
void
operator
()(
int
inputSize
,
const
T
*
outputGrad
,
void
operator
()(
const
T
*
outputGrad
,
const
T
*
filterData
,
int
batchSize
,
int
outputChannels
,
int
outputHeight
,
int
outputWidth
,
int
inputChannels
,
int
inputHeight
,
int
inputWidth
,
int
filterHeight
,
...
...
@@ -60,17 +103,42 @@ public:
T
*
inputGrad
);
};
/**
*\brief Functor tot compute the depthwise convolution backprop w.r.t filter.
*
* \param[in] outputGradData the grad data of output.
* \param[in] inputData inputData.
* \param[in] batchSize batch size of input data.
* \param[in] outputChannels channels of outputData.
* \param[in] outputHeight height of outputData.
* \param[in] outputWidth width of outputData.
* \param[in] inputChannels channels of input data.
* \param[in] inputHeight height of inputData.
* \param[in] inputWidth width of inputData..
* \param[in] filterHeight height of filter.
* \param[in] filterWidth widht of filter.
* \param[in] strideH stride size in height direction.
* \param[in] strideW stride size in width direction.
* \param[in] paddingH padding size in height direction.
* \param[in] paddingW padding size in width direction.
* \param[in] colData Auxiliary data when calculating filterGrad.
* size:
*inputChannels*filterHeight*filterWidth*outputHeight*outputWidth \param[in]
*multiplierData Auxiliary data when calculating filterGrad. size:
*outputHeight * outputWidth. \param[out]
*filterGrad the grad data of filter.
*
*/
template
<
DeviceType
Device
,
class
T
>
class
DepthwiseConvGradFilterFunctor
{
public:
void
operator
()(
int
num_i
,
int
colDataSize
,
const
T
*
outputGrad
,
void
operator
()(
const
T
*
outputGrad
,
const
T
*
inputData
,
int
batchSize
,
int
outputChannels
,
int
outputHeight
,
int
outputWidth
,
int
inputChannels
,
int
inputHeight
,
int
inputWidth
,
int
filterHeight
,
...
...
paddle/function/DepthwiseConvOpGpu.cu
浏览文件 @
064dc888
...
...
@@ -12,12 +12,11 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "ConvOp.h"
#include "DepthwiseConvOp.h"
#include "GemmFunctor.h"
#include "paddle/math/MemoryHandle.h"
namespace
paddle
{
// CUDA kernel to compute the depthwise convolution forward pass
template
<
class
T
>
__global__
void
ConvolutionDepthwiseForward
(
const
int
nthreads
,
...
...
@@ -48,7 +47,7 @@ void ConvolutionDepthwiseForward(const int nthreads,
for
(
int
kw
=
0
;
kw
<
filterWidth
;
++
kw
)
{
const
int
h_in
=
-
paddingH
+
h
*
strideH
+
kh
;
const
int
w_in
=
-
paddingW
+
w
*
strideW
+
kw
;
const
int
offset
=
((
n
*
outputChannels
+
c
)
*
inputHeight
+
h_in
)
const
int
offset
=
((
n
*
outputChannels
+
c
)
*
inputHeight
+
h_in
)
*
inputWidth
+
w_in
;
value
+=
(
*
weight
)
*
inputData
[
offset
];
++
weight
;
...
...
@@ -73,6 +72,7 @@ void ConvolutionDepthwiseForward(const int nthreads,
}
}
// CUDA kernel to compute the depthwise convolution backprop w.r.t input.
template
<
class
T
>
__global__
void
ConvolutionDepthwiseInputBackward
(
const
int
nthreads
,
...
...
@@ -113,6 +113,7 @@ void ConvolutionDepthwiseInputBackward(const int nthreads,
}
}
// CUDA kernel to compute the depthwise convolution backprop w.r.t filter.
template
<
class
T
>
__global__
void
ConvolutionDepthwiseFilterBackward
(
const
int
num_i
,
const
int
nthreads
,
...
...
@@ -150,15 +151,14 @@ void ConvolutionDepthwiseFilterBackward(const int num_i, const int nthreads,
template
<
class
T
>
class
DepthwiseConvFunctor
<
DEVICE_TYPE_GPU
,
T
>
{
public:
void
operator
()(
int
outputSize
,
const
T
*
inputData
,
void
operator
()(
const
T
*
inputData
,
const
T
*
filterData
,
int
batchSize
,
int
outputChannels
,
int
outputHeight
,
int
outputWidth
,
int
inputHeight
,
int
inputWidth
,
int
inputHeight
,
int
inputWidth
,
int
filterHeight
,
int
filterWidth
,
int
strideH
,
...
...
@@ -167,12 +167,14 @@ public:
int
paddingW
,
T
*
outputData
){
int
outputSize
=
batchSize
*
outputChannels
*
outputHeight
*
outputWidth
;
size_t
blocks
=
(
outputSize
+
1024
-
1
)
/
1024
;
size_t
blockX
=
512
;
size_t
blockY
=
(
blocks
+
512
-
1
)
/
512
;
dim3
threads
(
1024
,
1
);
dim3
grid
(
blockX
,
blockY
);
ConvolutionDepthwiseForward
<
T
>
<<<
grid
,
threads
,
0
,
STREAM_DEFAULT
>>>
(
outputSize
,
...
...
@@ -182,8 +184,8 @@ public:
outputChannels
,
outputHeight
,
outputWidth
,
inputHeight
,
inputWidth
,
inputHeight
,
inputWidth
,
filterHeight
,
filterWidth
,
strideH
,
...
...
@@ -197,13 +199,13 @@ public:
template
<
class
T
>
class
DepthwiseConvGradInputFunctor
<
DEVICE_TYPE_GPU
,
T
>
{
public:
void
operator
()(
int
inputSize
,
const
T
*
outputGrad
,
void
operator
()(
const
T
*
outputGrad
,
const
T
*
filterData
,
int
batchSize
,
int
outputChannels
,
int
outputHeight
,
int
outputWidth
,
int
inputChannels
,
int
inputHeight
,
int
inputWidth
,
int
filterHeight
,
...
...
@@ -212,7 +214,9 @@ public:
int
strideW
,
int
paddingH
,
int
paddingW
,
T
*
inputGrad
){
T
*
inputGrad
){
int
inputSize
=
batchSize
*
inputChannels
*
inputHeight
*
inputWidth
;
size_t
blocks
=
(
inputSize
+
1024
-
1
)
/
1024
;
size_t
blockX
=
512
;
...
...
@@ -220,6 +224,7 @@ public:
dim3
threads
(
1024
,
1
);
dim3
grid
(
blockX
,
blockY
);
ConvolutionDepthwiseInputBackward
<
T
>
// NOLINT_NEXT_LINE(whitespace/operators)
<<<
grid
,
threads
,
0
,
STREAM_DEFAULT
>>>
(
...
...
@@ -245,14 +250,13 @@ public:
template
<
class
T
>
class
DepthwiseConvGradFilterFunctor
<
DEVICE_TYPE_GPU
,
T
>
{
public:
void
operator
()(
int
num_i
,
int
colDataSize
,
const
T
*
outputGrad
,
void
operator
()(
const
T
*
outputGrad
,
const
T
*
inputData
,
int
batchSize
,
int
outputChannels
,
int
outputHeight
,
int
outputWidth
,
int
inputChannels
,
int
inputHeight
,
int
inputWidth
,
int
filterHeight
,
...
...
@@ -265,60 +269,65 @@ public:
T
*
multiplierData
,
T
*
filterGrad
){
int
colDataSize
=
inputChannels
*
filterHeight
*
filterWidth
*
outputHeight
*
outputWidth
;
size_t
blocks
=
(
colDataSize
+
1024
-
1
)
/
1024
;
size_t
blockX
=
512
;
size_t
blockY
=
(
blocks
+
512
-
1
)
/
512
;
dim3
threads
(
1024
,
1
);
dim3
grid
(
blockX
,
blockY
);
ConvolutionDepthwiseFilterBackward
<
T
>
<<<
grid
,
threads
,
0
,
STREAM_DEFAULT
>>>
(
num_i
,
colDataSize
,
outputGrad
,
inputData
,
batchSize
,
outputChannels
,
outputHeight
,
outputWidth
,
inputHeight
,
inputWidth
,
filterHeight
,
filterWidth
,
strideH
,
strideW
,
paddingH
,
paddingW
,
colData
);
GemmFunctor
<
DEVICE_TYPE_GPU
,
real
>
gemm
;
int
M
=
colDataSize
/
outputHeight
/
outputWidth
;
int
N
=
1
;
int
K
=
outputHeight
*
outputWidth
;
gemm
(
CblasNoTrans
,
CblasNoTrans
,
M
,
N
,
K
,
(
T
)
1.0
,
colData
,
K
,
multiplierData
,
N
,
(
T
)
1.0
,
filterGrad
,
N
);
for
(
int
i
=
0
;
i
<
batchSize
;
i
++
)
{
ConvolutionDepthwiseFilterBackward
<
T
>
<<<
grid
,
threads
,
0
,
STREAM_DEFAULT
>>>
(
i
,
colDataSize
,
outputGrad
,
inputData
,
batchSize
,
outputChannels
,
outputHeight
,
outputWidth
,
inputHeight
,
inputWidth
,
filterHeight
,
filterWidth
,
strideH
,
strideW
,
paddingH
,
paddingW
,
colData
);
GemmFunctor
<
DEVICE_TYPE_GPU
,
real
>
gemm
;
int
M
=
colDataSize
/
outputHeight
/
outputWidth
;
int
N
=
1
;
int
K
=
outputHeight
*
outputWidth
;
gemm
(
CblasNoTrans
,
CblasNoTrans
,
M
,
N
,
K
,
(
T
)
1.0
,
colData
,
K
,
multiplierData
,
N
,
(
T
)
1.0
,
filterGrad
,
N
);
}
//gemv
}
};
#ifdef PADDLE_TYPE_DOUBLE
using
real
=
double
;
template
class
DepthwiseConvGradInputFunctor
<
DEVICE_TYPE_GPU
,
double
>;
template
class
DepthwiseConvFunctor
<
DEVICE_TYPE_GPU
,
double
>;
template
class
DepthwiseConvGradFilterFunctor
<
DEVICE_TYPE_GPU
,
double
>;
#else
using
real
=
float
;
template
class
DepthwiseConvGradInputFunctor
<
DEVICE_TYPE_GPU
,
float
>;
template
class
DepthwiseConvFunctor
<
DEVICE_TYPE_GPU
,
float
>;
template
class
DepthwiseConvGradFilterFunctor
<
DEVICE_TYPE_GPU
,
float
>;
#endif
template
class
DepthwiseConvGradInputFunctor
<
DEVICE_TYPE_GPU
,
real
>;
template
class
DepthwiseConvFunctor
<
DEVICE_TYPE_GPU
,
real
>;
template
class
DepthwiseConvGradFilterFunctor
<
DEVICE_TYPE_GPU
,
real
>;
}
// namespace paddle
paddle/gserver/layers/DepthwiseConvLayer.cpp
浏览文件 @
064dc888
...
...
@@ -15,14 +15,9 @@ limitations under the License. */
#include "DepthwiseConvLayer.h"
#include "paddle/utils/Logging.h"
#include "paddle/utils/Stat.h"
#include <iostream>
namespace
paddle
{
/*
* The calculation of the exconvt(convolution transpose (deconv) operation)
* is a swap of forward and backward of the calculation of exconv.
* */
REGISTER_LAYER
(
depthwise_conv
,
DepthwiseConvLayer
);
bool
DepthwiseConvLayer
::
init
(
const
LayerMap
&
layerMap
,
...
...
@@ -76,11 +71,12 @@ bool DepthwiseConvLayer::init(const LayerMap &layerMap,
#define BACKWARD_FILTER(i, inputs, outputs) \
backward_[2 * i + 1]->calc(inputs, outputs)
// compute the depthwise convolution forward pass
void
DepthwiseConvLayer
::
forward
(
PassType
passType
)
{
Layer
::
forward
(
passType
);
size_t
batchSize
=
inputLayers_
[
0
]
->
getOutputValue
()
->
getHeight
();
// std::cout << "outputSize" << getOutputSize() <<std::endl;
resetOutput
(
batchSize
,
getOutputSize
());
// Calculate the shape of the input, output, and filter.
...
...
@@ -127,6 +123,7 @@ void DepthwiseConvLayer::forward(PassType passType) {
forwardActivation
();
}
// compute the depthwise convolution backprop.
void
DepthwiseConvLayer
::
backward
(
const
UpdateCallback
&
callback
)
{
backwardActivation
();
...
...
paddle/gserver/layers/DepthwiseConvLayer.h
浏览文件 @
064dc888
...
...
@@ -22,10 +22,8 @@ namespace paddle {
/**
* @brief A subclass of convolution layer.
* This layer expands input and use matrix multiplication to
* calculate convolution operation.
*
* The config file api is img_conv_layer.
* This layer do the depthwise convolution calculation in mobilenet.
* The config file api is img_depthwise_conv_layer.
*/
class
DepthwiseConvLayer
:
public
ExpandConvBaseLayer
{
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录