Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
Crayon鑫
Paddle
提交
064dc888
P
Paddle
项目概览
Crayon鑫
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
064dc888
编写于
7月 10, 2017
作者:
X
xzl
浏览文件
操作
浏览文件
下载
电子邮件补丁
差异文件
add the comments for .h file and code tiny modify
上级
36e7800a
变更
5
隐藏空白更改
内联
并排
Showing
5 changed file
with
180 addition
and
117 deletion
+180
-117
paddle/function/DepthwiseConvOp.cpp
paddle/function/DepthwiseConvOp.cpp
+32
-41
paddle/function/DepthwiseConvOp.h
paddle/function/DepthwiseConvOp.h
+76
-8
paddle/function/DepthwiseConvOpGpu.cu
paddle/function/DepthwiseConvOpGpu.cu
+67
-58
paddle/gserver/layers/DepthwiseConvLayer.cpp
paddle/gserver/layers/DepthwiseConvLayer.cpp
+3
-6
paddle/gserver/layers/DepthwiseConvLayer.h
paddle/gserver/layers/DepthwiseConvLayer.h
+2
-4
未找到文件。
paddle/function/DepthwiseConvOp.cpp
浏览文件 @
064dc888
...
...
@@ -13,16 +13,16 @@ See the License for the specific language governing permissions and
limitations under the License. */
#include "DepthwiseConvOp.h"
#include "ConvOp.h"
#include "GemmFunctor.h"
#include "paddle/math/MemoryHandle.h"
//
#include "paddle/math/MemoryHandle.h"
namespace
paddle
{
template
<
class
T
>
class
DepthwiseConvFunctor
<
DEVICE_TYPE_CPU
,
T
>
{
public:
void
operator
()(
int
outputSize
,
const
T
*
inputData
,
void
operator
()(
const
T
*
inputData
,
const
T
*
filterData
,
int
batchSize
,
int
outputChannels
,
...
...
@@ -44,13 +44,13 @@ public:
template
<
class
T
>
class
DepthwiseConvGradInputFunctor
<
DEVICE_TYPE_CPU
,
T
>
{
public:
void
operator
()(
int
inputSize
,
const
T
*
outputGrad
,
void
operator
()(
const
T
*
outputGrad
,
const
T
*
filterData
,
int
batchSize
,
int
outputChannels
,
int
outputHeight
,
int
outputWidth
,
int
inputChannels
,
int
inputHeight
,
int
inputWidth
,
int
filterHeight
,
...
...
@@ -65,14 +65,13 @@ public:
template
<
class
T
>
class
DepthwiseConvGradFilterFunctor
<
DEVICE_TYPE_CPU
,
T
>
{
public:
void
operator
()(
int
num_i
,
int
colDataSize
,
const
T
*
outputGrad
,
void
operator
()(
const
T
*
outputGrad
,
const
T
*
inputData
,
int
batchSize
,
int
outputChannels
,
int
outputHeight
,
int
outputWidth
,
int
inputChannels
,
int
inputHeight
,
int
inputWidth
,
int
filterHeight
,
...
...
@@ -87,7 +86,7 @@ public:
};
/*
* \brief Forward calculation of convolution.
* \brief Forward calculation of
depthwise
convolution.
*/
template
<
DeviceType
Device
>
class
DepthwiseConvFunction
:
public
ConvFunctionBase
{
...
...
@@ -126,11 +125,9 @@ public:
real
*
inputData
=
inputs
[
0
].
data
<
real
>
();
real
*
filterData
=
inputs
[
1
].
data
<
real
>
();
real
*
outputData
=
outputs
[
0
].
data
<
real
>
();
size_t
outputSize
=
batchSize
*
outputChannels
*
outputHeight
*
outputWidth
;
DepthwiseConvFunctor
<
Device
,
real
>
depthwiseConv
;
depthwiseConv
(
outputSize
,
inputData
,
depthwiseConv
(
inputData
,
filterData
,
batchSize
,
outputChannels
,
...
...
@@ -149,7 +146,7 @@ public:
};
/*
* \brief Backward input calculation of convolution.
* \brief Backward input calculation of
depthwise
convolution.
*/
template
<
DeviceType
Device
>
class
DepthwiseConvGradInputFunction
:
public
ConvFunctionBase
{
...
...
@@ -191,16 +188,14 @@ public:
real
*
filterData
=
inputs
[
1
].
data
<
real
>
();
real
*
inputGrad
=
outputs
[
0
].
data
<
real
>
();
size_t
inputSize
=
batchSize
*
inputChannels
*
inputHeight
*
inputWidth
;
DepthwiseConvGradInputFunctor
<
Device
,
real
>
depthwiseConvGradInput
;
depthwiseConvGradInput
(
inputSize
,
outputGrad
,
depthwiseConvGradInput
(
outputGrad
,
filterData
,
batchSize
,
outputChannels
,
outputHeight
,
outputWidth
,
inputChannels
,
inputHeight
,
inputWidth
,
filterHeight
,
...
...
@@ -214,7 +209,7 @@ public:
};
/*
* \brief Backward filter calculation of convolution.
* \brief Backward filter calculation of
depthwise
convolution.
*/
template
<
DeviceType
Device
>
class
DepthwiseConvGradFilterFunction
:
public
ConvFunctionBase
{
...
...
@@ -255,35 +250,31 @@ public:
real
*
multiplierData
=
inputs
[
2
].
data
<
real
>
();
real
*
filterGrad
=
outputs
[
0
].
data
<
real
>
();
size_
t
size
=
in
t
size
=
inputChannels
*
filterHeight
*
filterWidth
*
outputHeight
*
outputWidth
;
resizeBuffer
<
Device
>
(
size
);
real
*
colData
=
reinterpret_cast
<
real
*>
(
memory_
->
getBuf
());
DepthwiseConvGradFilterFunctor
<
Device
,
real
>
depthwiseConvGradFilter
;
for
(
size_t
i
=
0
;
i
<
batchSize
;
i
++
)
{
depthwiseConvGradFilter
(
i
,
size
,
outputGrad
,
inputData
,
batchSize
,
outputChannels
,
outputHeight
,
outputWidth
,
inputHeight
,
inputWidth
,
filterHeight
,
filterWidth
,
strideH
(),
strideW
(),
paddingH
(),
paddingW
(),
colData
,
multiplierData
,
filterGrad
);
}
depthwiseConvGradFilter
(
outputGrad
,
inputData
,
batchSize
,
outputChannels
,
outputHeight
,
outputWidth
,
inputChannels
,
inputHeight
,
inputWidth
,
filterHeight
,
filterWidth
,
strideH
(),
strideW
(),
paddingH
(),
paddingW
(),
colData
,
multiplierData
,
filterGrad
);
}
};
...
...
paddle/function/DepthwiseConvOp.h
浏览文件 @
064dc888
...
...
@@ -14,15 +14,36 @@ limitations under the License. */
#pragma once
#include "
ConvOp
.h"
#include "
TensorType
.h"
namespace
paddle
{
/**
*\brief Depthwise convolution forward. The outputData
* of depthwise convolution is same with ExpandConvLayer
* when groups equals inputChannels in ExpandConvLayer.
*
* \param[in] inputData input data.
* \param[in] filterData the Paramters of the depthwise conv layer..
* \param[in] batchSize batch size of input data.
* \param[in] outputChannels channels of outputData.
* \param[in] outputHeight height of outputData.
* \param[in] outputWidth width of outputData.
* \param[in] inputHeight height of inputData.
* \param[in] inputWidth width of inputData..
* \param[in] filterHeight height of filter.
* \param[in] filterWidth widht of filter.
* \param[in] strideH stride size in height direction.
* \param[in] strideW stride size in width direction.
* \param[in] paddingH padding size in height direction.
* \param[in] paddingW padding size in width direction.
* \param[out] outputData outputData.
*
*/
template
<
DeviceType
Device
,
class
T
>
class
DepthwiseConvFunctor
{
public:
void
operator
()(
int
outputSize
,
const
T
*
inputData
,
void
operator
()(
const
T
*
inputData
,
const
T
*
filterData
,
int
batchSize
,
int
outputChannels
,
...
...
@@ -39,16 +60,38 @@ public:
T
*
outputData
);
};
/**
*\brief Functor tot compute the depthwise convolution backprop w.r.t input.
*
*
* \param[in] outputGradData the grad data of output.
* \param[in] filterData the Paramters of the depthwise conv layer..
* \param[in] batchSize batch size of input data.
* \param[in] outputChannels channels of outputData.
* \param[in] outputHeight height of outputData.
* \param[in] outputWidth width of outputData.
* \param[in] inputChannels channels of input data.
* \param[in] inputHeight height of inputData.
* \param[in] inputWidth width of inputData..
* \param[in] filterHeight height of filter.
* \param[in] filterWidth widht of filter.
* \param[in] strideH stride size in height direction.
* \param[in] strideW stride size in width direction.
* \param[in] paddingH padding size in height direction.
* \param[in] paddingW padding size in width direction.
* \param[out] inputGrad the grad data of input.
*
*/
template
<
DeviceType
Device
,
class
T
>
class
DepthwiseConvGradInputFunctor
{
public:
void
operator
()(
int
inputSize
,
const
T
*
outputGrad
,
void
operator
()(
const
T
*
outputGrad
,
const
T
*
filterData
,
int
batchSize
,
int
outputChannels
,
int
outputHeight
,
int
outputWidth
,
int
inputChannels
,
int
inputHeight
,
int
inputWidth
,
int
filterHeight
,
...
...
@@ -60,17 +103,42 @@ public:
T
*
inputGrad
);
};
/**
*\brief Functor tot compute the depthwise convolution backprop w.r.t filter.
*
* \param[in] outputGradData the grad data of output.
* \param[in] inputData inputData.
* \param[in] batchSize batch size of input data.
* \param[in] outputChannels channels of outputData.
* \param[in] outputHeight height of outputData.
* \param[in] outputWidth width of outputData.
* \param[in] inputChannels channels of input data.
* \param[in] inputHeight height of inputData.
* \param[in] inputWidth width of inputData..
* \param[in] filterHeight height of filter.
* \param[in] filterWidth widht of filter.
* \param[in] strideH stride size in height direction.
* \param[in] strideW stride size in width direction.
* \param[in] paddingH padding size in height direction.
* \param[in] paddingW padding size in width direction.
* \param[in] colData Auxiliary data when calculating filterGrad.
* size:
*inputChannels*filterHeight*filterWidth*outputHeight*outputWidth \param[in]
*multiplierData Auxiliary data when calculating filterGrad. size:
*outputHeight * outputWidth. \param[out]
*filterGrad the grad data of filter.
*
*/
template
<
DeviceType
Device
,
class
T
>
class
DepthwiseConvGradFilterFunctor
{
public:
void
operator
()(
int
num_i
,
int
colDataSize
,
const
T
*
outputGrad
,
void
operator
()(
const
T
*
outputGrad
,
const
T
*
inputData
,
int
batchSize
,
int
outputChannels
,
int
outputHeight
,
int
outputWidth
,
int
inputChannels
,
int
inputHeight
,
int
inputWidth
,
int
filterHeight
,
...
...
paddle/function/DepthwiseConvOpGpu.cu
浏览文件 @
064dc888
...
...
@@ -12,12 +12,11 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "ConvOp.h"
#include "DepthwiseConvOp.h"
#include "GemmFunctor.h"
#include "paddle/math/MemoryHandle.h"
namespace
paddle
{
// CUDA kernel to compute the depthwise convolution forward pass
template
<
class
T
>
__global__
void
ConvolutionDepthwiseForward
(
const
int
nthreads
,
...
...
@@ -48,7 +47,7 @@ void ConvolutionDepthwiseForward(const int nthreads,
for
(
int
kw
=
0
;
kw
<
filterWidth
;
++
kw
)
{
const
int
h_in
=
-
paddingH
+
h
*
strideH
+
kh
;
const
int
w_in
=
-
paddingW
+
w
*
strideW
+
kw
;
const
int
offset
=
((
n
*
outputChannels
+
c
)
*
inputHeight
+
h_in
)
const
int
offset
=
((
n
*
outputChannels
+
c
)
*
inputHeight
+
h_in
)
*
inputWidth
+
w_in
;
value
+=
(
*
weight
)
*
inputData
[
offset
];
++
weight
;
...
...
@@ -73,6 +72,7 @@ void ConvolutionDepthwiseForward(const int nthreads,
}
}
// CUDA kernel to compute the depthwise convolution backprop w.r.t input.
template
<
class
T
>
__global__
void
ConvolutionDepthwiseInputBackward
(
const
int
nthreads
,
...
...
@@ -113,6 +113,7 @@ void ConvolutionDepthwiseInputBackward(const int nthreads,
}
}
// CUDA kernel to compute the depthwise convolution backprop w.r.t filter.
template
<
class
T
>
__global__
void
ConvolutionDepthwiseFilterBackward
(
const
int
num_i
,
const
int
nthreads
,
...
...
@@ -150,15 +151,14 @@ void ConvolutionDepthwiseFilterBackward(const int num_i, const int nthreads,
template
<
class
T
>
class
DepthwiseConvFunctor
<
DEVICE_TYPE_GPU
,
T
>
{
public:
void
operator
()(
int
outputSize
,
const
T
*
inputData
,
void
operator
()(
const
T
*
inputData
,
const
T
*
filterData
,
int
batchSize
,
int
outputChannels
,
int
outputHeight
,
int
outputWidth
,
int
inputHeight
,
int
inputWidth
,
int
inputHeight
,
int
inputWidth
,
int
filterHeight
,
int
filterWidth
,
int
strideH
,
...
...
@@ -167,12 +167,14 @@ public:
int
paddingW
,
T
*
outputData
){
int
outputSize
=
batchSize
*
outputChannels
*
outputHeight
*
outputWidth
;
size_t
blocks
=
(
outputSize
+
1024
-
1
)
/
1024
;
size_t
blockX
=
512
;
size_t
blockY
=
(
blocks
+
512
-
1
)
/
512
;
dim3
threads
(
1024
,
1
);
dim3
grid
(
blockX
,
blockY
);
ConvolutionDepthwiseForward
<
T
>
<<<
grid
,
threads
,
0
,
STREAM_DEFAULT
>>>
(
outputSize
,
...
...
@@ -182,8 +184,8 @@ public:
outputChannels
,
outputHeight
,
outputWidth
,
inputHeight
,
inputWidth
,
inputHeight
,
inputWidth
,
filterHeight
,
filterWidth
,
strideH
,
...
...
@@ -197,13 +199,13 @@ public:
template
<
class
T
>
class
DepthwiseConvGradInputFunctor
<
DEVICE_TYPE_GPU
,
T
>
{
public:
void
operator
()(
int
inputSize
,
const
T
*
outputGrad
,
void
operator
()(
const
T
*
outputGrad
,
const
T
*
filterData
,
int
batchSize
,
int
outputChannels
,
int
outputHeight
,
int
outputWidth
,
int
inputChannels
,
int
inputHeight
,
int
inputWidth
,
int
filterHeight
,
...
...
@@ -212,7 +214,9 @@ public:
int
strideW
,
int
paddingH
,
int
paddingW
,
T
*
inputGrad
){
T
*
inputGrad
){
int
inputSize
=
batchSize
*
inputChannels
*
inputHeight
*
inputWidth
;
size_t
blocks
=
(
inputSize
+
1024
-
1
)
/
1024
;
size_t
blockX
=
512
;
...
...
@@ -220,6 +224,7 @@ public:
dim3
threads
(
1024
,
1
);
dim3
grid
(
blockX
,
blockY
);
ConvolutionDepthwiseInputBackward
<
T
>
// NOLINT_NEXT_LINE(whitespace/operators)
<<<
grid
,
threads
,
0
,
STREAM_DEFAULT
>>>
(
...
...
@@ -245,14 +250,13 @@ public:
template
<
class
T
>
class
DepthwiseConvGradFilterFunctor
<
DEVICE_TYPE_GPU
,
T
>
{
public:
void
operator
()(
int
num_i
,
int
colDataSize
,
const
T
*
outputGrad
,
void
operator
()(
const
T
*
outputGrad
,
const
T
*
inputData
,
int
batchSize
,
int
outputChannels
,
int
outputHeight
,
int
outputWidth
,
int
inputChannels
,
int
inputHeight
,
int
inputWidth
,
int
filterHeight
,
...
...
@@ -265,60 +269,65 @@ public:
T
*
multiplierData
,
T
*
filterGrad
){
int
colDataSize
=
inputChannels
*
filterHeight
*
filterWidth
*
outputHeight
*
outputWidth
;
size_t
blocks
=
(
colDataSize
+
1024
-
1
)
/
1024
;
size_t
blockX
=
512
;
size_t
blockY
=
(
blocks
+
512
-
1
)
/
512
;
dim3
threads
(
1024
,
1
);
dim3
grid
(
blockX
,
blockY
);
ConvolutionDepthwiseFilterBackward
<
T
>
<<<
grid
,
threads
,
0
,
STREAM_DEFAULT
>>>
(
num_i
,
colDataSize
,
outputGrad
,
inputData
,
batchSize
,
outputChannels
,
outputHeight
,
outputWidth
,
inputHeight
,
inputWidth
,
filterHeight
,
filterWidth
,
strideH
,
strideW
,
paddingH
,
paddingW
,
colData
);
GemmFunctor
<
DEVICE_TYPE_GPU
,
real
>
gemm
;
int
M
=
colDataSize
/
outputHeight
/
outputWidth
;
int
N
=
1
;
int
K
=
outputHeight
*
outputWidth
;
gemm
(
CblasNoTrans
,
CblasNoTrans
,
M
,
N
,
K
,
(
T
)
1.0
,
colData
,
K
,
multiplierData
,
N
,
(
T
)
1.0
,
filterGrad
,
N
);
for
(
int
i
=
0
;
i
<
batchSize
;
i
++
)
{
ConvolutionDepthwiseFilterBackward
<
T
>
<<<
grid
,
threads
,
0
,
STREAM_DEFAULT
>>>
(
i
,
colDataSize
,
outputGrad
,
inputData
,
batchSize
,
outputChannels
,
outputHeight
,
outputWidth
,
inputHeight
,
inputWidth
,
filterHeight
,
filterWidth
,
strideH
,
strideW
,
paddingH
,
paddingW
,
colData
);
GemmFunctor
<
DEVICE_TYPE_GPU
,
real
>
gemm
;
int
M
=
colDataSize
/
outputHeight
/
outputWidth
;
int
N
=
1
;
int
K
=
outputHeight
*
outputWidth
;
gemm
(
CblasNoTrans
,
CblasNoTrans
,
M
,
N
,
K
,
(
T
)
1.0
,
colData
,
K
,
multiplierData
,
N
,
(
T
)
1.0
,
filterGrad
,
N
);
}
//gemv
}
};
#ifdef PADDLE_TYPE_DOUBLE
using
real
=
double
;
template
class
DepthwiseConvGradInputFunctor
<
DEVICE_TYPE_GPU
,
double
>;
template
class
DepthwiseConvFunctor
<
DEVICE_TYPE_GPU
,
double
>;
template
class
DepthwiseConvGradFilterFunctor
<
DEVICE_TYPE_GPU
,
double
>;
#else
using
real
=
float
;
template
class
DepthwiseConvGradInputFunctor
<
DEVICE_TYPE_GPU
,
float
>;
template
class
DepthwiseConvFunctor
<
DEVICE_TYPE_GPU
,
float
>;
template
class
DepthwiseConvGradFilterFunctor
<
DEVICE_TYPE_GPU
,
float
>;
#endif
template
class
DepthwiseConvGradInputFunctor
<
DEVICE_TYPE_GPU
,
real
>;
template
class
DepthwiseConvFunctor
<
DEVICE_TYPE_GPU
,
real
>;
template
class
DepthwiseConvGradFilterFunctor
<
DEVICE_TYPE_GPU
,
real
>;
}
// namespace paddle
paddle/gserver/layers/DepthwiseConvLayer.cpp
浏览文件 @
064dc888
...
...
@@ -15,14 +15,9 @@ limitations under the License. */
#include "DepthwiseConvLayer.h"
#include "paddle/utils/Logging.h"
#include "paddle/utils/Stat.h"
#include <iostream>
namespace
paddle
{
/*
* The calculation of the exconvt(convolution transpose (deconv) operation)
* is a swap of forward and backward of the calculation of exconv.
* */
REGISTER_LAYER
(
depthwise_conv
,
DepthwiseConvLayer
);
bool
DepthwiseConvLayer
::
init
(
const
LayerMap
&
layerMap
,
...
...
@@ -76,11 +71,12 @@ bool DepthwiseConvLayer::init(const LayerMap &layerMap,
#define BACKWARD_FILTER(i, inputs, outputs) \
backward_[2 * i + 1]->calc(inputs, outputs)
// compute the depthwise convolution forward pass
void
DepthwiseConvLayer
::
forward
(
PassType
passType
)
{
Layer
::
forward
(
passType
);
size_t
batchSize
=
inputLayers_
[
0
]
->
getOutputValue
()
->
getHeight
();
// std::cout << "outputSize" << getOutputSize() <<std::endl;
resetOutput
(
batchSize
,
getOutputSize
());
// Calculate the shape of the input, output, and filter.
...
...
@@ -127,6 +123,7 @@ void DepthwiseConvLayer::forward(PassType passType) {
forwardActivation
();
}
// compute the depthwise convolution backprop.
void
DepthwiseConvLayer
::
backward
(
const
UpdateCallback
&
callback
)
{
backwardActivation
();
...
...
paddle/gserver/layers/DepthwiseConvLayer.h
浏览文件 @
064dc888
...
...
@@ -22,10 +22,8 @@ namespace paddle {
/**
* @brief A subclass of convolution layer.
* This layer expands input and use matrix multiplication to
* calculate convolution operation.
*
* The config file api is img_conv_layer.
* This layer do the depthwise convolution calculation in mobilenet.
* The config file api is img_depthwise_conv_layer.
*/
class
DepthwiseConvLayer
:
public
ExpandConvBaseLayer
{
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录