提交 064dc888 编写于 作者: X xzl

add the comments for .h file and code tiny modify

上级 36e7800a
......@@ -13,16 +13,16 @@ See the License for the specific language governing permissions and
limitations under the License. */
#include "DepthwiseConvOp.h"
#include "ConvOp.h"
#include "GemmFunctor.h"
#include "paddle/math/MemoryHandle.h"
//#include "paddle/math/MemoryHandle.h"
namespace paddle {
template <class T>
class DepthwiseConvFunctor<DEVICE_TYPE_CPU, T> {
public:
void operator()(int outputSize,
const T* inputData,
void operator()(const T* inputData,
const T* filterData,
int batchSize,
int outputChannels,
......@@ -44,13 +44,13 @@ public:
template <class T>
class DepthwiseConvGradInputFunctor<DEVICE_TYPE_CPU, T> {
public:
void operator()(int inputSize,
const T* outputGrad,
void operator()(const T* outputGrad,
const T* filterData,
int batchSize,
int outputChannels,
int outputHeight,
int outputWidth,
int inputChannels,
int inputHeight,
int inputWidth,
int filterHeight,
......@@ -65,14 +65,13 @@ public:
template <class T>
class DepthwiseConvGradFilterFunctor<DEVICE_TYPE_CPU, T> {
public:
void operator()(int num_i,
int colDataSize,
const T* outputGrad,
void operator()(const T* outputGrad,
const T* inputData,
int batchSize,
int outputChannels,
int outputHeight,
int outputWidth,
int inputChannels,
int inputHeight,
int inputWidth,
int filterHeight,
......@@ -87,7 +86,7 @@ public:
};
/*
* \brief Forward calculation of convolution.
* \brief Forward calculation of depthwise convolution.
*/
template <DeviceType Device>
class DepthwiseConvFunction : public ConvFunctionBase {
......@@ -126,11 +125,9 @@ public:
real* inputData = inputs[0].data<real>();
real* filterData = inputs[1].data<real>();
real* outputData = outputs[0].data<real>();
size_t outputSize = batchSize * outputChannels * outputHeight * outputWidth;
DepthwiseConvFunctor<Device, real> depthwiseConv;
depthwiseConv(outputSize,
inputData,
depthwiseConv(inputData,
filterData,
batchSize,
outputChannels,
......@@ -149,7 +146,7 @@ public:
};
/*
* \brief Backward input calculation of convolution.
* \brief Backward input calculation of depthwise convolution.
*/
template <DeviceType Device>
class DepthwiseConvGradInputFunction : public ConvFunctionBase {
......@@ -191,16 +188,14 @@ public:
real* filterData = inputs[1].data<real>();
real* inputGrad = outputs[0].data<real>();
size_t inputSize = batchSize * inputChannels * inputHeight * inputWidth;
DepthwiseConvGradInputFunctor<Device, real> depthwiseConvGradInput;
depthwiseConvGradInput(inputSize,
outputGrad,
depthwiseConvGradInput(outputGrad,
filterData,
batchSize,
outputChannels,
outputHeight,
outputWidth,
inputChannels,
inputHeight,
inputWidth,
filterHeight,
......@@ -214,7 +209,7 @@ public:
};
/*
* \brief Backward filter calculation of convolution.
* \brief Backward filter calculation of depthwise convolution.
*/
template <DeviceType Device>
class DepthwiseConvGradFilterFunction : public ConvFunctionBase {
......@@ -255,35 +250,31 @@ public:
real* multiplierData = inputs[2].data<real>();
real* filterGrad = outputs[0].data<real>();
size_t size =
int size =
inputChannels * filterHeight * filterWidth * outputHeight * outputWidth;
resizeBuffer<Device>(size);
real* colData = reinterpret_cast<real*>(memory_->getBuf());
DepthwiseConvGradFilterFunctor<Device, real> depthwiseConvGradFilter;
for (size_t i = 0; i < batchSize; i++) {
depthwiseConvGradFilter(i,
size,
outputGrad,
inputData,
batchSize,
outputChannels,
outputHeight,
outputWidth,
inputHeight,
inputWidth,
filterHeight,
filterWidth,
strideH(),
strideW(),
paddingH(),
paddingW(),
colData,
multiplierData,
filterGrad);
}
depthwiseConvGradFilter(outputGrad,
inputData,
batchSize,
outputChannels,
outputHeight,
outputWidth,
inputChannels,
inputHeight,
inputWidth,
filterHeight,
filterWidth,
strideH(),
strideW(),
paddingH(),
paddingW(),
colData,
multiplierData,
filterGrad);
}
};
......
......@@ -14,15 +14,36 @@ limitations under the License. */
#pragma once
#include "ConvOp.h"
#include "TensorType.h"
namespace paddle {
/**
*\brief Depthwise convolution forward. The outputData
* of depthwise convolution is same with ExpandConvLayer
* when groups equals inputChannels in ExpandConvLayer.
*
* \param[in] inputData input data.
* \param[in] filterData the Paramters of the depthwise conv layer..
* \param[in] batchSize batch size of input data.
* \param[in] outputChannels channels of outputData.
* \param[in] outputHeight height of outputData.
* \param[in] outputWidth width of outputData.
* \param[in] inputHeight height of inputData.
* \param[in] inputWidth width of inputData..
* \param[in] filterHeight height of filter.
* \param[in] filterWidth widht of filter.
* \param[in] strideH stride size in height direction.
* \param[in] strideW stride size in width direction.
* \param[in] paddingH padding size in height direction.
* \param[in] paddingW padding size in width direction.
* \param[out] outputData outputData.
*
*/
template <DeviceType Device, class T>
class DepthwiseConvFunctor {
public:
void operator()(int outputSize,
const T* inputData,
void operator()(const T* inputData,
const T* filterData,
int batchSize,
int outputChannels,
......@@ -39,16 +60,38 @@ public:
T* outputData);
};
/**
*\brief Functor tot compute the depthwise convolution backprop w.r.t input.
*
*
* \param[in] outputGradData the grad data of output.
* \param[in] filterData the Paramters of the depthwise conv layer..
* \param[in] batchSize batch size of input data.
* \param[in] outputChannels channels of outputData.
* \param[in] outputHeight height of outputData.
* \param[in] outputWidth width of outputData.
* \param[in] inputChannels channels of input data.
* \param[in] inputHeight height of inputData.
* \param[in] inputWidth width of inputData..
* \param[in] filterHeight height of filter.
* \param[in] filterWidth widht of filter.
* \param[in] strideH stride size in height direction.
* \param[in] strideW stride size in width direction.
* \param[in] paddingH padding size in height direction.
* \param[in] paddingW padding size in width direction.
* \param[out] inputGrad the grad data of input.
*
*/
template <DeviceType Device, class T>
class DepthwiseConvGradInputFunctor {
public:
void operator()(int inputSize,
const T* outputGrad,
void operator()(const T* outputGrad,
const T* filterData,
int batchSize,
int outputChannels,
int outputHeight,
int outputWidth,
int inputChannels,
int inputHeight,
int inputWidth,
int filterHeight,
......@@ -60,17 +103,42 @@ public:
T* inputGrad);
};
/**
*\brief Functor tot compute the depthwise convolution backprop w.r.t filter.
*
* \param[in] outputGradData the grad data of output.
* \param[in] inputData inputData.
* \param[in] batchSize batch size of input data.
* \param[in] outputChannels channels of outputData.
* \param[in] outputHeight height of outputData.
* \param[in] outputWidth width of outputData.
* \param[in] inputChannels channels of input data.
* \param[in] inputHeight height of inputData.
* \param[in] inputWidth width of inputData..
* \param[in] filterHeight height of filter.
* \param[in] filterWidth widht of filter.
* \param[in] strideH stride size in height direction.
* \param[in] strideW stride size in width direction.
* \param[in] paddingH padding size in height direction.
* \param[in] paddingW padding size in width direction.
* \param[in] colData Auxiliary data when calculating filterGrad.
* size:
*inputChannels*filterHeight*filterWidth*outputHeight*outputWidth \param[in]
*multiplierData Auxiliary data when calculating filterGrad. size:
*outputHeight * outputWidth. \param[out]
*filterGrad the grad data of filter.
*
*/
template <DeviceType Device, class T>
class DepthwiseConvGradFilterFunctor {
public:
void operator()(int num_i,
int colDataSize,
const T* outputGrad,
void operator()(const T* outputGrad,
const T* inputData,
int batchSize,
int outputChannels,
int outputHeight,
int outputWidth,
int inputChannels,
int inputHeight,
int inputWidth,
int filterHeight,
......
......@@ -12,12 +12,11 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "ConvOp.h"
#include "DepthwiseConvOp.h"
#include "GemmFunctor.h"
#include "paddle/math/MemoryHandle.h"
namespace paddle {
// CUDA kernel to compute the depthwise convolution forward pass
template <class T>
__global__
void ConvolutionDepthwiseForward(const int nthreads,
......@@ -48,7 +47,7 @@ void ConvolutionDepthwiseForward(const int nthreads,
for (int kw = 0; kw < filterWidth; ++kw) {
const int h_in = -paddingH + h * strideH + kh;
const int w_in = -paddingW + w * strideW + kw;
const int offset = ((n * outputChannels + c) * inputHeight + h_in)
const int offset = ((n * outputChannels + c) * inputHeight + h_in)
* inputWidth + w_in;
value += (*weight) * inputData[offset];
++weight;
......@@ -73,6 +72,7 @@ void ConvolutionDepthwiseForward(const int nthreads,
}
}
// CUDA kernel to compute the depthwise convolution backprop w.r.t input.
template <class T>
__global__
void ConvolutionDepthwiseInputBackward(const int nthreads,
......@@ -113,6 +113,7 @@ void ConvolutionDepthwiseInputBackward(const int nthreads,
}
}
// CUDA kernel to compute the depthwise convolution backprop w.r.t filter.
template <class T>
__global__
void ConvolutionDepthwiseFilterBackward(const int num_i, const int nthreads,
......@@ -150,15 +151,14 @@ void ConvolutionDepthwiseFilterBackward(const int num_i, const int nthreads,
template <class T>
class DepthwiseConvFunctor<DEVICE_TYPE_GPU, T>{
public:
void operator()(int outputSize,
const T* inputData,
void operator()(const T* inputData,
const T* filterData,
int batchSize,
int outputChannels,
int outputHeight,
int outputWidth,
int inputHeight,
int inputWidth,
int inputHeight,
int inputWidth,
int filterHeight,
int filterWidth,
int strideH,
......@@ -167,12 +167,14 @@ public:
int paddingW,
T* outputData){
int outputSize = batchSize * outputChannels * outputHeight * outputWidth;
size_t blocks = (outputSize + 1024 -1) / 1024;
size_t blockX = 512;
size_t blockY = (blocks+512-1)/512;
dim3 threads(1024, 1);
dim3 grid(blockX, blockY);
ConvolutionDepthwiseForward<T>
<<< grid, threads, 0, STREAM_DEFAULT >>>(
outputSize,
......@@ -182,8 +184,8 @@ public:
outputChannels,
outputHeight,
outputWidth,
inputHeight,
inputWidth,
inputHeight,
inputWidth,
filterHeight,
filterWidth,
strideH,
......@@ -197,13 +199,13 @@ public:
template <class T>
class DepthwiseConvGradInputFunctor<DEVICE_TYPE_GPU, T>{
public:
void operator()(int inputSize,
const T* outputGrad,
void operator()(const T* outputGrad,
const T* filterData,
int batchSize,
int outputChannels,
int outputHeight,
int outputWidth,
int inputChannels,
int inputHeight,
int inputWidth,
int filterHeight,
......@@ -212,7 +214,9 @@ public:
int strideW,
int paddingH,
int paddingW,
T* inputGrad){
T* inputGrad){
int inputSize = batchSize * inputChannels * inputHeight * inputWidth;
size_t blocks = (inputSize + 1024 -1) / 1024;
size_t blockX = 512;
......@@ -220,6 +224,7 @@ public:
dim3 threads(1024, 1);
dim3 grid(blockX, blockY);
ConvolutionDepthwiseInputBackward<T>
// NOLINT_NEXT_LINE(whitespace/operators)
<<< grid, threads, 0, STREAM_DEFAULT >>>(
......@@ -245,14 +250,13 @@ public:
template <class T>
class DepthwiseConvGradFilterFunctor<DEVICE_TYPE_GPU, T> {
public:
void operator()(int num_i,
int colDataSize,
const T* outputGrad,
void operator()(const T* outputGrad,
const T* inputData,
int batchSize,
int outputChannels,
int outputHeight,
int outputWidth,
int inputChannels,
int inputHeight,
int inputWidth,
int filterHeight,
......@@ -265,60 +269,65 @@ public:
T* multiplierData,
T* filterGrad){
int colDataSize = inputChannels * filterHeight * filterWidth * outputHeight * outputWidth;
size_t blocks = (colDataSize + 1024 -1) / 1024;
size_t blockX = 512;
size_t blockY = (blocks+512-1)/512;
dim3 threads(1024, 1);
dim3 grid(blockX, blockY);
ConvolutionDepthwiseFilterBackward<T>
<<< grid, threads, 0, STREAM_DEFAULT >>>(
num_i,
colDataSize,
outputGrad,
inputData,
batchSize,
outputChannels,
outputHeight,
outputWidth,
inputHeight,
inputWidth,
filterHeight,
filterWidth,
strideH,
strideW,
paddingH,
paddingW,
colData
);
GemmFunctor<DEVICE_TYPE_GPU, real> gemm;
int M = colDataSize / outputHeight / outputWidth;
int N = 1;
int K = outputHeight * outputWidth;
gemm(CblasNoTrans,
CblasNoTrans,
M,
N,
K,
(T)1.0,
colData,
K,
multiplierData,
N,
(T)1.0,
filterGrad,
N);
for(int i = 0; i < batchSize; i++) {
ConvolutionDepthwiseFilterBackward<T>
<<< grid, threads, 0, STREAM_DEFAULT >>>(
i,
colDataSize,
outputGrad,
inputData,
batchSize,
outputChannels,
outputHeight,
outputWidth,
inputHeight,
inputWidth,
filterHeight,
filterWidth,
strideH,
strideW,
paddingH,
paddingW,
colData
);
GemmFunctor<DEVICE_TYPE_GPU, real> gemm;
int M = colDataSize / outputHeight / outputWidth;
int N = 1;
int K = outputHeight * outputWidth;
gemm(CblasNoTrans,
CblasNoTrans,
M,
N,
K,
(T)1.0,
colData,
K,
multiplierData,
N,
(T)1.0,
filterGrad,
N);
}
//gemv
}
};
#ifdef PADDLE_TYPE_DOUBLE
using real=double;
template class DepthwiseConvGradInputFunctor<DEVICE_TYPE_GPU, double>;
template class DepthwiseConvFunctor<DEVICE_TYPE_GPU, double>;
template class DepthwiseConvGradFilterFunctor<DEVICE_TYPE_GPU, double>;
#else
using real=float;
template class DepthwiseConvGradInputFunctor<DEVICE_TYPE_GPU, float>;
template class DepthwiseConvFunctor<DEVICE_TYPE_GPU, float>;
template class DepthwiseConvGradFilterFunctor<DEVICE_TYPE_GPU, float>;
#endif
template class DepthwiseConvGradInputFunctor<DEVICE_TYPE_GPU, real>;
template class DepthwiseConvFunctor<DEVICE_TYPE_GPU, real>;
template class DepthwiseConvGradFilterFunctor<DEVICE_TYPE_GPU, real>;
} // namespace paddle
......@@ -15,14 +15,9 @@ limitations under the License. */
#include "DepthwiseConvLayer.h"
#include "paddle/utils/Logging.h"
#include "paddle/utils/Stat.h"
#include <iostream>
namespace paddle {
/*
* The calculation of the exconvt(convolution transpose (deconv) operation)
* is a swap of forward and backward of the calculation of exconv.
* */
REGISTER_LAYER(depthwise_conv, DepthwiseConvLayer);
bool DepthwiseConvLayer::init(const LayerMap &layerMap,
......@@ -76,11 +71,12 @@ bool DepthwiseConvLayer::init(const LayerMap &layerMap,
#define BACKWARD_FILTER(i, inputs, outputs) \
backward_[2 * i + 1]->calc(inputs, outputs)
// compute the depthwise convolution forward pass
void DepthwiseConvLayer::forward(PassType passType) {
Layer::forward(passType);
size_t batchSize = inputLayers_[0]->getOutputValue()->getHeight();
// std::cout << "outputSize" << getOutputSize() <<std::endl;
resetOutput(batchSize, getOutputSize());
// Calculate the shape of the input, output, and filter.
......@@ -127,6 +123,7 @@ void DepthwiseConvLayer::forward(PassType passType) {
forwardActivation();
}
// compute the depthwise convolution backprop.
void DepthwiseConvLayer::backward(const UpdateCallback &callback) {
backwardActivation();
......
......@@ -22,10 +22,8 @@ namespace paddle {
/**
* @brief A subclass of convolution layer.
* This layer expands input and use matrix multiplication to
* calculate convolution operation.
*
* The config file api is img_conv_layer.
* This layer do the depthwise convolution calculation in mobilenet.
* The config file api is img_depthwise_conv_layer.
*/
class DepthwiseConvLayer : public ExpandConvBaseLayer {
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册