diff --git a/paddle/function/DepthwiseConvOp.cpp b/paddle/function/DepthwiseConvOp.cpp index d4272c72f2404739a02d3458f00a771aa8d3d2c0..8dcd32b06764ef833e1856b68ed4e9de90262047 100644 --- a/paddle/function/DepthwiseConvOp.cpp +++ b/paddle/function/DepthwiseConvOp.cpp @@ -13,16 +13,16 @@ See the License for the specific language governing permissions and limitations under the License. */ #include "DepthwiseConvOp.h" +#include "ConvOp.h" #include "GemmFunctor.h" -#include "paddle/math/MemoryHandle.h" +//#include "paddle/math/MemoryHandle.h" namespace paddle { template class DepthwiseConvFunctor { public: - void operator()(int outputSize, - const T* inputData, + void operator()(const T* inputData, const T* filterData, int batchSize, int outputChannels, @@ -44,13 +44,13 @@ public: template class DepthwiseConvGradInputFunctor { public: - void operator()(int inputSize, - const T* outputGrad, + void operator()(const T* outputGrad, const T* filterData, int batchSize, int outputChannels, int outputHeight, int outputWidth, + int inputChannels, int inputHeight, int inputWidth, int filterHeight, @@ -65,14 +65,13 @@ public: template class DepthwiseConvGradFilterFunctor { public: - void operator()(int num_i, - int colDataSize, - const T* outputGrad, + void operator()(const T* outputGrad, const T* inputData, int batchSize, int outputChannels, int outputHeight, int outputWidth, + int inputChannels, int inputHeight, int inputWidth, int filterHeight, @@ -87,7 +86,7 @@ public: }; /* - * \brief Forward calculation of convolution. + * \brief Forward calculation of depthwise convolution. */ template class DepthwiseConvFunction : public ConvFunctionBase { @@ -126,11 +125,9 @@ public: real* inputData = inputs[0].data(); real* filterData = inputs[1].data(); real* outputData = outputs[0].data(); - size_t outputSize = batchSize * outputChannels * outputHeight * outputWidth; DepthwiseConvFunctor depthwiseConv; - depthwiseConv(outputSize, - inputData, + depthwiseConv(inputData, filterData, batchSize, outputChannels, @@ -149,7 +146,7 @@ public: }; /* - * \brief Backward input calculation of convolution. + * \brief Backward input calculation of depthwise convolution. */ template class DepthwiseConvGradInputFunction : public ConvFunctionBase { @@ -191,16 +188,14 @@ public: real* filterData = inputs[1].data(); real* inputGrad = outputs[0].data(); - size_t inputSize = batchSize * inputChannels * inputHeight * inputWidth; - DepthwiseConvGradInputFunctor depthwiseConvGradInput; - depthwiseConvGradInput(inputSize, - outputGrad, + depthwiseConvGradInput(outputGrad, filterData, batchSize, outputChannels, outputHeight, outputWidth, + inputChannels, inputHeight, inputWidth, filterHeight, @@ -214,7 +209,7 @@ public: }; /* - * \brief Backward filter calculation of convolution. + * \brief Backward filter calculation of depthwise convolution. */ template class DepthwiseConvGradFilterFunction : public ConvFunctionBase { @@ -255,35 +250,31 @@ public: real* multiplierData = inputs[2].data(); real* filterGrad = outputs[0].data(); - size_t size = + int size = inputChannels * filterHeight * filterWidth * outputHeight * outputWidth; - resizeBuffer(size); real* colData = reinterpret_cast(memory_->getBuf()); DepthwiseConvGradFilterFunctor depthwiseConvGradFilter; - for (size_t i = 0; i < batchSize; i++) { - depthwiseConvGradFilter(i, - size, - outputGrad, - inputData, - batchSize, - outputChannels, - outputHeight, - outputWidth, - inputHeight, - inputWidth, - filterHeight, - filterWidth, - strideH(), - strideW(), - paddingH(), - paddingW(), - colData, - multiplierData, - filterGrad); - } + depthwiseConvGradFilter(outputGrad, + inputData, + batchSize, + outputChannels, + outputHeight, + outputWidth, + inputChannels, + inputHeight, + inputWidth, + filterHeight, + filterWidth, + strideH(), + strideW(), + paddingH(), + paddingW(), + colData, + multiplierData, + filterGrad); } }; diff --git a/paddle/function/DepthwiseConvOp.h b/paddle/function/DepthwiseConvOp.h index 44290682def458aa51789b3ab12e8c5ac2c6a802..da180b29b064702b78b7ba8c3d63f0114dec6d82 100644 --- a/paddle/function/DepthwiseConvOp.h +++ b/paddle/function/DepthwiseConvOp.h @@ -14,15 +14,36 @@ limitations under the License. */ #pragma once -#include "ConvOp.h" +#include "TensorType.h" namespace paddle { +/** + *\brief Depthwise convolution forward. The outputData + * of depthwise convolution is same with ExpandConvLayer + * when groups equals inputChannels in ExpandConvLayer. + * + * \param[in] inputData input data. + * \param[in] filterData the Paramters of the depthwise conv layer.. + * \param[in] batchSize batch size of input data. + * \param[in] outputChannels channels of outputData. + * \param[in] outputHeight height of outputData. + * \param[in] outputWidth width of outputData. + * \param[in] inputHeight height of inputData. + * \param[in] inputWidth width of inputData.. + * \param[in] filterHeight height of filter. + * \param[in] filterWidth widht of filter. + * \param[in] strideH stride size in height direction. + * \param[in] strideW stride size in width direction. + * \param[in] paddingH padding size in height direction. + * \param[in] paddingW padding size in width direction. + * \param[out] outputData outputData. + * + */ template class DepthwiseConvFunctor { public: - void operator()(int outputSize, - const T* inputData, + void operator()(const T* inputData, const T* filterData, int batchSize, int outputChannels, @@ -39,16 +60,38 @@ public: T* outputData); }; +/** + *\brief Functor tot compute the depthwise convolution backprop w.r.t input. + * + * + * \param[in] outputGradData the grad data of output. + * \param[in] filterData the Paramters of the depthwise conv layer.. + * \param[in] batchSize batch size of input data. + * \param[in] outputChannels channels of outputData. + * \param[in] outputHeight height of outputData. + * \param[in] outputWidth width of outputData. + * \param[in] inputChannels channels of input data. + * \param[in] inputHeight height of inputData. + * \param[in] inputWidth width of inputData.. + * \param[in] filterHeight height of filter. + * \param[in] filterWidth widht of filter. + * \param[in] strideH stride size in height direction. + * \param[in] strideW stride size in width direction. + * \param[in] paddingH padding size in height direction. + * \param[in] paddingW padding size in width direction. + * \param[out] inputGrad the grad data of input. + * + */ template class DepthwiseConvGradInputFunctor { public: - void operator()(int inputSize, - const T* outputGrad, + void operator()(const T* outputGrad, const T* filterData, int batchSize, int outputChannels, int outputHeight, int outputWidth, + int inputChannels, int inputHeight, int inputWidth, int filterHeight, @@ -60,17 +103,42 @@ public: T* inputGrad); }; +/** + *\brief Functor tot compute the depthwise convolution backprop w.r.t filter. + * + * \param[in] outputGradData the grad data of output. + * \param[in] inputData inputData. + * \param[in] batchSize batch size of input data. + * \param[in] outputChannels channels of outputData. + * \param[in] outputHeight height of outputData. + * \param[in] outputWidth width of outputData. + * \param[in] inputChannels channels of input data. + * \param[in] inputHeight height of inputData. + * \param[in] inputWidth width of inputData.. + * \param[in] filterHeight height of filter. + * \param[in] filterWidth widht of filter. + * \param[in] strideH stride size in height direction. + * \param[in] strideW stride size in width direction. + * \param[in] paddingH padding size in height direction. + * \param[in] paddingW padding size in width direction. + * \param[in] colData Auxiliary data when calculating filterGrad. + * size: + *inputChannels*filterHeight*filterWidth*outputHeight*outputWidth \param[in] + *multiplierData Auxiliary data when calculating filterGrad. size: + *outputHeight * outputWidth. \param[out] + *filterGrad the grad data of filter. + * + */ template class DepthwiseConvGradFilterFunctor { public: - void operator()(int num_i, - int colDataSize, - const T* outputGrad, + void operator()(const T* outputGrad, const T* inputData, int batchSize, int outputChannels, int outputHeight, int outputWidth, + int inputChannels, int inputHeight, int inputWidth, int filterHeight, diff --git a/paddle/function/DepthwiseConvOpGpu.cu b/paddle/function/DepthwiseConvOpGpu.cu index 08fe9221ac036d9eea324e6ce050d36ee0452d6e..df9be80b3fa6644048985fc33b581bd96eebf198 100644 --- a/paddle/function/DepthwiseConvOpGpu.cu +++ b/paddle/function/DepthwiseConvOpGpu.cu @@ -12,12 +12,11 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. */ -#include "ConvOp.h" #include "DepthwiseConvOp.h" #include "GemmFunctor.h" -#include "paddle/math/MemoryHandle.h" namespace paddle { +// CUDA kernel to compute the depthwise convolution forward pass template __global__ void ConvolutionDepthwiseForward(const int nthreads, @@ -48,7 +47,7 @@ void ConvolutionDepthwiseForward(const int nthreads, for (int kw = 0; kw < filterWidth; ++kw) { const int h_in = -paddingH + h * strideH + kh; const int w_in = -paddingW + w * strideW + kw; - const int offset = ((n * outputChannels + c) * inputHeight + h_in) + const int offset = ((n * outputChannels + c) * inputHeight + h_in) * inputWidth + w_in; value += (*weight) * inputData[offset]; ++weight; @@ -73,6 +72,7 @@ void ConvolutionDepthwiseForward(const int nthreads, } } +// CUDA kernel to compute the depthwise convolution backprop w.r.t input. template __global__ void ConvolutionDepthwiseInputBackward(const int nthreads, @@ -113,6 +113,7 @@ void ConvolutionDepthwiseInputBackward(const int nthreads, } } +// CUDA kernel to compute the depthwise convolution backprop w.r.t filter. template __global__ void ConvolutionDepthwiseFilterBackward(const int num_i, const int nthreads, @@ -150,15 +151,14 @@ void ConvolutionDepthwiseFilterBackward(const int num_i, const int nthreads, template class DepthwiseConvFunctor{ public: - void operator()(int outputSize, - const T* inputData, + void operator()(const T* inputData, const T* filterData, int batchSize, int outputChannels, int outputHeight, int outputWidth, - int inputHeight, - int inputWidth, + int inputHeight, + int inputWidth, int filterHeight, int filterWidth, int strideH, @@ -167,12 +167,14 @@ public: int paddingW, T* outputData){ + int outputSize = batchSize * outputChannels * outputHeight * outputWidth; + size_t blocks = (outputSize + 1024 -1) / 1024; size_t blockX = 512; size_t blockY = (blocks+512-1)/512; dim3 threads(1024, 1); dim3 grid(blockX, blockY); - + ConvolutionDepthwiseForward <<< grid, threads, 0, STREAM_DEFAULT >>>( outputSize, @@ -182,8 +184,8 @@ public: outputChannels, outputHeight, outputWidth, - inputHeight, - inputWidth, + inputHeight, + inputWidth, filterHeight, filterWidth, strideH, @@ -197,13 +199,13 @@ public: template class DepthwiseConvGradInputFunctor{ public: - void operator()(int inputSize, - const T* outputGrad, + void operator()(const T* outputGrad, const T* filterData, int batchSize, int outputChannels, int outputHeight, int outputWidth, + int inputChannels, int inputHeight, int inputWidth, int filterHeight, @@ -212,7 +214,9 @@ public: int strideW, int paddingH, int paddingW, - T* inputGrad){ + T* inputGrad){ + + int inputSize = batchSize * inputChannels * inputHeight * inputWidth; size_t blocks = (inputSize + 1024 -1) / 1024; size_t blockX = 512; @@ -220,6 +224,7 @@ public: dim3 threads(1024, 1); dim3 grid(blockX, blockY); + ConvolutionDepthwiseInputBackward // NOLINT_NEXT_LINE(whitespace/operators) <<< grid, threads, 0, STREAM_DEFAULT >>>( @@ -245,14 +250,13 @@ public: template class DepthwiseConvGradFilterFunctor { public: - void operator()(int num_i, - int colDataSize, - const T* outputGrad, + void operator()(const T* outputGrad, const T* inputData, int batchSize, int outputChannels, int outputHeight, int outputWidth, + int inputChannels, int inputHeight, int inputWidth, int filterHeight, @@ -265,60 +269,65 @@ public: T* multiplierData, T* filterGrad){ + int colDataSize = inputChannels * filterHeight * filterWidth * outputHeight * outputWidth; + size_t blocks = (colDataSize + 1024 -1) / 1024; size_t blockX = 512; size_t blockY = (blocks+512-1)/512; dim3 threads(1024, 1); dim3 grid(blockX, blockY); - ConvolutionDepthwiseFilterBackward - <<< grid, threads, 0, STREAM_DEFAULT >>>( - num_i, - colDataSize, - outputGrad, - inputData, - batchSize, - outputChannels, - outputHeight, - outputWidth, - inputHeight, - inputWidth, - filterHeight, - filterWidth, - strideH, - strideW, - paddingH, - paddingW, - colData - ); - GemmFunctor gemm; - int M = colDataSize / outputHeight / outputWidth; - int N = 1; - int K = outputHeight * outputWidth; - gemm(CblasNoTrans, - CblasNoTrans, - M, - N, - K, - (T)1.0, - colData, - K, - multiplierData, - N, - (T)1.0, - filterGrad, - N); + for(int i = 0; i < batchSize; i++) { + ConvolutionDepthwiseFilterBackward + <<< grid, threads, 0, STREAM_DEFAULT >>>( + i, + colDataSize, + outputGrad, + inputData, + batchSize, + outputChannels, + outputHeight, + outputWidth, + inputHeight, + inputWidth, + filterHeight, + filterWidth, + strideH, + strideW, + paddingH, + paddingW, + colData + ); + GemmFunctor gemm; + int M = colDataSize / outputHeight / outputWidth; + int N = 1; + int K = outputHeight * outputWidth; + gemm(CblasNoTrans, + CblasNoTrans, + M, + N, + K, + (T)1.0, + colData, + K, + multiplierData, + N, + (T)1.0, + filterGrad, + N); + } //gemv } }; #ifdef PADDLE_TYPE_DOUBLE -using real=double; +template class DepthwiseConvGradInputFunctor; +template class DepthwiseConvFunctor; +template class DepthwiseConvGradFilterFunctor; #else -using real=float; +template class DepthwiseConvGradInputFunctor; +template class DepthwiseConvFunctor; +template class DepthwiseConvGradFilterFunctor; #endif -template class DepthwiseConvGradInputFunctor; -template class DepthwiseConvFunctor; -template class DepthwiseConvGradFilterFunctor; } // namespace paddle diff --git a/paddle/gserver/layers/DepthwiseConvLayer.cpp b/paddle/gserver/layers/DepthwiseConvLayer.cpp index f07100d94978959d36327ecd6c54fb3f672b8fa1..8da3a52c2434048eaa7da636bc14ec9705b42592 100644 --- a/paddle/gserver/layers/DepthwiseConvLayer.cpp +++ b/paddle/gserver/layers/DepthwiseConvLayer.cpp @@ -15,14 +15,9 @@ limitations under the License. */ #include "DepthwiseConvLayer.h" #include "paddle/utils/Logging.h" #include "paddle/utils/Stat.h" -#include namespace paddle { -/* - * The calculation of the exconvt(convolution transpose (deconv) operation) - * is a swap of forward and backward of the calculation of exconv. - * */ REGISTER_LAYER(depthwise_conv, DepthwiseConvLayer); bool DepthwiseConvLayer::init(const LayerMap &layerMap, @@ -76,11 +71,12 @@ bool DepthwiseConvLayer::init(const LayerMap &layerMap, #define BACKWARD_FILTER(i, inputs, outputs) \ backward_[2 * i + 1]->calc(inputs, outputs) +// compute the depthwise convolution forward pass void DepthwiseConvLayer::forward(PassType passType) { Layer::forward(passType); size_t batchSize = inputLayers_[0]->getOutputValue()->getHeight(); - // std::cout << "outputSize" << getOutputSize() <