add the comments for .h file and code tiny modify

064dc888 · xzl · 36e7800a · 064dc888 · 064dc888 · 064dc888
5 changed file
--- a/paddle/function/DepthwiseConvOp.cpp
+++ b/paddle/function/DepthwiseConvOp.cpp
@@ -13,16 +13,16 @@ See the License for the specific language governing permissions and
 limitations under the License. */

 #include "DepthwiseConvOp.h"
+#include "ConvOp.h"
 #include "GemmFunctor.h"
-#include "paddle/math/MemoryHandle.h"
+//#include "paddle/math/MemoryHandle.h"

 namespace paddle {

 template <class T>
 class DepthwiseConvFunctor<DEVICE_TYPE_CPU, T> {
 public:
-  void operator()(int outputSize,
-                  const T* inputData,
+  void operator()(const T* inputData,
                  const T* filterData,
                  int batchSize,
                  int outputChannels,
@@ -44,13 +44,13 @@ public:
 template <class T>
 class DepthwiseConvGradInputFunctor<DEVICE_TYPE_CPU, T> {
 public:
-  void operator()(int inputSize,
-                  const T* outputGrad,
+  void operator()(const T* outputGrad,
                  const T* filterData,
                  int batchSize,
                  int outputChannels,
                  int outputHeight,
                  int outputWidth,
+                  int inputChannels,
                  int inputHeight,
                  int inputWidth,
                  int filterHeight,
@@ -65,14 +65,13 @@ public:
 template <class T>
 class DepthwiseConvGradFilterFunctor<DEVICE_TYPE_CPU, T> {
 public:
-  void operator()(int num_i,
-                  int colDataSize,
-                  const T* outputGrad,
+  void operator()(const T* outputGrad,
                  const T* inputData,
                  int batchSize,
                  int outputChannels,
                  int outputHeight,
                  int outputWidth,
+                  int inputChannels,
                  int inputHeight,
                  int inputWidth,
                  int filterHeight,
@@ -87,7 +86,7 @@ public:
 };

 /*
- * \brief Forward calculation of convolution.
+ * \brief Forward calculation of depthwise convolution.
 */
 template <DeviceType Device>
 class DepthwiseConvFunction : public ConvFunctionBase {
@@ -126,11 +125,9 @@ public:
    real* inputData = inputs[0].data<real>();
    real* filterData = inputs[1].data<real>();
    real* outputData = outputs[0].data<real>();
-    size_t outputSize = batchSize * outputChannels * outputHeight * outputWidth;

    DepthwiseConvFunctor<Device, real> depthwiseConv;
-    depthwiseConv(outputSize,
-                  inputData,
+    depthwiseConv(inputData,
                  filterData,
                  batchSize,
                  outputChannels,
@@ -149,7 +146,7 @@ public:
 };

 /*
- * \brief Backward input calculation of convolution.
+ * \brief Backward input calculation of depthwise convolution.
 */
 template <DeviceType Device>
 class DepthwiseConvGradInputFunction : public ConvFunctionBase {
@@ -191,16 +188,14 @@ public:
    real* filterData = inputs[1].data<real>();
    real* inputGrad = outputs[0].data<real>();

-    size_t inputSize = batchSize * inputChannels * inputHeight * inputWidth;
-
    DepthwiseConvGradInputFunctor<Device, real> depthwiseConvGradInput;
-    depthwiseConvGradInput(inputSize,
-                           outputGrad,
+    depthwiseConvGradInput(outputGrad,
                           filterData,
                           batchSize,
                           outputChannels,
                           outputHeight,
                           outputWidth,
+                           inputChannels,
                           inputHeight,
                           inputWidth,
                           filterHeight,
@@ -214,7 +209,7 @@ public:
 };

 /*
- * \brief Backward filter calculation of convolution.
+ * \brief Backward filter calculation of depthwise convolution.
 */
 template <DeviceType Device>
 class DepthwiseConvGradFilterFunction : public ConvFunctionBase {
@@ -255,35 +250,31 @@ public:
    real* multiplierData = inputs[2].data<real>();
    real* filterGrad = outputs[0].data<real>();

-    size_t size =
+    int size =
        inputChannels * filterHeight * filterWidth * outputHeight * outputWidth;
-
    resizeBuffer<Device>(size);
    real* colData = reinterpret_cast<real*>(memory_->getBuf());

    DepthwiseConvGradFilterFunctor<Device, real> depthwiseConvGradFilter;

-    for (size_t i = 0; i < batchSize; i++) {
-      depthwiseConvGradFilter(i,
-                              size,
-                              outputGrad,
-                              inputData,
-                              batchSize,
-                              outputChannels,
-                              outputHeight,
-                              outputWidth,
-                              inputHeight,
-                              inputWidth,
-                              filterHeight,
-                              filterWidth,
-                              strideH(),
-                              strideW(),
-                              paddingH(),
-                              paddingW(),
-                              colData,
-                              multiplierData,
-                              filterGrad);
-    }
+    depthwiseConvGradFilter(outputGrad,
+                            inputData,
+                            batchSize,
+                            outputChannels,
+                            outputHeight,
+                            outputWidth,
+                            inputChannels,
+                            inputHeight,
+                            inputWidth,
+                            filterHeight,
+                            filterWidth,
+                            strideH(),
+                            strideW(),
+                            paddingH(),
+                            paddingW(),
+                            colData,
+                            multiplierData,
+                            filterGrad);
  }
 };


--- a/paddle/function/DepthwiseConvOp.h
+++ b/paddle/function/DepthwiseConvOp.h
@@ -14,15 +14,36 @@ limitations under the License. */

 #pragma once

-#include "ConvOp.h"
+#include "TensorType.h"

 namespace paddle {

+/**
+ *\brief   Depthwise convolution forward. The outputData
+ *         of depthwise convolution is same with ExpandConvLayer
+ *         when groups equals inputChannels in ExpandConvLayer.
+ *
+ * \param[in]   inputData         input data.
+ * \param[in]   filterData        the Paramters of the depthwise conv layer..
+ * \param[in]   batchSize         batch size of input data.
+ * \param[in]   outputChannels    channels of outputData.
+ * \param[in]   outputHeight      height of outputData.
+ * \param[in]   outputWidth       width of outputData.
+ * \param[in]   inputHeight       height of inputData.
+ * \param[in]   inputWidth        width of inputData..
+ * \param[in]   filterHeight      height of filter.
+ * \param[in]   filterWidth       widht of filter.
+ * \param[in]   strideH           stride size in height direction.
+ * \param[in]   strideW           stride size in width direction.
+ * \param[in]   paddingH          padding size in height direction.
+ * \param[in]   paddingW          padding size in width direction.
+ * \param[out]  outputData        outputData.
+ *
+ */
 template <DeviceType Device, class T>
 class DepthwiseConvFunctor {
 public:
-  void operator()(int outputSize,
-                  const T* inputData,
+  void operator()(const T* inputData,
                  const T* filterData,
                  int batchSize,
                  int outputChannels,
@@ -39,16 +60,38 @@ public:
                  T* outputData);
 };

+/**
+ *\brief  Functor tot compute the depthwise convolution backprop w.r.t input.
+ *
+ *
+ * \param[in]   outputGradData    the grad data of output.
+ * \param[in]   filterData        the Paramters of the depthwise conv layer..
+ * \param[in]   batchSize         batch size of input data.
+ * \param[in]   outputChannels    channels of outputData.
+ * \param[in]   outputHeight      height of outputData.
+ * \param[in]   outputWidth       width of outputData.
+ * \param[in]   inputChannels     channels of input data.
+ * \param[in]   inputHeight       height of inputData.
+ * \param[in]   inputWidth        width of inputData..
+ * \param[in]   filterHeight      height of filter.
+ * \param[in]   filterWidth       widht of filter.
+ * \param[in]   strideH           stride size in height direction.
+ * \param[in]   strideW           stride size in width direction.
+ * \param[in]   paddingH          padding size in height direction.
+ * \param[in]   paddingW          padding size in width direction.
+ * \param[out]  inputGrad         the grad data of input.
+ *
+ */
 template <DeviceType Device, class T>
 class DepthwiseConvGradInputFunctor {
 public:
-  void operator()(int inputSize,
-                  const T* outputGrad,
+  void operator()(const T* outputGrad,
                  const T* filterData,
                  int batchSize,
                  int outputChannels,
                  int outputHeight,
                  int outputWidth,
+                  int inputChannels,
                  int inputHeight,
                  int inputWidth,
                  int filterHeight,
@@ -60,17 +103,42 @@ public:
                  T* inputGrad);
 };

+/**
+ *\brief  Functor tot compute the depthwise convolution backprop w.r.t filter.
+ *
+ * \param[in]   outputGradData    the grad data of output.
+ * \param[in]   inputData         inputData.
+ * \param[in]   batchSize         batch size of input data.
+ * \param[in]   outputChannels    channels of outputData.
+ * \param[in]   outputHeight      height of outputData.
+ * \param[in]   outputWidth       width of outputData.
+ * \param[in]   inputChannels     channels of input data.
+ * \param[in]   inputHeight       height of inputData.
+ * \param[in]   inputWidth        width of inputData..
+ * \param[in]   filterHeight      height of filter.
+ * \param[in]   filterWidth       widht of filter.
+ * \param[in]   strideH           stride size in height direction.
+ * \param[in]   strideW           stride size in width direction.
+ * \param[in]   paddingH          padding size in height direction.
+ * \param[in]   paddingW          padding size in width direction.
+ * \param[in]   colData           Auxiliary data when calculating filterGrad.
+ *                                size:
+ *inputChannels*filterHeight*filterWidth*outputHeight*outputWidth \param[in]
+ *multiplierData    Auxiliary data when calculating filterGrad. size:
+ *outputHeight * outputWidth. \param[out]
+ *filterGrad        the grad data of filter.
+ *
+ */
 template <DeviceType Device, class T>
 class DepthwiseConvGradFilterFunctor {
 public:
-  void operator()(int num_i,
-                  int colDataSize,
-                  const T* outputGrad,
+  void operator()(const T* outputGrad,
                  const T* inputData,
                  int batchSize,
                  int outputChannels,
                  int outputHeight,
                  int outputWidth,
+                  int inputChannels,
                  int inputHeight,
                  int inputWidth,
                  int filterHeight,

--- a/paddle/function/DepthwiseConvOpGpu.cu
+++ b/paddle/function/DepthwiseConvOpGpu.cu
@@ -12,12 +12,11 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 See the License for the specific language governing permissions and
 limitations under the License. */

-#include "ConvOp.h"
 #include "DepthwiseConvOp.h"
 #include "GemmFunctor.h"
-#include "paddle/math/MemoryHandle.h"

 namespace paddle {
+// CUDA kernel to compute the depthwise convolution forward pass
 template <class T>
 __global__ 
 void ConvolutionDepthwiseForward(const int nthreads,
@@ -48,7 +47,7 @@ void ConvolutionDepthwiseForward(const int nthreads,
 		  for (int kw = 0; kw < filterWidth; ++kw) {
 			const int h_in = -paddingH + h * strideH + kh;
 			const int w_in = -paddingW + w * strideW + kw;
-			  const int offset = ((n * outputChannels + c) * inputHeight + h_in)
+			const int offset = ((n * outputChannels + c) * inputHeight + h_in)
 					* inputWidth + w_in;
 			  value += (*weight) * inputData[offset];
 			++weight;
@@ -73,6 +72,7 @@ void ConvolutionDepthwiseForward(const int nthreads,
  }
 }

+// CUDA kernel to compute the depthwise convolution backprop w.r.t input.
 template <class T>
 __global__
 void ConvolutionDepthwiseInputBackward(const int nthreads,
@@ -113,6 +113,7 @@ void ConvolutionDepthwiseInputBackward(const int nthreads,
  }
 }

+// CUDA kernel to compute the depthwise convolution backprop w.r.t filter.
 template <class T>
 __global__
 void ConvolutionDepthwiseFilterBackward(const int num_i, const int nthreads,
@@ -150,15 +151,14 @@ void ConvolutionDepthwiseFilterBackward(const int num_i, const int nthreads,
 template <class T>
 class DepthwiseConvFunctor<DEVICE_TYPE_GPU, T>{
 public:
-  void operator()(int outputSize, 
-            const T* inputData, 
+  void operator()(const T* inputData, 
            const T* filterData,
            int batchSize,
            int outputChannels,
            int outputHeight,
            int outputWidth,
-			int inputHeight,
-			int inputWidth,
+            int inputHeight,
+            int inputWidth,
            int filterHeight,
            int filterWidth,
            int strideH,
@@ -167,12 +167,14 @@ public:
            int paddingW,
            T* outputData){

+    int outputSize = batchSize * outputChannels * outputHeight * outputWidth;
+
    size_t blocks = (outputSize + 1024 -1) / 1024;
    size_t blockX = 512;
    size_t blockY = (blocks+512-1)/512;
    dim3 threads(1024, 1);
    dim3 grid(blockX, blockY);
-    
+
    ConvolutionDepthwiseForward<T>
        <<< grid, threads, 0, STREAM_DEFAULT >>>(
            outputSize, 
@@ -182,8 +184,8 @@ public:
            outputChannels,
            outputHeight,
            outputWidth,
-			inputHeight,
-			inputWidth,
+            inputHeight,
+            inputWidth,
            filterHeight,
            filterWidth,
            strideH,
@@ -197,13 +199,13 @@ public:
 template <class T>
 class DepthwiseConvGradInputFunctor<DEVICE_TYPE_GPU, T>{
 public:
-  void operator()(int inputSize,
-            const T* outputGrad,
+  void operator()(const T* outputGrad,
            const T* filterData,
            int batchSize,
            int outputChannels,
            int outputHeight,
            int outputWidth,
+            int inputChannels,
            int inputHeight,
            int inputWidth,
            int filterHeight,
@@ -212,7 +214,9 @@ public:
            int strideW,
            int paddingH,
            int paddingW,
-                T* inputGrad){
+            T* inputGrad){
+
+	int inputSize = batchSize * inputChannels * inputHeight * inputWidth;

    size_t blocks = (inputSize + 1024 -1) / 1024;
    size_t blockX = 512;
@@ -220,6 +224,7 @@ public:
    dim3 threads(1024, 1);
    dim3 grid(blockX, blockY);

+
    ConvolutionDepthwiseInputBackward<T>
          // NOLINT_NEXT_LINE(whitespace/operators)
        <<< grid, threads, 0, STREAM_DEFAULT >>>(
@@ -245,14 +250,13 @@ public:
 template <class T>
 class DepthwiseConvGradFilterFunctor<DEVICE_TYPE_GPU, T> {
 public:
-  void operator()(int num_i,
-                int colDataSize,
-                const T* outputGrad,
+  void operator()(const T* outputGrad,
                const T* inputData,
                int batchSize,
                int outputChannels,
                int outputHeight,
                int outputWidth,
+                int inputChannels,
                int inputHeight,
                int inputWidth,
                int filterHeight,
@@ -265,60 +269,65 @@ public:
                T* multiplierData,
                T* filterGrad){

+        int colDataSize = inputChannels * filterHeight * filterWidth * outputHeight * outputWidth;
+
        size_t blocks = (colDataSize + 1024 -1) / 1024;
        size_t blockX = 512;
        size_t blockY = (blocks+512-1)/512;
        dim3 threads(1024, 1);
        dim3 grid(blockX, blockY);

-	    ConvolutionDepthwiseFilterBackward<T>
-            <<< grid, threads, 0, STREAM_DEFAULT >>>(
-                num_i,
-                colDataSize,
-                outputGrad,
-                inputData,
-                batchSize,
-                outputChannels,
-                outputHeight,
-                outputWidth,
-                inputHeight,
-                inputWidth,
-                filterHeight,
-                filterWidth,
-                strideH,
-                strideW,
-                paddingH,
-                paddingW,
-                colData
-            );
-        GemmFunctor<DEVICE_TYPE_GPU, real> gemm;
-        int M = colDataSize / outputHeight / outputWidth;
-        int N = 1;
-        int K = outputHeight * outputWidth;
-        gemm(CblasNoTrans,
-            CblasNoTrans,
-            M,
-            N,
-            K,
-            (T)1.0,
-            colData,
-            K,
-            multiplierData,
-            N,
-            (T)1.0,
-            filterGrad,
-            N);
+        for(int i = 0; i < batchSize; i++) {
+			ConvolutionDepthwiseFilterBackward<T>
+				<<< grid, threads, 0, STREAM_DEFAULT >>>(
+                    i,
+                    colDataSize,
+                    outputGrad,
+                    inputData,
+                    batchSize,
+                    outputChannels,
+                    outputHeight,
+                    outputWidth,
+                    inputHeight,
+                    inputWidth,
+                    filterHeight,
+                    filterWidth,
+                    strideH,
+                    strideW,
+                    paddingH,
+                    paddingW,
+                    colData
+				);
+			GemmFunctor<DEVICE_TYPE_GPU, real> gemm;
+			int M = colDataSize / outputHeight / outputWidth;
+			int N = 1;
+			int K = outputHeight * outputWidth;
+			gemm(CblasNoTrans,
+				CblasNoTrans,
+				M,
+				N,
+				K,
+				(T)1.0,
+				colData,
+				K,
+				multiplierData,
+				N,
+				(T)1.0,
+				filterGrad,
+				N);
+		}
        //gemv
    }
 };

 #ifdef PADDLE_TYPE_DOUBLE
-using real=double;
+template class DepthwiseConvGradInputFunctor<DEVICE_TYPE_GPU, double>;
+template class DepthwiseConvFunctor<DEVICE_TYPE_GPU, double>;
+template class DepthwiseConvGradFilterFunctor<DEVICE_TYPE_GPU, double>;
 #else 
-using real=float;
+template class DepthwiseConvGradInputFunctor<DEVICE_TYPE_GPU, float>;
+template class DepthwiseConvFunctor<DEVICE_TYPE_GPU, float>;
+template class DepthwiseConvGradFilterFunctor<DEVICE_TYPE_GPU, float>;
 #endif
-template class DepthwiseConvGradInputFunctor<DEVICE_TYPE_GPU, real>;
-template class DepthwiseConvFunctor<DEVICE_TYPE_GPU, real>;
-template class DepthwiseConvGradFilterFunctor<DEVICE_TYPE_GPU, real>;

 }  // namespace paddle
--- a/paddle/gserver/layers/DepthwiseConvLayer.cpp
+++ b/paddle/gserver/layers/DepthwiseConvLayer.cpp
@@ -15,14 +15,9 @@ limitations under the License. */
 #include "DepthwiseConvLayer.h"
 #include "paddle/utils/Logging.h"
 #include "paddle/utils/Stat.h"
-#include <iostream>

 namespace paddle {

-/*
- * The calculation of the exconvt(convolution transpose (deconv) operation)
- * is a swap of forward and backward of the calculation of exconv.
- * */
 REGISTER_LAYER(depthwise_conv, DepthwiseConvLayer);

 bool DepthwiseConvLayer::init(const LayerMap &layerMap,
@@ -76,11 +71,12 @@ bool DepthwiseConvLayer::init(const LayerMap &layerMap,
 #define BACKWARD_FILTER(i, inputs, outputs) \
  backward_[2 * i + 1]->calc(inputs, outputs)

+// compute the depthwise convolution forward pass
 void DepthwiseConvLayer::forward(PassType passType) {
  Layer::forward(passType);

  size_t batchSize = inputLayers_[0]->getOutputValue()->getHeight();
-  // std::cout << "outputSize" << getOutputSize() <<std::endl;
+
  resetOutput(batchSize, getOutputSize());

  // Calculate the shape of the input, output, and filter.
@@ -127,6 +123,7 @@ void DepthwiseConvLayer::forward(PassType passType) {
  forwardActivation();
 }

+// compute the depthwise convolution backprop.
 void DepthwiseConvLayer::backward(const UpdateCallback &callback) {
  backwardActivation();


--- a/paddle/gserver/layers/DepthwiseConvLayer.h
+++ b/paddle/gserver/layers/DepthwiseConvLayer.h
@@ -22,10 +22,8 @@ namespace paddle {

 /**
 * @brief A subclass of convolution layer.
- * This layer expands input and use matrix multiplication to
- * calculate convolution operation.
- *
- * The config file api is img_conv_layer.
+ * This layer do the depthwise convolution calculation in mobilenet.
+ * The config file api is img_depthwise_conv_layer.
 */

 class DepthwiseConvLayer : public ExpandConvBaseLayer {