modify the format and delete useless comment

c43f6936 · xzl · fc8aedb1 · c43f6936 · c43f6936 · c43f6936
3 changed file
--- a/paddle/function/DepthwiseConvOp.cpp
+++ b/paddle/function/DepthwiseConvOp.cpp
@@ -15,7 +15,6 @@ limitations under the License. */
 #include "DepthwiseConvOp.h"
 #include "ConvOp.h"
 #include "GemmFunctor.h"
-//#include "paddle/math/MemoryHandle.h"

 namespace paddle {

@@ -28,6 +27,7 @@ public:
                  int outputChannels,
                  int outputHeight,
                  int outputWidth,
+                  int inputChannels,
                  int inputHeight,
                  int inputWidth,
                  int filterHeight,
@@ -114,7 +114,7 @@ public:
    const TensorShape& output = outputs[0].shape();

    size_t batchSize = input[0];
-    // size_t inputChannels = input[1];
+    size_t inputChannels = input[1];
    size_t inputHeight = input[2];
    size_t inputWidth = input[3];
    size_t filterHeight = getFilterHeight(filter);
@@ -134,6 +134,7 @@ public:
                  outputChannels,
                  outputHeight,
                  outputWidth,
+                  inputChannels,
                  inputHeight,
                  inputWidth,
                  filterHeight,
@@ -168,8 +169,6 @@ public:
    CHECK_EQ(numInputs_, inputs.size());
    CHECK_EQ(numOutputs_, outputs.size());
    check(inputs, outputs);
-    // Since the implementation of Col2ImFunctor is ADD_TO,
-    // this function only supports ADD_TO mode.
    CHECK_EQ(outputs[0].getArgType(), ADD_TO);
    const TensorShape& output = inputs[0].shape();
    const TensorShape& filter = inputs[1].shape();
@@ -228,12 +227,11 @@ public:
  }

  void calc(const BufferArgs& inputs, const BufferArgs& outputs) override {
-    // CHECK_EQ(numInputs_, inputs.size());
-    // CHECK_EQ(numOutputs_, outputs.size());
+    CHECK_EQ(numInputs_, inputs.size());
+    CHECK_EQ(numOutputs_, outputs.size());
    check(inputs, outputs);
    const TensorShape& output = inputs[0].shape();
    const TensorShape& input = inputs[1].shape();
-    // const TensorShape& multiplier = inputs[2].shape();
    const TensorShape& filter = outputs[0].shape();

    size_t batchSize = input[0];

--- a/paddle/function/DepthwiseConvOp.h
+++ b/paddle/function/DepthwiseConvOp.h
@@ -29,6 +29,7 @@ namespace paddle {
 * \param[in]   outputChannels    channels of outputData.
 * \param[in]   outputHeight      height of outputData.
 * \param[in]   outputWidth       width of outputData.
+ * \param[in]   inputChannels     channels of inputData.
 * \param[in]   inputHeight       height of inputData.
 * \param[in]   inputWidth        width of inputData..
 * \param[in]   filterHeight      height of filter.
@@ -49,8 +50,9 @@ public:
                  int outputChannels,
                  int outputHeight,
                  int outputWidth,
+                  int inputChannels,
                  int inputHeight,
-                  int intputWidth,
+                  int inputWidth,
                  int filterHeight,
                  int filterWidth,
                  int strideH,

--- a/paddle/function/DepthwiseConvOpGpu.cu
+++ b/paddle/function/DepthwiseConvOpGpu.cu
@@ -24,7 +24,7 @@ __global__
 void ConvolutionDepthwiseForward(const int nthreads,
    const T* const inputData, const T* const filterData,
    const int batchSize, const int outputChannels, const int outputHeight,
-    const int outputWidth, const int inputHeight, const int inputWidth,
+    const int outputWidth,const int inputChannels, const int inputHeight, const int inputWidth,
    const int filterHeight, const int filterWidth, const int strideH,
    const int strideW, const int paddingH, const int paddingW,
    T* const outputData) {
@@ -49,7 +49,7 @@ void ConvolutionDepthwiseForward(const int nthreads,
            for (int kw = 0; kw < filterWidth; ++kw) {
                const int h_in = -paddingH + h * strideH + kh;
                const int w_in = -paddingW + w * strideW + kw;
-			const int offset = ((n * outputChannels + c) * inputHeight + h_in)
+                const int offset = ((n * inputChannels + c) * inputHeight + h_in)
 					* inputWidth + w_in;
                value += (*weight) * inputData[offset];
                ++weight;
@@ -80,15 +80,15 @@ __global__
 void ConvolutionDepthwiseInputBackward(const int nthreads,
    const T* const top_diff, const T* const weight_data,
    const int num, const int outputChannels, const int outputHeight,
-    const int outputWidth, const int inputHeight, const int inputWidth,
+    const int outputWidth,const int inputChannels, const int inputHeight, const int inputWidth,
    const int filterHeight, const int filterWidth, const int strideH,
    const int strideW, const int paddingH, const int paddingW,
     T* const bottom_diff) {
  int index =
    (blockIdx.x * gridDim.y + blockIdx.y) * blockDim.x + threadIdx.x;
  if(index < nthreads) {
-    const int n = index / outputChannels / inputHeight / inputWidth;
-    const int c = (index / inputHeight / inputWidth) % outputChannels;
+    const int n = index / inputChannels / inputHeight / inputWidth;
+    const int c = (index / inputHeight / inputWidth) % inputChannels;
    const int h = (index / inputWidth) % inputHeight;
    const int w = index % inputWidth;
    const T* weight = weight_data + c * filterHeight * filterWidth;
@@ -121,7 +121,7 @@ __global__
 void ConvolutionDepthwiseFilterBackward(const int num_i, const int nthreads,
    const T* const top_diff, const T* const inputData,
    const int num, const int outputChannels, const int outputHeight,
-    const int outputWidth, const int inputHeight, const int inputWidth,
+    const int outputWidth, const int inputChannels, const int inputHeight, const int inputWidth,
    const int filterHeight, const int filterWidth, const int strideH,
    const int strideW, const int paddingH, const int paddingW,
    T* const buffer_data) {
@@ -141,7 +141,7 @@ void ConvolutionDepthwiseFilterBackward(const int num_i, const int nthreads,
      const int n = num_i;
      const int top_offset = ((n * outputChannels + c) * outputHeight + h)
            * outputWidth + w;
-      const int bottom_offset = ((n * outputChannels + c) * inputHeight + h_in)
+      const int bottom_offset = ((n * inputChannels + c) * inputHeight + h_in)
            * inputWidth + w_in;
      buffer_data[index] = top_diff[top_offset] * inputData[bottom_offset];
    } else {
@@ -159,6 +159,7 @@ public:
            int outputChannels,
            int outputHeight,
            int outputWidth,
+			int inputChannels,
            int inputHeight,
            int inputWidth,
            int filterHeight,
@@ -186,6 +187,7 @@ public:
            outputChannels,
            outputHeight,
            outputWidth,
+			inputChannels,
            inputHeight,
            inputWidth,
            filterHeight,
@@ -237,6 +239,7 @@ public:
            outputChannels,
            outputHeight,
            outputWidth,
+			inputChannels,
            inputHeight,
            inputWidth,
            filterHeight,
@@ -290,6 +293,7 @@ public:
                    outputChannels,
                    outputHeight,
                    outputWidth,
+					inputChannels,
                    inputHeight,
                    inputWidth,
                    filterHeight,