diff --git a/paddle/function/DepthwiseConvOp.cpp b/paddle/function/DepthwiseConvOp.cpp
index 8dcd32b06764ef833e1856b68ed4e9de90262047..358135e9a10c1579608cee79e2a7fb25d3d8fcf9 100644
--- a/paddle/function/DepthwiseConvOp.cpp
+++ b/paddle/function/DepthwiseConvOp.cpp
@@ -81,7 +81,6 @@ public:
                   int paddingH,
                   int paddingW,
                   T* colData,
-                  T* multiplierData,
                   T* filterGrad) {}
 };
 
@@ -247,7 +246,6 @@ public:
 
     real* outputGrad = inputs[0].data<real>();
     real* inputData = inputs[1].data<real>();
-    real* multiplierData = inputs[2].data<real>();
     real* filterGrad = outputs[0].data<real>();
 
     int size =
@@ -273,7 +271,6 @@ public:
                             paddingH(),
                             paddingW(),
                             colData,
-                            multiplierData,
                             filterGrad);
   }
 };
diff --git a/paddle/function/DepthwiseConvOp.h b/paddle/function/DepthwiseConvOp.h
index da180b29b064702b78b7ba8c3d63f0114dec6d82..5c5a70e5df05e0ddd2657287d9f5c909a4d17f62 100644
--- a/paddle/function/DepthwiseConvOp.h
+++ b/paddle/function/DepthwiseConvOp.h
@@ -148,9 +148,7 @@ public:
                   int paddingH,
                   int paddingW,
                   T* colData,
-                  T* multiplierData,
                   T* filterGrad);
-
-};  // namespace paddle
+};
 
 }  // namespace paddle
diff --git a/paddle/function/DepthwiseConvOpGpu.cu b/paddle/function/DepthwiseConvOpGpu.cu
index df9be80b3fa6644048985fc33b581bd96eebf198..5fb85df4896f582e52cf08e796fc19fd13b675ee 100644
--- a/paddle/function/DepthwiseConvOpGpu.cu
+++ b/paddle/function/DepthwiseConvOpGpu.cu
@@ -14,6 +14,7 @@ limitations under the License. */
 
 #include "DepthwiseConvOp.h"
 #include "GemmFunctor.h"
+#include "paddle/math/BaseMatrix.h"
 
 namespace paddle {
 // CUDA kernel to compute the depthwise convolution forward pass
@@ -266,7 +267,6 @@ public:
                 int paddingH,
                 int paddingW,
                 T* colData,
-                T* multiplierData,
                 T* filterGrad){
 
         int colDataSize = inputChannels * filterHeight * filterWidth * outputHeight * outputWidth;
@@ -276,6 +276,7 @@ public:
         size_t blockY = (blocks+512-1)/512;
         dim3 threads(1024, 1);
         dim3 grid(blockX, blockY);
+		BaseMatrix filterGradMatrix(inputChannels * filterHeight * filterWidth, 1, filterGrad, false, true);
 
         for(int i = 0; i < batchSize; i++) {
 			ConvolutionDepthwiseFilterBackward<T>
@@ -298,25 +299,12 @@ public:
                     paddingW,
                     colData
 				);
-			GemmFunctor<DEVICE_TYPE_GPU, real> gemm;
 			int M = colDataSize / outputHeight / outputWidth;
-			int N = 1;
 			int K = outputHeight * outputWidth;
-			gemm(CblasNoTrans,
-				CblasNoTrans,
-				M,
-				N,
-				K,
-				(T)1.0,
-				colData,
-				K,
-				multiplierData,
-				N,
-				(T)1.0,
-				filterGrad,
-				N);
+
+            BaseMatrix colMatrix(M, K, colData, false, true);
+		    filterGradMatrix.sumRows(colMatrix, (T)1.0, (T)1.0);	
 		}
-        //gemv
     }
 };
 
diff --git a/paddle/gserver/layers/DepthwiseConvLayer.cpp b/paddle/gserver/layers/DepthwiseConvLayer.cpp
index 8da3a52c2434048eaa7da636bc14ec9705b42592..4b5f16d76bd02166de766367be9937344f270d87 100644
--- a/paddle/gserver/layers/DepthwiseConvLayer.cpp
+++ b/paddle/gserver/layers/DepthwiseConvLayer.cpp
@@ -29,18 +29,10 @@ bool DepthwiseConvLayer::init(const LayerMap &layerMap,
   inputShape_.resize(numInputs);
   filterShape_.resize(numInputs);
   outputShape_.resize(numInputs);
-  multiplierShape_.resize(numInputs);
-  weightMultiplier_.resize(numInputs);
 
   for (int i = 0; i < config_.inputs_size(); i++) {
     std::vector<size_t> paddings = {(size_t)paddingY_[i], (size_t)padding_[i]};
     std::vector<size_t> strides = {(size_t)strideY_[i], (size_t)stride_[i]};
-    Matrix::resizeOrCreate(weightMultiplier_[i],
-                           (size_t)outputH_[i] * (size_t)outputW_[i],
-                           (size_t)1,
-                           false,
-                           useGpu_);
-    weightMultiplier_[i]->one();
     createFunction(forward_,
                    "DepthwiseConv",
                    FuncConfig()
@@ -65,100 +57,4 @@ bool DepthwiseConvLayer::init(const LayerMap &layerMap,
   return true;
 }
 
-// i is the index of input layers
-#define BACKWARD_INPUT(i, inputs, outputs) \
-  backward_[2 * i]->calc(inputs, outputs)
-#define BACKWARD_FILTER(i, inputs, outputs) \
-  backward_[2 * i + 1]->calc(inputs, outputs)
-
-// compute the depthwise convolution forward pass
-void DepthwiseConvLayer::forward(PassType passType) {
-  Layer::forward(passType);
-
-  size_t batchSize = inputLayers_[0]->getOutputValue()->getHeight();
-
-  resetOutput(batchSize, getOutputSize());
-
-  // Calculate the shape of the input, output, and filter.
-  for (size_t i = 0; i < inputLayers_.size(); ++i) {
-    inputShape_[i] = TensorShape({(size_t)batchSize,
-                                  (size_t)channels_[i],
-                                  (size_t)imgSizeH_[i],
-                                  (size_t)imgSizeW_[i]});
-    multiplierShape_[i] =
-        TensorShape({(size_t)outputH_[i] * (size_t)outputW_[i], (size_t)1});
-    filterShape_[i] = TensorShape({(size_t)groups_[i],
-                                   (size_t)numFilters_ / groups_[i],
-                                   (size_t)channels_[i] / groups_[i],
-                                   (size_t)filterSizeY_[i],
-                                   (size_t)filterSize_[i]});
-    outputShape_[i] = TensorShape({(size_t)batchSize,
-                                   (size_t)numFilters_,
-                                   (size_t)outputH_[i],
-                                   (size_t)outputW_[i]});
-  }
-
-  // Calculate the output value.
-  for (size_t i = 0; i < inputLayers_.size(); ++i) {
-    BufferArgs inputs;
-    BufferArgs outputs;
-    inputs.addArg(*getInputValue(i), inputShape_[i]);
-    inputs.addArg(*weights_[i]->getW(), filterShape_[i]);
-    outputs.addArg(
-        *getOutputValue(), outputShape_[i], i == 0 ? ASSIGN_TO : ADD_TO);
-
-    forward_[i]->calc(inputs, outputs);
-  }
-
-  /* add the bias-vector */
-  if (biases_.get()) {
-    if (sharedBiases_) {
-      addSharedBias();
-    } else {
-      addUnsharedBias();
-    }
-  }
-
-  /* activation */
-  forwardActivation();
-}
-
-// compute the depthwise convolution backprop.
-void DepthwiseConvLayer::backward(const UpdateCallback &callback) {
-  backwardActivation();
-
-  MatrixPtr outGrad = getOutputGrad();
-  if (biases_ && biases_->getWGrad()) {
-    bpropBiases(outGrad);
-    /* Increasing the number of gradient */
-    biases_->getParameterPtr()->incUpdate(callback);
-  }
-
-  // Calculate the input grad and filter grad.
-  for (size_t i = 0; i < inputLayers_.size(); ++i) {
-    if (getInputGrad(i)) {
-      BufferArgs inputs;
-      BufferArgs outputs;
-      inputs.addArg(*getOutputGrad(), outputShape_[i]);
-      inputs.addArg(*weights_[i]->getW(), filterShape_[i]);
-      outputs.addArg(*getInputGrad(i), inputShape_[i], ADD_TO);
-      BACKWARD_INPUT(i, inputs, outputs);
-    }
-
-    if (weights_[i]->getWGrad()) {
-      BufferArgs inputs;
-      BufferArgs outputs;
-      inputs.addArg(*getOutputGrad(), outputShape_[i]);
-      inputs.addArg(*getInputValue(i), inputShape_[i]);
-      inputs.addArg(*weightMultiplier_[i], multiplierShape_[i]);
-      // weight_multiplier
-      outputs.addArg(*weights_[i]->getWGrad(), filterShape_[i], ADD_TO);
-      BACKWARD_FILTER(i, inputs, outputs);
-
-      /* Increasing the number of gradient */
-      weights_[i]->getParameterPtr()->incUpdate(callback);
-    }
-  }
-}
-
 }  // namespace paddle
diff --git a/paddle/gserver/layers/DepthwiseConvLayer.h b/paddle/gserver/layers/DepthwiseConvLayer.h
index c640d13b582fe0918d409ba8c7b4a46c14541a1e..ce074803ab4e1426962975a16b2e70faa884fb8b 100644
--- a/paddle/gserver/layers/DepthwiseConvLayer.h
+++ b/paddle/gserver/layers/DepthwiseConvLayer.h
@@ -15,7 +15,7 @@ limitations under the License. */
 #pragma once
 
 #include <vector>
-#include "ExpandConvBaseLayer.h"
+#include "ExpandConvLayer.h"
 #include "paddle/math/Matrix.h"
 
 namespace paddle {
@@ -26,25 +26,15 @@ namespace paddle {
  * The config file api is img_depthwise_conv_layer.
  */
 
-class DepthwiseConvLayer : public ExpandConvBaseLayer {
+class DepthwiseConvLayer : public ExpandConvLayer {
 public:
   explicit DepthwiseConvLayer(const LayerConfig& config)
-      : ExpandConvBaseLayer(config) {}
+      : ExpandConvLayer(config) {}
 
   ~DepthwiseConvLayer() {}
 
   bool init(const LayerMap& layerMap,
             const ParameterMap& parameterMap) override;
-
-  void forward(PassType passType) override;
-  void backward(const UpdateCallback& callback) override;
-
-protected:
-  std::vector<TensorShape> inputShape_;
-  std::vector<TensorShape> filterShape_;
-  std::vector<TensorShape> outputShape_;
-  std::vector<TensorShape> multiplierShape_;
-  std::vector<MatrixPtr> weightMultiplier_;
 };
 
 }  // namespace paddle