Modifed ExpandConvBaseLayer to unify paras between expand and cudnn

60bf1a47 · wangyang59 · fd925943 · 60bf1a47 · 60bf1a47
隐藏空白更改
内联并排

Showing with 41 addition and 8 deletion

paddle/gserver/layers/ExpandConvBaseLayer.cpp paddle/gserver/layers/ExpandConvBaseLayer.cpp +6 -6

paddle/gserver/tests/test_ConvUnify.cpp paddle/gserver/tests/test_ConvUnify.cpp +35 -2

未找到文件。
--- a/paddle/gserver/layers/ExpandConvBaseLayer.cpp
+++ b/paddle/gserver/layers/ExpandConvBaseLayer.cpp
@@ -145,7 +145,7 @@ void ExpandConvBaseLayer::expandFwdOnce(MatrixPtr image,
  real *expInData = expandInput_->getData();
  for (int g = 0; g < groups_[inIdx]; ++g) {
    MatrixPtr A =
-        Matrix::create(wgtData, subK, subM, true, useGpu_);  // mark transpose
+        Matrix::create(wgtData, subM, subK, false, useGpu_);  // mark transpose
    MatrixPtr B = Matrix::create(expInData, subK, subN, false, useGpu_);
    MatrixPtr C = Matrix::create(outData, subM, subN, false, useGpu_);
    C->mul(A, B, 1, 1);
@@ -182,7 +182,7 @@ void ExpandConvBaseLayer::bpropActs(MatrixPtr out,
      // create temporary matrix
      MatrixPtr C = Matrix::create(expandInData, subK, subN, false, useGpu_);
      MatrixPtr B = Matrix::create(localGradData, subM, subN, false, useGpu_);
-      MatrixPtr A = Matrix::create(wgtData, subK, subM, false, useGpu_);
+      MatrixPtr A = Matrix::create(wgtData, subM, subK, true, useGpu_);
      C->mul(A, B);  // mul
      // clear the temporary matrix
@@ -247,10 +247,10 @@ void ExpandConvBaseLayer::bpropWeights(MatrixPtr image,
    // expand-mul one-group by one
    for (int g = 0; g < groups_[inpIdx]; g++) {
-      MatrixPtr A = Matrix::create(expandInData, subK, subN, false, useGpu_);
+      MatrixPtr A = Matrix::create(expandInData, subK, subN, true, useGpu_);
-      MatrixPtr B = Matrix::create(gradData, subM, subN, true, useGpu_);
+      MatrixPtr B = Matrix::create(gradData, subM, subN, false, useGpu_);
-      MatrixPtr C = Matrix::create(wGradData, subK, subM, false, useGpu_);
+      MatrixPtr C = Matrix::create(wGradData, subM, subK, false, useGpu_);
-      C->mul(A, B, 1, 1);
+      C->mul(B, A, 1, 1);
      A->clear();
      B->clear();

--- a/paddle/gserver/tests/test_ConvUnify.cpp
+++ b/paddle/gserver/tests/test_ConvUnify.cpp
@@ -86,13 +86,14 @@ MatrixPtr doOneConvTest(size_t imgSize, size_t output_x, size_t stride,
    initTestLayer(config, &layerMap, &parameters, &convLayer);
    convLayer->getBiasParameter()->zeroMem();
    convLayer->getParameters()[0]->zeroMem();
-    convLayer->getParameters()[0]->getBuf(PARAMETER_VALUE)->copyFrom(param, 18);
+    convLayer->getParameters()[0]->getBuf(PARAMETER_VALUE)->copyFrom(param,
+        channel* filter_size * filter_size * config.layerConfig.num_filters());
    convLayer->forward(PASS_GC);
    return convLayer->getOutputValue();
 }
-TEST(Layer, convTransLayerFwd2) {
+TEST(Layer, convParaUnified) {
    MatrixPtr input, resultCpu, resultGpu;
    input = Matrix::create(1, 4 * 4, false, false);
    float inputData[] = {1, 2, 3, 4,
@@ -122,6 +123,38 @@ TEST(Layer, convTransLayerFwd2) {
                       /*numfilters*/ 2,
                       input, param, true);
    checkMatrixEqual(resultCpu, resultGpu);
+    input = Matrix::create(1, 3 * 3 * 2, false, false);
+    float inputData2[] = {1, 2, 3,
+                          4, 5, 6,
+                          7, 8, 9,
+                          10, 11, 12,
+                          13, 14, 15,
+                          16, 17, 18};
+    float param2[] = {1, 2, 3, 4, 5, 6, 7, 8,
+                      8, 7, 6, 5, 4, 3, 2, 1};
+    input->setData(inputData2);
+    resultCpu = doOneConvTest(/* imgSize */ 3,
+                   /* output_x */ 2,
+                   /* stride */ 1,
+                   /* padding */ 0,
+                   /* filter_size */ 2,
+                   /*channel*/ 2,
+                   /*numfilters*/ 2,
+                   input, param2, false);
+    resultGpu = doOneConvTest(/* imgSize */ 3,
+                       /* output_x */ 2,
+                       /* stride */ 1,
+                       /* padding */ 0,
+                       /* filter_size */ 2,
+                       /*channel*/ 2,
+                       /*numfilters*/ 2,
+                       input, param2, true);
+    checkMatrixEqual(resultCpu, resultGpu);
 }
 int main(int argc, char** argv) {