diff --git a/paddle/gserver/layers/ExpandConvBaseLayer.cpp b/paddle/gserver/layers/ExpandConvBaseLayer.cpp index 71a69bd0d01f4f6fcd579a408008ad4e00b5fd4d..a9b5b916a1f0d22ff46dc6795053f44e3e3af09e 100644 --- a/paddle/gserver/layers/ExpandConvBaseLayer.cpp +++ b/paddle/gserver/layers/ExpandConvBaseLayer.cpp @@ -145,7 +145,7 @@ void ExpandConvBaseLayer::expandFwdOnce(MatrixPtr image, real *expInData = expandInput_->getData(); for (int g = 0; g < groups_[inIdx]; ++g) { MatrixPtr A = - Matrix::create(wgtData, subK, subM, true, useGpu_); // mark transpose + Matrix::create(wgtData, subM, subK, false, useGpu_); // mark transpose MatrixPtr B = Matrix::create(expInData, subK, subN, false, useGpu_); MatrixPtr C = Matrix::create(outData, subM, subN, false, useGpu_); C->mul(A, B, 1, 1); @@ -182,7 +182,7 @@ void ExpandConvBaseLayer::bpropActs(MatrixPtr out, // create temporary matrix MatrixPtr C = Matrix::create(expandInData, subK, subN, false, useGpu_); MatrixPtr B = Matrix::create(localGradData, subM, subN, false, useGpu_); - MatrixPtr A = Matrix::create(wgtData, subK, subM, false, useGpu_); + MatrixPtr A = Matrix::create(wgtData, subM, subK, true, useGpu_); C->mul(A, B); // mul // clear the temporary matrix @@ -247,10 +247,10 @@ void ExpandConvBaseLayer::bpropWeights(MatrixPtr image, // expand-mul one-group by one for (int g = 0; g < groups_[inpIdx]; g++) { - MatrixPtr A = Matrix::create(expandInData, subK, subN, false, useGpu_); - MatrixPtr B = Matrix::create(gradData, subM, subN, true, useGpu_); - MatrixPtr C = Matrix::create(wGradData, subK, subM, false, useGpu_); - C->mul(A, B, 1, 1); + MatrixPtr A = Matrix::create(expandInData, subK, subN, true, useGpu_); + MatrixPtr B = Matrix::create(gradData, subM, subN, false, useGpu_); + MatrixPtr C = Matrix::create(wGradData, subM, subK, false, useGpu_); + C->mul(B, A, 1, 1); A->clear(); B->clear(); diff --git a/paddle/gserver/tests/test_ConvUnify.cpp b/paddle/gserver/tests/test_ConvUnify.cpp index f1442ca7b832ad449a910208d9f27257bbe7ffaf..795641143e31c51b5bd91bd0029d85ccd4c29d94 100644 --- a/paddle/gserver/tests/test_ConvUnify.cpp +++ b/paddle/gserver/tests/test_ConvUnify.cpp @@ -86,13 +86,14 @@ MatrixPtr doOneConvTest(size_t imgSize, size_t output_x, size_t stride, initTestLayer(config, &layerMap, ¶meters, &convLayer); convLayer->getBiasParameter()->zeroMem(); convLayer->getParameters()[0]->zeroMem(); - convLayer->getParameters()[0]->getBuf(PARAMETER_VALUE)->copyFrom(param, 18); + convLayer->getParameters()[0]->getBuf(PARAMETER_VALUE)->copyFrom(param, + channel* filter_size * filter_size * config.layerConfig.num_filters()); convLayer->forward(PASS_GC); return convLayer->getOutputValue(); } -TEST(Layer, convTransLayerFwd2) { +TEST(Layer, convParaUnified) { MatrixPtr input, resultCpu, resultGpu; input = Matrix::create(1, 4 * 4, false, false); float inputData[] = {1, 2, 3, 4, @@ -122,6 +123,38 @@ TEST(Layer, convTransLayerFwd2) { /*numfilters*/ 2, input, param, true); checkMatrixEqual(resultCpu, resultGpu); + + input = Matrix::create(1, 3 * 3 * 2, false, false); + float inputData2[] = {1, 2, 3, + 4, 5, 6, + 7, 8, 9, + + 10, 11, 12, + 13, 14, 15, + 16, 17, 18}; + float param2[] = {1, 2, 3, 4, 5, 6, 7, 8, + 8, 7, 6, 5, 4, 3, 2, 1}; + + input->setData(inputData2); + + resultCpu = doOneConvTest(/* imgSize */ 3, + /* output_x */ 2, + /* stride */ 1, + /* padding */ 0, + /* filter_size */ 2, + /*channel*/ 2, + /*numfilters*/ 2, + input, param2, false); + + resultGpu = doOneConvTest(/* imgSize */ 3, + /* output_x */ 2, + /* stride */ 1, + /* padding */ 0, + /* filter_size */ 2, + /*channel*/ 2, + /*numfilters*/ 2, + input, param2, true); + checkMatrixEqual(resultCpu, resultGpu); } int main(int argc, char** argv) {