diff --git a/paddle/gserver/layers/ConvexCombinationLayer.cpp b/paddle/gserver/layers/ConvexCombinationLayer.cpp index 3f4d77a2fe069f239db8cd099dd0d472d6ce3ccc..ed57f2af3c6455fb89fd05b37bb205e8da0bf7e1 100644 --- a/paddle/gserver/layers/ConvexCombinationLayer.cpp +++ b/paddle/gserver/layers/ConvexCombinationLayer.cpp @@ -113,7 +113,7 @@ void ConvexCombinationLayer::forward(PassType passType) { tmpRow0->setData(inV0->getData() + i * weightDim); tmpRow1->setData(outV->getData() + i * dataDim); - tmpRow1->mul(tmpRow0, tmpMtx0, 1, 0); + tmpRow1->mul(*tmpRow0, *tmpMtx0, 1, 0); } } @@ -136,7 +136,7 @@ void ConvexCombinationLayer::backward(const UpdateCallback& callback) { tmpRow1->setData(outG->getData() + i * dataDim); tmpMtx0->setData(inV1->getData() + i * weightDim * dataDim); - tmpRow0->mul(tmpRow1, tmpMtx0->getTranspose(), 1, 1); + tmpRow0->mul(*tmpRow1, *(tmpMtx0->getTranspose()), 1, 1); } } @@ -146,7 +146,7 @@ void ConvexCombinationLayer::backward(const UpdateCallback& callback) { tmpRow1->setData(outG->getData() + i * dataDim); tmpMtx0->setData(inG1->getData() + i * weightDim * dataDim); - tmpMtx0->mul(tmpRow0->getTranspose(), tmpRow1, 1, 1); + tmpMtx0->mul(*(tmpRow0->getTranspose()), *tmpRow1, 1, 1); } } } diff --git a/paddle/gserver/layers/ExpandConvBaseLayer.cpp b/paddle/gserver/layers/ExpandConvBaseLayer.cpp index 25948747fe93e65b77d8eef5ac4748c545f79e90..9ddccc202705c024076db795a9aeda0c823e9399 100644 --- a/paddle/gserver/layers/ExpandConvBaseLayer.cpp +++ b/paddle/gserver/layers/ExpandConvBaseLayer.cpp @@ -150,7 +150,7 @@ void ExpandConvBaseLayer::expandFwdOnce(MatrixPtr image, Matrix::create(wgtData, subM, subK, false, useGpu_); // mark transpose MatrixPtr B = Matrix::create(expInData, subK, subN, false, useGpu_); MatrixPtr C = Matrix::create(outData, subM, subN, false, useGpu_); - C->mul(A, B, 1, 1); + C->mul(*A, *B, 1, 1); A->clear(); B->clear(); @@ -185,7 +185,7 @@ void ExpandConvBaseLayer::bpropActs(MatrixPtr out, MatrixPtr C = Matrix::create(expandInData, subK, subN, false, useGpu_); MatrixPtr B = Matrix::create(localGradData, subM, subN, false, useGpu_); MatrixPtr A = Matrix::create(wgtData, subM, subK, true, useGpu_); - C->mul(A, B); // mul + C->mul(*A, *B); // mul // clear the temporary matrix A->clear(); @@ -252,7 +252,7 @@ void ExpandConvBaseLayer::bpropWeights(MatrixPtr image, MatrixPtr A = Matrix::create(expandInData, subK, subN, true, useGpu_); MatrixPtr B = Matrix::create(gradData, subM, subN, false, useGpu_); MatrixPtr C = Matrix::create(wGradData, subM, subK, false, useGpu_); - C->mul(B, A, 1, 1); + C->mul(*B, *A, 1, 1); A->clear(); B->clear(); diff --git a/paddle/gserver/layers/FullMatrixProjection.cpp b/paddle/gserver/layers/FullMatrixProjection.cpp index 9e72a33a3c6f443497192ff5d39b4d4ad4a02ec0..b8b6f403d6a02833305e2e10e6c3f6a178d4f0fd 100644 --- a/paddle/gserver/layers/FullMatrixProjection.cpp +++ b/paddle/gserver/layers/FullMatrixProjection.cpp @@ -28,7 +28,7 @@ FullMatrixProjection::FullMatrixProjection(const ProjectionConfig& config, void FullMatrixProjection::forward() { REGISTER_TIMER_INFO("FwMulTimer", getName().c_str()); - out_->value->mul(in_->value, weight_->getW(), 1, 1); + out_->value->mul(*(in_->value), *(weight_->getW()), 1, 1); } void FullMatrixProjection::backward(const UpdateCallback& callback) { @@ -37,7 +37,8 @@ void FullMatrixProjection::backward(const UpdateCallback& callback) { /* Calculate the W-gradient for the current layer */ if (weight_->getWGrad()) { REGISTER_TIMER_INFO("GradMulTimer", getName().c_str()); - weight_->getWGrad()->mul(in_->value->getTranspose(), out_->grad, 1, 1); + weight_->getWGrad()->mul( + *(in_->value->getTranspose()), *(out_->grad), 1, 1); } // If callback does not change value, backward propagation error @@ -47,7 +48,7 @@ void FullMatrixProjection::backward(const UpdateCallback& callback) { /* Calculate the input layers error */ if (in_->grad) { REGISTER_TIMER_INFO("BpMulTimer", getName().c_str()); - in_->grad->mul(out_->grad, weight_->getW()->getTranspose(), 1, 1); + in_->grad->mul(*(out_->grad), *(weight_->getW()->getTranspose()), 1, 1); } hl_set_sync_flag(syncFlag); diff --git a/paddle/gserver/layers/FullyConnectedLayer.cpp b/paddle/gserver/layers/FullyConnectedLayer.cpp index 89afe33c36697f8d57885043ed68cdf26576e358..d8a667ff8dc026b21308f0a43827f2e4342e1913 100644 --- a/paddle/gserver/layers/FullyConnectedLayer.cpp +++ b/paddle/gserver/layers/FullyConnectedLayer.cpp @@ -84,8 +84,8 @@ void FullyConnectedLayer::forward(PassType passType) { auto input = getInput(i); CHECK(input.value) << "The input of 'fc' layer must be matrix"; REGISTER_TIMER_INFO("FwMulTimer", getName().c_str()); - i == 0 ? outV->mul(input.value, weights_[i]->getW(), 1, 0) - : outV->mul(input.value, weights_[i]->getW(), 1, 1); + i == 0 ? outV->mul(*input.value, *weights_[i]->getW(), 1, 0) + : outV->mul(*input.value, *weights_[i]->getW(), 1, 1); } /* add the bias-vector */ @@ -123,7 +123,7 @@ void FullyConnectedLayer::backward(const UpdateCallback& callback) { MatrixPtr oGrad = getOutputGrad(); { REGISTER_TIMER_INFO("GradMulTimer", getName().c_str()); - weights_[i]->getWGrad()->mul(input_T, oGrad, 1, 1); + weights_[i]->getWGrad()->mul(*input_T, *oGrad, 1, 1); } } @@ -136,7 +136,7 @@ void FullyConnectedLayer::backward(const UpdateCallback& callback) { if (NULL != preGrad) { MatrixPtr weights_T = weights_[i]->getW()->getTranspose(); REGISTER_TIMER_INFO("BpMulTimer", getName().c_str()); - preGrad->mul(getOutputGrad(), weights_T, 1, 1); + preGrad->mul(*getOutputGrad(), *weights_T, 1, 1); } hl_set_sync_flag(syncFlag); diff --git a/paddle/gserver/layers/LinearChainCRF.cpp b/paddle/gserver/layers/LinearChainCRF.cpp index af550c7a0154802a93bacccab500695bdad36542..b7f748f3bb8a419429956724131e81dfdbd274c6 100644 --- a/paddle/gserver/layers/LinearChainCRF.cpp +++ b/paddle/gserver/layers/LinearChainCRF.cpp @@ -59,7 +59,7 @@ real LinearChainCRF::forward(real* x, int* s, int length) { matX->rowMax(*maxX_); expX_->assign(*matX); // subtract max to avoid overflow or underflow - expX_->mul(maxX_, ones_, (real)-1, (real)1); + expX_->mul(*maxX_, *ones_, (real)-1, (real)1); expX_->exp2(); real* a = a_->getData(); diff --git a/paddle/gserver/layers/LstmLayer.cpp b/paddle/gserver/layers/LstmLayer.cpp index 2543d1b49a801943819e05bc52e53eaeafae1edf..01cc5fec8b9704e48dbe9b2e67840a344ac383ca 100644 --- a/paddle/gserver/layers/LstmLayer.cpp +++ b/paddle/gserver/layers/LstmLayer.cpp @@ -316,7 +316,7 @@ void LstmLayer::forwardSequence(int batchSize, } if (prevOutput_) { frameGate->setData(lstmValue.gateValue); - frameGate->mul(prevOutput_, weight_->getW(), 1, 1); + frameGate->mul(*prevOutput_, *weight_->getW(), 1, 1); } } AsyncGpuBlock asyncGpuBlock; @@ -338,7 +338,7 @@ void LstmLayer::forwardSequence(int batchSize, frameOutput->setData(lstmValue.outputValue); nextFrame(reversed_, getSize()); frameGate->setData(lstmValue.gateValue); - frameGate->mul(frameOutput, weight_->getW(), 1, 1); + frameGate->mul(*frameOutput, *weight_->getW(), 1, 1); } } if (n != numSequences - 1) { @@ -348,7 +348,7 @@ void LstmLayer::forwardSequence(int batchSize, if (!reversed_) { if (!prevState_) lstmValue.prevStateValue = nullptr; if (prevOutput_) { - frameGate->mul(frameOutput, weight_->getW(), 1, 1); + frameGate->mul(*frameOutput, *weight_->getW(), 1, 1); } } else { lstmValue.prevStateValue = nullptr; @@ -470,7 +470,7 @@ void LstmLayer::backwardSequence(int batchSize, frameGate->setData(lstmGrad.gateGrad); nextFrame(reversed_, getSize()); frameOutput->setData(lstmGrad.outputGrad); - frameOutput->mul(frameGate, weightT, 1, 1); + frameOutput->mul(*frameGate, *weightT, 1, 1); } else { nextFrame(reversed_, getSize()); } @@ -479,14 +479,14 @@ void LstmLayer::backwardSequence(int batchSize, if (weight_->getWGrad()) { if (!reversed_) { weight_->getWGrad()->mul( - output_.value->subMatrix(start, length - 1)->getTranspose(), - gate_.grad->subMatrix(start + 1, length - 1), + *output_.value->subMatrix(start, length - 1)->getTranspose(), + *gate_.grad->subMatrix(start + 1, length - 1), 1, 1); } else { weight_->getWGrad()->mul( - output_.value->subMatrix(start + 1, length - 1)->getTranspose(), - gate_.grad->subMatrix(start, length - 1), + *output_.value->subMatrix(start + 1, length - 1)->getTranspose(), + *gate_.grad->subMatrix(start, length - 1), 1, 1); } @@ -541,7 +541,7 @@ void LstmLayer::forwardBatch(int batchSize, if (n != 0) { MatrixPtr batch1 = batchValue_->getBatchValue(n - 1, batchSize); - gateValue->mul(batch1, weight_->getW(), 1, 1); + gateValue->mul(*batch1, *weight_->getW(), 1, 1); } else if (prevOutput_) { Matrix::resizeOrCreate(prevBatchOutput2_, gateValue->getHeight(), @@ -549,7 +549,7 @@ void LstmLayer::forwardBatch(int batchSize, false, useGpu_); batchValue_->prevOutput2Batch(*prevOutput_, *prevBatchOutput2_); - gateValue->mul(prevBatchOutput2_, weight_->getW(), 1, 1); + gateValue->mul(*prevBatchOutput2_, *weight_->getW(), 1, 1); batchValue_->prevOutput2Batch(*prevState_, *totalState_->subMatrix(0, numSequences)); @@ -672,16 +672,16 @@ void LstmLayer::backwardBatch(int batchSize, if (n != 0) { MatrixPtr tmp = batchGrad_->getBatchValue(n - 1, batchSize); - tmp->mul(gateGrad, weightT, 1, 1); + tmp->mul(*gateGrad, *weightT, 1, 1); } if (n != 0 && weight_->getWGrad()) { /* backward weight */ MatrixPtr outputValue = batchValue_->getBatchValue(n - 1, batchSize); - weight_->getWGrad()->mul(outputValue->getTranspose(), gateGrad, 1, 1); + weight_->getWGrad()->mul(*outputValue->getTranspose(), *gateGrad, 1, 1); } else if (prevOutput_ && weight_->getWGrad()) { weight_->getWGrad()->mul( - prevBatchOutput2_->getTranspose(), gateGrad, 1, 1); + *prevBatchOutput2_->getTranspose(), *gateGrad, 1, 1); } } } diff --git a/paddle/gserver/layers/MDLstmLayer.cpp b/paddle/gserver/layers/MDLstmLayer.cpp index 1243c12889542103f65b427da8f549e852773c5c..fb41af563195496a57eafcc52b49eadac697fa0a 100644 --- a/paddle/gserver/layers/MDLstmLayer.cpp +++ b/paddle/gserver/layers/MDLstmLayer.cpp @@ -547,7 +547,7 @@ void MDLstmLayer::forwardOneSequence(int start, CoordIterator& coordIter) { if (coordIter.getPrePos(delays_, i, prePos)) { int preOffset = coordIter.offset(prePos); frameGate_[start + offset].value->mul( - frameOutput_[start + preOffset].value, weight_->getW(), 1.0, 1.0); + *frameOutput_[start + preOffset].value, *weight_->getW(), 1.0, 1.0); } } forwardGate2OutputSequence(start, coordIter); @@ -747,11 +747,11 @@ void MDLstmLayer::backwardOneSequence(int start, CoordIterator& coordIter) { if (coordIter.getPrePos(delays_, i, prePos)) { int preOffset = coordIter.offset(prePos); frameOutput_[start + preOffset].grad->mul( - frameGate_[start + offset].grad, weightT, 1.0, 1.0); + *frameGate_[start + offset].grad, *weightT, 1.0, 1.0); if (weight_->getWGrad()) { weight_->getWGrad()->mul( - frameOutput_[start + preOffset].value->getTranspose(), - frameGate_[start + offset].grad, + *frameOutput_[start + preOffset].value->getTranspose(), + *frameGate_[start + offset].grad, 1.0, 1.0); } diff --git a/paddle/gserver/layers/OuterProdLayer.cpp b/paddle/gserver/layers/OuterProdLayer.cpp index cf9a008318e9d8dd50d1f401576082c07680f6c4..b606e4436567eb2a8df9fd501a2af8c8aa1d2fdf 100644 --- a/paddle/gserver/layers/OuterProdLayer.cpp +++ b/paddle/gserver/layers/OuterProdLayer.cpp @@ -96,7 +96,7 @@ void OuterProdLayer::forward(PassType passType) { tmpRow0->setData(inV0->getData() + i * dim0); tmpRow1->setData(inV1->getData() + i * dim1); - tmpMtx0->mul(tmpRow0->getTranspose(), tmpRow1); + tmpMtx0->mul(*tmpRow0->getTranspose(), *tmpRow1); } } } @@ -121,7 +121,7 @@ void OuterProdLayer::backward(const UpdateCallback& callback) { tmpRow0->setData(inG0->getData() + i * dim0); tmpRow1->setData(inV1->getData() + i * dim1); - tmpRow0->mul(tmpRow1, tmpMtx0->getTranspose(), 1, 1); + tmpRow0->mul(*tmpRow1, *tmpMtx0->getTranspose(), 1, 1); } } @@ -131,7 +131,7 @@ void OuterProdLayer::backward(const UpdateCallback& callback) { tmpRow0->setData(inV0->getData() + i * dim0); tmpRow1->setData(inG1->getData() + i * dim1); - tmpRow1->mul(tmpRow0, tmpMtx0, 1, 1); + tmpRow1->mul(*tmpRow0, *tmpMtx0, 1, 1); } } } diff --git a/paddle/gserver/layers/RecurrentLayer.cpp b/paddle/gserver/layers/RecurrentLayer.cpp index 85812c9d660e07e915012a7337e621c10a6597ca..94b16996a86d2c52c8b97cbe009076fa3ade03f7 100644 --- a/paddle/gserver/layers/RecurrentLayer.cpp +++ b/paddle/gserver/layers/RecurrentLayer.cpp @@ -215,12 +215,12 @@ void RecurrentLayer::forwardSequence(int batchSize, void RecurrentLayer::forwardOneSequence(int start, int length) { if (!reversed_) { if (prevOutput_) { - frameOutput_[start].value->mul(prevOutput_, weight_->getW(), 1, 1); + frameOutput_[start].value->mul(*prevOutput_, *weight_->getW(), 1, 1); } activation_->forward(frameOutput_[start]); for (int i = 1; i < length; ++i) { frameOutput_[start + i].value->mul( - frameOutput_[start + i - 1].value, weight_->getW(), 1, 1); + *frameOutput_[start + i - 1].value, *weight_->getW(), 1, 1); activation_->forward(frameOutput_[start + i]); } if (prevOutput_) { @@ -230,7 +230,7 @@ void RecurrentLayer::forwardOneSequence(int start, int length) { activation_->forward(frameOutput_[start + length - 1]); for (int i = length - 2; i >= 0; --i) { frameOutput_[start + i].value->mul( - frameOutput_[start + i + 1].value, weight_->getW(), 1, 1); + *frameOutput_[start + i + 1].value, *weight_->getW(), 1, 1); activation_->forward(frameOutput_[start + i]); } } @@ -282,13 +282,13 @@ void RecurrentLayer::backwardOneSequence(int start, int length) { for (int i = length - 1; i > 0; --i) { activation_->backward(frameOutput_[start + i]); frameOutput_[start + i - 1].grad->mul( - frameOutput_[start + i].grad, weightT, 1, 1); + *frameOutput_[start + i].grad, *weightT, 1, 1); } activation_->backward(frameOutput_[start]); if (weight_->getWGrad()) { weight_->getWGrad()->mul( - output_.value->subMatrix(start, length - 1)->getTranspose(), - output_.grad->subMatrix(start + 1, length - 1), + *output_.value->subMatrix(start, length - 1)->getTranspose(), + *output_.grad->subMatrix(start + 1, length - 1), 1, 1); } @@ -296,13 +296,13 @@ void RecurrentLayer::backwardOneSequence(int start, int length) { for (int i = 0; i < length - 1; ++i) { activation_->backward(frameOutput_[start + i]); frameOutput_[start + i + 1].grad->mul( - frameOutput_[start + i].grad, weightT, 1, 1); + *frameOutput_[start + i].grad, *weightT, 1, 1); } activation_->backward(frameOutput_[start + length - 1]); if (weight_->getWGrad()) { weight_->getWGrad()->mul( - output_.value->subMatrix(start + 1, length - 1)->getTranspose(), - output_.grad->subMatrix(start, length - 1), + *output_.value->subMatrix(start + 1, length - 1)->getTranspose(), + *output_.grad->subMatrix(start, length - 1), 1, 1); } @@ -329,7 +329,7 @@ void RecurrentLayer::forwardBatch(int batchSize, if (n != 0) { MatrixPtr batch1 = batchValue_->getBatchValue(n - 1, batch2->getHeight()); - batch2->mul(batch1, weight_->getW(), 1, 1); + batch2->mul(*batch1, *weight_->getW(), 1, 1); } Argument arg; arg.value = batch2; @@ -367,14 +367,14 @@ void RecurrentLayer::backwardBatch(int batchSize, if (n != 0) { batch1 = batchGrad_->getBatchValue(n - 1, batch2->getHeight()); - batch1->mul(batch2, weightT, 1, 1); + batch1->mul(*batch2, *weightT, 1, 1); } if (backwardByBatch && weight_->getWGrad()) { if (n != 0) { /* backward weight */ batch1 = batchValue_->getBatchValue(n - 1, batch2->getHeight()); - weight_->getWGrad()->mul(batch1->getTranspose(), batch2, 1, 1); + weight_->getWGrad()->mul(*batch1->getTranspose(), *batch2, 1, 1); } } } @@ -389,14 +389,14 @@ void RecurrentLayer::backwardBatch(int batchSize, int len = starts[seq + 1] - starts[seq]; if (!reversed_) { weight_->getWGrad()->mul( - output_.value->subMatrix(starts[seq], len - 1)->getTranspose(), - output_.grad->subMatrix(starts[seq] + 1, len - 1), + *output_.value->subMatrix(starts[seq], len - 1)->getTranspose(), + *output_.grad->subMatrix(starts[seq] + 1, len - 1), 1, 1); } else { weight_->getWGrad()->mul( - output_.value->subMatrix(starts[seq] + 1, len - 1)->getTranspose(), - output_.grad->subMatrix(starts[seq], len - 1), + *output_.value->subMatrix(starts[seq] + 1, len - 1)->getTranspose(), + *output_.grad->subMatrix(starts[seq], len - 1), 1, 1); } diff --git a/paddle/gserver/layers/SelectiveFullyConnectedLayer.cpp b/paddle/gserver/layers/SelectiveFullyConnectedLayer.cpp index 9200a01eee3be8ab61b6181ec337b2c3c70c5966..5eacff6b7143996130bea64766ef42c66f4c7310 100644 --- a/paddle/gserver/layers/SelectiveFullyConnectedLayer.cpp +++ b/paddle/gserver/layers/SelectiveFullyConnectedLayer.cpp @@ -155,20 +155,20 @@ void SelectiveFullyConnectedLayer::forward(PassType passType) { // manully compute the multiplication of // the input vector and the selected rows. REGISTER_TIMER("selective.plain"); - interOutput_->mul(input, weight->getTranspose(), 1, scaleT); + interOutput_->mul(*input, *weight->getTranspose(), 1, scaleT); } else { // if the indecies is not sparse enough, // use full mul instead REGISTER_TIMER("selective.mul"); if (fullOutput_) { - interOutput_->mul(input, weight->getTranspose(), 1, scaleT); + interOutput_->mul(*input, *weight->getTranspose(), 1, scaleT); } else { Matrix::resizeOrCreate(mmat_, hsize, wsize, /*trans=*/false, /*useGpu=*/useGpu_); - mmat_->mul(input, weight->getTranspose()); + mmat_->mul(*input, *weight->getTranspose()); interOutput_->add3(mmat_); } } @@ -242,14 +242,14 @@ void SelectiveFullyConnectedLayer::backward(const UpdateCallback& callback) { MatrixPtr preGrad = getInputGrad(i); if (preGrad) { REGISTER_TIMER_INFO("BpMulTimer", getName().c_str()); - preGrad->mul(interOutGrad_, weights_[i]->getW(), 1, 1); + preGrad->mul(*interOutGrad_, *weights_[i]->getW(), 1, 1); } MatrixPtr wGrad = weights_[i]->getWGrad(); if (wGrad) { REGISTER_TIMER_INFO("GradMulTimer", getName().c_str()); MatrixPtr input = getInputValue(i); - wGrad->mul(interOutGrad_->getTranspose(), input, 1, 1); + wGrad->mul(*interOutGrad_->getTranspose(), *input, 1, 1); } { diff --git a/paddle/gserver/layers/TensorLayer.cpp b/paddle/gserver/layers/TensorLayer.cpp index 642eb1bdd31c0c16f251dd7afda1ae0a61c0872e..5be88d7c05dae5c43805bc62ce95dc2efcac3df6 100644 --- a/paddle/gserver/layers/TensorLayer.cpp +++ b/paddle/gserver/layers/TensorLayer.cpp @@ -77,7 +77,7 @@ void TensorLayer::forward(PassType passType) { REGISTER_TIMER_INFO("TensorFwMulTimer", getName().c_str()); for (size_t i = 0; i < getSize(); ++i) { MatrixPtr weights = weights_[i]->getW(); - tmpMat->mul(input1, weights, 1, 0); + tmpMat->mul(*input1, *weights, 1, 0); outV->rowDotMul(i, *tmpMat, *input2); } } @@ -112,7 +112,7 @@ void TensorLayer::backward(const UpdateCallback& callback) { if (weights_[i]->getWGrad()) { tmpMat->rowScale(i, *input1, *oGrad); MatrixPtr input1_T = tmpMat->getTranspose(); - weights_[i]->getWGrad()->mul(input1_T, input2, 1, 1); + weights_[i]->getWGrad()->mul(*input1_T, *input2, 1, 1); } } } @@ -130,11 +130,11 @@ void TensorLayer::backward(const UpdateCallback& callback) { if (NULL != preGrad1) { /* (grad * e2) * trans(W) */ tmpMat->rowScale(i, *input2, *oGrad); MatrixPtr weights_T = weights->getTranspose(); - preGrad1->mul(tmpMat, weights_T, 1, 1); + preGrad1->mul(*tmpMat, *weights_T, 1, 1); } if (NULL != preGrad2) { /* (grad * e1) * W */ tmpMat->rowScale(i, *input1, *oGrad); - preGrad2->mul(tmpMat, weights, 1, 1); + preGrad2->mul(*tmpMat, *weights, 1, 1); } } } diff --git a/paddle/gserver/layers/TransposedFullMatrixProjection.cpp b/paddle/gserver/layers/TransposedFullMatrixProjection.cpp index 3f7ff0488207564e3ebbd5a467f42b46af3b31ff..2a12499e5b5f10cda1a4d5b09946613eafc10ff7 100644 --- a/paddle/gserver/layers/TransposedFullMatrixProjection.cpp +++ b/paddle/gserver/layers/TransposedFullMatrixProjection.cpp @@ -46,7 +46,7 @@ TransposedFullMatrixProjection::TransposedFullMatrixProjection( void TransposedFullMatrixProjection::forward() { REGISTER_TIMER_INFO("FwMulTimer", getName().c_str()); - out_->value->mul(in_->value, weight_->getW()->getTranspose(), 1, 1); + out_->value->mul(*(in_->value), *(weight_->getW()->getTranspose()), 1, 1); } void TransposedFullMatrixProjection::backward(const UpdateCallback& callback) { @@ -55,7 +55,8 @@ void TransposedFullMatrixProjection::backward(const UpdateCallback& callback) { /* Calculate the W-gradient for the current layer */ if (weight_->getWGrad()) { REGISTER_TIMER_INFO("GradMulTimer", getName().c_str()); - weight_->getWGrad()->mul(out_->grad->getTranspose(), in_->value, 1, 1); + weight_->getWGrad()->mul( + *(out_->grad->getTranspose()), *(in_->value), 1, 1); } // If callback does not change value, backprop error asynchronously so that @@ -69,7 +70,7 @@ void TransposedFullMatrixProjection::backward(const UpdateCallback& callback) { /* Calculate the input layers error */ if (in_->grad) { REGISTER_TIMER_INFO("BpMulTimer", getName().c_str()); - in_->grad->mul(out_->grad, weight_->getW(), 1, 1); + in_->grad->mul(*(out_->grad), *(weight_->getW()), 1, 1); } hl_set_sync_flag(syncFlag); diff --git a/paddle/math/CpuSparseMatrix.cpp b/paddle/math/CpuSparseMatrix.cpp index b5d5b6ef615829fc1e24ccd417e2f0b3312f072d..82a482f701481267e564c7ad8179689deb65a75b 100644 --- a/paddle/math/CpuSparseMatrix.cpp +++ b/paddle/math/CpuSparseMatrix.cpp @@ -163,15 +163,16 @@ MatrixPtr CpuSparseMatrix::getTranspose() { SparseValueType CpuSparseMatrix::getValueType() { return valueType_; } -void CpuSparseMatrix::mul(MatrixPtr a, MatrixPtr b, real scaleAB, real scaleT) { +void CpuSparseMatrix::mul(const Matrix& a, + const Matrix& b, + real scaleAB, + real scaleT) { CHECK(!isTransposed()) << "Not supported"; + const auto a_ptr = dynamic_cast(&a); + const auto b_ptr = dynamic_cast(&b); - if (dynamic_cast(a.get()) && dynamic_cast(b.get())) { - CpuMatrix::mul(dynamic_cast(a.get()), - dynamic_cast(b.get()), - this, - scaleAB, - scaleT); + if (a_ptr && b_ptr) { + CpuMatrix::mul((CpuMatrix*)a_ptr, (CpuMatrix*)b_ptr, this, scaleAB, scaleT); } else { LOG(FATAL) << "not supported"; } diff --git a/paddle/math/CpuSparseMatrix.h b/paddle/math/CpuSparseMatrix.h index 9676f8864f845e8ab75467c8ca6b6e7e68945d96..d3e8871cb5b320ce420d601bde7f18d85398dde7 100644 --- a/paddle/math/CpuSparseMatrix.h +++ b/paddle/math/CpuSparseMatrix.h @@ -203,7 +203,7 @@ public: /// mem MUST be alloced outside (memAlloc=false) void transpose(MatrixPtr matTrans, bool memAlloc); - void mul(MatrixPtr A, MatrixPtr B, real alpha, real beta); + void mul(const Matrix& A, const Matrix& B, real alpha, real beta); /** * @brief sparseMatrix += denseMatrix diff --git a/paddle/math/Matrix.cpp b/paddle/math/Matrix.cpp index 3b3c1d7d48a286b0715b883e4a6f796e86a42665..0193f2f9973032c1971d6583d04d1d75ee76d933 100644 --- a/paddle/math/Matrix.cpp +++ b/paddle/math/Matrix.cpp @@ -582,18 +582,16 @@ void GpuMatrix::mul(const GpuMatrix& a, } /* this = a*b */ -void GpuMatrix::mul(const MatrixPtr a, const MatrixPtr b) { - mul(a, b, 1.0, 0.0); -} +void GpuMatrix::mul(const Matrix& a, const Matrix& b) { mul(a, b, 1.0, 0.0); } -void GpuMatrix::mul(const MatrixPtr a, - const MatrixPtr b, +void GpuMatrix::mul(const Matrix& a, + const Matrix& b, real scaleAB, real scaleT) { - GpuMatrixPtr a_ptr = std::dynamic_pointer_cast(a); - GpuMatrixPtr b_ptr = std::dynamic_pointer_cast(b); - GpuSparseMatrixPtr a_ptr_s = std::dynamic_pointer_cast(a); - GpuSparseMatrixPtr b_ptr_s = std::dynamic_pointer_cast(b); + const auto a_ptr = dynamic_cast(&a); + const auto b_ptr = dynamic_cast(&b); + const auto a_ptr_s = dynamic_cast(&a); + const auto b_ptr_s = dynamic_cast(&b); if (a_ptr && b_ptr) { mul(*a_ptr, *b_ptr, scaleAB, scaleT); @@ -2598,29 +2596,22 @@ void CpuMatrix::sequenceAvgForward(Matrix& a, } /* this = scaleAB*(a*b) + scaleT*this*/ -void CpuMatrix::mul(const MatrixPtr a, - const MatrixPtr b, +void CpuMatrix::mul(const Matrix& a, + const Matrix& b, real scaleAB, real scaleT) { CHECK(!isTransposed()) << "Not supported"; + const auto a_ptr = dynamic_cast(&a); + const auto b_ptr = dynamic_cast(&b); + const auto a_ptr_s = dynamic_cast(&a); + const auto b_ptr_s = dynamic_cast(&b); - if (dynamic_cast(a.get()) && dynamic_cast(b.get())) { - mul(dynamic_cast(a.get()), - dynamic_cast(b.get()), - scaleAB, - scaleT); - } else if (dynamic_cast(a.get()) && - dynamic_cast(b.get())) { - mul(dynamic_cast(a.get()), - dynamic_cast(b.get()), - scaleAB, - scaleT); - } else if (dynamic_cast(a.get()) && - dynamic_cast(b.get())) { - mul(dynamic_cast(a.get()), - dynamic_cast(b.get()), - scaleAB, - scaleT); + if (a_ptr && b_ptr) { + mul((CpuMatrix*)a_ptr, (CpuMatrix*)b_ptr, scaleAB, scaleT); + } else if (a_ptr_s && b_ptr) { + mul((CpuSparseMatrix*)a_ptr_s, (CpuMatrix*)b_ptr, scaleAB, scaleT); + } else if (a_ptr && b_ptr_s) { + mul((CpuMatrix*)a_ptr, (CpuSparseMatrix*)b_ptr_s, scaleAB, scaleT); } else { LOG(FATAL) << "Not supported"; } @@ -3289,7 +3280,7 @@ void CpuMatrix::addColumnVector(const Matrix& b) { } /* this = a*b */ -void CpuMatrix::mul(const MatrixPtr a, const MatrixPtr b) { +void CpuMatrix::mul(const Matrix& a, const Matrix& b) { return mul(a, b, 1.0, 0.0); } diff --git a/paddle/math/Matrix.h b/paddle/math/Matrix.h index b8c7adf9486be7cea7001e1c10cfa0a5df0ef610..dfcb0853df37c69151994bff5d025855ab12a807 100644 --- a/paddle/math/Matrix.h +++ b/paddle/math/Matrix.h @@ -444,8 +444,8 @@ public: * this = scaleAB*(a*b) + scaleT*this * @endcode */ - virtual void mul(const MatrixPtr a, - const MatrixPtr b, + virtual void mul(const Matrix& a, + const Matrix& b, real scaleAB, real scaleT) { LOG(FATAL) << "Not implemented"; @@ -643,7 +643,7 @@ public: * this = a*b * @endcode */ - virtual void mul(const MatrixPtr a, const MatrixPtr b) { + virtual void mul(const Matrix& a, const Matrix& b) { LOG(FATAL) << "Not implemented"; } @@ -1272,14 +1272,14 @@ public: * this = scaleAB*(a*b) + scaleT*this * @endcode */ - void mul(const MatrixPtr a, const MatrixPtr b, real scaleAB, real scaleT); + void mul(const Matrix& a, const Matrix& b, real scaleAB, real scaleT); /** * @code * this = a*b * @endcode */ - void mul(const MatrixPtr a, const MatrixPtr b); + void mul(const Matrix& a, const Matrix& b); void mul(const GpuMatrix& a, const GpuMatrix& b, real scaleAB, real scaleT); @@ -1784,7 +1784,7 @@ public: void addColumnVector(const Matrix& b); - void mul(const MatrixPtr a, const MatrixPtr b, real scaleAB, real scaleT); + void mul(const Matrix& a, const Matrix& b, real scaleAB, real scaleT); void mul(CpuMatrix* a, CpuMatrix* b, real scaleAB, real scaleT); void mul(CpuMatrix* a, CpuSparseMatrix* b, real scaleAB, real scaleT); @@ -1807,7 +1807,7 @@ public: virtual void mul(CpuSparseMatrix* a, CpuMatrix* b, real scaleAB, real scaleT); - void mul(const MatrixPtr a, const MatrixPtr b); + void mul(const Matrix& a, const Matrix& b); void rightMul(Matrix& b, real scaleAB, real scaleT); void rightMul(Matrix& b); diff --git a/paddle/math/SparseMatrix.cpp b/paddle/math/SparseMatrix.cpp index 9154503c2132a740aaa42f90eb7061156403ac00..720a035ecbd26df01fe24c991982bbf7965ccbdc 100644 --- a/paddle/math/SparseMatrix.cpp +++ b/paddle/math/SparseMatrix.cpp @@ -571,49 +571,48 @@ void GpuSparseMatrix::transpose(MatrixPtr matTrans, bool memAlloc) { hl_stream_synchronize(stream); } -void GpuSparseMatrix::mul(const GpuMatrixPtr a, - const GpuMatrixPtr b, +void GpuSparseMatrix::mul(const GpuMatrix& a, + const GpuMatrix& b, real scaleAB, real scaleT) { - CHECK(a->useGpu_ && b->useGpu_) << "type not match"; + CHECK(a.useGpu_ && b.useGpu_) << "type not match"; CHECK(!trans_) << "trans not supported"; - real* A_d = a->getData(); - real* B_d = b->getData(); + real* A_d = (real*)a.getData(); + real* B_d = (real*)b.getData(); hl_sparse_matrix_s C_d = sMatrix_.get(); - hl_trans_op_t a_trans = a->trans_ ? HPPL_OP_T : HPPL_OP_N; - hl_trans_op_t b_trans = b->trans_ ? HPPL_OP_T : HPPL_OP_N; - - if (!a->trans_ && !b->trans_) { - CHECK(height_ == a->getHeight()); - CHECK(width_ == b->getWidth()); - CHECK(a->getWidth() == b->getHeight()); - } else if (a->trans_ && !b->trans_) { - CHECK(height_ == a->getWidth()); - CHECK(width_ == b->getWidth()); - CHECK(a->getHeight() == b->getHeight()); - } else if (!a->trans_ && b->trans_) { - CHECK(height_ == a->getHeight()); - CHECK(width_ == b->getHeight()); - CHECK(a->getWidth() == b->getWidth()); + hl_trans_op_t a_trans = a.trans_ ? HPPL_OP_T : HPPL_OP_N; + hl_trans_op_t b_trans = b.trans_ ? HPPL_OP_T : HPPL_OP_N; + + if (!a.trans_ && !b.trans_) { + CHECK(height_ == a.getHeight()); + CHECK(width_ == b.getWidth()); + CHECK(a.getWidth() == b.getHeight()); + } else if (a.trans_ && !b.trans_) { + CHECK(height_ == a.getWidth()); + CHECK(width_ == b.getWidth()); + CHECK(a.getHeight() == b.getHeight()); + } else if (!a.trans_ && b.trans_) { + CHECK(height_ == a.getHeight()); + CHECK(width_ == b.getHeight()); + CHECK(a.getWidth() == b.getWidth()); } else { LOG(INFO) << "Not support"; } int dimM = height_; int dimN = width_; - int dimK = !b->trans_ ? b->getHeight() : b->getWidth(); + int dimK = !b.trans_ ? b.getHeight() : b.getWidth(); hl_sparse_matrix_mul( A_d, a_trans, B_d, b_trans, C_d, dimM, dimN, dimK, scaleAB, scaleT); } -void GpuSparseMatrix::mul(const MatrixPtr a, - const MatrixPtr b, +void GpuSparseMatrix::mul(const Matrix& a, + const Matrix& b, real scaleAB, real scaleT) { - if (std::dynamic_pointer_cast(a) && - std::dynamic_pointer_cast(b)) { - GpuMatrixPtr a_ptr = std::dynamic_pointer_cast(a); - GpuMatrixPtr b_ptr = std::dynamic_pointer_cast(b); - mul(a_ptr, b_ptr, scaleAB, scaleT); + const auto a_ptr = dynamic_cast(&a); + const auto b_ptr = dynamic_cast(&b); + if (a_ptr && b_ptr) { + mul(*a_ptr, *b_ptr, scaleAB, scaleT); } else { LOG(FATAL) << "not supported"; } diff --git a/paddle/math/SparseMatrix.h b/paddle/math/SparseMatrix.h index bd96a3301ded2fd89bd31b94f42b0cb4718cbcb7..1d3801548e03a6ae679afb15bf7f620172d61c57 100644 --- a/paddle/math/SparseMatrix.h +++ b/paddle/math/SparseMatrix.h @@ -104,10 +104,7 @@ public: size_t newNnz, SparseValueType valueType); - void mul(const GpuMatrixPtr a, - const GpuMatrixPtr b, - real scaleAB, - real scaleT); + void mul(const GpuMatrix& a, const GpuMatrix& b, real scaleAB, real scaleT); /// B = A , B.trans = !A.trans MatrixPtr getTranspose(); @@ -218,7 +215,7 @@ protected: void copyRow(int offsets, size_t colNum, const sparse_float_value_t* row); public: - void mul(const MatrixPtr a, const MatrixPtr b, real scaleAB, real scaleT); + void mul(const Matrix& a, const Matrix& b, real scaleAB, real scaleT); void copyFrom(CpuSparseMatrix& src, hl_stream_t stream); void copyFrom(GpuSparseMatrix& src, hl_stream_t stream); diff --git a/paddle/math/tests/test_SparseMatrix.cpp b/paddle/math/tests/test_SparseMatrix.cpp index 88b75b6d83612c56a598cf1b301bd38f888e1cce..0949ab7ffba423daedd47876bc055a21c5c3f016 100644 --- a/paddle/math/tests/test_SparseMatrix.cpp +++ b/paddle/math/tests/test_SparseMatrix.cpp @@ -33,8 +33,8 @@ TEST(Matrix, CopyCpuMatrixToSparseMatrix) { ret2(new CpuMatrix(HEIGHT, WIDTH_TEST)); ret1->zeroMem(); ret2->zeroMem(); - ret1->mul(testMatrix, mulCpuMatrix, 1.0, 1.0); - ret2->mul(testCpuMatrix, mulCpuMatrix, 1.0, 1.0); + ret1->mul(*testMatrix, *mulCpuMatrix, 1.0, 1.0); + ret2->mul(*testCpuMatrix, *mulCpuMatrix, 1.0, 1.0); checkMatrixEqual(ret1, ret2); } @@ -147,9 +147,9 @@ void test_sparse_matrix_mul(MatrixPara paraA, hl_stream_synchronize(stream); /*matrix mul*/ - cpuMatrixC->mul(cpuMatrixA, cpuMatrixB, 1.0, 1.0); - gpuMatrixC->mul(gpuMatrixA, gpuMatrixB, 1.0, 1.0); - cpuDenseC->mul(cpuDenseA, cpuDenseB, 1.0, 1.0); + cpuMatrixC->mul(*cpuMatrixA, *cpuMatrixB, 1.0, 1.0); + gpuMatrixC->mul(*gpuMatrixA, *gpuMatrixB, 1.0, 1.0); + cpuDenseC->mul(*cpuDenseA, *cpuDenseB, 1.0, 1.0); gpuMatrixC_d2h->copyFrom(*gpuMatrixC, stream); hl_stream_synchronize(stream); @@ -224,8 +224,8 @@ TEST(Matrix, CopySparseMatrixToGpuSparseMatrix) { MatrixPtr ret2(new GpuMatrix(HEIGHT, WIDTH_TEST)); ret1->zeroMem(); ret2->zeroMem(); - ret1->mul(testMatrix, mulCpuMatrix, 1.0, 1.0); - ret2->mul(testGpuMatrix, mulGpuMatrix, 1.0, 1.0); + ret1->mul(*testMatrix, *mulCpuMatrix, 1.0, 1.0); + ret2->mul(*testGpuMatrix, *mulGpuMatrix, 1.0, 1.0); checkMatrixEqual(ret1, ret2); } diff --git a/paddle/math/tests/test_matrixCompare.cpp b/paddle/math/tests/test_matrixCompare.cpp index 10289940a4c5f246e34808669833117826355e64..c6fc849ba0328dae62c9da0bd721d86fd8b6881e 100644 --- a/paddle/math/tests/test_matrixCompare.cpp +++ b/paddle/math/tests/test_matrixCompare.cpp @@ -318,7 +318,7 @@ void testMatrixInverse(int height) { cpu->randomizeUniform(); MatrixPtr cpuT = cpu->getTranspose(); MatrixPtr outputCheck = std::make_shared(height, height); - outputCheck->mul(cpu, cpuT); + outputCheck->mul(*cpu, *cpuT); cpu->setDiag(1.0); cpu->add(*outputCheck); @@ -328,7 +328,7 @@ void testMatrixInverse(int height) { TensorCheckErr(*cpuI, *gpuI); - outputCheck->mul(cpu, cpuI); + outputCheck->mul(*cpu, *cpuI); cpu->setDiag(1.0); TensorCheckErr(*cpu, *outputCheck); } @@ -509,8 +509,8 @@ void testMatrixMul(bool transa, bool transb, int dimM, int dimN, int dimK) { gpuB->copyFrom(*cpuB); gpuC->copyFrom(*cpuC); - cpuC->mul(cpuA, cpuB, alpha, beta); - gpuC->mul(gpuA, gpuB, alpha, beta); + cpuC->mul(*cpuA, *cpuB, alpha, beta); + gpuC->mul(*gpuA, *gpuB, alpha, beta); TensorCheckErr(*cpuC, *gpuC); } @@ -581,8 +581,8 @@ void testSubMatrixMul(bool transa, bool transb, int dimM, int dimN, int dimK) { MatrixPtr subCpuC = cpuC->subMatrix(startM, endM, startN, endN); MatrixPtr subGpuC = gpuC->subMatrix(startM, endM, startN, endN); - subCpuC->mul(subCpuA, subCpuB, alpha, beta); - subGpuC->mul(subGpuA, subGpuB, alpha, beta); + subCpuC->mul(*subCpuA, *subCpuB, alpha, beta); + subGpuC->mul(*subGpuA, *subGpuB, alpha, beta); TensorCheckErr(*cpuC, *gpuC); } diff --git a/paddle/math/tests/test_sparseMatrixCompare.cpp b/paddle/math/tests/test_sparseMatrixCompare.cpp index 6f6de238bacaade85d728b7d773145326229015a..dcdbccffc3a19faa177c9867fe7ab142612f5209 100644 --- a/paddle/math/tests/test_sparseMatrixCompare.cpp +++ b/paddle/math/tests/test_sparseMatrixCompare.cpp @@ -102,8 +102,8 @@ void testSpMatrixMul(int M, int N, int K, real rate) { gpuC->copyFrom(*cpuC, stream); hl_stream_synchronize(stream); - cpuC->mul(cpuA, cpuB->getTranspose(), 1, 1); - gpuC->mul(gpuA, gpuB->getTranspose(), 1, 1); + cpuC->mul(*cpuA, *cpuB->getTranspose(), 1, 1); + gpuC->mul(*gpuA, *gpuB->getTranspose(), 1, 1); MatrixPtr outputCheck(new CpuSparseMatrix(M, N, nnz)); outputCheck->copyFrom(*gpuC, stream);