提交 90493691 编写于 作者: T tianbingsz 提交者: GitHub

Merge pull request #934 from tianbingsz/paddle_function_mat

Matrix API refactor
...@@ -78,7 +78,7 @@ public: ...@@ -78,7 +78,7 @@ public:
useGpu(arguments[0].deviceId)); useGpu(arguments[0].deviceId));
errorMat->zeroMem(); errorMat->zeroMem();
if (label != nullptr) { if (label != nullptr) {
errorMat->classificationError(output, label); errorMat->classificationError(*output, *label);
} else if (dynamic_cast<CpuSparseMatrix*>(multiBinaryLabel.get()) || } else if (dynamic_cast<CpuSparseMatrix*>(multiBinaryLabel.get()) ||
dynamic_cast<GpuSparseMatrix*>(multiBinaryLabel.get())) { dynamic_cast<GpuSparseMatrix*>(multiBinaryLabel.get())) {
errorMat->classificationErrorMulti( errorMat->classificationErrorMulti(
......
...@@ -90,8 +90,8 @@ void ContextProjection::forward() { ...@@ -90,8 +90,8 @@ void ContextProjection::forward() {
REGISTER_TIMER_INFO("ContextProjectionForward", getName().c_str()); REGISTER_TIMER_INFO("ContextProjectionForward", getName().c_str());
bool isPadding = config_.trainable_padding(); bool isPadding = config_.trainable_padding();
out_->value->contextProjectionForward( out_->value->contextProjectionForward(
in_->value, *(in_->value),
state_ ? state_ : isPadding ? weight_->getW() : nullptr, state_ ? state_.get() : isPadding ? weight_->getW().get() : nullptr,
*startPositions, *startPositions,
config_.context_length(), config_.context_length(),
config_.context_start(), config_.context_start(),
...@@ -128,8 +128,8 @@ void ContextProjection::backward(const UpdateCallback& callback) { ...@@ -128,8 +128,8 @@ void ContextProjection::backward(const UpdateCallback& callback) {
bool isPadding = config_.trainable_padding(); bool isPadding = config_.trainable_padding();
if (!out_->grad->useGpu()) { if (!out_->grad->useGpu()) {
out_->grad->contextProjectionBackward( out_->grad->contextProjectionBackward(
in_->grad, in_->grad.get(),
isPadding ? weight_->getWGrad() : nullptr, isPadding ? weight_->getWGrad().get() : nullptr,
*startPositions, *startPositions,
config_.context_length(), config_.context_length(),
config_.context_start(), config_.context_start(),
...@@ -137,7 +137,7 @@ void ContextProjection::backward(const UpdateCallback& callback) { ...@@ -137,7 +137,7 @@ void ContextProjection::backward(const UpdateCallback& callback) {
isPadding); isPadding);
} else { } else {
if (in_->grad) { if (in_->grad) {
out_->grad->contextProjectionBackwardData(in_->grad, out_->grad->contextProjectionBackwardData(*(in_->grad),
*startPositions, *startPositions,
config_.context_length(), config_.context_length(),
config_.context_start()); config_.context_start());
...@@ -145,7 +145,7 @@ void ContextProjection::backward(const UpdateCallback& callback) { ...@@ -145,7 +145,7 @@ void ContextProjection::backward(const UpdateCallback& callback) {
if (isPadding && weight_->getWGrad()) { if (isPadding && weight_->getWGrad()) {
out_->grad->contextProjectionBackwardWeight( out_->grad->contextProjectionBackwardWeight(
weight_->getWGrad(), *(weight_->getWGrad()),
*startPositions, *startPositions,
config_.context_length(), config_.context_length(),
config_.context_start(), config_.context_start(),
......
...@@ -113,7 +113,7 @@ void ConvexCombinationLayer::forward(PassType passType) { ...@@ -113,7 +113,7 @@ void ConvexCombinationLayer::forward(PassType passType) {
tmpRow0->setData(inV0->getData() + i * weightDim); tmpRow0->setData(inV0->getData() + i * weightDim);
tmpRow1->setData(outV->getData() + i * dataDim); tmpRow1->setData(outV->getData() + i * dataDim);
tmpRow1->mul(tmpRow0, tmpMtx0, 1, 0); tmpRow1->mul(*tmpRow0, *tmpMtx0, 1, 0);
} }
} }
...@@ -136,7 +136,7 @@ void ConvexCombinationLayer::backward(const UpdateCallback& callback) { ...@@ -136,7 +136,7 @@ void ConvexCombinationLayer::backward(const UpdateCallback& callback) {
tmpRow1->setData(outG->getData() + i * dataDim); tmpRow1->setData(outG->getData() + i * dataDim);
tmpMtx0->setData(inV1->getData() + i * weightDim * dataDim); tmpMtx0->setData(inV1->getData() + i * weightDim * dataDim);
tmpRow0->mul(tmpRow1, tmpMtx0->getTranspose(), 1, 1); tmpRow0->mul(*tmpRow1, *(tmpMtx0->getTranspose()), 1, 1);
} }
} }
...@@ -146,7 +146,7 @@ void ConvexCombinationLayer::backward(const UpdateCallback& callback) { ...@@ -146,7 +146,7 @@ void ConvexCombinationLayer::backward(const UpdateCallback& callback) {
tmpRow1->setData(outG->getData() + i * dataDim); tmpRow1->setData(outG->getData() + i * dataDim);
tmpMtx0->setData(inG1->getData() + i * weightDim * dataDim); tmpMtx0->setData(inG1->getData() + i * weightDim * dataDim);
tmpMtx0->mul(tmpRow0->getTranspose(), tmpRow1, 1, 1); tmpMtx0->mul(*(tmpRow0->getTranspose()), *tmpRow1, 1, 1);
} }
} }
} }
......
...@@ -150,7 +150,7 @@ void ExpandConvBaseLayer::expandFwdOnce(MatrixPtr image, ...@@ -150,7 +150,7 @@ void ExpandConvBaseLayer::expandFwdOnce(MatrixPtr image,
Matrix::create(wgtData, subM, subK, false, useGpu_); // mark transpose Matrix::create(wgtData, subM, subK, false, useGpu_); // mark transpose
MatrixPtr B = Matrix::create(expInData, subK, subN, false, useGpu_); MatrixPtr B = Matrix::create(expInData, subK, subN, false, useGpu_);
MatrixPtr C = Matrix::create(outData, subM, subN, false, useGpu_); MatrixPtr C = Matrix::create(outData, subM, subN, false, useGpu_);
C->mul(A, B, 1, 1); C->mul(*A, *B, 1, 1);
A->clear(); A->clear();
B->clear(); B->clear();
...@@ -185,7 +185,7 @@ void ExpandConvBaseLayer::bpropActs(MatrixPtr out, ...@@ -185,7 +185,7 @@ void ExpandConvBaseLayer::bpropActs(MatrixPtr out,
MatrixPtr C = Matrix::create(expandInData, subK, subN, false, useGpu_); MatrixPtr C = Matrix::create(expandInData, subK, subN, false, useGpu_);
MatrixPtr B = Matrix::create(localGradData, subM, subN, false, useGpu_); MatrixPtr B = Matrix::create(localGradData, subM, subN, false, useGpu_);
MatrixPtr A = Matrix::create(wgtData, subM, subK, true, useGpu_); MatrixPtr A = Matrix::create(wgtData, subM, subK, true, useGpu_);
C->mul(A, B); // mul C->mul(*A, *B); // mul
// clear the temporary matrix // clear the temporary matrix
A->clear(); A->clear();
...@@ -252,7 +252,7 @@ void ExpandConvBaseLayer::bpropWeights(MatrixPtr image, ...@@ -252,7 +252,7 @@ void ExpandConvBaseLayer::bpropWeights(MatrixPtr image,
MatrixPtr A = Matrix::create(expandInData, subK, subN, true, useGpu_); MatrixPtr A = Matrix::create(expandInData, subK, subN, true, useGpu_);
MatrixPtr B = Matrix::create(gradData, subM, subN, false, useGpu_); MatrixPtr B = Matrix::create(gradData, subM, subN, false, useGpu_);
MatrixPtr C = Matrix::create(wGradData, subM, subK, false, useGpu_); MatrixPtr C = Matrix::create(wGradData, subM, subK, false, useGpu_);
C->mul(B, A, 1, 1); C->mul(*B, *A, 1, 1);
A->clear(); A->clear();
B->clear(); B->clear();
......
...@@ -28,7 +28,7 @@ FullMatrixProjection::FullMatrixProjection(const ProjectionConfig& config, ...@@ -28,7 +28,7 @@ FullMatrixProjection::FullMatrixProjection(const ProjectionConfig& config,
void FullMatrixProjection::forward() { void FullMatrixProjection::forward() {
REGISTER_TIMER_INFO("FwMulTimer", getName().c_str()); REGISTER_TIMER_INFO("FwMulTimer", getName().c_str());
out_->value->mul(in_->value, weight_->getW(), 1, 1); out_->value->mul(*(in_->value), *(weight_->getW()), 1, 1);
} }
void FullMatrixProjection::backward(const UpdateCallback& callback) { void FullMatrixProjection::backward(const UpdateCallback& callback) {
...@@ -37,7 +37,8 @@ void FullMatrixProjection::backward(const UpdateCallback& callback) { ...@@ -37,7 +37,8 @@ void FullMatrixProjection::backward(const UpdateCallback& callback) {
/* Calculate the W-gradient for the current layer */ /* Calculate the W-gradient for the current layer */
if (weight_->getWGrad()) { if (weight_->getWGrad()) {
REGISTER_TIMER_INFO("GradMulTimer", getName().c_str()); REGISTER_TIMER_INFO("GradMulTimer", getName().c_str());
weight_->getWGrad()->mul(in_->value->getTranspose(), out_->grad, 1, 1); weight_->getWGrad()->mul(
*(in_->value->getTranspose()), *(out_->grad), 1, 1);
} }
// If callback does not change value, backward propagation error // If callback does not change value, backward propagation error
...@@ -47,7 +48,7 @@ void FullMatrixProjection::backward(const UpdateCallback& callback) { ...@@ -47,7 +48,7 @@ void FullMatrixProjection::backward(const UpdateCallback& callback) {
/* Calculate the input layers error */ /* Calculate the input layers error */
if (in_->grad) { if (in_->grad) {
REGISTER_TIMER_INFO("BpMulTimer", getName().c_str()); REGISTER_TIMER_INFO("BpMulTimer", getName().c_str());
in_->grad->mul(out_->grad, weight_->getW()->getTranspose(), 1, 1); in_->grad->mul(*(out_->grad), *(weight_->getW()->getTranspose()), 1, 1);
} }
hl_set_sync_flag(syncFlag); hl_set_sync_flag(syncFlag);
......
...@@ -84,8 +84,8 @@ void FullyConnectedLayer::forward(PassType passType) { ...@@ -84,8 +84,8 @@ void FullyConnectedLayer::forward(PassType passType) {
auto input = getInput(i); auto input = getInput(i);
CHECK(input.value) << "The input of 'fc' layer must be matrix"; CHECK(input.value) << "The input of 'fc' layer must be matrix";
REGISTER_TIMER_INFO("FwMulTimer", getName().c_str()); REGISTER_TIMER_INFO("FwMulTimer", getName().c_str());
i == 0 ? outV->mul(input.value, weights_[i]->getW(), 1, 0) i == 0 ? outV->mul(*input.value, *weights_[i]->getW(), 1, 0)
: outV->mul(input.value, weights_[i]->getW(), 1, 1); : outV->mul(*input.value, *weights_[i]->getW(), 1, 1);
} }
/* add the bias-vector */ /* add the bias-vector */
...@@ -123,7 +123,7 @@ void FullyConnectedLayer::backward(const UpdateCallback& callback) { ...@@ -123,7 +123,7 @@ void FullyConnectedLayer::backward(const UpdateCallback& callback) {
MatrixPtr oGrad = getOutputGrad(); MatrixPtr oGrad = getOutputGrad();
{ {
REGISTER_TIMER_INFO("GradMulTimer", getName().c_str()); REGISTER_TIMER_INFO("GradMulTimer", getName().c_str());
weights_[i]->getWGrad()->mul(input_T, oGrad, 1, 1); weights_[i]->getWGrad()->mul(*input_T, *oGrad, 1, 1);
} }
} }
...@@ -136,7 +136,7 @@ void FullyConnectedLayer::backward(const UpdateCallback& callback) { ...@@ -136,7 +136,7 @@ void FullyConnectedLayer::backward(const UpdateCallback& callback) {
if (NULL != preGrad) { if (NULL != preGrad) {
MatrixPtr weights_T = weights_[i]->getW()->getTranspose(); MatrixPtr weights_T = weights_[i]->getW()->getTranspose();
REGISTER_TIMER_INFO("BpMulTimer", getName().c_str()); REGISTER_TIMER_INFO("BpMulTimer", getName().c_str());
preGrad->mul(getOutputGrad(), weights_T, 1, 1); preGrad->mul(*getOutputGrad(), *weights_T, 1, 1);
} }
hl_set_sync_flag(syncFlag); hl_set_sync_flag(syncFlag);
......
...@@ -59,7 +59,7 @@ real LinearChainCRF::forward(real* x, int* s, int length) { ...@@ -59,7 +59,7 @@ real LinearChainCRF::forward(real* x, int* s, int length) {
matX->rowMax(*maxX_); matX->rowMax(*maxX_);
expX_->assign(*matX); expX_->assign(*matX);
// subtract max to avoid overflow or underflow // subtract max to avoid overflow or underflow
expX_->mul(maxX_, ones_, (real)-1, (real)1); expX_->mul(*maxX_, *ones_, (real)-1, (real)1);
expX_->exp2(); expX_->exp2();
real* a = a_->getData(); real* a = a_->getData();
......
...@@ -316,7 +316,7 @@ void LstmLayer::forwardSequence(int batchSize, ...@@ -316,7 +316,7 @@ void LstmLayer::forwardSequence(int batchSize,
} }
if (prevOutput_) { if (prevOutput_) {
frameGate->setData(lstmValue.gateValue); frameGate->setData(lstmValue.gateValue);
frameGate->mul(prevOutput_, weight_->getW(), 1, 1); frameGate->mul(*prevOutput_, *weight_->getW(), 1, 1);
} }
} }
AsyncGpuBlock asyncGpuBlock; AsyncGpuBlock asyncGpuBlock;
...@@ -338,7 +338,7 @@ void LstmLayer::forwardSequence(int batchSize, ...@@ -338,7 +338,7 @@ void LstmLayer::forwardSequence(int batchSize,
frameOutput->setData(lstmValue.outputValue); frameOutput->setData(lstmValue.outputValue);
nextFrame(reversed_, getSize()); nextFrame(reversed_, getSize());
frameGate->setData(lstmValue.gateValue); frameGate->setData(lstmValue.gateValue);
frameGate->mul(frameOutput, weight_->getW(), 1, 1); frameGate->mul(*frameOutput, *weight_->getW(), 1, 1);
} }
} }
if (n != numSequences - 1) { if (n != numSequences - 1) {
...@@ -348,7 +348,7 @@ void LstmLayer::forwardSequence(int batchSize, ...@@ -348,7 +348,7 @@ void LstmLayer::forwardSequence(int batchSize,
if (!reversed_) { if (!reversed_) {
if (!prevState_) lstmValue.prevStateValue = nullptr; if (!prevState_) lstmValue.prevStateValue = nullptr;
if (prevOutput_) { if (prevOutput_) {
frameGate->mul(frameOutput, weight_->getW(), 1, 1); frameGate->mul(*frameOutput, *weight_->getW(), 1, 1);
} }
} else { } else {
lstmValue.prevStateValue = nullptr; lstmValue.prevStateValue = nullptr;
...@@ -470,7 +470,7 @@ void LstmLayer::backwardSequence(int batchSize, ...@@ -470,7 +470,7 @@ void LstmLayer::backwardSequence(int batchSize,
frameGate->setData(lstmGrad.gateGrad); frameGate->setData(lstmGrad.gateGrad);
nextFrame(reversed_, getSize()); nextFrame(reversed_, getSize());
frameOutput->setData(lstmGrad.outputGrad); frameOutput->setData(lstmGrad.outputGrad);
frameOutput->mul(frameGate, weightT, 1, 1); frameOutput->mul(*frameGate, *weightT, 1, 1);
} else { } else {
nextFrame(reversed_, getSize()); nextFrame(reversed_, getSize());
} }
...@@ -479,14 +479,14 @@ void LstmLayer::backwardSequence(int batchSize, ...@@ -479,14 +479,14 @@ void LstmLayer::backwardSequence(int batchSize,
if (weight_->getWGrad()) { if (weight_->getWGrad()) {
if (!reversed_) { if (!reversed_) {
weight_->getWGrad()->mul( weight_->getWGrad()->mul(
output_.value->subMatrix(start, length - 1)->getTranspose(), *output_.value->subMatrix(start, length - 1)->getTranspose(),
gate_.grad->subMatrix(start + 1, length - 1), *gate_.grad->subMatrix(start + 1, length - 1),
1, 1,
1); 1);
} else { } else {
weight_->getWGrad()->mul( weight_->getWGrad()->mul(
output_.value->subMatrix(start + 1, length - 1)->getTranspose(), *output_.value->subMatrix(start + 1, length - 1)->getTranspose(),
gate_.grad->subMatrix(start, length - 1), *gate_.grad->subMatrix(start, length - 1),
1, 1,
1); 1);
} }
...@@ -541,7 +541,7 @@ void LstmLayer::forwardBatch(int batchSize, ...@@ -541,7 +541,7 @@ void LstmLayer::forwardBatch(int batchSize,
if (n != 0) { if (n != 0) {
MatrixPtr batch1 = batchValue_->getBatchValue(n - 1, batchSize); MatrixPtr batch1 = batchValue_->getBatchValue(n - 1, batchSize);
gateValue->mul(batch1, weight_->getW(), 1, 1); gateValue->mul(*batch1, *weight_->getW(), 1, 1);
} else if (prevOutput_) { } else if (prevOutput_) {
Matrix::resizeOrCreate(prevBatchOutput2_, Matrix::resizeOrCreate(prevBatchOutput2_,
gateValue->getHeight(), gateValue->getHeight(),
...@@ -549,7 +549,7 @@ void LstmLayer::forwardBatch(int batchSize, ...@@ -549,7 +549,7 @@ void LstmLayer::forwardBatch(int batchSize,
false, false,
useGpu_); useGpu_);
batchValue_->prevOutput2Batch(*prevOutput_, *prevBatchOutput2_); batchValue_->prevOutput2Batch(*prevOutput_, *prevBatchOutput2_);
gateValue->mul(prevBatchOutput2_, weight_->getW(), 1, 1); gateValue->mul(*prevBatchOutput2_, *weight_->getW(), 1, 1);
batchValue_->prevOutput2Batch(*prevState_, batchValue_->prevOutput2Batch(*prevState_,
*totalState_->subMatrix(0, numSequences)); *totalState_->subMatrix(0, numSequences));
...@@ -672,16 +672,16 @@ void LstmLayer::backwardBatch(int batchSize, ...@@ -672,16 +672,16 @@ void LstmLayer::backwardBatch(int batchSize,
if (n != 0) { if (n != 0) {
MatrixPtr tmp = batchGrad_->getBatchValue(n - 1, batchSize); MatrixPtr tmp = batchGrad_->getBatchValue(n - 1, batchSize);
tmp->mul(gateGrad, weightT, 1, 1); tmp->mul(*gateGrad, *weightT, 1, 1);
} }
if (n != 0 && weight_->getWGrad()) { if (n != 0 && weight_->getWGrad()) {
/* backward weight */ /* backward weight */
MatrixPtr outputValue = batchValue_->getBatchValue(n - 1, batchSize); MatrixPtr outputValue = batchValue_->getBatchValue(n - 1, batchSize);
weight_->getWGrad()->mul(outputValue->getTranspose(), gateGrad, 1, 1); weight_->getWGrad()->mul(*outputValue->getTranspose(), *gateGrad, 1, 1);
} else if (prevOutput_ && weight_->getWGrad()) { } else if (prevOutput_ && weight_->getWGrad()) {
weight_->getWGrad()->mul( weight_->getWGrad()->mul(
prevBatchOutput2_->getTranspose(), gateGrad, 1, 1); *prevBatchOutput2_->getTranspose(), *gateGrad, 1, 1);
} }
} }
} }
......
...@@ -547,7 +547,7 @@ void MDLstmLayer::forwardOneSequence(int start, CoordIterator& coordIter) { ...@@ -547,7 +547,7 @@ void MDLstmLayer::forwardOneSequence(int start, CoordIterator& coordIter) {
if (coordIter.getPrePos(delays_, i, prePos)) { if (coordIter.getPrePos(delays_, i, prePos)) {
int preOffset = coordIter.offset(prePos); int preOffset = coordIter.offset(prePos);
frameGate_[start + offset].value->mul( frameGate_[start + offset].value->mul(
frameOutput_[start + preOffset].value, weight_->getW(), 1.0, 1.0); *frameOutput_[start + preOffset].value, *weight_->getW(), 1.0, 1.0);
} }
} }
forwardGate2OutputSequence(start, coordIter); forwardGate2OutputSequence(start, coordIter);
...@@ -747,11 +747,11 @@ void MDLstmLayer::backwardOneSequence(int start, CoordIterator& coordIter) { ...@@ -747,11 +747,11 @@ void MDLstmLayer::backwardOneSequence(int start, CoordIterator& coordIter) {
if (coordIter.getPrePos(delays_, i, prePos)) { if (coordIter.getPrePos(delays_, i, prePos)) {
int preOffset = coordIter.offset(prePos); int preOffset = coordIter.offset(prePos);
frameOutput_[start + preOffset].grad->mul( frameOutput_[start + preOffset].grad->mul(
frameGate_[start + offset].grad, weightT, 1.0, 1.0); *frameGate_[start + offset].grad, *weightT, 1.0, 1.0);
if (weight_->getWGrad()) { if (weight_->getWGrad()) {
weight_->getWGrad()->mul( weight_->getWGrad()->mul(
frameOutput_[start + preOffset].value->getTranspose(), *frameOutput_[start + preOffset].value->getTranspose(),
frameGate_[start + offset].grad, *frameGate_[start + offset].grad,
1.0, 1.0,
1.0); 1.0);
} }
......
...@@ -96,7 +96,7 @@ void OuterProdLayer::forward(PassType passType) { ...@@ -96,7 +96,7 @@ void OuterProdLayer::forward(PassType passType) {
tmpRow0->setData(inV0->getData() + i * dim0); tmpRow0->setData(inV0->getData() + i * dim0);
tmpRow1->setData(inV1->getData() + i * dim1); tmpRow1->setData(inV1->getData() + i * dim1);
tmpMtx0->mul(tmpRow0->getTranspose(), tmpRow1); tmpMtx0->mul(*tmpRow0->getTranspose(), *tmpRow1);
} }
} }
} }
...@@ -121,7 +121,7 @@ void OuterProdLayer::backward(const UpdateCallback& callback) { ...@@ -121,7 +121,7 @@ void OuterProdLayer::backward(const UpdateCallback& callback) {
tmpRow0->setData(inG0->getData() + i * dim0); tmpRow0->setData(inG0->getData() + i * dim0);
tmpRow1->setData(inV1->getData() + i * dim1); tmpRow1->setData(inV1->getData() + i * dim1);
tmpRow0->mul(tmpRow1, tmpMtx0->getTranspose(), 1, 1); tmpRow0->mul(*tmpRow1, *tmpMtx0->getTranspose(), 1, 1);
} }
} }
...@@ -131,7 +131,7 @@ void OuterProdLayer::backward(const UpdateCallback& callback) { ...@@ -131,7 +131,7 @@ void OuterProdLayer::backward(const UpdateCallback& callback) {
tmpRow0->setData(inV0->getData() + i * dim0); tmpRow0->setData(inV0->getData() + i * dim0);
tmpRow1->setData(inG1->getData() + i * dim1); tmpRow1->setData(inG1->getData() + i * dim1);
tmpRow1->mul(tmpRow0, tmpMtx0, 1, 1); tmpRow1->mul(*tmpRow0, *tmpMtx0, 1, 1);
} }
} }
} }
......
...@@ -215,12 +215,12 @@ void RecurrentLayer::forwardSequence(int batchSize, ...@@ -215,12 +215,12 @@ void RecurrentLayer::forwardSequence(int batchSize,
void RecurrentLayer::forwardOneSequence(int start, int length) { void RecurrentLayer::forwardOneSequence(int start, int length) {
if (!reversed_) { if (!reversed_) {
if (prevOutput_) { if (prevOutput_) {
frameOutput_[start].value->mul(prevOutput_, weight_->getW(), 1, 1); frameOutput_[start].value->mul(*prevOutput_, *weight_->getW(), 1, 1);
} }
activation_->forward(frameOutput_[start]); activation_->forward(frameOutput_[start]);
for (int i = 1; i < length; ++i) { for (int i = 1; i < length; ++i) {
frameOutput_[start + i].value->mul( frameOutput_[start + i].value->mul(
frameOutput_[start + i - 1].value, weight_->getW(), 1, 1); *frameOutput_[start + i - 1].value, *weight_->getW(), 1, 1);
activation_->forward(frameOutput_[start + i]); activation_->forward(frameOutput_[start + i]);
} }
if (prevOutput_) { if (prevOutput_) {
...@@ -230,7 +230,7 @@ void RecurrentLayer::forwardOneSequence(int start, int length) { ...@@ -230,7 +230,7 @@ void RecurrentLayer::forwardOneSequence(int start, int length) {
activation_->forward(frameOutput_[start + length - 1]); activation_->forward(frameOutput_[start + length - 1]);
for (int i = length - 2; i >= 0; --i) { for (int i = length - 2; i >= 0; --i) {
frameOutput_[start + i].value->mul( frameOutput_[start + i].value->mul(
frameOutput_[start + i + 1].value, weight_->getW(), 1, 1); *frameOutput_[start + i + 1].value, *weight_->getW(), 1, 1);
activation_->forward(frameOutput_[start + i]); activation_->forward(frameOutput_[start + i]);
} }
} }
...@@ -282,13 +282,13 @@ void RecurrentLayer::backwardOneSequence(int start, int length) { ...@@ -282,13 +282,13 @@ void RecurrentLayer::backwardOneSequence(int start, int length) {
for (int i = length - 1; i > 0; --i) { for (int i = length - 1; i > 0; --i) {
activation_->backward(frameOutput_[start + i]); activation_->backward(frameOutput_[start + i]);
frameOutput_[start + i - 1].grad->mul( frameOutput_[start + i - 1].grad->mul(
frameOutput_[start + i].grad, weightT, 1, 1); *frameOutput_[start + i].grad, *weightT, 1, 1);
} }
activation_->backward(frameOutput_[start]); activation_->backward(frameOutput_[start]);
if (weight_->getWGrad()) { if (weight_->getWGrad()) {
weight_->getWGrad()->mul( weight_->getWGrad()->mul(
output_.value->subMatrix(start, length - 1)->getTranspose(), *output_.value->subMatrix(start, length - 1)->getTranspose(),
output_.grad->subMatrix(start + 1, length - 1), *output_.grad->subMatrix(start + 1, length - 1),
1, 1,
1); 1);
} }
...@@ -296,13 +296,13 @@ void RecurrentLayer::backwardOneSequence(int start, int length) { ...@@ -296,13 +296,13 @@ void RecurrentLayer::backwardOneSequence(int start, int length) {
for (int i = 0; i < length - 1; ++i) { for (int i = 0; i < length - 1; ++i) {
activation_->backward(frameOutput_[start + i]); activation_->backward(frameOutput_[start + i]);
frameOutput_[start + i + 1].grad->mul( frameOutput_[start + i + 1].grad->mul(
frameOutput_[start + i].grad, weightT, 1, 1); *frameOutput_[start + i].grad, *weightT, 1, 1);
} }
activation_->backward(frameOutput_[start + length - 1]); activation_->backward(frameOutput_[start + length - 1]);
if (weight_->getWGrad()) { if (weight_->getWGrad()) {
weight_->getWGrad()->mul( weight_->getWGrad()->mul(
output_.value->subMatrix(start + 1, length - 1)->getTranspose(), *output_.value->subMatrix(start + 1, length - 1)->getTranspose(),
output_.grad->subMatrix(start, length - 1), *output_.grad->subMatrix(start, length - 1),
1, 1,
1); 1);
} }
...@@ -329,7 +329,7 @@ void RecurrentLayer::forwardBatch(int batchSize, ...@@ -329,7 +329,7 @@ void RecurrentLayer::forwardBatch(int batchSize,
if (n != 0) { if (n != 0) {
MatrixPtr batch1 = MatrixPtr batch1 =
batchValue_->getBatchValue(n - 1, batch2->getHeight()); batchValue_->getBatchValue(n - 1, batch2->getHeight());
batch2->mul(batch1, weight_->getW(), 1, 1); batch2->mul(*batch1, *weight_->getW(), 1, 1);
} }
Argument arg; Argument arg;
arg.value = batch2; arg.value = batch2;
...@@ -367,14 +367,14 @@ void RecurrentLayer::backwardBatch(int batchSize, ...@@ -367,14 +367,14 @@ void RecurrentLayer::backwardBatch(int batchSize,
if (n != 0) { if (n != 0) {
batch1 = batchGrad_->getBatchValue(n - 1, batch2->getHeight()); batch1 = batchGrad_->getBatchValue(n - 1, batch2->getHeight());
batch1->mul(batch2, weightT, 1, 1); batch1->mul(*batch2, *weightT, 1, 1);
} }
if (backwardByBatch && weight_->getWGrad()) { if (backwardByBatch && weight_->getWGrad()) {
if (n != 0) { if (n != 0) {
/* backward weight */ /* backward weight */
batch1 = batchValue_->getBatchValue(n - 1, batch2->getHeight()); batch1 = batchValue_->getBatchValue(n - 1, batch2->getHeight());
weight_->getWGrad()->mul(batch1->getTranspose(), batch2, 1, 1); weight_->getWGrad()->mul(*batch1->getTranspose(), *batch2, 1, 1);
} }
} }
} }
...@@ -389,14 +389,14 @@ void RecurrentLayer::backwardBatch(int batchSize, ...@@ -389,14 +389,14 @@ void RecurrentLayer::backwardBatch(int batchSize,
int len = starts[seq + 1] - starts[seq]; int len = starts[seq + 1] - starts[seq];
if (!reversed_) { if (!reversed_) {
weight_->getWGrad()->mul( weight_->getWGrad()->mul(
output_.value->subMatrix(starts[seq], len - 1)->getTranspose(), *output_.value->subMatrix(starts[seq], len - 1)->getTranspose(),
output_.grad->subMatrix(starts[seq] + 1, len - 1), *output_.grad->subMatrix(starts[seq] + 1, len - 1),
1, 1,
1); 1);
} else { } else {
weight_->getWGrad()->mul( weight_->getWGrad()->mul(
output_.value->subMatrix(starts[seq] + 1, len - 1)->getTranspose(), *output_.value->subMatrix(starts[seq] + 1, len - 1)->getTranspose(),
output_.grad->subMatrix(starts[seq], len - 1), *output_.grad->subMatrix(starts[seq], len - 1),
1, 1,
1); 1);
} }
......
...@@ -155,20 +155,20 @@ void SelectiveFullyConnectedLayer::forward(PassType passType) { ...@@ -155,20 +155,20 @@ void SelectiveFullyConnectedLayer::forward(PassType passType) {
// manully compute the multiplication of // manully compute the multiplication of
// the input vector and the selected rows. // the input vector and the selected rows.
REGISTER_TIMER("selective.plain"); REGISTER_TIMER("selective.plain");
interOutput_->mul(input, weight->getTranspose(), 1, scaleT); interOutput_->mul(*input, *weight->getTranspose(), 1, scaleT);
} else { } else {
// if the indecies is not sparse enough, // if the indecies is not sparse enough,
// use full mul instead // use full mul instead
REGISTER_TIMER("selective.mul"); REGISTER_TIMER("selective.mul");
if (fullOutput_) { if (fullOutput_) {
interOutput_->mul(input, weight->getTranspose(), 1, scaleT); interOutput_->mul(*input, *weight->getTranspose(), 1, scaleT);
} else { } else {
Matrix::resizeOrCreate(mmat_, Matrix::resizeOrCreate(mmat_,
hsize, hsize,
wsize, wsize,
/*trans=*/false, /*trans=*/false,
/*useGpu=*/useGpu_); /*useGpu=*/useGpu_);
mmat_->mul(input, weight->getTranspose()); mmat_->mul(*input, *weight->getTranspose());
interOutput_->add3(mmat_); interOutput_->add3(mmat_);
} }
} }
...@@ -242,14 +242,14 @@ void SelectiveFullyConnectedLayer::backward(const UpdateCallback& callback) { ...@@ -242,14 +242,14 @@ void SelectiveFullyConnectedLayer::backward(const UpdateCallback& callback) {
MatrixPtr preGrad = getInputGrad(i); MatrixPtr preGrad = getInputGrad(i);
if (preGrad) { if (preGrad) {
REGISTER_TIMER_INFO("BpMulTimer", getName().c_str()); REGISTER_TIMER_INFO("BpMulTimer", getName().c_str());
preGrad->mul(interOutGrad_, weights_[i]->getW(), 1, 1); preGrad->mul(*interOutGrad_, *weights_[i]->getW(), 1, 1);
} }
MatrixPtr wGrad = weights_[i]->getWGrad(); MatrixPtr wGrad = weights_[i]->getWGrad();
if (wGrad) { if (wGrad) {
REGISTER_TIMER_INFO("GradMulTimer", getName().c_str()); REGISTER_TIMER_INFO("GradMulTimer", getName().c_str());
MatrixPtr input = getInputValue(i); MatrixPtr input = getInputValue(i);
wGrad->mul(interOutGrad_->getTranspose(), input, 1, 1); wGrad->mul(*interOutGrad_->getTranspose(), *input, 1, 1);
} }
{ {
......
...@@ -77,7 +77,7 @@ void TensorLayer::forward(PassType passType) { ...@@ -77,7 +77,7 @@ void TensorLayer::forward(PassType passType) {
REGISTER_TIMER_INFO("TensorFwMulTimer", getName().c_str()); REGISTER_TIMER_INFO("TensorFwMulTimer", getName().c_str());
for (size_t i = 0; i < getSize(); ++i) { for (size_t i = 0; i < getSize(); ++i) {
MatrixPtr weights = weights_[i]->getW(); MatrixPtr weights = weights_[i]->getW();
tmpMat->mul(input1, weights, 1, 0); tmpMat->mul(*input1, *weights, 1, 0);
outV->rowDotMul(i, *tmpMat, *input2); outV->rowDotMul(i, *tmpMat, *input2);
} }
} }
...@@ -112,7 +112,7 @@ void TensorLayer::backward(const UpdateCallback& callback) { ...@@ -112,7 +112,7 @@ void TensorLayer::backward(const UpdateCallback& callback) {
if (weights_[i]->getWGrad()) { if (weights_[i]->getWGrad()) {
tmpMat->rowScale(i, *input1, *oGrad); tmpMat->rowScale(i, *input1, *oGrad);
MatrixPtr input1_T = tmpMat->getTranspose(); MatrixPtr input1_T = tmpMat->getTranspose();
weights_[i]->getWGrad()->mul(input1_T, input2, 1, 1); weights_[i]->getWGrad()->mul(*input1_T, *input2, 1, 1);
} }
} }
} }
...@@ -130,11 +130,11 @@ void TensorLayer::backward(const UpdateCallback& callback) { ...@@ -130,11 +130,11 @@ void TensorLayer::backward(const UpdateCallback& callback) {
if (NULL != preGrad1) { /* (grad * e2) * trans(W) */ if (NULL != preGrad1) { /* (grad * e2) * trans(W) */
tmpMat->rowScale(i, *input2, *oGrad); tmpMat->rowScale(i, *input2, *oGrad);
MatrixPtr weights_T = weights->getTranspose(); MatrixPtr weights_T = weights->getTranspose();
preGrad1->mul(tmpMat, weights_T, 1, 1); preGrad1->mul(*tmpMat, *weights_T, 1, 1);
} }
if (NULL != preGrad2) { /* (grad * e1) * W */ if (NULL != preGrad2) { /* (grad * e1) * W */
tmpMat->rowScale(i, *input1, *oGrad); tmpMat->rowScale(i, *input1, *oGrad);
preGrad2->mul(tmpMat, weights, 1, 1); preGrad2->mul(*tmpMat, *weights, 1, 1);
} }
} }
} }
......
...@@ -46,7 +46,7 @@ TransposedFullMatrixProjection::TransposedFullMatrixProjection( ...@@ -46,7 +46,7 @@ TransposedFullMatrixProjection::TransposedFullMatrixProjection(
void TransposedFullMatrixProjection::forward() { void TransposedFullMatrixProjection::forward() {
REGISTER_TIMER_INFO("FwMulTimer", getName().c_str()); REGISTER_TIMER_INFO("FwMulTimer", getName().c_str());
out_->value->mul(in_->value, weight_->getW()->getTranspose(), 1, 1); out_->value->mul(*(in_->value), *(weight_->getW()->getTranspose()), 1, 1);
} }
void TransposedFullMatrixProjection::backward(const UpdateCallback& callback) { void TransposedFullMatrixProjection::backward(const UpdateCallback& callback) {
...@@ -55,7 +55,8 @@ void TransposedFullMatrixProjection::backward(const UpdateCallback& callback) { ...@@ -55,7 +55,8 @@ void TransposedFullMatrixProjection::backward(const UpdateCallback& callback) {
/* Calculate the W-gradient for the current layer */ /* Calculate the W-gradient for the current layer */
if (weight_->getWGrad()) { if (weight_->getWGrad()) {
REGISTER_TIMER_INFO("GradMulTimer", getName().c_str()); REGISTER_TIMER_INFO("GradMulTimer", getName().c_str());
weight_->getWGrad()->mul(out_->grad->getTranspose(), in_->value, 1, 1); weight_->getWGrad()->mul(
*(out_->grad->getTranspose()), *(in_->value), 1, 1);
} }
// If callback does not change value, backprop error asynchronously so that // If callback does not change value, backprop error asynchronously so that
...@@ -69,7 +70,7 @@ void TransposedFullMatrixProjection::backward(const UpdateCallback& callback) { ...@@ -69,7 +70,7 @@ void TransposedFullMatrixProjection::backward(const UpdateCallback& callback) {
/* Calculate the input layers error */ /* Calculate the input layers error */
if (in_->grad) { if (in_->grad) {
REGISTER_TIMER_INFO("BpMulTimer", getName().c_str()); REGISTER_TIMER_INFO("BpMulTimer", getName().c_str());
in_->grad->mul(out_->grad, weight_->getW(), 1, 1); in_->grad->mul(*(out_->grad), *(weight_->getW()), 1, 1);
} }
hl_set_sync_flag(syncFlag); hl_set_sync_flag(syncFlag);
......
...@@ -163,15 +163,16 @@ MatrixPtr CpuSparseMatrix::getTranspose() { ...@@ -163,15 +163,16 @@ MatrixPtr CpuSparseMatrix::getTranspose() {
SparseValueType CpuSparseMatrix::getValueType() { return valueType_; } SparseValueType CpuSparseMatrix::getValueType() { return valueType_; }
void CpuSparseMatrix::mul(MatrixPtr a, MatrixPtr b, real scaleAB, real scaleT) { void CpuSparseMatrix::mul(const Matrix& a,
const Matrix& b,
real scaleAB,
real scaleT) {
CHECK(!isTransposed()) << "Not supported"; CHECK(!isTransposed()) << "Not supported";
const auto a_ptr = dynamic_cast<const CpuMatrix*>(&a);
const auto b_ptr = dynamic_cast<const CpuMatrix*>(&b);
if (dynamic_cast<CpuMatrix*>(a.get()) && dynamic_cast<CpuMatrix*>(b.get())) { if (a_ptr && b_ptr) {
CpuMatrix::mul(dynamic_cast<CpuMatrix*>(a.get()), CpuMatrix::mul((CpuMatrix*)a_ptr, (CpuMatrix*)b_ptr, this, scaleAB, scaleT);
dynamic_cast<CpuMatrix*>(b.get()),
this,
scaleAB,
scaleT);
} else { } else {
LOG(FATAL) << "not supported"; LOG(FATAL) << "not supported";
} }
......
...@@ -203,7 +203,7 @@ public: ...@@ -203,7 +203,7 @@ public:
/// mem MUST be alloced outside (memAlloc=false) /// mem MUST be alloced outside (memAlloc=false)
void transpose(MatrixPtr matTrans, bool memAlloc); void transpose(MatrixPtr matTrans, bool memAlloc);
void mul(MatrixPtr A, MatrixPtr B, real alpha, real beta); void mul(const Matrix& A, const Matrix& B, real alpha, real beta);
/** /**
* @brief sparseMatrix += denseMatrix * @brief sparseMatrix += denseMatrix
......
...@@ -582,18 +582,16 @@ void GpuMatrix::mul(const GpuMatrix& a, ...@@ -582,18 +582,16 @@ void GpuMatrix::mul(const GpuMatrix& a,
} }
/* this = a*b */ /* this = a*b */
void GpuMatrix::mul(const MatrixPtr a, const MatrixPtr b) { void GpuMatrix::mul(const Matrix& a, const Matrix& b) { mul(a, b, 1.0, 0.0); }
mul(a, b, 1.0, 0.0);
}
void GpuMatrix::mul(const MatrixPtr a, void GpuMatrix::mul(const Matrix& a,
const MatrixPtr b, const Matrix& b,
real scaleAB, real scaleAB,
real scaleT) { real scaleT) {
GpuMatrixPtr a_ptr = std::dynamic_pointer_cast<GpuMatrix>(a); const auto a_ptr = dynamic_cast<const GpuMatrix*>(&a);
GpuMatrixPtr b_ptr = std::dynamic_pointer_cast<GpuMatrix>(b); const auto b_ptr = dynamic_cast<const GpuMatrix*>(&b);
GpuSparseMatrixPtr a_ptr_s = std::dynamic_pointer_cast<GpuSparseMatrix>(a); const auto a_ptr_s = dynamic_cast<const GpuSparseMatrix*>(&a);
GpuSparseMatrixPtr b_ptr_s = std::dynamic_pointer_cast<GpuSparseMatrix>(b); const auto b_ptr_s = dynamic_cast<const GpuSparseMatrix*>(&b);
if (a_ptr && b_ptr) { if (a_ptr && b_ptr) {
mul(*a_ptr, *b_ptr, scaleAB, scaleT); mul(*a_ptr, *b_ptr, scaleAB, scaleT);
...@@ -766,20 +764,19 @@ void GpuMatrix::maxoutBackward(Matrix& a, ...@@ -766,20 +764,19 @@ void GpuMatrix::maxoutBackward(Matrix& a,
} }
/*calulate the error of classification */ /*calulate the error of classification */
void GpuMatrix::classificationError(MatrixPtr output, IVectorPtr label) { void GpuMatrix::classificationError(Matrix& output, IVector& label) {
GpuMatrixPtr output_ptr = std::dynamic_pointer_cast<GpuMatrix>(output); auto output_ptr = dynamic_cast<const GpuMatrix*>(&output);
GpuIVectorPtr label_ptr = std::dynamic_pointer_cast<GpuIVector>(label); auto label_ptr = dynamic_cast<const GpuIVector*>(&label);
CHECK(output_ptr && label_ptr) << "Invalid argument pointer"; CHECK(output_ptr && label_ptr) << "Invalid argument pointer";
CHECK(height_ == output_ptr->height_ && width_ == 1) CHECK(height_ == output_ptr->height_ && width_ == 1)
<< "Matrix dimensions are not equal"; << "Matrix dimensions are not equal";
real* output_d = output_ptr->data_;
real* recResult_d = data_;
int* label_d = label_ptr->getData();
hl_matrix_classification_error( hl_matrix_classification_error((real*)output_ptr->data_,
output_d, label_d, recResult_d, height_, output_ptr->width_); (int*)label_ptr->getData(),
data_,
height_,
output_ptr->width_);
} }
/* copy -log(output[i * width + label]) to this->data[i] */ /* copy -log(output[i * width + label]) to this->data[i] */
...@@ -1370,86 +1367,62 @@ void GpuMatrix::maxSequenceBackward(Matrix& outputGrad, ...@@ -1370,86 +1367,62 @@ void GpuMatrix::maxSequenceBackward(Matrix& outputGrad,
hl_max_sequence_backward(outGrad, maxIndex, inputGrad, numSequences, dim); hl_max_sequence_backward(outGrad, maxIndex, inputGrad, numSequences, dim);
} }
void GpuMatrix::contextProjectionForward(MatrixPtr input, void GpuMatrix::contextProjectionForward(Matrix& input,
MatrixPtr weight, Matrix* weight,
const IVector& sequence, const IVector& sequence,
int contextLength, int contextLength,
int contextStart, int contextStart,
size_t beginPad, size_t beginPad,
bool isPadding) { bool isPadding) {
CHECK(dynamic_cast<GpuMatrix*>(input.get())); CHECK(dynamic_cast<GpuMatrix*>(&input));
CHECK(dynamic_cast<const GpuIVector*>(&sequence)); CHECK(dynamic_cast<const GpuIVector*>(&sequence));
if (weight) CHECK(dynamic_cast<GpuMatrix*>(weight.get())); if (weight) CHECK(dynamic_cast<GpuMatrix*>(weight));
CHECK_EQ(getWidth(), input.getWidth() * contextLength);
size_t numSequences = sequence.getSize() - 1;
int64_t inputDim = input->getWidth();
int64_t dim = getWidth();
CHECK_EQ(dim, inputDim * contextLength);
real* outData = getData();
real* inputData = input->getData();
const int* starts = sequence.getData();
hl_context_projection_forward(inputData, hl_context_projection_forward(input.getData(),
starts, sequence.getData(),
isPadding ? weight->getData() : NULL, isPadding ? weight->getData() : NULL,
outData, getData(),
numSequences, sequence.getSize() - 1,
inputDim, input.getWidth(),
contextLength, contextLength,
contextStart, contextStart,
beginPad, beginPad,
isPadding); isPadding);
} }
void GpuMatrix::contextProjectionBackwardData(MatrixPtr inputGrad, void GpuMatrix::contextProjectionBackwardData(Matrix& inputGrad,
const IVector& sequence, const IVector& sequence,
int contextLength, int contextLength,
int contextStart) { int contextStart) {
CHECK(dynamic_cast<GpuMatrix*>(inputGrad.get())); CHECK(dynamic_cast<GpuMatrix*>(&inputGrad));
CHECK(dynamic_cast<const GpuIVector*>(&sequence)); CHECK(dynamic_cast<const GpuIVector*>(&sequence));
CHECK_EQ(getWidth(), inputGrad.getWidth() * contextLength);
size_t numSequences = sequence.getSize() - 1; hl_context_projection_backward_data(getData(),
int64_t inputDim = inputGrad->getWidth(); sequence.getData(),
int64_t dim = getWidth(); inputGrad.getData(),
CHECK_EQ(dim, inputDim * contextLength); sequence.getSize() - 1,
inputGrad.getWidth(),
real* outGrad = getData();
real* inGrad = inputGrad->getData();
const int* starts = sequence.getData();
hl_context_projection_backward_data(outGrad,
starts,
inGrad,
numSequences,
inputDim,
contextLength, contextLength,
contextStart); contextStart);
} }
void GpuMatrix::contextProjectionBackwardWeight(MatrixPtr weightGrad, void GpuMatrix::contextProjectionBackwardWeight(Matrix& weightGrad,
const IVector& sequence, const IVector& sequence,
int contextLength, int contextLength,
int contextStart, int contextStart,
int totalPad, int totalPad,
size_t beginPad) { size_t beginPad) {
CHECK(dynamic_cast<GpuMatrix*>(weightGrad.get())); CHECK(dynamic_cast<GpuMatrix*>(&weightGrad));
CHECK(dynamic_cast<const GpuIVector*>(&sequence)); CHECK(dynamic_cast<const GpuIVector*>(&sequence));
CHECK_EQ(getWidth(), weightGrad.getWidth() * contextLength);
size_t numSequences = sequence.getSize() - 1; hl_context_projection_backward_weight(getData(),
int64_t weightDim = weightGrad->getWidth(); sequence.getData(),
int64_t dim = getWidth(); weightGrad.getData(),
CHECK_EQ(dim, weightDim * contextLength); sequence.getSize() - 1,
weightGrad.getWidth(),
real* outGrad = getData();
real* wtGrad = weightGrad->getData();
const int* starts = sequence.getData();
hl_context_projection_backward_weight(outGrad,
starts,
wtGrad,
numSequences,
weightDim,
totalPad, totalPad,
contextLength, contextLength,
contextStart, contextStart,
...@@ -2371,23 +2344,21 @@ void CpuMatrix::maxSequenceBackward(Matrix& outputGrad, ...@@ -2371,23 +2344,21 @@ void CpuMatrix::maxSequenceBackward(Matrix& outputGrad,
} }
} }
void CpuMatrix::contextProjectionForward(MatrixPtr input, void CpuMatrix::contextProjectionForward(Matrix& input,
MatrixPtr weight, Matrix* weight,
const IVector& sequence, const IVector& sequence,
int contextLength, int contextLength,
int contextStart, int contextStart,
size_t beginPad, size_t beginPad,
bool isPadding) { bool isPadding) {
CHECK(dynamic_cast<CpuMatrix*>(input.get())); auto input_ptr = dynamic_cast<CpuMatrix*>(&input);
CHECK(dynamic_cast<const CpuIVector*>(&sequence)); auto seq_ptr = dynamic_cast<const CpuIVector*>(&sequence);
if (weight) CHECK(dynamic_cast<CpuMatrix*>(weight.get())); CHECK(input_ptr && seq_ptr);
if (weight) CHECK(dynamic_cast<CpuMatrix*>(weight));
size_t numSequences = sequence.getSize() - 1; CHECK_EQ(getWidth(), input_ptr->getWidth() * contextLength);
int64_t inputDim = input->getWidth();
int64_t dim = getWidth(); const int* starts = seq_ptr->getData();
CHECK_EQ(dim, inputDim * contextLength); size_t numSequences = seq_ptr->getSize() - 1;
const int* starts = sequence.getData();
for (size_t i = 0; i < numSequences; ++i) { for (size_t i = 0; i < numSequences; ++i) {
for (int j = 0; j < contextLength; ++j) { for (int j = 0; j < contextLength; ++j) {
int begin = starts[i] + contextStart + j; int begin = starts[i] + contextStart + j;
...@@ -2400,7 +2371,7 @@ void CpuMatrix::contextProjectionForward(MatrixPtr input, ...@@ -2400,7 +2371,7 @@ void CpuMatrix::contextProjectionForward(MatrixPtr input,
MatrixPtr mat = this->subMatrix(starts[i], padSize); MatrixPtr mat = this->subMatrix(starts[i], padSize);
if (isPadding) { if (isPadding) {
MatrixPtr sub = weight->subMatrix(j, padSize); MatrixPtr sub = weight->subMatrix(j, padSize);
mat->addAtOffset(*sub, j * inputDim); mat->addAtOffset(*sub, j * input_ptr->getWidth());
} }
dstBegin = starts[i] + padSize; dstBegin = starts[i] + padSize;
begin = starts[i]; begin = starts[i];
...@@ -2412,41 +2383,36 @@ void CpuMatrix::contextProjectionForward(MatrixPtr input, ...@@ -2412,41 +2383,36 @@ void CpuMatrix::contextProjectionForward(MatrixPtr input,
if (isPadding) { if (isPadding) {
MatrixPtr sub = MatrixPtr sub =
weight->subMatrix(beginPad + contextStart + j - padSize, padSize); weight->subMatrix(beginPad + contextStart + j - padSize, padSize);
mat->addAtOffset(*sub, j * inputDim); mat->addAtOffset(*sub, j * input_ptr->getWidth());
} }
dstEnd = starts[i + 1] - padSize; dstEnd = starts[i + 1] - padSize;
end = starts[i + 1]; end = starts[i + 1];
} }
if (end <= begin) continue; if (end <= begin) continue;
MatrixPtr src = input->subMatrix(begin, end - begin); MatrixPtr src = input_ptr->subMatrix(begin, end - begin);
MatrixPtr dst = this->subMatrix(dstBegin, dstEnd - dstBegin); MatrixPtr dst = this->subMatrix(dstBegin, dstEnd - dstBegin);
dst->addAtOffset(*src, j * inputDim); dst->addAtOffset(*src, j * input_ptr->getWidth());
} }
} }
} }
void CpuMatrix::contextProjectionBackward(MatrixPtr inputGrad, void CpuMatrix::contextProjectionBackward(Matrix* inputGrad,
MatrixPtr weightGrad, Matrix* weightGrad,
const IVector& sequence, const IVector& sequence,
int contextLength, int contextLength,
int contextStart, int contextStart,
size_t beginPad, size_t beginPad,
bool isPadding) { bool isPadding) {
if (inputGrad) CHECK(dynamic_cast<CpuMatrix*>(inputGrad.get())); if (inputGrad) CHECK(dynamic_cast<CpuMatrix*>(inputGrad));
if (weightGrad) CHECK(dynamic_cast<CpuMatrix*>(weightGrad.get())); if (weightGrad) CHECK(dynamic_cast<CpuMatrix*>(weightGrad));
CHECK(dynamic_cast<const CpuIVector*>(&sequence)); CHECK(dynamic_cast<const CpuIVector*>(&sequence));
int64_t inputDim = 0; int64_t inputDim = inputGrad ? inputGrad->getWidth()
int64_t dim = getWidth(); : weightGrad ? weightGrad->getWidth() : 0;
size_t numSequences = sequence.getSize() - 1; CHECK_EQ(getWidth(), inputDim * contextLength);
const int* starts = sequence.getData();
if (inputGrad) {
inputDim = inputGrad->getWidth();
} else {
inputDim = weightGrad->getWidth();
}
CHECK_EQ(dim, inputDim * contextLength);
const int* starts = sequence.getData();
size_t numSequences = sequence.getSize() - 1;
for (size_t i = 0; i < numSequences; ++i) { for (size_t i = 0; i < numSequences; ++i) {
for (int j = 0; j < contextLength; ++j) { for (int j = 0; j < contextLength; ++j) {
int begin = starts[i] + contextStart + j; int begin = starts[i] + contextStart + j;
...@@ -2630,29 +2596,22 @@ void CpuMatrix::sequenceAvgForward(Matrix& a, ...@@ -2630,29 +2596,22 @@ void CpuMatrix::sequenceAvgForward(Matrix& a,
} }
/* this = scaleAB*(a*b) + scaleT*this*/ /* this = scaleAB*(a*b) + scaleT*this*/
void CpuMatrix::mul(const MatrixPtr a, void CpuMatrix::mul(const Matrix& a,
const MatrixPtr b, const Matrix& b,
real scaleAB, real scaleAB,
real scaleT) { real scaleT) {
CHECK(!isTransposed()) << "Not supported"; CHECK(!isTransposed()) << "Not supported";
const auto a_ptr = dynamic_cast<const CpuMatrix*>(&a);
const auto b_ptr = dynamic_cast<const CpuMatrix*>(&b);
const auto a_ptr_s = dynamic_cast<const CpuSparseMatrix*>(&a);
const auto b_ptr_s = dynamic_cast<const CpuSparseMatrix*>(&b);
if (dynamic_cast<CpuMatrix*>(a.get()) && dynamic_cast<CpuMatrix*>(b.get())) { if (a_ptr && b_ptr) {
mul(dynamic_cast<CpuMatrix*>(a.get()), mul((CpuMatrix*)a_ptr, (CpuMatrix*)b_ptr, scaleAB, scaleT);
dynamic_cast<CpuMatrix*>(b.get()), } else if (a_ptr_s && b_ptr) {
scaleAB, mul((CpuSparseMatrix*)a_ptr_s, (CpuMatrix*)b_ptr, scaleAB, scaleT);
scaleT); } else if (a_ptr && b_ptr_s) {
} else if (dynamic_cast<CpuSparseMatrix*>(a.get()) && mul((CpuMatrix*)a_ptr, (CpuSparseMatrix*)b_ptr_s, scaleAB, scaleT);
dynamic_cast<CpuMatrix*>(b.get())) {
mul(dynamic_cast<CpuSparseMatrix*>(a.get()),
dynamic_cast<CpuMatrix*>(b.get()),
scaleAB,
scaleT);
} else if (dynamic_cast<CpuMatrix*>(a.get()) &&
dynamic_cast<CpuSparseMatrix*>(b.get())) {
mul(dynamic_cast<CpuMatrix*>(a.get()),
dynamic_cast<CpuSparseMatrix*>(b.get()),
scaleAB,
scaleT);
} else { } else {
LOG(FATAL) << "Not supported"; LOG(FATAL) << "Not supported";
} }
...@@ -3321,7 +3280,7 @@ void CpuMatrix::addColumnVector(const Matrix& b) { ...@@ -3321,7 +3280,7 @@ void CpuMatrix::addColumnVector(const Matrix& b) {
} }
/* this = a*b */ /* this = a*b */
void CpuMatrix::mul(const MatrixPtr a, const MatrixPtr b) { void CpuMatrix::mul(const Matrix& a, const Matrix& b) {
return mul(a, b, 1.0, 0.0); return mul(a, b, 1.0, 0.0);
} }
...@@ -3544,21 +3503,20 @@ void CpuMatrix::rowNormalizeL1(Matrix& out) { ...@@ -3544,21 +3503,20 @@ void CpuMatrix::rowNormalizeL1(Matrix& out) {
} }
/* calulate classification error */ /* calulate classification error */
void CpuMatrix::classificationError(MatrixPtr output, IVectorPtr label) { void CpuMatrix::classificationError(Matrix& output, IVector& label) {
CHECK(dynamic_cast<CpuMatrix*>(output.get())); CHECK(dynamic_cast<const CpuMatrix*>(&output));
CHECK(dynamic_cast<CpuIVector*>(label.get())); CHECK(dynamic_cast<const CpuIVector*>(&label));
size_t numSamples = getHeight();
size_t dim = output->getWidth();
CHECK_EQ(label->getSize(), numSamples);
CHECK_EQ(output->getHeight(), numSamples);
CHECK_EQ(getWidth(), (size_t)1); CHECK_EQ(getWidth(), (size_t)1);
size_t numSamples = getHeight();
CHECK_EQ(label.getSize(), numSamples);
CHECK_EQ(output.getHeight(), numSamples);
real* out = output->getData(); size_t dim = output.getWidth();
real* result = getData(); real* out = output.getData();
int* lbl = label->getData(); int* lbl = label.getData();
real maxData; real maxData = 0.0;
int maxIndex; int maxIndex = -1;
for (size_t i = 0; i < numSamples; ++i) { for (size_t i = 0; i < numSamples; ++i) {
CHECK_GE(lbl[i], 0); CHECK_GE(lbl[i], 0);
CHECK_LT((size_t)lbl[i], dim); CHECK_LT((size_t)lbl[i], dim);
...@@ -3570,7 +3528,7 @@ void CpuMatrix::classificationError(MatrixPtr output, IVectorPtr label) { ...@@ -3570,7 +3528,7 @@ void CpuMatrix::classificationError(MatrixPtr output, IVectorPtr label) {
maxData = out[i * dim + j]; maxData = out[i * dim + j];
} }
} }
result[i] = (maxIndex != lbl[i]); getData()[i] = (maxIndex != lbl[i]);
} }
} }
......
...@@ -444,8 +444,8 @@ public: ...@@ -444,8 +444,8 @@ public:
* this = scaleAB*(a*b) + scaleT*this * this = scaleAB*(a*b) + scaleT*this
* @endcode * @endcode
*/ */
virtual void mul(const MatrixPtr a, virtual void mul(const Matrix& a,
const MatrixPtr b, const Matrix& b,
real scaleAB, real scaleAB,
real scaleT) { real scaleT) {
LOG(FATAL) << "Not implemented"; LOG(FATAL) << "Not implemented";
...@@ -643,7 +643,7 @@ public: ...@@ -643,7 +643,7 @@ public:
* this = a*b * this = a*b
* @endcode * @endcode
*/ */
virtual void mul(const MatrixPtr a, const MatrixPtr b) { virtual void mul(const Matrix& a, const Matrix& b) {
LOG(FATAL) << "Not implemented"; LOG(FATAL) << "Not implemented";
} }
...@@ -835,7 +835,7 @@ public: ...@@ -835,7 +835,7 @@ public:
* *
* output[i] = 0 if row i is correct. * output[i] = 0 if row i is correct.
*/ */
virtual void classificationError(MatrixPtr output, IVectorPtr label) { virtual void classificationError(Matrix& output, IVector& label) {
LOG(FATAL) << "Not implemented"; LOG(FATAL) << "Not implemented";
} }
...@@ -997,8 +997,8 @@ public: ...@@ -997,8 +997,8 @@ public:
LOG(FATAL) << "Not implemeted"; LOG(FATAL) << "Not implemeted";
} }
virtual void contextProjectionForward(MatrixPtr input, virtual void contextProjectionForward(Matrix& input,
MatrixPtr weight, Matrix* weight,
const IVector& sequence, const IVector& sequence,
int contextLength, int contextLength,
int contextStart, int contextStart,
...@@ -1007,8 +1007,8 @@ public: ...@@ -1007,8 +1007,8 @@ public:
LOG(FATAL) << "Not implemeted"; LOG(FATAL) << "Not implemeted";
} }
virtual void contextProjectionBackward(MatrixPtr inputGrad, virtual void contextProjectionBackward(Matrix* inputGrad,
MatrixPtr weightGrad, Matrix* weightGrad,
const IVector& sequence, const IVector& sequence,
int contextLength, int contextLength,
int contextStart, int contextStart,
...@@ -1017,14 +1017,14 @@ public: ...@@ -1017,14 +1017,14 @@ public:
LOG(FATAL) << "Not implemeted"; LOG(FATAL) << "Not implemeted";
} }
virtual void contextProjectionBackwardData(MatrixPtr inputGrad, virtual void contextProjectionBackwardData(Matrix& inputGrad,
const IVector& sequence, const IVector& sequence,
int contextLength, int contextLength,
int contextStart) { int contextStart) {
LOG(FATAL) << "Not implemeted"; LOG(FATAL) << "Not implemeted";
} }
virtual void contextProjectionBackwardWeight(MatrixPtr weightGrad, virtual void contextProjectionBackwardWeight(Matrix& weightGrad,
const IVector& sequence, const IVector& sequence,
int contextLength, int contextLength,
int contextStart, int contextStart,
...@@ -1272,14 +1272,14 @@ public: ...@@ -1272,14 +1272,14 @@ public:
* this = scaleAB*(a*b) + scaleT*this * this = scaleAB*(a*b) + scaleT*this
* @endcode * @endcode
*/ */
void mul(const MatrixPtr a, const MatrixPtr b, real scaleAB, real scaleT); void mul(const Matrix& a, const Matrix& b, real scaleAB, real scaleT);
/** /**
* @code * @code
* this = a*b * this = a*b
* @endcode * @endcode
*/ */
void mul(const MatrixPtr a, const MatrixPtr b); void mul(const Matrix& a, const Matrix& b);
void mul(const GpuMatrix& a, const GpuMatrix& b, real scaleAB, real scaleT); void mul(const GpuMatrix& a, const GpuMatrix& b, real scaleAB, real scaleT);
...@@ -1373,7 +1373,7 @@ public: ...@@ -1373,7 +1373,7 @@ public:
void check(std::ostream& os, Matrix& refMat, bool printDiff = true); void check(std::ostream& os, Matrix& refMat, bool printDiff = true);
void randomizeUniform(); void randomizeUniform();
void classificationError(MatrixPtr output, IVectorPtr label); void classificationError(Matrix& output, IVector& label);
void convExpand(Matrix& feature, void convExpand(Matrix& feature,
int feaImgHeight, int feaImgHeight,
...@@ -1487,20 +1487,20 @@ public: ...@@ -1487,20 +1487,20 @@ public:
const IVector& sequence, const IVector& sequence,
IVector& index); IVector& index);
void contextProjectionForward(MatrixPtr input, void contextProjectionForward(Matrix& input,
MatrixPtr weight, Matrix* weight,
const IVector& sequence, const IVector& sequence,
int contextLength, int contextLength,
int contextStart, int contextStart,
size_t beginPad, size_t beginPad,
bool isPadding); bool isPadding);
void contextProjectionBackwardData(MatrixPtr inputGrad, void contextProjectionBackwardData(Matrix& inputGrad,
const IVector& sequence, const IVector& sequence,
int contextLength, int contextLength,
int contextStart); int contextStart);
void contextProjectionBackwardWeight(MatrixPtr weightGrad, void contextProjectionBackwardWeight(Matrix& weightGrad,
const IVector& sequence, const IVector& sequence,
int contextLength, int contextLength,
int contextStart, int contextStart,
...@@ -1713,16 +1713,16 @@ public: ...@@ -1713,16 +1713,16 @@ public:
const IVector& sequence, const IVector& sequence,
IVector& index); IVector& index);
void contextProjectionForward(MatrixPtr input, void contextProjectionForward(Matrix& input,
MatrixPtr weight, Matrix* weight,
const IVector& sequence, const IVector& sequence,
int contextLength, int contextLength,
int contextStart, int contextStart,
size_t beginPad, size_t beginPad,
bool isPadding); bool isPadding);
void contextProjectionBackward(MatrixPtr inputGrad, void contextProjectionBackward(Matrix* inputGrad,
MatrixPtr weightGrad, Matrix* weightGrad,
const IVector& sequence, const IVector& sequence,
int contextLength, int contextLength,
int contextStart, int contextStart,
...@@ -1784,7 +1784,7 @@ public: ...@@ -1784,7 +1784,7 @@ public:
void addColumnVector(const Matrix& b); void addColumnVector(const Matrix& b);
void mul(const MatrixPtr a, const MatrixPtr b, real scaleAB, real scaleT); void mul(const Matrix& a, const Matrix& b, real scaleAB, real scaleT);
void mul(CpuMatrix* a, CpuMatrix* b, real scaleAB, real scaleT); void mul(CpuMatrix* a, CpuMatrix* b, real scaleAB, real scaleT);
void mul(CpuMatrix* a, CpuSparseMatrix* b, real scaleAB, real scaleT); void mul(CpuMatrix* a, CpuSparseMatrix* b, real scaleAB, real scaleT);
...@@ -1807,7 +1807,7 @@ public: ...@@ -1807,7 +1807,7 @@ public:
virtual void mul(CpuSparseMatrix* a, CpuMatrix* b, real scaleAB, real scaleT); virtual void mul(CpuSparseMatrix* a, CpuMatrix* b, real scaleAB, real scaleT);
void mul(const MatrixPtr a, const MatrixPtr b); void mul(const Matrix& a, const Matrix& b);
void rightMul(Matrix& b, real scaleAB, real scaleT); void rightMul(Matrix& b, real scaleAB, real scaleT);
void rightMul(Matrix& b); void rightMul(Matrix& b);
...@@ -1881,7 +1881,7 @@ public: ...@@ -1881,7 +1881,7 @@ public:
void randomizeUniform(); void randomizeUniform();
void classificationError(MatrixPtr output, IVectorPtr label); void classificationError(Matrix& output, IVector& label);
void addByBitCode(size_t numClasses, const IVector& codes, const Matrix& vec); void addByBitCode(size_t numClasses, const IVector& codes, const Matrix& vec);
......
...@@ -571,49 +571,48 @@ void GpuSparseMatrix::transpose(MatrixPtr matTrans, bool memAlloc) { ...@@ -571,49 +571,48 @@ void GpuSparseMatrix::transpose(MatrixPtr matTrans, bool memAlloc) {
hl_stream_synchronize(stream); hl_stream_synchronize(stream);
} }
void GpuSparseMatrix::mul(const GpuMatrixPtr a, void GpuSparseMatrix::mul(const GpuMatrix& a,
const GpuMatrixPtr b, const GpuMatrix& b,
real scaleAB, real scaleAB,
real scaleT) { real scaleT) {
CHECK(a->useGpu_ && b->useGpu_) << "type not match"; CHECK(a.useGpu_ && b.useGpu_) << "type not match";
CHECK(!trans_) << "trans not supported"; CHECK(!trans_) << "trans not supported";
real* A_d = a->getData(); real* A_d = (real*)a.getData();
real* B_d = b->getData(); real* B_d = (real*)b.getData();
hl_sparse_matrix_s C_d = sMatrix_.get(); hl_sparse_matrix_s C_d = sMatrix_.get();
hl_trans_op_t a_trans = a->trans_ ? HPPL_OP_T : HPPL_OP_N; hl_trans_op_t a_trans = a.trans_ ? HPPL_OP_T : HPPL_OP_N;
hl_trans_op_t b_trans = b->trans_ ? HPPL_OP_T : HPPL_OP_N; hl_trans_op_t b_trans = b.trans_ ? HPPL_OP_T : HPPL_OP_N;
if (!a->trans_ && !b->trans_) { if (!a.trans_ && !b.trans_) {
CHECK(height_ == a->getHeight()); CHECK(height_ == a.getHeight());
CHECK(width_ == b->getWidth()); CHECK(width_ == b.getWidth());
CHECK(a->getWidth() == b->getHeight()); CHECK(a.getWidth() == b.getHeight());
} else if (a->trans_ && !b->trans_) { } else if (a.trans_ && !b.trans_) {
CHECK(height_ == a->getWidth()); CHECK(height_ == a.getWidth());
CHECK(width_ == b->getWidth()); CHECK(width_ == b.getWidth());
CHECK(a->getHeight() == b->getHeight()); CHECK(a.getHeight() == b.getHeight());
} else if (!a->trans_ && b->trans_) { } else if (!a.trans_ && b.trans_) {
CHECK(height_ == a->getHeight()); CHECK(height_ == a.getHeight());
CHECK(width_ == b->getHeight()); CHECK(width_ == b.getHeight());
CHECK(a->getWidth() == b->getWidth()); CHECK(a.getWidth() == b.getWidth());
} else { } else {
LOG(INFO) << "Not support"; LOG(INFO) << "Not support";
} }
int dimM = height_; int dimM = height_;
int dimN = width_; int dimN = width_;
int dimK = !b->trans_ ? b->getHeight() : b->getWidth(); int dimK = !b.trans_ ? b.getHeight() : b.getWidth();
hl_sparse_matrix_mul( hl_sparse_matrix_mul(
A_d, a_trans, B_d, b_trans, C_d, dimM, dimN, dimK, scaleAB, scaleT); A_d, a_trans, B_d, b_trans, C_d, dimM, dimN, dimK, scaleAB, scaleT);
} }
void GpuSparseMatrix::mul(const MatrixPtr a, void GpuSparseMatrix::mul(const Matrix& a,
const MatrixPtr b, const Matrix& b,
real scaleAB, real scaleAB,
real scaleT) { real scaleT) {
if (std::dynamic_pointer_cast<GpuMatrix>(a) && const auto a_ptr = dynamic_cast<const GpuMatrix*>(&a);
std::dynamic_pointer_cast<GpuMatrix>(b)) { const auto b_ptr = dynamic_cast<const GpuMatrix*>(&b);
GpuMatrixPtr a_ptr = std::dynamic_pointer_cast<GpuMatrix>(a); if (a_ptr && b_ptr) {
GpuMatrixPtr b_ptr = std::dynamic_pointer_cast<GpuMatrix>(b); mul(*a_ptr, *b_ptr, scaleAB, scaleT);
mul(a_ptr, b_ptr, scaleAB, scaleT);
} else { } else {
LOG(FATAL) << "not supported"; LOG(FATAL) << "not supported";
} }
......
...@@ -104,10 +104,7 @@ public: ...@@ -104,10 +104,7 @@ public:
size_t newNnz, size_t newNnz,
SparseValueType valueType); SparseValueType valueType);
void mul(const GpuMatrixPtr a, void mul(const GpuMatrix& a, const GpuMatrix& b, real scaleAB, real scaleT);
const GpuMatrixPtr b,
real scaleAB,
real scaleT);
/// B = A , B.trans = !A.trans /// B = A , B.trans = !A.trans
MatrixPtr getTranspose(); MatrixPtr getTranspose();
...@@ -218,7 +215,7 @@ protected: ...@@ -218,7 +215,7 @@ protected:
void copyRow(int offsets, size_t colNum, const sparse_float_value_t* row); void copyRow(int offsets, size_t colNum, const sparse_float_value_t* row);
public: public:
void mul(const MatrixPtr a, const MatrixPtr b, real scaleAB, real scaleT); void mul(const Matrix& a, const Matrix& b, real scaleAB, real scaleT);
void copyFrom(CpuSparseMatrix& src, hl_stream_t stream); void copyFrom(CpuSparseMatrix& src, hl_stream_t stream);
void copyFrom(GpuSparseMatrix& src, hl_stream_t stream); void copyFrom(GpuSparseMatrix& src, hl_stream_t stream);
......
...@@ -33,8 +33,8 @@ TEST(Matrix, CopyCpuMatrixToSparseMatrix) { ...@@ -33,8 +33,8 @@ TEST(Matrix, CopyCpuMatrixToSparseMatrix) {
ret2(new CpuMatrix(HEIGHT, WIDTH_TEST)); ret2(new CpuMatrix(HEIGHT, WIDTH_TEST));
ret1->zeroMem(); ret1->zeroMem();
ret2->zeroMem(); ret2->zeroMem();
ret1->mul(testMatrix, mulCpuMatrix, 1.0, 1.0); ret1->mul(*testMatrix, *mulCpuMatrix, 1.0, 1.0);
ret2->mul(testCpuMatrix, mulCpuMatrix, 1.0, 1.0); ret2->mul(*testCpuMatrix, *mulCpuMatrix, 1.0, 1.0);
checkMatrixEqual(ret1, ret2); checkMatrixEqual(ret1, ret2);
} }
...@@ -147,9 +147,9 @@ void test_sparse_matrix_mul(MatrixPara paraA, ...@@ -147,9 +147,9 @@ void test_sparse_matrix_mul(MatrixPara paraA,
hl_stream_synchronize(stream); hl_stream_synchronize(stream);
/*matrix mul*/ /*matrix mul*/
cpuMatrixC->mul(cpuMatrixA, cpuMatrixB, 1.0, 1.0); cpuMatrixC->mul(*cpuMatrixA, *cpuMatrixB, 1.0, 1.0);
gpuMatrixC->mul(gpuMatrixA, gpuMatrixB, 1.0, 1.0); gpuMatrixC->mul(*gpuMatrixA, *gpuMatrixB, 1.0, 1.0);
cpuDenseC->mul(cpuDenseA, cpuDenseB, 1.0, 1.0); cpuDenseC->mul(*cpuDenseA, *cpuDenseB, 1.0, 1.0);
gpuMatrixC_d2h->copyFrom(*gpuMatrixC, stream); gpuMatrixC_d2h->copyFrom(*gpuMatrixC, stream);
hl_stream_synchronize(stream); hl_stream_synchronize(stream);
...@@ -224,8 +224,8 @@ TEST(Matrix, CopySparseMatrixToGpuSparseMatrix) { ...@@ -224,8 +224,8 @@ TEST(Matrix, CopySparseMatrixToGpuSparseMatrix) {
MatrixPtr ret2(new GpuMatrix(HEIGHT, WIDTH_TEST)); MatrixPtr ret2(new GpuMatrix(HEIGHT, WIDTH_TEST));
ret1->zeroMem(); ret1->zeroMem();
ret2->zeroMem(); ret2->zeroMem();
ret1->mul(testMatrix, mulCpuMatrix, 1.0, 1.0); ret1->mul(*testMatrix, *mulCpuMatrix, 1.0, 1.0);
ret2->mul(testGpuMatrix, mulGpuMatrix, 1.0, 1.0); ret2->mul(*testGpuMatrix, *mulGpuMatrix, 1.0, 1.0);
checkMatrixEqual(ret1, ret2); checkMatrixEqual(ret1, ret2);
} }
......
...@@ -65,16 +65,16 @@ void testMatrixProjectionForward(int contextStart, ...@@ -65,16 +65,16 @@ void testMatrixProjectionForward(int contextStart,
// calculate // calculate
int beginPad = std::max(0, -contextStart); int beginPad = std::max(0, -contextStart);
cpuOutput->contextProjectionForward(cpuInput, cpuOutput->contextProjectionForward(*cpuInput,
cpuWeight, cpuWeight.get(),
*cpuSequence, *cpuSequence,
contextLength, contextLength,
contextStart, contextStart,
beginPad, beginPad,
padding); padding);
gpuOutput->contextProjectionForward(gpuInput, gpuOutput->contextProjectionForward(*gpuInput,
gpuWeight, gpuWeight.get(),
*gpuSequence, *gpuSequence,
contextLength, contextLength,
contextStart, contextStart,
...@@ -120,17 +120,17 @@ void testMatrixProjectionBackward(int contextStart, ...@@ -120,17 +120,17 @@ void testMatrixProjectionBackward(int contextStart,
// calculate // calculate
int beginPad = std::max(0, -contextStart); int beginPad = std::max(0, -contextStart);
cpuOutputGrad->contextProjectionBackward(cpuInputGrad, cpuOutputGrad->contextProjectionBackward(cpuInputGrad.get(),
cpuWeightGrad, cpuWeightGrad.get(),
*cpuSequence, *cpuSequence,
contextLength, contextLength,
contextStart, contextStart,
beginPad, beginPad,
padding); padding);
gpuOutputGrad->contextProjectionBackwardData( gpuOutputGrad->contextProjectionBackwardData(
gpuInputGrad, *gpuSequence, contextLength, contextStart); *gpuInputGrad, *gpuSequence, contextLength, contextStart);
if (padding) { if (padding) {
gpuOutputGrad->contextProjectionBackwardWeight(gpuWeightGrad, gpuOutputGrad->contextProjectionBackwardWeight(*gpuWeightGrad,
*gpuSequence, *gpuSequence,
contextLength, contextLength,
contextStart, contextStart,
...@@ -318,7 +318,7 @@ void testMatrixInverse(int height) { ...@@ -318,7 +318,7 @@ void testMatrixInverse(int height) {
cpu->randomizeUniform(); cpu->randomizeUniform();
MatrixPtr cpuT = cpu->getTranspose(); MatrixPtr cpuT = cpu->getTranspose();
MatrixPtr outputCheck = std::make_shared<CpuMatrix>(height, height); MatrixPtr outputCheck = std::make_shared<CpuMatrix>(height, height);
outputCheck->mul(cpu, cpuT); outputCheck->mul(*cpu, *cpuT);
cpu->setDiag(1.0); cpu->setDiag(1.0);
cpu->add(*outputCheck); cpu->add(*outputCheck);
...@@ -328,7 +328,7 @@ void testMatrixInverse(int height) { ...@@ -328,7 +328,7 @@ void testMatrixInverse(int height) {
TensorCheckErr(*cpuI, *gpuI); TensorCheckErr(*cpuI, *gpuI);
outputCheck->mul(cpu, cpuI); outputCheck->mul(*cpu, *cpuI);
cpu->setDiag(1.0); cpu->setDiag(1.0);
TensorCheckErr(*cpu, *outputCheck); TensorCheckErr(*cpu, *outputCheck);
} }
...@@ -509,8 +509,8 @@ void testMatrixMul(bool transa, bool transb, int dimM, int dimN, int dimK) { ...@@ -509,8 +509,8 @@ void testMatrixMul(bool transa, bool transb, int dimM, int dimN, int dimK) {
gpuB->copyFrom(*cpuB); gpuB->copyFrom(*cpuB);
gpuC->copyFrom(*cpuC); gpuC->copyFrom(*cpuC);
cpuC->mul(cpuA, cpuB, alpha, beta); cpuC->mul(*cpuA, *cpuB, alpha, beta);
gpuC->mul(gpuA, gpuB, alpha, beta); gpuC->mul(*gpuA, *gpuB, alpha, beta);
TensorCheckErr(*cpuC, *gpuC); TensorCheckErr(*cpuC, *gpuC);
} }
...@@ -581,8 +581,8 @@ void testSubMatrixMul(bool transa, bool transb, int dimM, int dimN, int dimK) { ...@@ -581,8 +581,8 @@ void testSubMatrixMul(bool transa, bool transb, int dimM, int dimN, int dimK) {
MatrixPtr subCpuC = cpuC->subMatrix(startM, endM, startN, endN); MatrixPtr subCpuC = cpuC->subMatrix(startM, endM, startN, endN);
MatrixPtr subGpuC = gpuC->subMatrix(startM, endM, startN, endN); MatrixPtr subGpuC = gpuC->subMatrix(startM, endM, startN, endN);
subCpuC->mul(subCpuA, subCpuB, alpha, beta); subCpuC->mul(*subCpuA, *subCpuB, alpha, beta);
subGpuC->mul(subGpuA, subGpuB, alpha, beta); subGpuC->mul(*subGpuA, *subGpuB, alpha, beta);
TensorCheckErr(*cpuC, *gpuC); TensorCheckErr(*cpuC, *gpuC);
} }
...@@ -939,8 +939,8 @@ void testClassificationError(int numSamples, int dim) { ...@@ -939,8 +939,8 @@ void testClassificationError(int numSamples, int dim) {
gpuOutput->copyFrom(*cpuOutput); gpuOutput->copyFrom(*cpuOutput);
gpuLabel->copyFrom(*cpuLabel); gpuLabel->copyFrom(*cpuLabel);
cpuError->classificationError(cpuOutput, cpuLabel); cpuError->classificationError(*cpuOutput, *cpuLabel);
gpuError->classificationError(gpuOutput, gpuLabel); gpuError->classificationError(*gpuOutput, *gpuLabel);
TensorCheckEqual(*cpuError, *gpuError); TensorCheckEqual(*cpuError, *gpuError);
} }
......
...@@ -102,8 +102,8 @@ void testSpMatrixMul(int M, int N, int K, real rate) { ...@@ -102,8 +102,8 @@ void testSpMatrixMul(int M, int N, int K, real rate) {
gpuC->copyFrom(*cpuC, stream); gpuC->copyFrom(*cpuC, stream);
hl_stream_synchronize(stream); hl_stream_synchronize(stream);
cpuC->mul(cpuA, cpuB->getTranspose(), 1, 1); cpuC->mul(*cpuA, *cpuB->getTranspose(), 1, 1);
gpuC->mul(gpuA, gpuB->getTranspose(), 1, 1); gpuC->mul(*gpuA, *gpuB->getTranspose(), 1, 1);
MatrixPtr outputCheck(new CpuSparseMatrix(M, N, nnz)); MatrixPtr outputCheck(new CpuSparseMatrix(M, N, nnz));
outputCheck->copyFrom(*gpuC, stream); outputCheck->copyFrom(*gpuC, stream);
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册