From cda3a7747a657e630164c6802b9f1382e29c855b Mon Sep 17 00:00:00 2001 From: peterzhang2029 Date: Mon, 27 Nov 2017 12:55:52 +0800 Subject: [PATCH] bug fix when using hsigmoid with gpu --- .../layers/HierarchicalSigmoidLayer.cpp | 140 ++++++++++++++++-- .../gserver/layers/HierarchicalSigmoidLayer.h | 10 ++ 2 files changed, 134 insertions(+), 16 deletions(-) diff --git a/paddle/gserver/layers/HierarchicalSigmoidLayer.cpp b/paddle/gserver/layers/HierarchicalSigmoidLayer.cpp index d62a8d846..f93a9937d 100644 --- a/paddle/gserver/layers/HierarchicalSigmoidLayer.cpp +++ b/paddle/gserver/layers/HierarchicalSigmoidLayer.cpp @@ -64,49 +64,113 @@ void HierarchicalSigmoidLayer::forward(PassType passType) { batchSize, codeLength_, /* trans */ false, - useGpu(deviceId_)); + false); Matrix::resizeOrCreate(preOutput_.grad, batchSize, codeLength_, /* trans */ false, - useGpu(deviceId_)); - + false); IVectorPtr label = getInput(*getLabelLayer()).ids; - preOutput_.value->zeroMem(); + if (useGpu_) { + Matrix::resizeOrCreate(cpuOutput_, + output_.value->getHeight(), + output_.value->getWidth(), + /* trans */ false, + false); + IVector::resizeOrCreate(cpuLabel_, label->getSize(), false); + cpuLabel_->copyFrom(*label); + cpuOutput_->copyFrom(*output_.value); + } else { + cpuOutput_ = output_.value; + cpuLabel_ = label; + } /* add the bias-vector */ if (biases_.get() != NULL) { - preOutput_.value->addByBitCode(numClasses_, *label, *biases_->getW()); + if (useGpu_) { + Matrix::resizeOrCreate(cpuBias_, + 1, + numClasses_ - 1, + /* trans */ false, + false); + cpuBias_->copyFrom(*biases_->getW()); + } else { + cpuBias_ = biases_->getW(); + } + preOutput_.value->addByBitCode(numClasses_, *cpuLabel_, *cpuBias_); } for (size_t i = 0; i < inputLayers_.size() - 1; ++i) { MatrixPtr input = getInputValue(i); + if (useGpu_) { + Matrix::resizeOrCreate(cpuInput_, + input->getHeight(), + input->getWidth(), + /* trans */ false, + false); + Matrix::resizeOrCreate(cpuWeight_, + weights_[i]->getW()->getHeight(), + weights_[i]->getW()->getWidth(), + /* trans */ false, + false); + cpuInput_->copyFrom(*input); + cpuWeight_->copyFrom(*weights_[i]->getW()); + } else { + cpuInput_ = input; + cpuWeight_ = weights_[i]->getW(); + } preOutput_.value->mulByBitCode( - numClasses_, *label, *weights_[i]->getW(), *input); + numClasses_, *cpuLabel_, *cpuWeight_, *cpuInput_); } // keep consistent with the clipping in the following softrelu preOutput_.value->clip(-40.0, 40.0); preOutput_.value->sumByBitCode(numClasses_, - *label, - *output_.value, + *cpuLabel_, + *cpuOutput_, -1); // scaleSum preOutput_.value->softrelu(*preOutput_.value); MatrixPtr sum = - Matrix::create(batchSize, 1, /* trans= */ false, useGpu(deviceId_)); + Matrix::create(batchSize, 1, /* trans= */ false, false); preOutput_.value->rowSum(*sum); - output_.value->add(*sum); + cpuOutput_->add(*sum); + if (useGpu_) { + output_.value->copyFrom(*cpuOutput_); + } else { + output_.value = cpuOutput_; + } } void HierarchicalSigmoidLayer::backward(const UpdateCallback& callback) { IVectorPtr label = getInput(*getLabelLayer()).ids; + if (useGpu_) { + IVector::resizeOrCreate(cpuLabel_, label->getSize(), false); + cpuLabel_->copyFrom(*label); + } else { + cpuLabel_ = label; + } preOutput_.grad->one(); preOutput_.grad->softreluDerivative(*preOutput_.value); - preOutput_.grad->subByBitCode(numClasses_, *label); + preOutput_.grad->subByBitCode(numClasses_, *cpuLabel_); if (biases_ && biases_->getWGrad()) { + MatrixPtr biases_grad = biases_->getWGrad(); + if (useGpu_) { + Matrix::resizeOrCreate(cpuBias_, + 1, + numClasses_ - 1, + /* trans */ false, + false); + cpuBias_->copyFrom(*biases_grad); + } else { + cpuBias_ = biases_grad; + } preOutput_.grad->addByBitCodeBackward( - numClasses_, *label, *biases_->getWGrad()); - + numClasses_, *cpuLabel_, *cpuBias_); + if (useGpu) { + biases_grad->copyFrom(*cpuBias_); + } else { + biases_grad = cpuBias_; + } /* Increasing the number of gradient */ biases_->getParameterPtr()->incUpdate(callback); } @@ -115,9 +179,31 @@ void HierarchicalSigmoidLayer::backward(const UpdateCallback& callback) { /* Calculate the W-gradient for the current layer */ MatrixPtr input = getInputValue(i); if (weights_[i]->getWGrad()) { + MatrixPtr weights_grad = weights_[i]->getWGrad(); + if (useGpu_) { + Matrix::resizeOrCreate(cpuInput_, + input->getHeight(), + input->getWidth(), + /* trans */ false, + false); + Matrix::resizeOrCreate(cpuWeightGrad_, + weights_grad->getHeight(), + weights_grad->getWidth(), + /* trans */ false, + false); + cpuInput_->copyFrom(*input); + cpuWeightGrad_->copyFrom(*weights_grad); + } else { + cpuInput_ = input; + cpuWeightGrad_ = weights_grad; + } preOutput_.grad->mulByBitCodeBackwardWeight( - numClasses_, *label, *weights_[i]->getWGrad(), *input); - + numClasses_, *cpuLabel_, *cpuWeightGrad_, *cpuInput_); + if (useGpu_) { + weights_grad->copyFrom(*cpuWeightGrad_); + } else { + weights_grad = cpuWeightGrad_; + } /* Increasing the number of gradient */ weights_[i]->getParameterPtr()->incUpdate(callback); } @@ -125,8 +211,30 @@ void HierarchicalSigmoidLayer::backward(const UpdateCallback& callback) { /* Calculate the input layers error */ MatrixPtr inputGrad = getInputGrad(i); if (inputGrad) { + if (useGpu_) { + Matrix::resizeOrCreate(cpuInputGrad_, + inputGrad->getHeight(), + inputGrad->getWidth(), + /* trans */ false, + false); + Matrix::resizeOrCreate(cpuWeight_, + weights_[i]->getW()->getHeight(), + weights_[i]->getW()->getWidth(), + /* trans */ false, + false); + cpuInputGrad_->copyFrom(*inputGrad); + cpuWeight_->copyFrom(*weights_[i]->getW()); + } else { + cpuInputGrad_ = inputGrad; + cpuWeight_ = weights_[i]->getW(); + } preOutput_.grad->mulByBitCodeBackwardError( - numClasses_, *label, *weights_[i]->getW(), *inputGrad); + numClasses_, *cpuLabel_, *cpuWeight_, *cpuInputGrad_); + if (useGpu_) { + inputGrad->copyFrom(*cpuInputGrad_); + } else { + inputGrad = cpuInputGrad_; + } } } } diff --git a/paddle/gserver/layers/HierarchicalSigmoidLayer.h b/paddle/gserver/layers/HierarchicalSigmoidLayer.h index 9afd40b16..2483572de 100644 --- a/paddle/gserver/layers/HierarchicalSigmoidLayer.h +++ b/paddle/gserver/layers/HierarchicalSigmoidLayer.h @@ -80,6 +80,16 @@ protected: int codeLength_; /// temporary result of output_ Argument preOutput_; + + /// The temporary variables in CPU memory. + MatrixPtr cpuWeight_; + MatrixPtr cpuWeightGrad_; + MatrixPtr cpuInput_; + MatrixPtr cpuInputGrad_; + MatrixPtr cpuBias_; + MatrixPtr cpuOutput_; + IVectorPtr cpuLabel_; + }; } // namespace paddle -- GitLab