提交 cda3a774 编写于 作者: P peterzhang2029

bug fix when using hsigmoid with gpu

上级 54b39949
...@@ -64,49 +64,113 @@ void HierarchicalSigmoidLayer::forward(PassType passType) { ...@@ -64,49 +64,113 @@ void HierarchicalSigmoidLayer::forward(PassType passType) {
batchSize, batchSize,
codeLength_, codeLength_,
/* trans */ false, /* trans */ false,
useGpu(deviceId_)); false);
Matrix::resizeOrCreate(preOutput_.grad, Matrix::resizeOrCreate(preOutput_.grad,
batchSize, batchSize,
codeLength_, codeLength_,
/* trans */ false, /* trans */ false,
useGpu(deviceId_)); false);
IVectorPtr label = getInput(*getLabelLayer()).ids; IVectorPtr label = getInput(*getLabelLayer()).ids;
preOutput_.value->zeroMem(); preOutput_.value->zeroMem();
if (useGpu_) {
Matrix::resizeOrCreate(cpuOutput_,
output_.value->getHeight(),
output_.value->getWidth(),
/* trans */ false,
false);
IVector::resizeOrCreate(cpuLabel_, label->getSize(), false);
cpuLabel_->copyFrom(*label);
cpuOutput_->copyFrom(*output_.value);
} else {
cpuOutput_ = output_.value;
cpuLabel_ = label;
}
/* add the bias-vector */ /* add the bias-vector */
if (biases_.get() != NULL) { if (biases_.get() != NULL) {
preOutput_.value->addByBitCode(numClasses_, *label, *biases_->getW()); if (useGpu_) {
Matrix::resizeOrCreate(cpuBias_,
1,
numClasses_ - 1,
/* trans */ false,
false);
cpuBias_->copyFrom(*biases_->getW());
} else {
cpuBias_ = biases_->getW();
}
preOutput_.value->addByBitCode(numClasses_, *cpuLabel_, *cpuBias_);
} }
for (size_t i = 0; i < inputLayers_.size() - 1; ++i) { for (size_t i = 0; i < inputLayers_.size() - 1; ++i) {
MatrixPtr input = getInputValue(i); MatrixPtr input = getInputValue(i);
if (useGpu_) {
Matrix::resizeOrCreate(cpuInput_,
input->getHeight(),
input->getWidth(),
/* trans */ false,
false);
Matrix::resizeOrCreate(cpuWeight_,
weights_[i]->getW()->getHeight(),
weights_[i]->getW()->getWidth(),
/* trans */ false,
false);
cpuInput_->copyFrom(*input);
cpuWeight_->copyFrom(*weights_[i]->getW());
} else {
cpuInput_ = input;
cpuWeight_ = weights_[i]->getW();
}
preOutput_.value->mulByBitCode( preOutput_.value->mulByBitCode(
numClasses_, *label, *weights_[i]->getW(), *input); numClasses_, *cpuLabel_, *cpuWeight_, *cpuInput_);
} }
// keep consistent with the clipping in the following softrelu // keep consistent with the clipping in the following softrelu
preOutput_.value->clip(-40.0, 40.0); preOutput_.value->clip(-40.0, 40.0);
preOutput_.value->sumByBitCode(numClasses_, preOutput_.value->sumByBitCode(numClasses_,
*label, *cpuLabel_,
*output_.value, *cpuOutput_,
-1); // scaleSum -1); // scaleSum
preOutput_.value->softrelu(*preOutput_.value); preOutput_.value->softrelu(*preOutput_.value);
MatrixPtr sum = MatrixPtr sum =
Matrix::create(batchSize, 1, /* trans= */ false, useGpu(deviceId_)); Matrix::create(batchSize, 1, /* trans= */ false, false);
preOutput_.value->rowSum(*sum); preOutput_.value->rowSum(*sum);
output_.value->add(*sum); cpuOutput_->add(*sum);
if (useGpu_) {
output_.value->copyFrom(*cpuOutput_);
} else {
output_.value = cpuOutput_;
}
} }
void HierarchicalSigmoidLayer::backward(const UpdateCallback& callback) { void HierarchicalSigmoidLayer::backward(const UpdateCallback& callback) {
IVectorPtr label = getInput(*getLabelLayer()).ids; IVectorPtr label = getInput(*getLabelLayer()).ids;
if (useGpu_) {
IVector::resizeOrCreate(cpuLabel_, label->getSize(), false);
cpuLabel_->copyFrom(*label);
} else {
cpuLabel_ = label;
}
preOutput_.grad->one(); preOutput_.grad->one();
preOutput_.grad->softreluDerivative(*preOutput_.value); preOutput_.grad->softreluDerivative(*preOutput_.value);
preOutput_.grad->subByBitCode(numClasses_, *label); preOutput_.grad->subByBitCode(numClasses_, *cpuLabel_);
if (biases_ && biases_->getWGrad()) { if (biases_ && biases_->getWGrad()) {
MatrixPtr biases_grad = biases_->getWGrad();
if (useGpu_) {
Matrix::resizeOrCreate(cpuBias_,
1,
numClasses_ - 1,
/* trans */ false,
false);
cpuBias_->copyFrom(*biases_grad);
} else {
cpuBias_ = biases_grad;
}
preOutput_.grad->addByBitCodeBackward( preOutput_.grad->addByBitCodeBackward(
numClasses_, *label, *biases_->getWGrad()); numClasses_, *cpuLabel_, *cpuBias_);
if (useGpu) {
biases_grad->copyFrom(*cpuBias_);
} else {
biases_grad = cpuBias_;
}
/* Increasing the number of gradient */ /* Increasing the number of gradient */
biases_->getParameterPtr()->incUpdate(callback); biases_->getParameterPtr()->incUpdate(callback);
} }
...@@ -115,9 +179,31 @@ void HierarchicalSigmoidLayer::backward(const UpdateCallback& callback) { ...@@ -115,9 +179,31 @@ void HierarchicalSigmoidLayer::backward(const UpdateCallback& callback) {
/* Calculate the W-gradient for the current layer */ /* Calculate the W-gradient for the current layer */
MatrixPtr input = getInputValue(i); MatrixPtr input = getInputValue(i);
if (weights_[i]->getWGrad()) { if (weights_[i]->getWGrad()) {
MatrixPtr weights_grad = weights_[i]->getWGrad();
if (useGpu_) {
Matrix::resizeOrCreate(cpuInput_,
input->getHeight(),
input->getWidth(),
/* trans */ false,
false);
Matrix::resizeOrCreate(cpuWeightGrad_,
weights_grad->getHeight(),
weights_grad->getWidth(),
/* trans */ false,
false);
cpuInput_->copyFrom(*input);
cpuWeightGrad_->copyFrom(*weights_grad);
} else {
cpuInput_ = input;
cpuWeightGrad_ = weights_grad;
}
preOutput_.grad->mulByBitCodeBackwardWeight( preOutput_.grad->mulByBitCodeBackwardWeight(
numClasses_, *label, *weights_[i]->getWGrad(), *input); numClasses_, *cpuLabel_, *cpuWeightGrad_, *cpuInput_);
if (useGpu_) {
weights_grad->copyFrom(*cpuWeightGrad_);
} else {
weights_grad = cpuWeightGrad_;
}
/* Increasing the number of gradient */ /* Increasing the number of gradient */
weights_[i]->getParameterPtr()->incUpdate(callback); weights_[i]->getParameterPtr()->incUpdate(callback);
} }
...@@ -125,8 +211,30 @@ void HierarchicalSigmoidLayer::backward(const UpdateCallback& callback) { ...@@ -125,8 +211,30 @@ void HierarchicalSigmoidLayer::backward(const UpdateCallback& callback) {
/* Calculate the input layers error */ /* Calculate the input layers error */
MatrixPtr inputGrad = getInputGrad(i); MatrixPtr inputGrad = getInputGrad(i);
if (inputGrad) { if (inputGrad) {
if (useGpu_) {
Matrix::resizeOrCreate(cpuInputGrad_,
inputGrad->getHeight(),
inputGrad->getWidth(),
/* trans */ false,
false);
Matrix::resizeOrCreate(cpuWeight_,
weights_[i]->getW()->getHeight(),
weights_[i]->getW()->getWidth(),
/* trans */ false,
false);
cpuInputGrad_->copyFrom(*inputGrad);
cpuWeight_->copyFrom(*weights_[i]->getW());
} else {
cpuInputGrad_ = inputGrad;
cpuWeight_ = weights_[i]->getW();
}
preOutput_.grad->mulByBitCodeBackwardError( preOutput_.grad->mulByBitCodeBackwardError(
numClasses_, *label, *weights_[i]->getW(), *inputGrad); numClasses_, *cpuLabel_, *cpuWeight_, *cpuInputGrad_);
if (useGpu_) {
inputGrad->copyFrom(*cpuInputGrad_);
} else {
inputGrad = cpuInputGrad_;
}
} }
} }
} }
......
...@@ -80,6 +80,16 @@ protected: ...@@ -80,6 +80,16 @@ protected:
int codeLength_; int codeLength_;
/// temporary result of output_ /// temporary result of output_
Argument preOutput_; Argument preOutput_;
/// The temporary variables in CPU memory.
MatrixPtr cpuWeight_;
MatrixPtr cpuWeightGrad_;
MatrixPtr cpuInput_;
MatrixPtr cpuInputGrad_;
MatrixPtr cpuBias_;
MatrixPtr cpuOutput_;
IVectorPtr cpuLabel_;
}; };
} // namespace paddle } // namespace paddle
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册