提交 7f0ad620 编写于 作者: Y Yu Yang 提交者: GitHub

Merge pull request #1149 from reyoung/feature/ErrorHandlingInPaddle

Feature/error handling in paddle
...@@ -69,8 +69,14 @@ static ClassRegistrar<ActivationFunction> gActivationRegistrar; ...@@ -69,8 +69,14 @@ static ClassRegistrar<ActivationFunction> gActivationRegistrar;
class IdentityActivation : public ActivationFunction { class IdentityActivation : public ActivationFunction {
public: public:
static const std::string name; static const std::string name;
void forward(Argument& act) { (void)act; } Error __must_check forward(Argument& act) {
void backward(Argument& act) { (void)act; } (void)act;
return Error();
}
Error __must_check backward(Argument& act) {
(void)act;
return Error();
}
const std::string& getName() const { return name; } const std::string& getName() const { return name; }
}; };
const std::string IdentityActivation::name = ""; const std::string IdentityActivation::name = "";
...@@ -86,8 +92,14 @@ static InitFunction __reg_activation__identity([] { ...@@ -86,8 +92,14 @@ static InitFunction __reg_activation__identity([] {
* \f] * \f]
*/ */
BEGIN_DEFINE_ACTIVATION(sigmoid) BEGIN_DEFINE_ACTIVATION(sigmoid)
void forward(Argument& act) { act.value->sigmoid(*act.value); } Error __must_check forward(Argument& act) {
void backward(Argument& act) { act.grad->sigmoidDerivative(*act.value); } act.value->sigmoid(*act.value);
return Error();
}
Error __must_check backward(Argument& act) {
act.grad->sigmoidDerivative(*act.value);
return Error();
}
END_DEFINE_ACTIVATION(sigmoid) END_DEFINE_ACTIVATION(sigmoid)
/** /**
...@@ -103,9 +115,12 @@ MatrixPtr sftMaxDot_; ...@@ -103,9 +115,12 @@ MatrixPtr sftMaxDot_;
MatrixPtr one_; MatrixPtr one_;
public: public:
void forward(Argument& act) { act.value->softmax(*act.value); } Error __must_check forward(Argument& act) {
act.value->softmax(*act.value);
return Error();
}
void backward(Argument& act) { Error __must_check backward(Argument& act) {
MatrixPtr outputV = act.value; MatrixPtr outputV = act.value;
MatrixPtr outputG = act.grad; MatrixPtr outputG = act.grad;
...@@ -137,6 +152,7 @@ void backward(Argument& act) { ...@@ -137,6 +152,7 @@ void backward(Argument& act) {
act.grad->softmaxDerivative(*act.value, *sftMaxSum_); act.grad->softmaxDerivative(*act.value, *sftMaxSum_);
} }
return Error();
} }
END_DEFINE_ACTIVATION(softmax) END_DEFINE_ACTIVATION(softmax)
...@@ -151,8 +167,11 @@ ACTIVATION_CLASS_NAME(softmax) softmax_; ...@@ -151,8 +167,11 @@ ACTIVATION_CLASS_NAME(softmax) softmax_;
Argument argument_; Argument argument_;
public: public:
void forward(Argument& act) { Error __must_check forward(Argument& act) {
CHECK_EQ(act.value->getWidth(), 1UL); if (act.value->getWidth() != 1UL) {
return Error(
"Input width for each timestep of sequence softmax should be 1");
}
if (!argument_.value) { if (!argument_.value) {
argument_.value = Matrix::create(nullptr, argument_.value = Matrix::create(nullptr,
...@@ -169,10 +188,14 @@ void forward(Argument& act) { ...@@ -169,10 +188,14 @@ void forward(Argument& act) {
auto starts = act.sequenceStartPositions->getVector(useGpu(act.deviceId)); auto starts = act.sequenceStartPositions->getVector(useGpu(act.deviceId));
act.value->sequenceSoftmax(*act.value, *starts); act.value->sequenceSoftmax(*act.value, *starts);
return Error();
} }
void backward(Argument& act) { Error __must_check backward(Argument& act) {
CHECK_EQ(act.grad->getWidth(), 1UL); if (act.value->getWidth() != 1UL) {
return Error(
"Input width for each timestep of sequence softmax should be 1");
}
size_t numSequences = act.getNumSequences(); size_t numSequences = act.getNumSequences();
const int* starts = act.sequenceStartPositions->getData(false); const int* starts = act.sequenceStartPositions->getData(false);
...@@ -184,8 +207,10 @@ void backward(Argument& act) { ...@@ -184,8 +207,10 @@ void backward(Argument& act) {
argument_.value->setData(act.value->getData() + offset, 1UL, size); argument_.value->setData(act.value->getData() + offset, 1UL, size);
argument_.grad->setData(act.grad->getData() + offset, 1UL, size); argument_.grad->setData(act.grad->getData() + offset, 1UL, size);
softmax_.backward(argument_); Error status = softmax_.backward(argument_);
if (!status) return status;
} }
return Error();
} }
END_DEFINE_ACTIVATION(sequence_softmax) END_DEFINE_ACTIVATION(sequence_softmax)
...@@ -200,9 +225,15 @@ END_DEFINE_ACTIVATION(sequence_softmax) ...@@ -200,9 +225,15 @@ END_DEFINE_ACTIVATION(sequence_softmax)
* 0 otherwise. * 0 otherwise.
*/ */
BEGIN_DEFINE_ACTIVATION(relu) BEGIN_DEFINE_ACTIVATION(relu)
void forward(Argument& act) { act.value->relu(*act.value); } Error __must_check forward(Argument& act) {
act.value->relu(*act.value);
return Error();
}
void backward(Argument& act) { act.grad->reluDerivative(*act.value); } Error __must_check backward(Argument& act) {
act.grad->reluDerivative(*act.value);
return Error();
}
END_DEFINE_ACTIVATION(relu) END_DEFINE_ACTIVATION(relu)
/** /**
...@@ -219,9 +250,15 @@ END_DEFINE_ACTIVATION(relu) ...@@ -219,9 +250,15 @@ END_DEFINE_ACTIVATION(relu)
* TODO(yuyang18): Remove magic number 24 or make it configuable. * TODO(yuyang18): Remove magic number 24 or make it configuable.
*/ */
BEGIN_DEFINE_ACTIVATION(brelu) BEGIN_DEFINE_ACTIVATION(brelu)
void forward(Argument& act) { act.value->brelu(*act.value); } Error __must_check forward(Argument& act) {
act.value->brelu(*act.value);
return Error();
}
void backward(Argument& act) { act.grad->breluDerivative(*act.value); } Error __must_check backward(Argument& act) {
act.grad->breluDerivative(*act.value);
return Error();
}
END_DEFINE_ACTIVATION(brelu) END_DEFINE_ACTIVATION(brelu)
/** /**
...@@ -231,9 +268,15 @@ END_DEFINE_ACTIVATION(brelu) ...@@ -231,9 +268,15 @@ END_DEFINE_ACTIVATION(brelu)
* \f] * \f]
*/ */
BEGIN_DEFINE_ACTIVATION(tanh) BEGIN_DEFINE_ACTIVATION(tanh)
void forward(Argument& act) { act.value->tanh(*act.value); } Error __must_check forward(Argument& act) {
act.value->tanh(*act.value);
return Error();
}
void backward(Argument& act) { act.grad->tanhDerivative(*act.value); } Error __must_check backward(Argument& act) {
act.grad->tanhDerivative(*act.value);
return Error();
}
END_DEFINE_ACTIVATION(tanh) END_DEFINE_ACTIVATION(tanh)
/** /**
...@@ -248,10 +291,14 @@ real a, b; ...@@ -248,10 +291,14 @@ real a, b;
public: public:
ACTIVATION_CLASS_NAME(stanh)() : a(1.7159), b(2. / 3.) {} ACTIVATION_CLASS_NAME(stanh)() : a(1.7159), b(2. / 3.) {}
void forward(Argument& act) { act.value->scaledTanh(*act.value, a, b); } Error __must_check forward(Argument& act) {
act.value->scaledTanh(*act.value, a, b);
return Error();
}
void backward(Argument& act) { Error __must_check backward(Argument& act) {
act.grad->scaledTanhDerivative(*act.value, a, b); act.grad->scaledTanhDerivative(*act.value, a, b);
return Error();
} }
END_DEFINE_ACTIVATION(stanh) END_DEFINE_ACTIVATION(stanh)
...@@ -262,9 +309,15 @@ END_DEFINE_ACTIVATION(stanh) ...@@ -262,9 +309,15 @@ END_DEFINE_ACTIVATION(stanh)
* \f] * \f]
*/ */
BEGIN_DEFINE_ACTIVATION(softrelu) BEGIN_DEFINE_ACTIVATION(softrelu)
void forward(Argument& act) { act.value->softrelu(*act.value); } Error __must_check forward(Argument& act) {
act.value->softrelu(*act.value);
return Error();
}
void backward(Argument& act) { act.grad->softreluDerivative(*act.value); } Error __must_check backward(Argument& act) {
act.grad->softreluDerivative(*act.value);
return Error();
}
END_DEFINE_ACTIVATION(softrelu) END_DEFINE_ACTIVATION(softrelu)
/** /**
...@@ -280,7 +333,7 @@ END_DEFINE_ACTIVATION(softrelu) ...@@ -280,7 +333,7 @@ END_DEFINE_ACTIVATION(softrelu)
* 0 if z=0 * 0 if z=0
*/ */
BEGIN_DEFINE_ACTIVATION(abs) BEGIN_DEFINE_ACTIVATION(abs)
void forward(Argument& act) { Error __must_check forward(Argument& act) {
SetDevice device(act.deviceId); SetDevice device(act.deviceId);
Matrix::resizeOrCreate(act.in, Matrix::resizeOrCreate(act.in,
act.value->getHeight(), act.value->getHeight(),
...@@ -290,9 +343,13 @@ void forward(Argument& act) { ...@@ -290,9 +343,13 @@ void forward(Argument& act) {
act.in->copyFrom(*act.value); act.in->copyFrom(*act.value);
act.value->abs2(*act.value); act.value->abs2(*act.value);
return Error();
} }
void backward(Argument& act) { act.grad->absDerivative(*act.in); } Error __must_check backward(Argument& act) {
act.grad->absDerivative(*act.in);
return Error();
}
END_DEFINE_ACTIVATION(abs) END_DEFINE_ACTIVATION(abs)
/** /**
...@@ -302,7 +359,7 @@ END_DEFINE_ACTIVATION(abs) ...@@ -302,7 +359,7 @@ END_DEFINE_ACTIVATION(abs)
* \f] * \f]
*/ */
BEGIN_DEFINE_ACTIVATION(square) BEGIN_DEFINE_ACTIVATION(square)
void forward(Argument& act) { Error __must_check forward(Argument& act) {
SetDevice device(act.deviceId); SetDevice device(act.deviceId);
Matrix::resizeOrCreate(act.in, Matrix::resizeOrCreate(act.in,
act.value->getHeight(), act.value->getHeight(),
...@@ -312,9 +369,13 @@ void forward(Argument& act) { ...@@ -312,9 +369,13 @@ void forward(Argument& act) {
act.in->copyFrom(*act.value); act.in->copyFrom(*act.value);
act.value->square2(*act.value); act.value->square2(*act.value);
return Error();
} }
void backward(Argument& act) { act.grad->squareDerivative(*act.in); } Error __must_check backward(Argument& act) {
act.grad->squareDerivative(*act.in);
return Error();
}
END_DEFINE_ACTIVATION(square) END_DEFINE_ACTIVATION(square)
/** /**
...@@ -324,9 +385,15 @@ END_DEFINE_ACTIVATION(square) ...@@ -324,9 +385,15 @@ END_DEFINE_ACTIVATION(square)
* \f] * \f]
*/ */
BEGIN_DEFINE_ACTIVATION(exponential) BEGIN_DEFINE_ACTIVATION(exponential)
void forward(Argument& act) { act.value->exp2(*act.value); } Error __must_check forward(Argument& act) {
act.value->exp2(*act.value);
return Error();
}
void backward(Argument& act) { act.grad->expDerivative(*act.value); } Error __must_check backward(Argument& act) {
act.grad->expDerivative(*act.value);
return Error();
}
END_DEFINE_ACTIVATION(exponential) END_DEFINE_ACTIVATION(exponential)
/** /**
...@@ -336,7 +403,7 @@ END_DEFINE_ACTIVATION(exponential) ...@@ -336,7 +403,7 @@ END_DEFINE_ACTIVATION(exponential)
* \f] * \f]
*/ */
BEGIN_DEFINE_ACTIVATION(log) BEGIN_DEFINE_ACTIVATION(log)
void forward(Argument& act) { Error __must_check forward(Argument& act) {
SetDevice device(act.deviceId); SetDevice device(act.deviceId);
Matrix::resizeOrCreate(act.in, Matrix::resizeOrCreate(act.in,
act.value->getHeight(), act.value->getHeight(),
...@@ -346,9 +413,13 @@ void forward(Argument& act) { ...@@ -346,9 +413,13 @@ void forward(Argument& act) {
act.in->copyFrom(*act.value); act.in->copyFrom(*act.value);
act.value->log2(*act.value); act.value->log2(*act.value);
return Error();
} }
void backward(Argument& act) { act.grad->dotDiv(*act.grad, *act.in); } Error __must_check backward(Argument& act) {
act.grad->dotDiv(*act.grad, *act.in);
return Error();
}
END_DEFINE_ACTIVATION(log) END_DEFINE_ACTIVATION(log)
ActivationFunction* ActivationFunction::create(const std::string& type) { ActivationFunction* ActivationFunction::create(const std::string& type) {
......
...@@ -15,6 +15,7 @@ limitations under the License. */ ...@@ -15,6 +15,7 @@ limitations under the License. */
#pragma once #pragma once
#include <string> #include <string>
#include <vector> #include <vector>
#include "paddle/utils/Error.h"
namespace paddle { namespace paddle {
...@@ -48,7 +49,7 @@ public: ...@@ -48,7 +49,7 @@ public:
* *
* Usually, act is Layer::output_ * Usually, act is Layer::output_
*/ */
virtual void forward(Argument& act) = 0; virtual Error __must_check forward(Argument& act) = 0;
/** /**
* @brief Backward propagaion * @brief Backward propagaion
...@@ -57,7 +58,7 @@ public: ...@@ -57,7 +58,7 @@ public:
* - Before calling backward(), act.grad = dE / dy, where E is the error/cost * - Before calling backward(), act.grad = dE / dy, where E is the error/cost
* - After backward() returns, act.grad = dE / dx = (dE/dy) * (dy/dx) * - After backward() returns, act.grad = dE / dx = (dE/dy) * (dy/dx)
*/ */
virtual void backward(Argument& act) = 0; virtual Error __must_check backward(Argument& act) = 0;
virtual const std::string& getName() const = 0; virtual const std::string& getName() const = 0;
}; };
......
...@@ -15,6 +15,7 @@ limitations under the License. */ ...@@ -15,6 +15,7 @@ limitations under the License. */
#include "paddle/utils/Util.h" #include "paddle/utils/Util.h"
#include "paddle/math/SparseMatrix.h" #include "paddle/math/SparseMatrix.h"
#include "paddle/utils/Error.h"
#include "paddle/utils/Logging.h" #include "paddle/utils/Logging.h"
#include "AddtoLayer.h" #include "AddtoLayer.h"
...@@ -334,7 +335,8 @@ void Layer::showOutputStats() { ...@@ -334,7 +335,8 @@ void Layer::showOutputStats() {
void Layer::forwardActivation() { void Layer::forwardActivation() {
/* activation */ /* activation */
activation_->forward(output_); auto status = activation_->forward(output_);
status.check();
/* dropout */ /* dropout */
if (config_.drop_rate() > 0) { if (config_.drop_rate() > 0) {
...@@ -372,7 +374,8 @@ void Layer::backwardActivation() { ...@@ -372,7 +374,8 @@ void Layer::backwardActivation() {
oGrad->dotMul(*oGrad, *dropOutMask_); oGrad->dotMul(*oGrad, *dropOutMask_);
} }
activation_->backward(output_); auto status = activation_->backward(output_);
status.check();
} }
void Layer::forwardDropOut() { void Layer::forwardDropOut() {
......
...@@ -506,9 +506,12 @@ void MDLstmLayer::forwardGate2OutputSequence(int start, ...@@ -506,9 +506,12 @@ void MDLstmLayer::forwardGate2OutputSequence(int start,
*frameState_[start + preOffsetV[i]].value, *checkFgOneDim, 1.0, 1.0); *frameState_[start + preOffsetV[i]].value, *checkFgOneDim, 1.0, 1.0);
} }
} }
activationGate_->forward(frameInputGate_[idxCurr]); auto status = activationGate_->forward(frameInputGate_[idxCurr]);
activationGate_->forward(frameForgetGate_[idxCurr]); status.check();
activation_->forward(frameInputNode_[idxCurr]); status = activationGate_->forward(frameForgetGate_[idxCurr]);
status.check();
status = activation_->forward(frameInputNode_[idxCurr]);
status.check();
frameState_[idxCurr].value->zeroMem(); frameState_[idxCurr].value->zeroMem();
for (int i = 0; i < numDims_; i++) { for (int i = 0; i < numDims_; i++) {
...@@ -530,10 +533,12 @@ void MDLstmLayer::forwardGate2OutputSequence(int start, ...@@ -530,10 +533,12 @@ void MDLstmLayer::forwardGate2OutputSequence(int start,
frameOutputGate_[idxCurr].value->addDotMul( frameOutputGate_[idxCurr].value->addDotMul(
*frameState_[idxCurr].value, *checkOg_, 1.0, 1.0); *frameState_[idxCurr].value, *checkOg_, 1.0, 1.0);
activationGate_->forward(frameOutputGate_[idxCurr]); status = activationGate_->forward(frameOutputGate_[idxCurr]);
status.check();
framePreOutput_[idxCurr].value->copyFrom(*(frameState_[idxCurr].value)); framePreOutput_[idxCurr].value->copyFrom(*(frameState_[idxCurr].value));
activationState_->forward(framePreOutput_[idxCurr]); status = activationState_->forward(framePreOutput_[idxCurr]);
status.check();
frameOutput_[idxCurr].value->dotMul(*framePreOutput_[idxCurr].value, frameOutput_[idxCurr].value->dotMul(*framePreOutput_[idxCurr].value,
*frameOutputGate_[idxCurr].value); *frameOutputGate_[idxCurr].value);
...@@ -640,12 +645,12 @@ void MDLstmLayer::backwardGate2OutputSequence(int start, ...@@ -640,12 +645,12 @@ void MDLstmLayer::backwardGate2OutputSequence(int start,
framePreOutput_[idxCurr].grad->dotMul(*frameOutput_[idxCurr].grad, framePreOutput_[idxCurr].grad->dotMul(*frameOutput_[idxCurr].grad,
*frameOutputGate_[idxCurr].value); *frameOutputGate_[idxCurr].value);
activationState_->backward(framePreOutput_[idxCurr]); activationState_->backward(framePreOutput_[idxCurr]).check();
frameState_[idxCurr].grad->copyFrom(*(framePreOutput_[idxCurr].grad)); frameState_[idxCurr].grad->copyFrom(*(framePreOutput_[idxCurr].grad));
frameOutputGate_[idxCurr].grad->dotMul(*frameOutput_[idxCurr].grad, frameOutputGate_[idxCurr].grad->dotMul(*frameOutput_[idxCurr].grad,
*framePreOutput_[idxCurr].value); *framePreOutput_[idxCurr].value);
activationGate_->backward(frameOutputGate_[idxCurr]); activationGate_->backward(frameOutputGate_[idxCurr]).check();
frameState_[idxCurr].grad->addDotMul( frameState_[idxCurr].grad->addDotMul(
*frameOutputGate_[idxCurr].grad, *checkOg_, 1.0, 1.0); *frameOutputGate_[idxCurr].grad, *checkOg_, 1.0, 1.0);
...@@ -702,9 +707,9 @@ void MDLstmLayer::backwardGate2OutputSequence(int start, ...@@ -702,9 +707,9 @@ void MDLstmLayer::backwardGate2OutputSequence(int start,
} }
} }
activationGate_->backward(frameInputGate_[idxCurr]); activationGate_->backward(frameInputGate_[idxCurr]).check();
activationGate_->backward(frameForgetGate_[idxCurr]); activationGate_->backward(frameForgetGate_[idxCurr]).check();
activation_->backward(frameInputNode_[idxCurr]); activation_->backward(frameInputNode_[idxCurr]).check();
if (bias_->getWGrad()) { if (bias_->getWGrad()) {
for (int i = 0; i < numDims_; i++) { for (int i = 0; i < numDims_; i++) {
......
...@@ -193,7 +193,8 @@ public: ...@@ -193,7 +193,8 @@ public:
forwardOneInput(l); forwardOneInput(l);
} }
activation_->forward(sampleOut_); auto status = activation_->forward(sampleOut_);
status.check();
forwardCost(); forwardCost();
} }
...@@ -207,7 +208,8 @@ public: ...@@ -207,7 +208,8 @@ public:
backwardCost(); backwardCost();
activation_->backward(sampleOut_); auto status = activation_->backward(sampleOut_);
status.check();
if (biases_->getWGrad()) { if (biases_->getWGrad()) {
backwardBias(callback); backwardBias(callback);
......
...@@ -217,21 +217,22 @@ void RecurrentLayer::forwardOneSequence(int start, int length) { ...@@ -217,21 +217,22 @@ void RecurrentLayer::forwardOneSequence(int start, int length) {
if (prevOutput_) { if (prevOutput_) {
frameOutput_[start].value->mul(*prevOutput_, *weight_->getW(), 1, 1); frameOutput_[start].value->mul(*prevOutput_, *weight_->getW(), 1, 1);
} }
activation_->forward(frameOutput_[start]); activation_->forward(frameOutput_[start]).check();
for (int i = 1; i < length; ++i) { for (int i = 1; i < length; ++i) {
frameOutput_[start + i].value->mul( frameOutput_[start + i].value->mul(
*frameOutput_[start + i - 1].value, *weight_->getW(), 1, 1); *frameOutput_[start + i - 1].value, *weight_->getW(), 1, 1);
activation_->forward(frameOutput_[start + i]); activation_->forward(frameOutput_[start + i]).check();
} }
if (prevOutput_) { if (prevOutput_) {
prevOutput_->assign(*frameOutput_[start + length - 1].value); prevOutput_->assign(*frameOutput_[start + length - 1].value);
} }
} else { } else {
activation_->forward(frameOutput_[start + length - 1]); activation_->forward(frameOutput_[start + length - 1]).check();
for (int i = length - 2; i >= 0; --i) { for (int i = length - 2; i >= 0; --i) {
frameOutput_[start + i].value->mul( frameOutput_[start + i].value->mul(
*frameOutput_[start + i + 1].value, *weight_->getW(), 1, 1); *frameOutput_[start + i + 1].value, *weight_->getW(), 1, 1);
activation_->forward(frameOutput_[start + i]); activation_->forward(frameOutput_[start + i]).check();
} }
} }
} }
...@@ -280,11 +281,11 @@ void RecurrentLayer::backwardOneSequence(int start, int length) { ...@@ -280,11 +281,11 @@ void RecurrentLayer::backwardOneSequence(int start, int length) {
MatrixPtr weightT = weight_->getW()->getTranspose(); MatrixPtr weightT = weight_->getW()->getTranspose();
if (!reversed_) { if (!reversed_) {
for (int i = length - 1; i > 0; --i) { for (int i = length - 1; i > 0; --i) {
activation_->backward(frameOutput_[start + i]); activation_->backward(frameOutput_[start + i]).check();
frameOutput_[start + i - 1].grad->mul( frameOutput_[start + i - 1].grad->mul(
*frameOutput_[start + i].grad, *weightT, 1, 1); *frameOutput_[start + i].grad, *weightT, 1, 1);
} }
activation_->backward(frameOutput_[start]); activation_->backward(frameOutput_[start]).check();
if (weight_->getWGrad()) { if (weight_->getWGrad()) {
weight_->getWGrad()->mul( weight_->getWGrad()->mul(
*output_.value->subMatrix(start, length - 1)->getTranspose(), *output_.value->subMatrix(start, length - 1)->getTranspose(),
...@@ -294,11 +295,11 @@ void RecurrentLayer::backwardOneSequence(int start, int length) { ...@@ -294,11 +295,11 @@ void RecurrentLayer::backwardOneSequence(int start, int length) {
} }
} else { } else {
for (int i = 0; i < length - 1; ++i) { for (int i = 0; i < length - 1; ++i) {
activation_->backward(frameOutput_[start + i]); activation_->backward(frameOutput_[start + i]).check();
frameOutput_[start + i + 1].grad->mul( frameOutput_[start + i + 1].grad->mul(
*frameOutput_[start + i].grad, *weightT, 1, 1); *frameOutput_[start + i].grad, *weightT, 1, 1);
} }
activation_->backward(frameOutput_[start + length - 1]); activation_->backward(frameOutput_[start + length - 1]).check();
if (weight_->getWGrad()) { if (weight_->getWGrad()) {
weight_->getWGrad()->mul( weight_->getWGrad()->mul(
*output_.value->subMatrix(start + 1, length - 1)->getTranspose(), *output_.value->subMatrix(start + 1, length - 1)->getTranspose(),
...@@ -333,7 +334,7 @@ void RecurrentLayer::forwardBatch(int batchSize, ...@@ -333,7 +334,7 @@ void RecurrentLayer::forwardBatch(int batchSize,
} }
Argument arg; Argument arg;
arg.value = batch2; arg.value = batch2;
activation_->forward(arg); activation_->forward(arg).check();
} }
} }
batchValue_->copyBackSeq(*output_.value); batchValue_->copyBackSeq(*output_.value);
...@@ -363,7 +364,7 @@ void RecurrentLayer::backwardBatch(int batchSize, ...@@ -363,7 +364,7 @@ void RecurrentLayer::backwardBatch(int batchSize,
Argument arg; Argument arg;
arg.value = batch1; arg.value = batch1;
arg.grad = batch2; arg.grad = batch2;
activation_->backward(arg); activation_->backward(arg).check();
if (n != 0) { if (n != 0) {
batch1 = batchGrad_->getBatchValue(n - 1, batch2->getHeight()); batch1 = batchGrad_->getBatchValue(n - 1, batch2->getHeight());
......
...@@ -192,7 +192,8 @@ void SelectiveFullyConnectedLayer::forward(PassType passType) { ...@@ -192,7 +192,8 @@ void SelectiveFullyConnectedLayer::forward(PassType passType) {
nnz, nnz,
/*trans=*/false, /*trans=*/false,
/*useGpu=*/useGpu_); /*useGpu=*/useGpu_);
activation_->forward(arg); //! TODO(yuyang18): Why we cannot invoke forwardActivation here?
activation_->forward(arg).check();
} else /* train and test in train, not generating */ { } else /* train and test in train, not generating */ {
// during training, this layer output value is *Matrix*, which is input of // during training, this layer output value is *Matrix*, which is input of
// eg. multi-class-cross-entropy // eg. multi-class-cross-entropy
......
...@@ -148,11 +148,11 @@ LayerPtr createCTCLayer(string name, ...@@ -148,11 +148,11 @@ LayerPtr createCTCLayer(string name,
ActivationFunction* softmaxActivation = ActivationFunction::create("softmax"); ActivationFunction* softmaxActivation = ActivationFunction::create("softmax");
softmaxActivation->forward(dataLayer->getOutput()); softmaxActivation->forward(dataLayer->getOutput()).check();
layer->forward(PASS_GC); layer->forward(PASS_GC);
layer->backward(); layer->backward();
softmaxActivation->backward(dataLayer->getOutput()); softmaxActivation->backward(dataLayer->getOutput()).check();
return layer; return layer;
} }
......
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
/**
* This header defines some useful attribute by each compiler. It is the
* abstract layer of compilers.
*/
#ifdef __GNUC__
#define GCC_VERSION \
(__GNUC__ * 10000 + __GNUC_MINOR__ * 100 + __GNUC_PATCHLEVEL__)
#else
#define GCC_VERSION
#endif
/**
* __must_check macro. It make the function's return value must be used,
* otherwise it will raise a compile warning. And also Paddle treat all compile
* warnings as errors.
*/
#if GCC_VERSION >= 30400
#define __must_check __attribute__((warn_unused_result))
#else
#define __must_check
#endif
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include <glog/logging.h>
#include <stdarg.h>
#include <stdio.h>
#include <memory>
#include <string>
#include "Compiler.h"
namespace paddle {
/**
* Error is Paddle error code. It only contain a std::string as error message.
*
*
* There are two styles to return error in Paddle.
*
* 1. Return Error
* When method return a status, the return must use `__must_check` attribute.
* Example as below.
* @code{cpp}
* Error __must_check foo();
*
* Error __must_check bar() {
* // do something.
* Status s = foo(); // invoke other method return status.
* if (!s) return s;
* // do something else.
* return Status();
* }
* @endcode{cpp}
*
* 2. Return by parameter.
* It is another way to return an error, by using a pointer parameter.
* Example as below.
*
* @code{cpp}
* Error bar();
*
* int foo(Error* error) {
* // Do something.
* Error s = bar();
* if (!s) {
* *error = s;
* return 0;
* }
* // Do something else.
* if (someInternalErrorHappend) {
* *error = Error("Some dimension is too large, %d", dimension);
* return 0;
* }
* // End of method.
* return someValue;
* }
*
* Error foobar() {
* Error s;
* // do something.
* foo(&s);
* if (!s) return s;
* }
* @endcode{cpp}
*
*
* Currently there is a helper method 'check' in status, because Paddle always
* use log(FATAL) or CHECK to make program exit before. When we clean all
* log(FATAL) and CHECK in Paddle, 'check' method will be removed.
*/
class Error {
public:
/**
* Construct a no-error value.
*/
Error() {}
/**
* @brief Create an Error use printf syntax.
*/
explicit Error(const char* fmt, ...) {
va_list ap;
va_start(ap, fmt);
constexpr size_t kBufferSize = 1024;
char buffer[kBufferSize];
vsnprintf(buffer, kBufferSize, fmt, ap);
this->msg_.reset(new std::string(buffer));
va_end(ap);
}
/**
* @brief msg will return the error message. If no error, return nullptr.
*/
const char* msg() const {
if (msg_) {
return msg_->c_str();
} else {
return nullptr;
}
}
/**
* @brief operator bool, return True if there is no error.
*/
operator bool() const { return msg_ == nullptr; }
/**
* @brief check this status by glog.
* @note It is a temp method used during cleaning Paddle code. It will be
* removed later.
*/
void check() const { CHECK(*this) << msg(); }
private:
std::shared_ptr<std::string> msg_;
};
} // namespace paddle
...@@ -4,6 +4,7 @@ add_simple_unittest(test_CustomStackTrace) ...@@ -4,6 +4,7 @@ add_simple_unittest(test_CustomStackTrace)
add_simple_unittest(test_ThreadBarrier) add_simple_unittest(test_ThreadBarrier)
add_simple_unittest(test_SpinLock) add_simple_unittest(test_SpinLock)
add_simple_unittest(test_SIMDFlags) add_simple_unittest(test_SIMDFlags)
add_simple_unittest(test_Error)
add_executable( add_executable(
test_CustomStackTracePrint test_CustomStackTracePrint
......
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/utils/Error.h"
#include <gtest/gtest.h>
TEST(Error, testAll) {
paddle::Error error;
ASSERT_TRUE(error);
error = paddle::Error("I'm the error");
ASSERT_FALSE(error);
ASSERT_STREQ("I'm the error", error.msg());
error = paddle::Error("error2");
ASSERT_FALSE(error);
ASSERT_STREQ("error2", error.msg());
int i = 3;
auto error3 = paddle::Error("error%d", i);
ASSERT_FALSE(error3);
ASSERT_STREQ("error3", error3.msg());
}
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册