提交 7f0ad620 编写于 作者: Y Yu Yang 提交者: GitHub

Merge pull request #1149 from reyoung/feature/ErrorHandlingInPaddle

Feature/error handling in paddle
......@@ -69,8 +69,14 @@ static ClassRegistrar<ActivationFunction> gActivationRegistrar;
class IdentityActivation : public ActivationFunction {
public:
static const std::string name;
void forward(Argument& act) { (void)act; }
void backward(Argument& act) { (void)act; }
Error __must_check forward(Argument& act) {
(void)act;
return Error();
}
Error __must_check backward(Argument& act) {
(void)act;
return Error();
}
const std::string& getName() const { return name; }
};
const std::string IdentityActivation::name = "";
......@@ -86,8 +92,14 @@ static InitFunction __reg_activation__identity([] {
* \f]
*/
BEGIN_DEFINE_ACTIVATION(sigmoid)
void forward(Argument& act) { act.value->sigmoid(*act.value); }
void backward(Argument& act) { act.grad->sigmoidDerivative(*act.value); }
Error __must_check forward(Argument& act) {
act.value->sigmoid(*act.value);
return Error();
}
Error __must_check backward(Argument& act) {
act.grad->sigmoidDerivative(*act.value);
return Error();
}
END_DEFINE_ACTIVATION(sigmoid)
/**
......@@ -103,9 +115,12 @@ MatrixPtr sftMaxDot_;
MatrixPtr one_;
public:
void forward(Argument& act) { act.value->softmax(*act.value); }
Error __must_check forward(Argument& act) {
act.value->softmax(*act.value);
return Error();
}
void backward(Argument& act) {
Error __must_check backward(Argument& act) {
MatrixPtr outputV = act.value;
MatrixPtr outputG = act.grad;
......@@ -137,6 +152,7 @@ void backward(Argument& act) {
act.grad->softmaxDerivative(*act.value, *sftMaxSum_);
}
return Error();
}
END_DEFINE_ACTIVATION(softmax)
......@@ -151,8 +167,11 @@ ACTIVATION_CLASS_NAME(softmax) softmax_;
Argument argument_;
public:
void forward(Argument& act) {
CHECK_EQ(act.value->getWidth(), 1UL);
Error __must_check forward(Argument& act) {
if (act.value->getWidth() != 1UL) {
return Error(
"Input width for each timestep of sequence softmax should be 1");
}
if (!argument_.value) {
argument_.value = Matrix::create(nullptr,
......@@ -169,10 +188,14 @@ void forward(Argument& act) {
auto starts = act.sequenceStartPositions->getVector(useGpu(act.deviceId));
act.value->sequenceSoftmax(*act.value, *starts);
return Error();
}
void backward(Argument& act) {
CHECK_EQ(act.grad->getWidth(), 1UL);
Error __must_check backward(Argument& act) {
if (act.value->getWidth() != 1UL) {
return Error(
"Input width for each timestep of sequence softmax should be 1");
}
size_t numSequences = act.getNumSequences();
const int* starts = act.sequenceStartPositions->getData(false);
......@@ -184,8 +207,10 @@ void backward(Argument& act) {
argument_.value->setData(act.value->getData() + offset, 1UL, size);
argument_.grad->setData(act.grad->getData() + offset, 1UL, size);
softmax_.backward(argument_);
Error status = softmax_.backward(argument_);
if (!status) return status;
}
return Error();
}
END_DEFINE_ACTIVATION(sequence_softmax)
......@@ -200,9 +225,15 @@ END_DEFINE_ACTIVATION(sequence_softmax)
* 0 otherwise.
*/
BEGIN_DEFINE_ACTIVATION(relu)
void forward(Argument& act) { act.value->relu(*act.value); }
Error __must_check forward(Argument& act) {
act.value->relu(*act.value);
return Error();
}
void backward(Argument& act) { act.grad->reluDerivative(*act.value); }
Error __must_check backward(Argument& act) {
act.grad->reluDerivative(*act.value);
return Error();
}
END_DEFINE_ACTIVATION(relu)
/**
......@@ -219,9 +250,15 @@ END_DEFINE_ACTIVATION(relu)
* TODO(yuyang18): Remove magic number 24 or make it configuable.
*/
BEGIN_DEFINE_ACTIVATION(brelu)
void forward(Argument& act) { act.value->brelu(*act.value); }
Error __must_check forward(Argument& act) {
act.value->brelu(*act.value);
return Error();
}
void backward(Argument& act) { act.grad->breluDerivative(*act.value); }
Error __must_check backward(Argument& act) {
act.grad->breluDerivative(*act.value);
return Error();
}
END_DEFINE_ACTIVATION(brelu)
/**
......@@ -231,9 +268,15 @@ END_DEFINE_ACTIVATION(brelu)
* \f]
*/
BEGIN_DEFINE_ACTIVATION(tanh)
void forward(Argument& act) { act.value->tanh(*act.value); }
Error __must_check forward(Argument& act) {
act.value->tanh(*act.value);
return Error();
}
void backward(Argument& act) { act.grad->tanhDerivative(*act.value); }
Error __must_check backward(Argument& act) {
act.grad->tanhDerivative(*act.value);
return Error();
}
END_DEFINE_ACTIVATION(tanh)
/**
......@@ -248,10 +291,14 @@ real a, b;
public:
ACTIVATION_CLASS_NAME(stanh)() : a(1.7159), b(2. / 3.) {}
void forward(Argument& act) { act.value->scaledTanh(*act.value, a, b); }
Error __must_check forward(Argument& act) {
act.value->scaledTanh(*act.value, a, b);
return Error();
}
void backward(Argument& act) {
Error __must_check backward(Argument& act) {
act.grad->scaledTanhDerivative(*act.value, a, b);
return Error();
}
END_DEFINE_ACTIVATION(stanh)
......@@ -262,9 +309,15 @@ END_DEFINE_ACTIVATION(stanh)
* \f]
*/
BEGIN_DEFINE_ACTIVATION(softrelu)
void forward(Argument& act) { act.value->softrelu(*act.value); }
Error __must_check forward(Argument& act) {
act.value->softrelu(*act.value);
return Error();
}
void backward(Argument& act) { act.grad->softreluDerivative(*act.value); }
Error __must_check backward(Argument& act) {
act.grad->softreluDerivative(*act.value);
return Error();
}
END_DEFINE_ACTIVATION(softrelu)
/**
......@@ -280,7 +333,7 @@ END_DEFINE_ACTIVATION(softrelu)
* 0 if z=0
*/
BEGIN_DEFINE_ACTIVATION(abs)
void forward(Argument& act) {
Error __must_check forward(Argument& act) {
SetDevice device(act.deviceId);
Matrix::resizeOrCreate(act.in,
act.value->getHeight(),
......@@ -290,9 +343,13 @@ void forward(Argument& act) {
act.in->copyFrom(*act.value);
act.value->abs2(*act.value);
return Error();
}
void backward(Argument& act) { act.grad->absDerivative(*act.in); }
Error __must_check backward(Argument& act) {
act.grad->absDerivative(*act.in);
return Error();
}
END_DEFINE_ACTIVATION(abs)
/**
......@@ -302,7 +359,7 @@ END_DEFINE_ACTIVATION(abs)
* \f]
*/
BEGIN_DEFINE_ACTIVATION(square)
void forward(Argument& act) {
Error __must_check forward(Argument& act) {
SetDevice device(act.deviceId);
Matrix::resizeOrCreate(act.in,
act.value->getHeight(),
......@@ -312,9 +369,13 @@ void forward(Argument& act) {
act.in->copyFrom(*act.value);
act.value->square2(*act.value);
return Error();
}
void backward(Argument& act) { act.grad->squareDerivative(*act.in); }
Error __must_check backward(Argument& act) {
act.grad->squareDerivative(*act.in);
return Error();
}
END_DEFINE_ACTIVATION(square)
/**
......@@ -324,9 +385,15 @@ END_DEFINE_ACTIVATION(square)
* \f]
*/
BEGIN_DEFINE_ACTIVATION(exponential)
void forward(Argument& act) { act.value->exp2(*act.value); }
Error __must_check forward(Argument& act) {
act.value->exp2(*act.value);
return Error();
}
void backward(Argument& act) { act.grad->expDerivative(*act.value); }
Error __must_check backward(Argument& act) {
act.grad->expDerivative(*act.value);
return Error();
}
END_DEFINE_ACTIVATION(exponential)
/**
......@@ -336,7 +403,7 @@ END_DEFINE_ACTIVATION(exponential)
* \f]
*/
BEGIN_DEFINE_ACTIVATION(log)
void forward(Argument& act) {
Error __must_check forward(Argument& act) {
SetDevice device(act.deviceId);
Matrix::resizeOrCreate(act.in,
act.value->getHeight(),
......@@ -346,9 +413,13 @@ void forward(Argument& act) {
act.in->copyFrom(*act.value);
act.value->log2(*act.value);
return Error();
}
void backward(Argument& act) { act.grad->dotDiv(*act.grad, *act.in); }
Error __must_check backward(Argument& act) {
act.grad->dotDiv(*act.grad, *act.in);
return Error();
}
END_DEFINE_ACTIVATION(log)
ActivationFunction* ActivationFunction::create(const std::string& type) {
......
......@@ -15,6 +15,7 @@ limitations under the License. */
#pragma once
#include <string>
#include <vector>
#include "paddle/utils/Error.h"
namespace paddle {
......@@ -48,7 +49,7 @@ public:
*
* Usually, act is Layer::output_
*/
virtual void forward(Argument& act) = 0;
virtual Error __must_check forward(Argument& act) = 0;
/**
* @brief Backward propagaion
......@@ -57,7 +58,7 @@ public:
* - Before calling backward(), act.grad = dE / dy, where E is the error/cost
* - After backward() returns, act.grad = dE / dx = (dE/dy) * (dy/dx)
*/
virtual void backward(Argument& act) = 0;
virtual Error __must_check backward(Argument& act) = 0;
virtual const std::string& getName() const = 0;
};
......
......@@ -15,6 +15,7 @@ limitations under the License. */
#include "paddle/utils/Util.h"
#include "paddle/math/SparseMatrix.h"
#include "paddle/utils/Error.h"
#include "paddle/utils/Logging.h"
#include "AddtoLayer.h"
......@@ -334,7 +335,8 @@ void Layer::showOutputStats() {
void Layer::forwardActivation() {
/* activation */
activation_->forward(output_);
auto status = activation_->forward(output_);
status.check();
/* dropout */
if (config_.drop_rate() > 0) {
......@@ -372,7 +374,8 @@ void Layer::backwardActivation() {
oGrad->dotMul(*oGrad, *dropOutMask_);
}
activation_->backward(output_);
auto status = activation_->backward(output_);
status.check();
}
void Layer::forwardDropOut() {
......
......@@ -506,9 +506,12 @@ void MDLstmLayer::forwardGate2OutputSequence(int start,
*frameState_[start + preOffsetV[i]].value, *checkFgOneDim, 1.0, 1.0);
}
}
activationGate_->forward(frameInputGate_[idxCurr]);
activationGate_->forward(frameForgetGate_[idxCurr]);
activation_->forward(frameInputNode_[idxCurr]);
auto status = activationGate_->forward(frameInputGate_[idxCurr]);
status.check();
status = activationGate_->forward(frameForgetGate_[idxCurr]);
status.check();
status = activation_->forward(frameInputNode_[idxCurr]);
status.check();
frameState_[idxCurr].value->zeroMem();
for (int i = 0; i < numDims_; i++) {
......@@ -530,10 +533,12 @@ void MDLstmLayer::forwardGate2OutputSequence(int start,
frameOutputGate_[idxCurr].value->addDotMul(
*frameState_[idxCurr].value, *checkOg_, 1.0, 1.0);
activationGate_->forward(frameOutputGate_[idxCurr]);
status = activationGate_->forward(frameOutputGate_[idxCurr]);
status.check();
framePreOutput_[idxCurr].value->copyFrom(*(frameState_[idxCurr].value));
activationState_->forward(framePreOutput_[idxCurr]);
status = activationState_->forward(framePreOutput_[idxCurr]);
status.check();
frameOutput_[idxCurr].value->dotMul(*framePreOutput_[idxCurr].value,
*frameOutputGate_[idxCurr].value);
......@@ -640,12 +645,12 @@ void MDLstmLayer::backwardGate2OutputSequence(int start,
framePreOutput_[idxCurr].grad->dotMul(*frameOutput_[idxCurr].grad,
*frameOutputGate_[idxCurr].value);
activationState_->backward(framePreOutput_[idxCurr]);
activationState_->backward(framePreOutput_[idxCurr]).check();
frameState_[idxCurr].grad->copyFrom(*(framePreOutput_[idxCurr].grad));
frameOutputGate_[idxCurr].grad->dotMul(*frameOutput_[idxCurr].grad,
*framePreOutput_[idxCurr].value);
activationGate_->backward(frameOutputGate_[idxCurr]);
activationGate_->backward(frameOutputGate_[idxCurr]).check();
frameState_[idxCurr].grad->addDotMul(
*frameOutputGate_[idxCurr].grad, *checkOg_, 1.0, 1.0);
......@@ -702,9 +707,9 @@ void MDLstmLayer::backwardGate2OutputSequence(int start,
}
}
activationGate_->backward(frameInputGate_[idxCurr]);
activationGate_->backward(frameForgetGate_[idxCurr]);
activation_->backward(frameInputNode_[idxCurr]);
activationGate_->backward(frameInputGate_[idxCurr]).check();
activationGate_->backward(frameForgetGate_[idxCurr]).check();
activation_->backward(frameInputNode_[idxCurr]).check();
if (bias_->getWGrad()) {
for (int i = 0; i < numDims_; i++) {
......
......@@ -193,7 +193,8 @@ public:
forwardOneInput(l);
}
activation_->forward(sampleOut_);
auto status = activation_->forward(sampleOut_);
status.check();
forwardCost();
}
......@@ -207,7 +208,8 @@ public:
backwardCost();
activation_->backward(sampleOut_);
auto status = activation_->backward(sampleOut_);
status.check();
if (biases_->getWGrad()) {
backwardBias(callback);
......
......@@ -217,21 +217,22 @@ void RecurrentLayer::forwardOneSequence(int start, int length) {
if (prevOutput_) {
frameOutput_[start].value->mul(*prevOutput_, *weight_->getW(), 1, 1);
}
activation_->forward(frameOutput_[start]);
activation_->forward(frameOutput_[start]).check();
for (int i = 1; i < length; ++i) {
frameOutput_[start + i].value->mul(
*frameOutput_[start + i - 1].value, *weight_->getW(), 1, 1);
activation_->forward(frameOutput_[start + i]);
activation_->forward(frameOutput_[start + i]).check();
}
if (prevOutput_) {
prevOutput_->assign(*frameOutput_[start + length - 1].value);
}
} else {
activation_->forward(frameOutput_[start + length - 1]);
activation_->forward(frameOutput_[start + length - 1]).check();
for (int i = length - 2; i >= 0; --i) {
frameOutput_[start + i].value->mul(
*frameOutput_[start + i + 1].value, *weight_->getW(), 1, 1);
activation_->forward(frameOutput_[start + i]);
activation_->forward(frameOutput_[start + i]).check();
}
}
}
......@@ -280,11 +281,11 @@ void RecurrentLayer::backwardOneSequence(int start, int length) {
MatrixPtr weightT = weight_->getW()->getTranspose();
if (!reversed_) {
for (int i = length - 1; i > 0; --i) {
activation_->backward(frameOutput_[start + i]);
activation_->backward(frameOutput_[start + i]).check();
frameOutput_[start + i - 1].grad->mul(
*frameOutput_[start + i].grad, *weightT, 1, 1);
}
activation_->backward(frameOutput_[start]);
activation_->backward(frameOutput_[start]).check();
if (weight_->getWGrad()) {
weight_->getWGrad()->mul(
*output_.value->subMatrix(start, length - 1)->getTranspose(),
......@@ -294,11 +295,11 @@ void RecurrentLayer::backwardOneSequence(int start, int length) {
}
} else {
for (int i = 0; i < length - 1; ++i) {
activation_->backward(frameOutput_[start + i]);
activation_->backward(frameOutput_[start + i]).check();
frameOutput_[start + i + 1].grad->mul(
*frameOutput_[start + i].grad, *weightT, 1, 1);
}
activation_->backward(frameOutput_[start + length - 1]);
activation_->backward(frameOutput_[start + length - 1]).check();
if (weight_->getWGrad()) {
weight_->getWGrad()->mul(
*output_.value->subMatrix(start + 1, length - 1)->getTranspose(),
......@@ -333,7 +334,7 @@ void RecurrentLayer::forwardBatch(int batchSize,
}
Argument arg;
arg.value = batch2;
activation_->forward(arg);
activation_->forward(arg).check();
}
}
batchValue_->copyBackSeq(*output_.value);
......@@ -363,7 +364,7 @@ void RecurrentLayer::backwardBatch(int batchSize,
Argument arg;
arg.value = batch1;
arg.grad = batch2;
activation_->backward(arg);
activation_->backward(arg).check();
if (n != 0) {
batch1 = batchGrad_->getBatchValue(n - 1, batch2->getHeight());
......
......@@ -192,7 +192,8 @@ void SelectiveFullyConnectedLayer::forward(PassType passType) {
nnz,
/*trans=*/false,
/*useGpu=*/useGpu_);
activation_->forward(arg);
//! TODO(yuyang18): Why we cannot invoke forwardActivation here?
activation_->forward(arg).check();
} else /* train and test in train, not generating */ {
// during training, this layer output value is *Matrix*, which is input of
// eg. multi-class-cross-entropy
......
......@@ -148,11 +148,11 @@ LayerPtr createCTCLayer(string name,
ActivationFunction* softmaxActivation = ActivationFunction::create("softmax");
softmaxActivation->forward(dataLayer->getOutput());
softmaxActivation->forward(dataLayer->getOutput()).check();
layer->forward(PASS_GC);
layer->backward();
softmaxActivation->backward(dataLayer->getOutput());
softmaxActivation->backward(dataLayer->getOutput()).check();
return layer;
}
......
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
/**
* This header defines some useful attribute by each compiler. It is the
* abstract layer of compilers.
*/
#ifdef __GNUC__
#define GCC_VERSION \
(__GNUC__ * 10000 + __GNUC_MINOR__ * 100 + __GNUC_PATCHLEVEL__)
#else
#define GCC_VERSION
#endif
/**
* __must_check macro. It make the function's return value must be used,
* otherwise it will raise a compile warning. And also Paddle treat all compile
* warnings as errors.
*/
#if GCC_VERSION >= 30400
#define __must_check __attribute__((warn_unused_result))
#else
#define __must_check
#endif
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include <glog/logging.h>
#include <stdarg.h>
#include <stdio.h>
#include <memory>
#include <string>
#include "Compiler.h"
namespace paddle {
/**
* Error is Paddle error code. It only contain a std::string as error message.
*
*
* There are two styles to return error in Paddle.
*
* 1. Return Error
* When method return a status, the return must use `__must_check` attribute.
* Example as below.
* @code{cpp}
* Error __must_check foo();
*
* Error __must_check bar() {
* // do something.
* Status s = foo(); // invoke other method return status.
* if (!s) return s;
* // do something else.
* return Status();
* }
* @endcode{cpp}
*
* 2. Return by parameter.
* It is another way to return an error, by using a pointer parameter.
* Example as below.
*
* @code{cpp}
* Error bar();
*
* int foo(Error* error) {
* // Do something.
* Error s = bar();
* if (!s) {
* *error = s;
* return 0;
* }
* // Do something else.
* if (someInternalErrorHappend) {
* *error = Error("Some dimension is too large, %d", dimension);
* return 0;
* }
* // End of method.
* return someValue;
* }
*
* Error foobar() {
* Error s;
* // do something.
* foo(&s);
* if (!s) return s;
* }
* @endcode{cpp}
*
*
* Currently there is a helper method 'check' in status, because Paddle always
* use log(FATAL) or CHECK to make program exit before. When we clean all
* log(FATAL) and CHECK in Paddle, 'check' method will be removed.
*/
class Error {
public:
/**
* Construct a no-error value.
*/
Error() {}
/**
* @brief Create an Error use printf syntax.
*/
explicit Error(const char* fmt, ...) {
va_list ap;
va_start(ap, fmt);
constexpr size_t kBufferSize = 1024;
char buffer[kBufferSize];
vsnprintf(buffer, kBufferSize, fmt, ap);
this->msg_.reset(new std::string(buffer));
va_end(ap);
}
/**
* @brief msg will return the error message. If no error, return nullptr.
*/
const char* msg() const {
if (msg_) {
return msg_->c_str();
} else {
return nullptr;
}
}
/**
* @brief operator bool, return True if there is no error.
*/
operator bool() const { return msg_ == nullptr; }
/**
* @brief check this status by glog.
* @note It is a temp method used during cleaning Paddle code. It will be
* removed later.
*/
void check() const { CHECK(*this) << msg(); }
private:
std::shared_ptr<std::string> msg_;
};
} // namespace paddle
......@@ -4,6 +4,7 @@ add_simple_unittest(test_CustomStackTrace)
add_simple_unittest(test_ThreadBarrier)
add_simple_unittest(test_SpinLock)
add_simple_unittest(test_SIMDFlags)
add_simple_unittest(test_Error)
add_executable(
test_CustomStackTracePrint
......
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/utils/Error.h"
#include <gtest/gtest.h>
TEST(Error, testAll) {
paddle::Error error;
ASSERT_TRUE(error);
error = paddle::Error("I'm the error");
ASSERT_FALSE(error);
ASSERT_STREQ("I'm the error", error.msg());
error = paddle::Error("error2");
ASSERT_FALSE(error);
ASSERT_STREQ("error2", error.msg());
int i = 3;
auto error3 = paddle::Error("error%d", i);
ASSERT_FALSE(error3);
ASSERT_STREQ("error3", error3.msg());
}
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册