diff --git a/paddle/gserver/activations/ActivationFunction.cpp b/paddle/gserver/activations/ActivationFunction.cpp index 8a938cf7e9d730a15064928eb84c20a2ccf7d50a..666c2e01c8bdaf412f53515aa86d89e8dfb9b1f0 100644 --- a/paddle/gserver/activations/ActivationFunction.cpp +++ b/paddle/gserver/activations/ActivationFunction.cpp @@ -69,11 +69,11 @@ static ClassRegistrar gActivationRegistrar; class IdentityActivation : public ActivationFunction { public: static const std::string name; - Status forward(Argument& act) { + Status __must_check forward(Argument& act) { (void)act; return Status(); } - Status backward(Argument& act) { + Status __must_check backward(Argument& act) { (void)act; return Status(); } @@ -92,11 +92,11 @@ static InitFunction __reg_activation__identity([] { * \f] */ BEGIN_DEFINE_ACTIVATION(sigmoid) -Status forward(Argument& act) { +Status __must_check forward(Argument& act) { act.value->sigmoid(*act.value); return Status(); } -Status backward(Argument& act) { +Status __must_check backward(Argument& act) { act.grad->sigmoidDerivative(*act.value); return Status(); } @@ -115,12 +115,12 @@ MatrixPtr sftMaxDot_; MatrixPtr one_; public: -Status forward(Argument& act) { +Status __must_check forward(Argument& act) { act.value->softmax(*act.value); return Status(); } -Status backward(Argument& act) { +Status __must_check backward(Argument& act) { MatrixPtr outputV = act.value; MatrixPtr outputG = act.grad; @@ -167,7 +167,7 @@ ACTIVATION_CLASS_NAME(softmax) softmax_; Argument argument_; public: -Status forward(Argument& act) { +Status __must_check forward(Argument& act) { if (act.value->getWidth() != 1UL) { return Status( "Input width for each timestep of sequence softmax should be 1"); @@ -191,7 +191,7 @@ Status forward(Argument& act) { return Status(); } -Status backward(Argument& act) { +Status __must_check backward(Argument& act) { if (act.value->getWidth() != 1UL) { return Status( "Input width for each timestep of sequence softmax should be 1"); @@ -207,7 +207,8 @@ Status backward(Argument& act) { argument_.value->setData(act.value->getData() + offset, 1UL, size); argument_.grad->setData(act.grad->getData() + offset, 1UL, size); - softmax_.backward(argument_); + Status status = softmax_.backward(argument_); + if (!status.isOK()) return status; } return Status(); } @@ -224,12 +225,12 @@ END_DEFINE_ACTIVATION(sequence_softmax) * 0 otherwise. */ BEGIN_DEFINE_ACTIVATION(relu) -Status forward(Argument& act) { +Status __must_check forward(Argument& act) { act.value->relu(*act.value); return Status(); } -Status backward(Argument& act) { +Status __must_check backward(Argument& act) { act.grad->reluDerivative(*act.value); return Status(); } @@ -249,12 +250,12 @@ END_DEFINE_ACTIVATION(relu) * TODO(yuyang18): Remove magic number 24 or make it configuable. */ BEGIN_DEFINE_ACTIVATION(brelu) -Status forward(Argument& act) { +Status __must_check forward(Argument& act) { act.value->brelu(*act.value); return Status(); } -Status backward(Argument& act) { +Status __must_check backward(Argument& act) { act.grad->breluDerivative(*act.value); return Status(); } @@ -267,12 +268,12 @@ END_DEFINE_ACTIVATION(brelu) * \f] */ BEGIN_DEFINE_ACTIVATION(tanh) -Status forward(Argument& act) { +Status __must_check forward(Argument& act) { act.value->tanh(*act.value); return Status(); } -Status backward(Argument& act) { +Status __must_check backward(Argument& act) { act.grad->tanhDerivative(*act.value); return Status(); } @@ -290,12 +291,12 @@ real a, b; public: ACTIVATION_CLASS_NAME(stanh)() : a(1.7159), b(2. / 3.) {} -Status forward(Argument& act) { +Status __must_check forward(Argument& act) { act.value->scaledTanh(*act.value, a, b); return Status(); } -Status backward(Argument& act) { +Status __must_check backward(Argument& act) { act.grad->scaledTanhDerivative(*act.value, a, b); return Status(); } @@ -308,12 +309,12 @@ END_DEFINE_ACTIVATION(stanh) * \f] */ BEGIN_DEFINE_ACTIVATION(softrelu) -Status forward(Argument& act) { +Status __must_check forward(Argument& act) { act.value->softrelu(*act.value); return Status(); } -Status backward(Argument& act) { +Status __must_check backward(Argument& act) { act.grad->softreluDerivative(*act.value); return Status(); } @@ -332,7 +333,7 @@ END_DEFINE_ACTIVATION(softrelu) * 0 if z=0 */ BEGIN_DEFINE_ACTIVATION(abs) -Status forward(Argument& act) { +Status __must_check forward(Argument& act) { SetDevice device(act.deviceId); Matrix::resizeOrCreate(act.in, act.value->getHeight(), @@ -345,7 +346,7 @@ Status forward(Argument& act) { return Status(); } -Status backward(Argument& act) { +Status __must_check backward(Argument& act) { act.grad->absDerivative(*act.in); return Status(); } @@ -358,7 +359,7 @@ END_DEFINE_ACTIVATION(abs) * \f] */ BEGIN_DEFINE_ACTIVATION(square) -Status forward(Argument& act) { +Status __must_check forward(Argument& act) { SetDevice device(act.deviceId); Matrix::resizeOrCreate(act.in, act.value->getHeight(), @@ -371,7 +372,7 @@ Status forward(Argument& act) { return Status(); } -Status backward(Argument& act) { +Status __must_check backward(Argument& act) { act.grad->squareDerivative(*act.in); return Status(); } @@ -384,12 +385,12 @@ END_DEFINE_ACTIVATION(square) * \f] */ BEGIN_DEFINE_ACTIVATION(exponential) -Status forward(Argument& act) { +Status __must_check forward(Argument& act) { act.value->exp2(*act.value); return Status(); } -Status backward(Argument& act) { +Status __must_check backward(Argument& act) { act.grad->expDerivative(*act.value); return Status(); } @@ -402,7 +403,7 @@ END_DEFINE_ACTIVATION(exponential) * \f] */ BEGIN_DEFINE_ACTIVATION(log) -Status forward(Argument& act) { +Status __must_check forward(Argument& act) { SetDevice device(act.deviceId); Matrix::resizeOrCreate(act.in, act.value->getHeight(), @@ -415,7 +416,7 @@ Status forward(Argument& act) { return Status(); } -Status backward(Argument& act) { +Status __must_check backward(Argument& act) { act.grad->dotDiv(*act.grad, *act.in); return Status(); } diff --git a/paddle/gserver/activations/ActivationFunction.h b/paddle/gserver/activations/ActivationFunction.h index ad395ac28da7d9328b4201079e05f5e3c396eba2..737df2219dd36f04c51463b5506d7dcf4338cc2f 100644 --- a/paddle/gserver/activations/ActivationFunction.h +++ b/paddle/gserver/activations/ActivationFunction.h @@ -49,7 +49,7 @@ public: * * Usually, act is Layer::output_ */ - virtual Status forward(Argument& act) = 0; + virtual Status __must_check forward(Argument& act) = 0; /** * @brief Backward propagaion @@ -58,7 +58,7 @@ public: * - Before calling backward(), act.grad = dE / dy, where E is the error/cost * - After backward() returns, act.grad = dE / dx = (dE/dy) * (dy/dx) */ - virtual Status backward(Argument& act) = 0; + virtual Status __must_check backward(Argument& act) = 0; virtual const std::string& getName() const = 0; }; diff --git a/paddle/gserver/layers/Layer.cpp b/paddle/gserver/layers/Layer.cpp index 06c936c3aec67059abc6530acc9a57b348158924..f96070fe6e269b9fb53453050a04f7f2c23998a1 100644 --- a/paddle/gserver/layers/Layer.cpp +++ b/paddle/gserver/layers/Layer.cpp @@ -336,7 +336,7 @@ void Layer::showOutputStats() { void Layer::forwardActivation() { /* activation */ auto status = activation_->forward(output_); - CHECK(status.isOK()) << status.what(); + status.check(); /* dropout */ if (config_.drop_rate() > 0) { @@ -375,7 +375,7 @@ void Layer::backwardActivation() { } auto status = activation_->backward(output_); - CHECK(status.isOK()) << status.what(); + status.check(); } void Layer::forwardDropOut() { diff --git a/paddle/gserver/layers/MDLstmLayer.cpp b/paddle/gserver/layers/MDLstmLayer.cpp index fb41af563195496a57eafcc52b49eadac697fa0a..88d934d782b549a984f1d7798e54bcc4436ea0cf 100644 --- a/paddle/gserver/layers/MDLstmLayer.cpp +++ b/paddle/gserver/layers/MDLstmLayer.cpp @@ -506,9 +506,12 @@ void MDLstmLayer::forwardGate2OutputSequence(int start, *frameState_[start + preOffsetV[i]].value, *checkFgOneDim, 1.0, 1.0); } } - activationGate_->forward(frameInputGate_[idxCurr]); - activationGate_->forward(frameForgetGate_[idxCurr]); - activation_->forward(frameInputNode_[idxCurr]); + auto status = activationGate_->forward(frameInputGate_[idxCurr]); + status.check(); + status = activationGate_->forward(frameForgetGate_[idxCurr]); + status.check(); + status = activation_->forward(frameInputNode_[idxCurr]); + status.check(); frameState_[idxCurr].value->zeroMem(); for (int i = 0; i < numDims_; i++) { @@ -530,10 +533,12 @@ void MDLstmLayer::forwardGate2OutputSequence(int start, frameOutputGate_[idxCurr].value->addDotMul( *frameState_[idxCurr].value, *checkOg_, 1.0, 1.0); - activationGate_->forward(frameOutputGate_[idxCurr]); + status = activationGate_->forward(frameOutputGate_[idxCurr]); + status.check(); framePreOutput_[idxCurr].value->copyFrom(*(frameState_[idxCurr].value)); - activationState_->forward(framePreOutput_[idxCurr]); + status = activationState_->forward(framePreOutput_[idxCurr]); + status.check(); frameOutput_[idxCurr].value->dotMul(*framePreOutput_[idxCurr].value, *frameOutputGate_[idxCurr].value); @@ -640,12 +645,12 @@ void MDLstmLayer::backwardGate2OutputSequence(int start, framePreOutput_[idxCurr].grad->dotMul(*frameOutput_[idxCurr].grad, *frameOutputGate_[idxCurr].value); - activationState_->backward(framePreOutput_[idxCurr]); + activationState_->backward(framePreOutput_[idxCurr]).check(); frameState_[idxCurr].grad->copyFrom(*(framePreOutput_[idxCurr].grad)); frameOutputGate_[idxCurr].grad->dotMul(*frameOutput_[idxCurr].grad, *framePreOutput_[idxCurr].value); - activationGate_->backward(frameOutputGate_[idxCurr]); + activationGate_->backward(frameOutputGate_[idxCurr]).check(); frameState_[idxCurr].grad->addDotMul( *frameOutputGate_[idxCurr].grad, *checkOg_, 1.0, 1.0); @@ -702,9 +707,9 @@ void MDLstmLayer::backwardGate2OutputSequence(int start, } } - activationGate_->backward(frameInputGate_[idxCurr]); - activationGate_->backward(frameForgetGate_[idxCurr]); - activation_->backward(frameInputNode_[idxCurr]); + activationGate_->backward(frameInputGate_[idxCurr]).check(); + activationGate_->backward(frameForgetGate_[idxCurr]).check(); + activation_->backward(frameInputNode_[idxCurr]).check(); if (bias_->getWGrad()) { for (int i = 0; i < numDims_; i++) { diff --git a/paddle/gserver/layers/NCELayer.cpp b/paddle/gserver/layers/NCELayer.cpp index 5ab765247f63dfe6e6651ca4d27dc7183a9f33e1..3542e739df8d03470bf2c455b4f3492a7f9e973a 100644 --- a/paddle/gserver/layers/NCELayer.cpp +++ b/paddle/gserver/layers/NCELayer.cpp @@ -193,7 +193,8 @@ public: forwardOneInput(l); } - activation_->forward(sampleOut_); + auto status = activation_->forward(sampleOut_); + status.check(); forwardCost(); } @@ -207,7 +208,8 @@ public: backwardCost(); - activation_->backward(sampleOut_); + auto status = activation_->backward(sampleOut_); + status.check(); if (biases_->getWGrad()) { backwardBias(callback); diff --git a/paddle/gserver/layers/RecurrentLayer.cpp b/paddle/gserver/layers/RecurrentLayer.cpp index 55e0fdfb9048c02b2dcd474c6887eee180328260..b843fa1265cf3c0ad0814fb90f69e245ee5ab4ad 100644 --- a/paddle/gserver/layers/RecurrentLayer.cpp +++ b/paddle/gserver/layers/RecurrentLayer.cpp @@ -217,21 +217,22 @@ void RecurrentLayer::forwardOneSequence(int start, int length) { if (prevOutput_) { frameOutput_[start].value->mul(*prevOutput_, *weight_->getW(), 1, 1); } - activation_->forward(frameOutput_[start]); + activation_->forward(frameOutput_[start]).check(); + for (int i = 1; i < length; ++i) { frameOutput_[start + i].value->mul( *frameOutput_[start + i - 1].value, *weight_->getW(), 1, 1); - activation_->forward(frameOutput_[start + i]); + activation_->forward(frameOutput_[start + i]).check(); } if (prevOutput_) { prevOutput_->assign(*frameOutput_[start + length - 1].value); } } else { - activation_->forward(frameOutput_[start + length - 1]); + activation_->forward(frameOutput_[start + length - 1]).check(); for (int i = length - 2; i >= 0; --i) { frameOutput_[start + i].value->mul( *frameOutput_[start + i + 1].value, *weight_->getW(), 1, 1); - activation_->forward(frameOutput_[start + i]); + activation_->forward(frameOutput_[start + i]).check(); } } } @@ -280,11 +281,11 @@ void RecurrentLayer::backwardOneSequence(int start, int length) { MatrixPtr weightT = weight_->getW()->getTranspose(); if (!reversed_) { for (int i = length - 1; i > 0; --i) { - activation_->backward(frameOutput_[start + i]); + activation_->backward(frameOutput_[start + i]).check(); frameOutput_[start + i - 1].grad->mul( *frameOutput_[start + i].grad, *weightT, 1, 1); } - activation_->backward(frameOutput_[start]); + activation_->backward(frameOutput_[start]).check(); if (weight_->getWGrad()) { weight_->getWGrad()->mul( *output_.value->subMatrix(start, length - 1)->getTranspose(), @@ -294,11 +295,11 @@ void RecurrentLayer::backwardOneSequence(int start, int length) { } } else { for (int i = 0; i < length - 1; ++i) { - activation_->backward(frameOutput_[start + i]); + activation_->backward(frameOutput_[start + i]).check(); frameOutput_[start + i + 1].grad->mul( *frameOutput_[start + i].grad, *weightT, 1, 1); } - activation_->backward(frameOutput_[start + length - 1]); + activation_->backward(frameOutput_[start + length - 1]).check(); if (weight_->getWGrad()) { weight_->getWGrad()->mul( *output_.value->subMatrix(start + 1, length - 1)->getTranspose(), @@ -333,7 +334,7 @@ void RecurrentLayer::forwardBatch(int batchSize, } Argument arg; arg.value = batch2; - activation_->forward(arg); + activation_->forward(arg).check(); } } batchValue_->copyBackSeq(*output_.value); @@ -363,7 +364,7 @@ void RecurrentLayer::backwardBatch(int batchSize, Argument arg; arg.value = batch1; arg.grad = batch2; - activation_->backward(arg); + activation_->backward(arg).check(); if (n != 0) { batch1 = batchGrad_->getBatchValue(n - 1, batch2->getHeight()); diff --git a/paddle/gserver/layers/SelectiveFullyConnectedLayer.cpp b/paddle/gserver/layers/SelectiveFullyConnectedLayer.cpp index 5eacff6b7143996130bea64766ef42c66f4c7310..d9a91de8a6f4daf514f089a3d63cb519223bfdd0 100644 --- a/paddle/gserver/layers/SelectiveFullyConnectedLayer.cpp +++ b/paddle/gserver/layers/SelectiveFullyConnectedLayer.cpp @@ -192,7 +192,8 @@ void SelectiveFullyConnectedLayer::forward(PassType passType) { nnz, /*trans=*/false, /*useGpu=*/useGpu_); - activation_->forward(arg); + //! TODO(yuyang18): Why we cannot invoke forwardActivation here? + activation_->forward(arg).check(); } else /* train and test in train, not generating */ { // during training, this layer output value is *Matrix*, which is input of // eg. multi-class-cross-entropy diff --git a/paddle/gserver/tests/test_WarpCTCLayer.cpp b/paddle/gserver/tests/test_WarpCTCLayer.cpp index 23ae95852e84216c9065f1b123d35ce868fbb90f..55427e2f12fd7b77c6eea1f65b3229e6fd29d71d 100644 --- a/paddle/gserver/tests/test_WarpCTCLayer.cpp +++ b/paddle/gserver/tests/test_WarpCTCLayer.cpp @@ -148,11 +148,11 @@ LayerPtr createCTCLayer(string name, ActivationFunction* softmaxActivation = ActivationFunction::create("softmax"); - softmaxActivation->forward(dataLayer->getOutput()); + softmaxActivation->forward(dataLayer->getOutput()).check(); layer->forward(PASS_GC); layer->backward(); - softmaxActivation->backward(dataLayer->getOutput()); + softmaxActivation->backward(dataLayer->getOutput()).check(); return layer; } diff --git a/paddle/utils/Compiler.h b/paddle/utils/Compiler.h index e69de29bb2d1d6434b8b29ae775ad8c2e48c5391..22812e83987d8dab7efdc93e30a2c28e7bcbc6ec 100644 --- a/paddle/utils/Compiler.h +++ b/paddle/utils/Compiler.h @@ -0,0 +1,25 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + http://www.apache.org/licenses/LICENSE-2.0 +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once + +#ifdef __GNUC__ +#define GCC_VERSION \ + (__GNUC__ * 10000 + __GNUC_MINOR__ * 100 + __GNUC_PATCHLEVEL__) +#else +#define GCC_VERSION +#endif + +#if GCC_VERSION >= 30400 +#define __must_check __attribute__((warn_unused_result)) +#else +#define __must_check +#endif diff --git a/paddle/utils/Status.h b/paddle/utils/Status.h index cb66e4b225e92c8b2319435ba1ea15b56a8d2e5e..26329f8d19bae60d5a4835528888393b6de620b4 100644 --- a/paddle/utils/Status.h +++ b/paddle/utils/Status.h @@ -14,9 +14,11 @@ limitations under the License. */ #pragma once +#include #include #include #include +#include "Compiler.h" namespace paddle { @@ -29,8 +31,55 @@ namespace paddle { * There are two styles to return status in Paddle. * * 1. Return Status + * When method return a status, the return must use `__must_check` attribute. + * Example as below. + * @code{cpp} + * Status __must_check foo(); * + * Status __must_check bar() { + * // do something. + * Status s = foo(); // invoke other method return status. + * if (!s.isOK()) return s; + * // do something else. + * return Status(); + * } + * @endcode{cpp} * + * 2. Return by parameter. + * It is another way to return a status, by using a pointer parameter. + * Example as below. + * + * @code{cpp} + * Status bar(); + * + * int foo(Status* status) { + * // Do something. + * Status s = bar(); + * if (!s.isOK()) { + * *status = s; + * return 0; + * } + * // Do something else. + * if (someInternalErrorHappend) { + * status->setByPrintf("Some dimension is too large, %d", dimension); + * return 0; + * } + * // End of method. + * return someValue; + * } + * + * Status foobar() { + * Status s; + * // do something. + * foo(&s); + * if (!s.isOK()) return s; + * } + * @endcode{cpp} + * + * + * Currently there is a helper method 'check' in status, because Paddle always + * use log(FATAL) or CHECK to make program exit before. When we clean all + * log(FATAL) and CHECK in Paddle, 'check' method will be removed. */ class Status final : public std::exception { public: @@ -92,6 +141,13 @@ public: */ inline bool isOK() const noexcept { return errMsg_ == nullptr; } + /** + * @brief check this status by glog. + * @note It is a temp method used during cleaning Paddle code. It will be + * removed later. + */ + inline void check() const { CHECK(isOK()) << what(); } + private: std::shared_ptr errMsg_; };