diff --git a/cmake/external/glog.cmake b/cmake/external/glog.cmake index 71e20c85276b014c2e33735c3199c3772526c6c7..ab105611c812a4f4b642ac5b1213fdfe93fab97d 100644 --- a/cmake/external/glog.cmake +++ b/cmake/external/glog.cmake @@ -1,11 +1,11 @@ # Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. -# +# # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. # You may obtain a copy of the License at -# +# # http://www.apache.org/licenses/LICENSE-2.0 -# +# # Unless required by applicable law or agreed to in writing, software # distributed under the License is distributed on an "AS IS" BASIS, # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -29,12 +29,14 @@ INCLUDE_DIRECTORIES(${GLOG_INCLUDE_DIR}) ExternalProject_Add( glog ${EXTERNAL_PROJECT_LOG_ARGS} + DEPENDS gflags GIT_REPOSITORY "https://github.com/google/glog.git" PREFIX ${GLOG_SOURCES_DIR} UPDATE_COMMAND "" CMAKE_ARGS -DCMAKE_INSTALL_PREFIX=${GLOG_INSTALL_DIR} CMAKE_ARGS -DCMAKE_POSITION_INDEPENDENT_CODE=ON - CMAKE_ARGS -DWITH_GFLAGS=OFF + CMAKE_ARGS -DWITH_GFLAGS=ON + CMAKE_ARGS -Dgflags_DIR=${GFLAGS_INSTALL_DIR}/lib/cmake/gflags CMAKE_ARGS -DBUILD_TESTING=OFF ) diff --git a/cmake/external/protobuf.cmake b/cmake/external/protobuf.cmake index 613614c0e3d42fac4147f78edbc1bd6d62847419..84f459033f06f89d3b150317793c7e62274468b2 100644 --- a/cmake/external/protobuf.cmake +++ b/cmake/external/protobuf.cmake @@ -29,17 +29,12 @@ IF(WIN32) "${PROTOBUF_INSTALL_DIR}/lib/libprotoc.lib" CACHE FILEPATH "protoc library." FORCE) SET(PROTOBUF_PROTOC_EXECUTABLE "${PROTOBUF_INSTALL_DIR}/bin/protoc.exe" CACHE FILEPATH "protobuf executable." FORCE) ELSE(WIN32) - IF(${HOST_SYSTEM} STREQUAL "centos") - SET(LIB "lib64") - ELSE() - SET(LIB "lib") - ENDIF() SET(PROTOBUF_LITE_LIBRARY - "${PROTOBUF_INSTALL_DIR}/${LIB}/libprotobuf-lite.a" CACHE FILEPATH "protobuf lite library." FORCE) + "${PROTOBUF_INSTALL_DIR}/lib/libprotobuf-lite.a" CACHE FILEPATH "protobuf lite library." FORCE) SET(PROTOBUF_LIBRARY - "${PROTOBUF_INSTALL_DIR}/${LIB}/libprotobuf.a" CACHE FILEPATH "protobuf library." FORCE) + "${PROTOBUF_INSTALL_DIR}/lib/libprotobuf.a" CACHE FILEPATH "protobuf library." FORCE) SET(PROTOBUF_PROTOC_LIBRARY - "${PROTOBUF_INSTALL_DIR}/${LIB}/libprotoc.a" CACHE FILEPATH "protoc library." FORCE) + "${PROTOBUF_INSTALL_DIR}/lib/libprotoc.a" CACHE FILEPATH "protoc library." FORCE) SET(PROTOBUF_PROTOC_EXECUTABLE "${PROTOBUF_INSTALL_DIR}/bin/protoc" CACHE FILEPATH "protobuf executable." FORCE) ENDIF(WIN32) @@ -58,6 +53,7 @@ ExternalProject_Add( -DCMAKE_POSITION_INDEPENDENT_CODE=ON -DCMAKE_BUILD_TYPE=Release -DCMAKE_INSTALL_PREFIX=${PROTOBUF_INSTALL_DIR} + -DCMAKE_INSTALL_LIBDIR=lib ) LIST(APPEND external_project_dependencies protobuf) diff --git a/cmake/external/python.cmake b/cmake/external/python.cmake index 209e679f2cb2178423bf20dec73a0bccef199fcb..6372a9a768e580f74f837ccb6c57d4f4395eb779 100644 --- a/cmake/external/python.cmake +++ b/cmake/external/python.cmake @@ -26,10 +26,10 @@ IF(PYTHONLIBS_FOUND AND PYTHONINTERP_FOUND) find_python_module(wheel REQUIRED) find_python_module(google.protobuf REQUIRED) FIND_PACKAGE(NumPy REQUIRED) - IF(${PY_GOOGLE.PROTOBUF_VERSION} VERSION_LESS "3.0.0") + IF(${PY_GOOGLE.PROTOBUF_VERSION} AND ${PY_GOOGLE.PROTOBUF_VERSION} VERSION_LESS "3.0.0") MESSAGE(FATAL_ERROR "Found Python Protobuf ${PY_GOOGLE.PROTOBUF_VERSION} < 3.0.0, " - "please use pip to upgrade protobuf.") - ENDIF(${PY_GOOGLE.PROTOBUF_VERSION} VERSION_LESS "3.0.0") + "please use pip to upgrade protobuf. pip install -U protobuf") + ENDIF() ELSE(PYTHONLIBS_FOUND AND PYTHONINTERP_FOUND) MESSAGE(FATAL_ERROR "Please install python 2.7 before building PaddlePaddle.") ##################################### PYTHON ######################################## diff --git a/cmake/external/warpctc.cmake b/cmake/external/warpctc.cmake index f5e4b3e1eb39acbe8dbcd0023956ca7e52c1ecd8..172c318b35d611d0432b78f2a18eb58a7d272b90 100644 --- a/cmake/external/warpctc.cmake +++ b/cmake/external/warpctc.cmake @@ -54,6 +54,7 @@ ExternalProject_Add( CMAKE_ARGS -DWITH_GPU=${WITH_GPU} CMAKE_ARGS -DWITH_OMP=${USE_OMP} CMAKE_ARGS -DWITH_TORCH=OFF + CMAKE_ARGS -DCMAKE_DISABLE_FIND_PACKAGE_Torch=TRUE CMAKE_ARGS -DBUILD_SHARED=ON ) diff --git a/cmake/system.cmake b/cmake/system.cmake index ab124a89dcdc1cc5b865f6a15c1693b2f72ea39a..3e472da7e0bd9c433f92f3e8b52970cd2cc6dcba 100644 --- a/cmake/system.cmake +++ b/cmake/system.cmake @@ -12,6 +12,14 @@ # See the License for the specific language governing permissions and # limitations under the License. +# Detects the OS and sets appropriate variables. +# CMAKE_SYSTEM_NAME only give us a coarse-grained name, +# but the name like centos is necessary in some scenes +# to distinguish system for customization. +# +# for instance, protobuf libs path is /lib64 +# on CentOS, but /lib on other systems. + IF(WIN32) SET(HOST_SYSTEM "win32") ELSE(WIN32) @@ -30,6 +38,10 @@ ELSE(WIN32) SET(HOST_SYSTEM "debian") ELSEIF(LINUX_ISSUE MATCHES "Ubuntu") SET(HOST_SYSTEM "ubuntu") + ELSEIF(LINUX_ISSUE MATCHES "Red Hat") + SET(HOST_SYSTEM "redhat") + ELSEIF(LINUX_ISSUE MATCHES "Fedora") + SET(HOST_SYSTEM "fedora") ENDIF() ENDIF(EXISTS "/etc/issue") @@ -40,6 +52,10 @@ ELSE(WIN32) ENDIF() ENDIF(EXISTS "/etc/redhat-release") + IF(NOT HOST_SYSTEM) + SET(HOST_SYSTEM ${CMAKE_SYSTEM_NAME}) + ENDIF() + ENDIF(APPLE) ENDIF(WIN32) diff --git a/paddle/gserver/activations/ActivationFunction.cpp b/paddle/gserver/activations/ActivationFunction.cpp index f8c4bcac2f8eb41400659dc24ba81768e7ae3640..c541b72e104bf2b81e2ac222d4af13ea2f90d289 100644 --- a/paddle/gserver/activations/ActivationFunction.cpp +++ b/paddle/gserver/activations/ActivationFunction.cpp @@ -69,8 +69,14 @@ static ClassRegistrar gActivationRegistrar; class IdentityActivation : public ActivationFunction { public: static const std::string name; - void forward(Argument& act) { (void)act; } - void backward(Argument& act) { (void)act; } + Error __must_check forward(Argument& act) { + (void)act; + return Error(); + } + Error __must_check backward(Argument& act) { + (void)act; + return Error(); + } const std::string& getName() const { return name; } }; const std::string IdentityActivation::name = ""; @@ -86,8 +92,14 @@ static InitFunction __reg_activation__identity([] { * \f] */ BEGIN_DEFINE_ACTIVATION(sigmoid) -void forward(Argument& act) { act.value->sigmoid(*act.value); } -void backward(Argument& act) { act.grad->sigmoidDerivative(*act.value); } +Error __must_check forward(Argument& act) { + act.value->sigmoid(*act.value); + return Error(); +} +Error __must_check backward(Argument& act) { + act.grad->sigmoidDerivative(*act.value); + return Error(); +} END_DEFINE_ACTIVATION(sigmoid) /** @@ -103,9 +115,12 @@ MatrixPtr sftMaxDot_; MatrixPtr one_; public: -void forward(Argument& act) { act.value->softmax(*act.value); } +Error __must_check forward(Argument& act) { + act.value->softmax(*act.value); + return Error(); +} -void backward(Argument& act) { +Error __must_check backward(Argument& act) { MatrixPtr outputV = act.value; MatrixPtr outputG = act.grad; @@ -137,6 +152,7 @@ void backward(Argument& act) { act.grad->softmaxDerivative(*act.value, *sftMaxSum_); } + return Error(); } END_DEFINE_ACTIVATION(softmax) @@ -151,8 +167,11 @@ ACTIVATION_CLASS_NAME(softmax) softmax_; Argument argument_; public: -void forward(Argument& act) { - CHECK_EQ(act.value->getWidth(), 1UL); +Error __must_check forward(Argument& act) { + if (act.value->getWidth() != 1UL) { + return Error( + "Input width for each timestep of sequence softmax should be 1"); + } if (!argument_.value) { argument_.value = Matrix::create(nullptr, @@ -169,10 +188,14 @@ void forward(Argument& act) { auto starts = act.sequenceStartPositions->getVector(useGpu(act.deviceId)); act.value->sequenceSoftmax(*act.value, *starts); + return Error(); } -void backward(Argument& act) { - CHECK_EQ(act.grad->getWidth(), 1UL); +Error __must_check backward(Argument& act) { + if (act.value->getWidth() != 1UL) { + return Error( + "Input width for each timestep of sequence softmax should be 1"); + } size_t numSequences = act.getNumSequences(); const int* starts = act.sequenceStartPositions->getData(false); @@ -184,8 +207,10 @@ void backward(Argument& act) { argument_.value->setData(act.value->getData() + offset, 1UL, size); argument_.grad->setData(act.grad->getData() + offset, 1UL, size); - softmax_.backward(argument_); + Error status = softmax_.backward(argument_); + if (!status) return status; } + return Error(); } END_DEFINE_ACTIVATION(sequence_softmax) @@ -200,9 +225,15 @@ END_DEFINE_ACTIVATION(sequence_softmax) * 0 otherwise. */ BEGIN_DEFINE_ACTIVATION(relu) -void forward(Argument& act) { act.value->relu(*act.value); } +Error __must_check forward(Argument& act) { + act.value->relu(*act.value); + return Error(); +} -void backward(Argument& act) { act.grad->reluDerivative(*act.value); } +Error __must_check backward(Argument& act) { + act.grad->reluDerivative(*act.value); + return Error(); +} END_DEFINE_ACTIVATION(relu) /** @@ -219,9 +250,15 @@ END_DEFINE_ACTIVATION(relu) * TODO(yuyang18): Remove magic number 24 or make it configuable. */ BEGIN_DEFINE_ACTIVATION(brelu) -void forward(Argument& act) { act.value->brelu(*act.value); } +Error __must_check forward(Argument& act) { + act.value->brelu(*act.value); + return Error(); +} -void backward(Argument& act) { act.grad->breluDerivative(*act.value); } +Error __must_check backward(Argument& act) { + act.grad->breluDerivative(*act.value); + return Error(); +} END_DEFINE_ACTIVATION(brelu) /** @@ -231,9 +268,15 @@ END_DEFINE_ACTIVATION(brelu) * \f] */ BEGIN_DEFINE_ACTIVATION(tanh) -void forward(Argument& act) { act.value->tanh(*act.value); } +Error __must_check forward(Argument& act) { + act.value->tanh(*act.value); + return Error(); +} -void backward(Argument& act) { act.grad->tanhDerivative(*act.value); } +Error __must_check backward(Argument& act) { + act.grad->tanhDerivative(*act.value); + return Error(); +} END_DEFINE_ACTIVATION(tanh) /** @@ -248,10 +291,14 @@ real a, b; public: ACTIVATION_CLASS_NAME(stanh)() : a(1.7159), b(2. / 3.) {} -void forward(Argument& act) { act.value->scaledTanh(*act.value, a, b); } +Error __must_check forward(Argument& act) { + act.value->scaledTanh(*act.value, a, b); + return Error(); +} -void backward(Argument& act) { +Error __must_check backward(Argument& act) { act.grad->scaledTanhDerivative(*act.value, a, b); + return Error(); } END_DEFINE_ACTIVATION(stanh) @@ -262,9 +309,15 @@ END_DEFINE_ACTIVATION(stanh) * \f] */ BEGIN_DEFINE_ACTIVATION(softrelu) -void forward(Argument& act) { act.value->softrelu(*act.value); } +Error __must_check forward(Argument& act) { + act.value->softrelu(*act.value); + return Error(); +} -void backward(Argument& act) { act.grad->softreluDerivative(*act.value); } +Error __must_check backward(Argument& act) { + act.grad->softreluDerivative(*act.value); + return Error(); +} END_DEFINE_ACTIVATION(softrelu) /** @@ -280,7 +333,7 @@ END_DEFINE_ACTIVATION(softrelu) * 0 if z=0 */ BEGIN_DEFINE_ACTIVATION(abs) -void forward(Argument& act) { +Error __must_check forward(Argument& act) { SetDevice device(act.deviceId); Matrix::resizeOrCreate(act.in, act.value->getHeight(), @@ -290,9 +343,13 @@ void forward(Argument& act) { act.in->copyFrom(*act.value); act.value->abs2(*act.value); + return Error(); } -void backward(Argument& act) { act.grad->absDerivative(*act.in); } +Error __must_check backward(Argument& act) { + act.grad->absDerivative(*act.in); + return Error(); +} END_DEFINE_ACTIVATION(abs) /** @@ -302,7 +359,7 @@ END_DEFINE_ACTIVATION(abs) * \f] */ BEGIN_DEFINE_ACTIVATION(square) -void forward(Argument& act) { +Error __must_check forward(Argument& act) { SetDevice device(act.deviceId); Matrix::resizeOrCreate(act.in, act.value->getHeight(), @@ -312,9 +369,13 @@ void forward(Argument& act) { act.in->copyFrom(*act.value); act.value->square2(*act.value); + return Error(); } -void backward(Argument& act) { act.grad->squareDerivative(*act.in); } +Error __must_check backward(Argument& act) { + act.grad->squareDerivative(*act.in); + return Error(); +} END_DEFINE_ACTIVATION(square) /** @@ -324,9 +385,15 @@ END_DEFINE_ACTIVATION(square) * \f] */ BEGIN_DEFINE_ACTIVATION(exponential) -void forward(Argument& act) { act.value->exp2(*act.value); } +Error __must_check forward(Argument& act) { + act.value->exp2(*act.value); + return Error(); +} -void backward(Argument& act) { act.grad->expDerivative(*act.value); } +Error __must_check backward(Argument& act) { + act.grad->expDerivative(*act.value); + return Error(); +} END_DEFINE_ACTIVATION(exponential) /** @@ -336,7 +403,7 @@ END_DEFINE_ACTIVATION(exponential) * \f] */ BEGIN_DEFINE_ACTIVATION(log) -void forward(Argument& act) { +Error __must_check forward(Argument& act) { SetDevice device(act.deviceId); Matrix::resizeOrCreate(act.in, act.value->getHeight(), @@ -346,9 +413,13 @@ void forward(Argument& act) { act.in->copyFrom(*act.value); act.value->log2(*act.value); + return Error(); } -void backward(Argument& act) { act.grad->dotDiv(*act.grad, *act.in); } +Error __must_check backward(Argument& act) { + act.grad->dotDiv(*act.grad, *act.in); + return Error(); +} END_DEFINE_ACTIVATION(log) ActivationFunction* ActivationFunction::create(const std::string& type) { diff --git a/paddle/gserver/activations/ActivationFunction.h b/paddle/gserver/activations/ActivationFunction.h index 601e3b6c0cd401ec007e8cf51e44416f82832e58..f208224e304a79125679c6f3a5c0be09552465ef 100644 --- a/paddle/gserver/activations/ActivationFunction.h +++ b/paddle/gserver/activations/ActivationFunction.h @@ -15,6 +15,7 @@ limitations under the License. */ #pragma once #include #include +#include "paddle/utils/Error.h" namespace paddle { @@ -48,7 +49,7 @@ public: * * Usually, act is Layer::output_ */ - virtual void forward(Argument& act) = 0; + virtual Error __must_check forward(Argument& act) = 0; /** * @brief Backward propagaion @@ -57,7 +58,7 @@ public: * - Before calling backward(), act.grad = dE / dy, where E is the error/cost * - After backward() returns, act.grad = dE / dx = (dE/dy) * (dy/dx) */ - virtual void backward(Argument& act) = 0; + virtual Error __must_check backward(Argument& act) = 0; virtual const std::string& getName() const = 0; }; diff --git a/paddle/gserver/layers/GatedRecurrentLayer.cpp b/paddle/gserver/layers/GatedRecurrentLayer.cpp index 930d9a056164e7c677adb53b7b67901364da1309..d3aeea921801da301b2829736059130aec14cef6 100644 --- a/paddle/gserver/layers/GatedRecurrentLayer.cpp +++ b/paddle/gserver/layers/GatedRecurrentLayer.cpp @@ -314,13 +314,13 @@ void GatedRecurrentLayer::forwardBatch(int batchSize, batchValue_->resizeOrCreate(*output_.value); batchValue_->copy(*inputValue, *gate_.value, /* seq2batch */ true); - if (bias_ && bias_->getWGrad()) { + if (bias_) { gate_.value->addBias(*(bias_->getW()), 1); } { int numBatch = batchValue_->getNumBatch(); - int batchSize = 0; + int curBatchSize = 0; AsyncGpuBlock asyncGpuBlock; for (int n = 0; n < numBatch; n++) { MatrixPtr outputValueTmp = batchValue_->getBatchValue(n); @@ -330,16 +330,17 @@ void GatedRecurrentLayer::forwardBatch(int batchSize, gruValue.resetOutputValue = (batchValue_->getBatchValue(*resetOutput_.value, n))->getData(); - batchSize = outputValueTmp->getHeight(); + curBatchSize = outputValueTmp->getHeight(); gruValue.prevOutValue = - (n == 0 ? nullptr - : (batchValue_->getBatchValue(n - 1, batchSize))->getData()); + (n == 0 + ? nullptr + : (batchValue_->getBatchValue(n - 1, curBatchSize))->getData()); { if (useGpu_) { - GruCompute::forward<1>(gruValue, getSize(), batchSize); + GruCompute::forward<1>(gruValue, getSize(), curBatchSize); } else { - GruCompute::forward<0>(gruValue, getSize(), batchSize); + GruCompute::forward<0>(gruValue, getSize(), curBatchSize); } } } diff --git a/paddle/gserver/layers/Layer.cpp b/paddle/gserver/layers/Layer.cpp index c47943f81c01589eada4b825d54be5c69314b6fa..f76d41ad3e8a3b1730f9d50c0773ee4f61ddb541 100644 --- a/paddle/gserver/layers/Layer.cpp +++ b/paddle/gserver/layers/Layer.cpp @@ -15,6 +15,7 @@ limitations under the License. */ #include "paddle/utils/Util.h" #include "paddle/math/SparseMatrix.h" +#include "paddle/utils/Error.h" #include "paddle/utils/Logging.h" #include "AddtoLayer.h" @@ -334,7 +335,8 @@ void Layer::showOutputStats() { void Layer::forwardActivation() { /* activation */ - activation_->forward(output_); + auto status = activation_->forward(output_); + status.check(); /* dropout */ if (config_.drop_rate() > 0) { @@ -372,7 +374,8 @@ void Layer::backwardActivation() { oGrad->dotMul(*oGrad, *dropOutMask_); } - activation_->backward(output_); + auto status = activation_->backward(output_); + status.check(); } void Layer::forwardDropOut() { diff --git a/paddle/gserver/layers/MDLstmLayer.cpp b/paddle/gserver/layers/MDLstmLayer.cpp index fb41af563195496a57eafcc52b49eadac697fa0a..88d934d782b549a984f1d7798e54bcc4436ea0cf 100644 --- a/paddle/gserver/layers/MDLstmLayer.cpp +++ b/paddle/gserver/layers/MDLstmLayer.cpp @@ -506,9 +506,12 @@ void MDLstmLayer::forwardGate2OutputSequence(int start, *frameState_[start + preOffsetV[i]].value, *checkFgOneDim, 1.0, 1.0); } } - activationGate_->forward(frameInputGate_[idxCurr]); - activationGate_->forward(frameForgetGate_[idxCurr]); - activation_->forward(frameInputNode_[idxCurr]); + auto status = activationGate_->forward(frameInputGate_[idxCurr]); + status.check(); + status = activationGate_->forward(frameForgetGate_[idxCurr]); + status.check(); + status = activation_->forward(frameInputNode_[idxCurr]); + status.check(); frameState_[idxCurr].value->zeroMem(); for (int i = 0; i < numDims_; i++) { @@ -530,10 +533,12 @@ void MDLstmLayer::forwardGate2OutputSequence(int start, frameOutputGate_[idxCurr].value->addDotMul( *frameState_[idxCurr].value, *checkOg_, 1.0, 1.0); - activationGate_->forward(frameOutputGate_[idxCurr]); + status = activationGate_->forward(frameOutputGate_[idxCurr]); + status.check(); framePreOutput_[idxCurr].value->copyFrom(*(frameState_[idxCurr].value)); - activationState_->forward(framePreOutput_[idxCurr]); + status = activationState_->forward(framePreOutput_[idxCurr]); + status.check(); frameOutput_[idxCurr].value->dotMul(*framePreOutput_[idxCurr].value, *frameOutputGate_[idxCurr].value); @@ -640,12 +645,12 @@ void MDLstmLayer::backwardGate2OutputSequence(int start, framePreOutput_[idxCurr].grad->dotMul(*frameOutput_[idxCurr].grad, *frameOutputGate_[idxCurr].value); - activationState_->backward(framePreOutput_[idxCurr]); + activationState_->backward(framePreOutput_[idxCurr]).check(); frameState_[idxCurr].grad->copyFrom(*(framePreOutput_[idxCurr].grad)); frameOutputGate_[idxCurr].grad->dotMul(*frameOutput_[idxCurr].grad, *framePreOutput_[idxCurr].value); - activationGate_->backward(frameOutputGate_[idxCurr]); + activationGate_->backward(frameOutputGate_[idxCurr]).check(); frameState_[idxCurr].grad->addDotMul( *frameOutputGate_[idxCurr].grad, *checkOg_, 1.0, 1.0); @@ -702,9 +707,9 @@ void MDLstmLayer::backwardGate2OutputSequence(int start, } } - activationGate_->backward(frameInputGate_[idxCurr]); - activationGate_->backward(frameForgetGate_[idxCurr]); - activation_->backward(frameInputNode_[idxCurr]); + activationGate_->backward(frameInputGate_[idxCurr]).check(); + activationGate_->backward(frameForgetGate_[idxCurr]).check(); + activation_->backward(frameInputNode_[idxCurr]).check(); if (bias_->getWGrad()) { for (int i = 0; i < numDims_; i++) { diff --git a/paddle/gserver/layers/NCELayer.cpp b/paddle/gserver/layers/NCELayer.cpp index 5ab765247f63dfe6e6651ca4d27dc7183a9f33e1..3542e739df8d03470bf2c455b4f3492a7f9e973a 100644 --- a/paddle/gserver/layers/NCELayer.cpp +++ b/paddle/gserver/layers/NCELayer.cpp @@ -193,7 +193,8 @@ public: forwardOneInput(l); } - activation_->forward(sampleOut_); + auto status = activation_->forward(sampleOut_); + status.check(); forwardCost(); } @@ -207,7 +208,8 @@ public: backwardCost(); - activation_->backward(sampleOut_); + auto status = activation_->backward(sampleOut_); + status.check(); if (biases_->getWGrad()) { backwardBias(callback); diff --git a/paddle/gserver/layers/RecurrentLayer.cpp b/paddle/gserver/layers/RecurrentLayer.cpp index 55e0fdfb9048c02b2dcd474c6887eee180328260..b843fa1265cf3c0ad0814fb90f69e245ee5ab4ad 100644 --- a/paddle/gserver/layers/RecurrentLayer.cpp +++ b/paddle/gserver/layers/RecurrentLayer.cpp @@ -217,21 +217,22 @@ void RecurrentLayer::forwardOneSequence(int start, int length) { if (prevOutput_) { frameOutput_[start].value->mul(*prevOutput_, *weight_->getW(), 1, 1); } - activation_->forward(frameOutput_[start]); + activation_->forward(frameOutput_[start]).check(); + for (int i = 1; i < length; ++i) { frameOutput_[start + i].value->mul( *frameOutput_[start + i - 1].value, *weight_->getW(), 1, 1); - activation_->forward(frameOutput_[start + i]); + activation_->forward(frameOutput_[start + i]).check(); } if (prevOutput_) { prevOutput_->assign(*frameOutput_[start + length - 1].value); } } else { - activation_->forward(frameOutput_[start + length - 1]); + activation_->forward(frameOutput_[start + length - 1]).check(); for (int i = length - 2; i >= 0; --i) { frameOutput_[start + i].value->mul( *frameOutput_[start + i + 1].value, *weight_->getW(), 1, 1); - activation_->forward(frameOutput_[start + i]); + activation_->forward(frameOutput_[start + i]).check(); } } } @@ -280,11 +281,11 @@ void RecurrentLayer::backwardOneSequence(int start, int length) { MatrixPtr weightT = weight_->getW()->getTranspose(); if (!reversed_) { for (int i = length - 1; i > 0; --i) { - activation_->backward(frameOutput_[start + i]); + activation_->backward(frameOutput_[start + i]).check(); frameOutput_[start + i - 1].grad->mul( *frameOutput_[start + i].grad, *weightT, 1, 1); } - activation_->backward(frameOutput_[start]); + activation_->backward(frameOutput_[start]).check(); if (weight_->getWGrad()) { weight_->getWGrad()->mul( *output_.value->subMatrix(start, length - 1)->getTranspose(), @@ -294,11 +295,11 @@ void RecurrentLayer::backwardOneSequence(int start, int length) { } } else { for (int i = 0; i < length - 1; ++i) { - activation_->backward(frameOutput_[start + i]); + activation_->backward(frameOutput_[start + i]).check(); frameOutput_[start + i + 1].grad->mul( *frameOutput_[start + i].grad, *weightT, 1, 1); } - activation_->backward(frameOutput_[start + length - 1]); + activation_->backward(frameOutput_[start + length - 1]).check(); if (weight_->getWGrad()) { weight_->getWGrad()->mul( *output_.value->subMatrix(start + 1, length - 1)->getTranspose(), @@ -333,7 +334,7 @@ void RecurrentLayer::forwardBatch(int batchSize, } Argument arg; arg.value = batch2; - activation_->forward(arg); + activation_->forward(arg).check(); } } batchValue_->copyBackSeq(*output_.value); @@ -363,7 +364,7 @@ void RecurrentLayer::backwardBatch(int batchSize, Argument arg; arg.value = batch1; arg.grad = batch2; - activation_->backward(arg); + activation_->backward(arg).check(); if (n != 0) { batch1 = batchGrad_->getBatchValue(n - 1, batch2->getHeight()); diff --git a/paddle/gserver/layers/SelectiveFullyConnectedLayer.cpp b/paddle/gserver/layers/SelectiveFullyConnectedLayer.cpp index 5eacff6b7143996130bea64766ef42c66f4c7310..d9a91de8a6f4daf514f089a3d63cb519223bfdd0 100644 --- a/paddle/gserver/layers/SelectiveFullyConnectedLayer.cpp +++ b/paddle/gserver/layers/SelectiveFullyConnectedLayer.cpp @@ -192,7 +192,8 @@ void SelectiveFullyConnectedLayer::forward(PassType passType) { nnz, /*trans=*/false, /*useGpu=*/useGpu_); - activation_->forward(arg); + //! TODO(yuyang18): Why we cannot invoke forwardActivation here? + activation_->forward(arg).check(); } else /* train and test in train, not generating */ { // during training, this layer output value is *Matrix*, which is input of // eg. multi-class-cross-entropy diff --git a/paddle/gserver/tests/test_WarpCTCLayer.cpp b/paddle/gserver/tests/test_WarpCTCLayer.cpp index 23ae95852e84216c9065f1b123d35ce868fbb90f..55427e2f12fd7b77c6eea1f65b3229e6fd29d71d 100644 --- a/paddle/gserver/tests/test_WarpCTCLayer.cpp +++ b/paddle/gserver/tests/test_WarpCTCLayer.cpp @@ -148,11 +148,11 @@ LayerPtr createCTCLayer(string name, ActivationFunction* softmaxActivation = ActivationFunction::create("softmax"); - softmaxActivation->forward(dataLayer->getOutput()); + softmaxActivation->forward(dataLayer->getOutput()).check(); layer->forward(PASS_GC); layer->backward(); - softmaxActivation->backward(dataLayer->getOutput()); + softmaxActivation->backward(dataLayer->getOutput()).check(); return layer; } diff --git a/paddle/utils/Compiler.h b/paddle/utils/Compiler.h new file mode 100644 index 0000000000000000000000000000000000000000..cebca5a2a3766110b83231eb0705e48800a7bda6 --- /dev/null +++ b/paddle/utils/Compiler.h @@ -0,0 +1,33 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved. +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + http://www.apache.org/licenses/LICENSE-2.0 +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once +/** + * This header defines some useful attribute by each compiler. It is the + * abstract layer of compilers. + */ +#ifdef __GNUC__ +#define GCC_VERSION \ + (__GNUC__ * 10000 + __GNUC_MINOR__ * 100 + __GNUC_PATCHLEVEL__) +#else +#define GCC_VERSION +#endif + +/** + * __must_check macro. It make the function's return value must be used, + * otherwise it will raise a compile warning. And also Paddle treat all compile + * warnings as errors. + */ +#if GCC_VERSION >= 30400 +#define __must_check __attribute__((warn_unused_result)) +#else +#define __must_check +#endif diff --git a/paddle/utils/Error.h b/paddle/utils/Error.h new file mode 100644 index 0000000000000000000000000000000000000000..2b4fbef4e015e7c6895745f220bd444f3883c121 --- /dev/null +++ b/paddle/utils/Error.h @@ -0,0 +1,130 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#pragma once + +#include +#include +#include +#include +#include +#include "Compiler.h" + +namespace paddle { + +/** + * Error is Paddle error code. It only contain a std::string as error message. + * + * + * There are two styles to return error in Paddle. + * + * 1. Return Error + * When method return a status, the return must use `__must_check` attribute. + * Example as below. + * @code{cpp} + * Error __must_check foo(); + * + * Error __must_check bar() { + * // do something. + * Status s = foo(); // invoke other method return status. + * if (!s) return s; + * // do something else. + * return Status(); + * } + * @endcode{cpp} + * + * 2. Return by parameter. + * It is another way to return an error, by using a pointer parameter. + * Example as below. + * + * @code{cpp} + * Error bar(); + * + * int foo(Error* error) { + * // Do something. + * Error s = bar(); + * if (!s) { + * *error = s; + * return 0; + * } + * // Do something else. + * if (someInternalErrorHappend) { + * *error = Error("Some dimension is too large, %d", dimension); + * return 0; + * } + * // End of method. + * return someValue; + * } + * + * Error foobar() { + * Error s; + * // do something. + * foo(&s); + * if (!s) return s; + * } + * @endcode{cpp} + * + * + * Currently there is a helper method 'check' in status, because Paddle always + * use log(FATAL) or CHECK to make program exit before. When we clean all + * log(FATAL) and CHECK in Paddle, 'check' method will be removed. + */ +class Error { +public: + /** + * Construct a no-error value. + */ + Error() {} + + /** + * @brief Create an Error use printf syntax. + */ + explicit Error(const char* fmt, ...) { + va_list ap; + va_start(ap, fmt); + constexpr size_t kBufferSize = 1024; + char buffer[kBufferSize]; + vsnprintf(buffer, kBufferSize, fmt, ap); + this->msg_.reset(new std::string(buffer)); + va_end(ap); + } + + /** + * @brief msg will return the error message. If no error, return nullptr. + */ + const char* msg() const { + if (msg_) { + return msg_->c_str(); + } else { + return nullptr; + } + } + + /** + * @brief operator bool, return True if there is no error. + */ + operator bool() const { return msg_ == nullptr; } + + /** + * @brief check this status by glog. + * @note It is a temp method used during cleaning Paddle code. It will be + * removed later. + */ + void check() const { CHECK(*this) << msg(); } + +private: + std::shared_ptr msg_; +}; + +} // namespace paddle diff --git a/paddle/utils/Util.cpp b/paddle/utils/Util.cpp index 411a64aa8d0737a8d57e62fbd0788ffaacfbc9f7..220aac1ff11e0ff263df8459f539237944b94c81 100644 --- a/paddle/utils/Util.cpp +++ b/paddle/utils/Util.cpp @@ -144,20 +144,20 @@ void runInitFunctions() { } void initMain(int argc, char** argv) { - initializeLogging(argc, argv); installLayerStackTracer(); std::string line; for (int i = 0; i < argc; ++i) { line += argv[i]; line += ' '; } - LOG(INFO) << "commandline: " << line; #ifndef GFLAGS_GFLAGS_H_ namespace gflags = google; #endif gflags::ParseCommandLineFlags(&argc, &argv, true); + initializeLogging(argc, argv); + LOG(INFO) << "commandline: " << line; CHECK_EQ(argc, 1) << "Unknown commandline argument: " << argv[1]; installProfilerSwitch(); diff --git a/paddle/utils/tests/CMakeLists.txt b/paddle/utils/tests/CMakeLists.txt index 26fafbd1ab3f2967b765b8bcb973fb745c0e6422..aa923b355377752f9b297a125f5c43c364ba9b06 100644 --- a/paddle/utils/tests/CMakeLists.txt +++ b/paddle/utils/tests/CMakeLists.txt @@ -4,6 +4,7 @@ add_simple_unittest(test_CustomStackTrace) add_simple_unittest(test_ThreadBarrier) add_simple_unittest(test_SpinLock) add_simple_unittest(test_SIMDFlags) +add_simple_unittest(test_Error) add_executable( test_CustomStackTracePrint diff --git a/paddle/utils/tests/test_Error.cpp b/paddle/utils/tests/test_Error.cpp new file mode 100644 index 0000000000000000000000000000000000000000..85156466e2cafd36d49941836c066a542dbbd60e --- /dev/null +++ b/paddle/utils/tests/test_Error.cpp @@ -0,0 +1,34 @@ +/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. + +Licensed under the Apache License, Version 2.0 (the "License"); +you may not use this file except in compliance with the License. +You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, software +distributed under the License is distributed on an "AS IS" BASIS, +WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +See the License for the specific language governing permissions and +limitations under the License. */ + +#include "paddle/utils/Error.h" + +#include + +TEST(Error, testAll) { + paddle::Error error; + ASSERT_TRUE(error); + error = paddle::Error("I'm the error"); + ASSERT_FALSE(error); + ASSERT_STREQ("I'm the error", error.msg()); + + error = paddle::Error("error2"); + ASSERT_FALSE(error); + ASSERT_STREQ("error2", error.msg()); + + int i = 3; + auto error3 = paddle::Error("error%d", i); + ASSERT_FALSE(error3); + ASSERT_STREQ("error3", error3.msg()); +}