提交 f56084b4 编写于 作者: D dangqingqing

Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into pad_op

# Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve. # Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
# #
# Licensed under the Apache License, Version 2.0 (the "License"); # Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License. # you may not use this file except in compliance with the License.
# You may obtain a copy of the License at # You may obtain a copy of the License at
# #
# http://www.apache.org/licenses/LICENSE-2.0 # http://www.apache.org/licenses/LICENSE-2.0
# #
# Unless required by applicable law or agreed to in writing, software # Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS, # distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. # WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
...@@ -29,12 +29,14 @@ INCLUDE_DIRECTORIES(${GLOG_INCLUDE_DIR}) ...@@ -29,12 +29,14 @@ INCLUDE_DIRECTORIES(${GLOG_INCLUDE_DIR})
ExternalProject_Add( ExternalProject_Add(
glog glog
${EXTERNAL_PROJECT_LOG_ARGS} ${EXTERNAL_PROJECT_LOG_ARGS}
DEPENDS gflags
GIT_REPOSITORY "https://github.com/google/glog.git" GIT_REPOSITORY "https://github.com/google/glog.git"
PREFIX ${GLOG_SOURCES_DIR} PREFIX ${GLOG_SOURCES_DIR}
UPDATE_COMMAND "" UPDATE_COMMAND ""
CMAKE_ARGS -DCMAKE_INSTALL_PREFIX=${GLOG_INSTALL_DIR} CMAKE_ARGS -DCMAKE_INSTALL_PREFIX=${GLOG_INSTALL_DIR}
CMAKE_ARGS -DCMAKE_POSITION_INDEPENDENT_CODE=ON CMAKE_ARGS -DCMAKE_POSITION_INDEPENDENT_CODE=ON
CMAKE_ARGS -DWITH_GFLAGS=OFF CMAKE_ARGS -DWITH_GFLAGS=ON
CMAKE_ARGS -Dgflags_DIR=${GFLAGS_INSTALL_DIR}/lib/cmake/gflags
CMAKE_ARGS -DBUILD_TESTING=OFF CMAKE_ARGS -DBUILD_TESTING=OFF
) )
......
...@@ -29,17 +29,12 @@ IF(WIN32) ...@@ -29,17 +29,12 @@ IF(WIN32)
"${PROTOBUF_INSTALL_DIR}/lib/libprotoc.lib" CACHE FILEPATH "protoc library." FORCE) "${PROTOBUF_INSTALL_DIR}/lib/libprotoc.lib" CACHE FILEPATH "protoc library." FORCE)
SET(PROTOBUF_PROTOC_EXECUTABLE "${PROTOBUF_INSTALL_DIR}/bin/protoc.exe" CACHE FILEPATH "protobuf executable." FORCE) SET(PROTOBUF_PROTOC_EXECUTABLE "${PROTOBUF_INSTALL_DIR}/bin/protoc.exe" CACHE FILEPATH "protobuf executable." FORCE)
ELSE(WIN32) ELSE(WIN32)
IF(${HOST_SYSTEM} STREQUAL "centos")
SET(LIB "lib64")
ELSE()
SET(LIB "lib")
ENDIF()
SET(PROTOBUF_LITE_LIBRARY SET(PROTOBUF_LITE_LIBRARY
"${PROTOBUF_INSTALL_DIR}/${LIB}/libprotobuf-lite.a" CACHE FILEPATH "protobuf lite library." FORCE) "${PROTOBUF_INSTALL_DIR}/lib/libprotobuf-lite.a" CACHE FILEPATH "protobuf lite library." FORCE)
SET(PROTOBUF_LIBRARY SET(PROTOBUF_LIBRARY
"${PROTOBUF_INSTALL_DIR}/${LIB}/libprotobuf.a" CACHE FILEPATH "protobuf library." FORCE) "${PROTOBUF_INSTALL_DIR}/lib/libprotobuf.a" CACHE FILEPATH "protobuf library." FORCE)
SET(PROTOBUF_PROTOC_LIBRARY SET(PROTOBUF_PROTOC_LIBRARY
"${PROTOBUF_INSTALL_DIR}/${LIB}/libprotoc.a" CACHE FILEPATH "protoc library." FORCE) "${PROTOBUF_INSTALL_DIR}/lib/libprotoc.a" CACHE FILEPATH "protoc library." FORCE)
SET(PROTOBUF_PROTOC_EXECUTABLE "${PROTOBUF_INSTALL_DIR}/bin/protoc" CACHE FILEPATH "protobuf executable." FORCE) SET(PROTOBUF_PROTOC_EXECUTABLE "${PROTOBUF_INSTALL_DIR}/bin/protoc" CACHE FILEPATH "protobuf executable." FORCE)
ENDIF(WIN32) ENDIF(WIN32)
...@@ -58,6 +53,7 @@ ExternalProject_Add( ...@@ -58,6 +53,7 @@ ExternalProject_Add(
-DCMAKE_POSITION_INDEPENDENT_CODE=ON -DCMAKE_POSITION_INDEPENDENT_CODE=ON
-DCMAKE_BUILD_TYPE=Release -DCMAKE_BUILD_TYPE=Release
-DCMAKE_INSTALL_PREFIX=${PROTOBUF_INSTALL_DIR} -DCMAKE_INSTALL_PREFIX=${PROTOBUF_INSTALL_DIR}
-DCMAKE_INSTALL_LIBDIR=lib
) )
LIST(APPEND external_project_dependencies protobuf) LIST(APPEND external_project_dependencies protobuf)
...@@ -26,10 +26,10 @@ IF(PYTHONLIBS_FOUND AND PYTHONINTERP_FOUND) ...@@ -26,10 +26,10 @@ IF(PYTHONLIBS_FOUND AND PYTHONINTERP_FOUND)
find_python_module(wheel REQUIRED) find_python_module(wheel REQUIRED)
find_python_module(google.protobuf REQUIRED) find_python_module(google.protobuf REQUIRED)
FIND_PACKAGE(NumPy REQUIRED) FIND_PACKAGE(NumPy REQUIRED)
IF(${PY_GOOGLE.PROTOBUF_VERSION} VERSION_LESS "3.0.0") IF(${PY_GOOGLE.PROTOBUF_VERSION} AND ${PY_GOOGLE.PROTOBUF_VERSION} VERSION_LESS "3.0.0")
MESSAGE(FATAL_ERROR "Found Python Protobuf ${PY_GOOGLE.PROTOBUF_VERSION} < 3.0.0, " MESSAGE(FATAL_ERROR "Found Python Protobuf ${PY_GOOGLE.PROTOBUF_VERSION} < 3.0.0, "
"please use pip to upgrade protobuf.") "please use pip to upgrade protobuf. pip install -U protobuf")
ENDIF(${PY_GOOGLE.PROTOBUF_VERSION} VERSION_LESS "3.0.0") ENDIF()
ELSE(PYTHONLIBS_FOUND AND PYTHONINTERP_FOUND) ELSE(PYTHONLIBS_FOUND AND PYTHONINTERP_FOUND)
MESSAGE(FATAL_ERROR "Please install python 2.7 before building PaddlePaddle.") MESSAGE(FATAL_ERROR "Please install python 2.7 before building PaddlePaddle.")
##################################### PYTHON ######################################## ##################################### PYTHON ########################################
......
...@@ -54,6 +54,7 @@ ExternalProject_Add( ...@@ -54,6 +54,7 @@ ExternalProject_Add(
CMAKE_ARGS -DWITH_GPU=${WITH_GPU} CMAKE_ARGS -DWITH_GPU=${WITH_GPU}
CMAKE_ARGS -DWITH_OMP=${USE_OMP} CMAKE_ARGS -DWITH_OMP=${USE_OMP}
CMAKE_ARGS -DWITH_TORCH=OFF CMAKE_ARGS -DWITH_TORCH=OFF
CMAKE_ARGS -DCMAKE_DISABLE_FIND_PACKAGE_Torch=TRUE
CMAKE_ARGS -DBUILD_SHARED=ON CMAKE_ARGS -DBUILD_SHARED=ON
) )
......
...@@ -12,6 +12,14 @@ ...@@ -12,6 +12,14 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
# Detects the OS and sets appropriate variables.
# CMAKE_SYSTEM_NAME only give us a coarse-grained name,
# but the name like centos is necessary in some scenes
# to distinguish system for customization.
#
# for instance, protobuf libs path is <install_dir>/lib64
# on CentOS, but <install_dir>/lib on other systems.
IF(WIN32) IF(WIN32)
SET(HOST_SYSTEM "win32") SET(HOST_SYSTEM "win32")
ELSE(WIN32) ELSE(WIN32)
...@@ -30,6 +38,10 @@ ELSE(WIN32) ...@@ -30,6 +38,10 @@ ELSE(WIN32)
SET(HOST_SYSTEM "debian") SET(HOST_SYSTEM "debian")
ELSEIF(LINUX_ISSUE MATCHES "Ubuntu") ELSEIF(LINUX_ISSUE MATCHES "Ubuntu")
SET(HOST_SYSTEM "ubuntu") SET(HOST_SYSTEM "ubuntu")
ELSEIF(LINUX_ISSUE MATCHES "Red Hat")
SET(HOST_SYSTEM "redhat")
ELSEIF(LINUX_ISSUE MATCHES "Fedora")
SET(HOST_SYSTEM "fedora")
ENDIF() ENDIF()
ENDIF(EXISTS "/etc/issue") ENDIF(EXISTS "/etc/issue")
...@@ -40,6 +52,10 @@ ELSE(WIN32) ...@@ -40,6 +52,10 @@ ELSE(WIN32)
ENDIF() ENDIF()
ENDIF(EXISTS "/etc/redhat-release") ENDIF(EXISTS "/etc/redhat-release")
IF(NOT HOST_SYSTEM)
SET(HOST_SYSTEM ${CMAKE_SYSTEM_NAME})
ENDIF()
ENDIF(APPLE) ENDIF(APPLE)
ENDIF(WIN32) ENDIF(WIN32)
......
...@@ -69,8 +69,14 @@ static ClassRegistrar<ActivationFunction> gActivationRegistrar; ...@@ -69,8 +69,14 @@ static ClassRegistrar<ActivationFunction> gActivationRegistrar;
class IdentityActivation : public ActivationFunction { class IdentityActivation : public ActivationFunction {
public: public:
static const std::string name; static const std::string name;
void forward(Argument& act) { (void)act; } Error __must_check forward(Argument& act) {
void backward(Argument& act) { (void)act; } (void)act;
return Error();
}
Error __must_check backward(Argument& act) {
(void)act;
return Error();
}
const std::string& getName() const { return name; } const std::string& getName() const { return name; }
}; };
const std::string IdentityActivation::name = ""; const std::string IdentityActivation::name = "";
...@@ -86,8 +92,14 @@ static InitFunction __reg_activation__identity([] { ...@@ -86,8 +92,14 @@ static InitFunction __reg_activation__identity([] {
* \f] * \f]
*/ */
BEGIN_DEFINE_ACTIVATION(sigmoid) BEGIN_DEFINE_ACTIVATION(sigmoid)
void forward(Argument& act) { act.value->sigmoid(*act.value); } Error __must_check forward(Argument& act) {
void backward(Argument& act) { act.grad->sigmoidDerivative(*act.value); } act.value->sigmoid(*act.value);
return Error();
}
Error __must_check backward(Argument& act) {
act.grad->sigmoidDerivative(*act.value);
return Error();
}
END_DEFINE_ACTIVATION(sigmoid) END_DEFINE_ACTIVATION(sigmoid)
/** /**
...@@ -103,9 +115,12 @@ MatrixPtr sftMaxDot_; ...@@ -103,9 +115,12 @@ MatrixPtr sftMaxDot_;
MatrixPtr one_; MatrixPtr one_;
public: public:
void forward(Argument& act) { act.value->softmax(*act.value); } Error __must_check forward(Argument& act) {
act.value->softmax(*act.value);
return Error();
}
void backward(Argument& act) { Error __must_check backward(Argument& act) {
MatrixPtr outputV = act.value; MatrixPtr outputV = act.value;
MatrixPtr outputG = act.grad; MatrixPtr outputG = act.grad;
...@@ -137,6 +152,7 @@ void backward(Argument& act) { ...@@ -137,6 +152,7 @@ void backward(Argument& act) {
act.grad->softmaxDerivative(*act.value, *sftMaxSum_); act.grad->softmaxDerivative(*act.value, *sftMaxSum_);
} }
return Error();
} }
END_DEFINE_ACTIVATION(softmax) END_DEFINE_ACTIVATION(softmax)
...@@ -151,8 +167,11 @@ ACTIVATION_CLASS_NAME(softmax) softmax_; ...@@ -151,8 +167,11 @@ ACTIVATION_CLASS_NAME(softmax) softmax_;
Argument argument_; Argument argument_;
public: public:
void forward(Argument& act) { Error __must_check forward(Argument& act) {
CHECK_EQ(act.value->getWidth(), 1UL); if (act.value->getWidth() != 1UL) {
return Error(
"Input width for each timestep of sequence softmax should be 1");
}
if (!argument_.value) { if (!argument_.value) {
argument_.value = Matrix::create(nullptr, argument_.value = Matrix::create(nullptr,
...@@ -169,10 +188,14 @@ void forward(Argument& act) { ...@@ -169,10 +188,14 @@ void forward(Argument& act) {
auto starts = act.sequenceStartPositions->getVector(useGpu(act.deviceId)); auto starts = act.sequenceStartPositions->getVector(useGpu(act.deviceId));
act.value->sequenceSoftmax(*act.value, *starts); act.value->sequenceSoftmax(*act.value, *starts);
return Error();
} }
void backward(Argument& act) { Error __must_check backward(Argument& act) {
CHECK_EQ(act.grad->getWidth(), 1UL); if (act.value->getWidth() != 1UL) {
return Error(
"Input width for each timestep of sequence softmax should be 1");
}
size_t numSequences = act.getNumSequences(); size_t numSequences = act.getNumSequences();
const int* starts = act.sequenceStartPositions->getData(false); const int* starts = act.sequenceStartPositions->getData(false);
...@@ -184,8 +207,10 @@ void backward(Argument& act) { ...@@ -184,8 +207,10 @@ void backward(Argument& act) {
argument_.value->setData(act.value->getData() + offset, 1UL, size); argument_.value->setData(act.value->getData() + offset, 1UL, size);
argument_.grad->setData(act.grad->getData() + offset, 1UL, size); argument_.grad->setData(act.grad->getData() + offset, 1UL, size);
softmax_.backward(argument_); Error status = softmax_.backward(argument_);
if (!status) return status;
} }
return Error();
} }
END_DEFINE_ACTIVATION(sequence_softmax) END_DEFINE_ACTIVATION(sequence_softmax)
...@@ -200,9 +225,15 @@ END_DEFINE_ACTIVATION(sequence_softmax) ...@@ -200,9 +225,15 @@ END_DEFINE_ACTIVATION(sequence_softmax)
* 0 otherwise. * 0 otherwise.
*/ */
BEGIN_DEFINE_ACTIVATION(relu) BEGIN_DEFINE_ACTIVATION(relu)
void forward(Argument& act) { act.value->relu(*act.value); } Error __must_check forward(Argument& act) {
act.value->relu(*act.value);
return Error();
}
void backward(Argument& act) { act.grad->reluDerivative(*act.value); } Error __must_check backward(Argument& act) {
act.grad->reluDerivative(*act.value);
return Error();
}
END_DEFINE_ACTIVATION(relu) END_DEFINE_ACTIVATION(relu)
/** /**
...@@ -219,9 +250,15 @@ END_DEFINE_ACTIVATION(relu) ...@@ -219,9 +250,15 @@ END_DEFINE_ACTIVATION(relu)
* TODO(yuyang18): Remove magic number 24 or make it configuable. * TODO(yuyang18): Remove magic number 24 or make it configuable.
*/ */
BEGIN_DEFINE_ACTIVATION(brelu) BEGIN_DEFINE_ACTIVATION(brelu)
void forward(Argument& act) { act.value->brelu(*act.value); } Error __must_check forward(Argument& act) {
act.value->brelu(*act.value);
return Error();
}
void backward(Argument& act) { act.grad->breluDerivative(*act.value); } Error __must_check backward(Argument& act) {
act.grad->breluDerivative(*act.value);
return Error();
}
END_DEFINE_ACTIVATION(brelu) END_DEFINE_ACTIVATION(brelu)
/** /**
...@@ -231,9 +268,15 @@ END_DEFINE_ACTIVATION(brelu) ...@@ -231,9 +268,15 @@ END_DEFINE_ACTIVATION(brelu)
* \f] * \f]
*/ */
BEGIN_DEFINE_ACTIVATION(tanh) BEGIN_DEFINE_ACTIVATION(tanh)
void forward(Argument& act) { act.value->tanh(*act.value); } Error __must_check forward(Argument& act) {
act.value->tanh(*act.value);
return Error();
}
void backward(Argument& act) { act.grad->tanhDerivative(*act.value); } Error __must_check backward(Argument& act) {
act.grad->tanhDerivative(*act.value);
return Error();
}
END_DEFINE_ACTIVATION(tanh) END_DEFINE_ACTIVATION(tanh)
/** /**
...@@ -248,10 +291,14 @@ real a, b; ...@@ -248,10 +291,14 @@ real a, b;
public: public:
ACTIVATION_CLASS_NAME(stanh)() : a(1.7159), b(2. / 3.) {} ACTIVATION_CLASS_NAME(stanh)() : a(1.7159), b(2. / 3.) {}
void forward(Argument& act) { act.value->scaledTanh(*act.value, a, b); } Error __must_check forward(Argument& act) {
act.value->scaledTanh(*act.value, a, b);
return Error();
}
void backward(Argument& act) { Error __must_check backward(Argument& act) {
act.grad->scaledTanhDerivative(*act.value, a, b); act.grad->scaledTanhDerivative(*act.value, a, b);
return Error();
} }
END_DEFINE_ACTIVATION(stanh) END_DEFINE_ACTIVATION(stanh)
...@@ -262,9 +309,15 @@ END_DEFINE_ACTIVATION(stanh) ...@@ -262,9 +309,15 @@ END_DEFINE_ACTIVATION(stanh)
* \f] * \f]
*/ */
BEGIN_DEFINE_ACTIVATION(softrelu) BEGIN_DEFINE_ACTIVATION(softrelu)
void forward(Argument& act) { act.value->softrelu(*act.value); } Error __must_check forward(Argument& act) {
act.value->softrelu(*act.value);
return Error();
}
void backward(Argument& act) { act.grad->softreluDerivative(*act.value); } Error __must_check backward(Argument& act) {
act.grad->softreluDerivative(*act.value);
return Error();
}
END_DEFINE_ACTIVATION(softrelu) END_DEFINE_ACTIVATION(softrelu)
/** /**
...@@ -280,7 +333,7 @@ END_DEFINE_ACTIVATION(softrelu) ...@@ -280,7 +333,7 @@ END_DEFINE_ACTIVATION(softrelu)
* 0 if z=0 * 0 if z=0
*/ */
BEGIN_DEFINE_ACTIVATION(abs) BEGIN_DEFINE_ACTIVATION(abs)
void forward(Argument& act) { Error __must_check forward(Argument& act) {
SetDevice device(act.deviceId); SetDevice device(act.deviceId);
Matrix::resizeOrCreate(act.in, Matrix::resizeOrCreate(act.in,
act.value->getHeight(), act.value->getHeight(),
...@@ -290,9 +343,13 @@ void forward(Argument& act) { ...@@ -290,9 +343,13 @@ void forward(Argument& act) {
act.in->copyFrom(*act.value); act.in->copyFrom(*act.value);
act.value->abs2(*act.value); act.value->abs2(*act.value);
return Error();
} }
void backward(Argument& act) { act.grad->absDerivative(*act.in); } Error __must_check backward(Argument& act) {
act.grad->absDerivative(*act.in);
return Error();
}
END_DEFINE_ACTIVATION(abs) END_DEFINE_ACTIVATION(abs)
/** /**
...@@ -302,7 +359,7 @@ END_DEFINE_ACTIVATION(abs) ...@@ -302,7 +359,7 @@ END_DEFINE_ACTIVATION(abs)
* \f] * \f]
*/ */
BEGIN_DEFINE_ACTIVATION(square) BEGIN_DEFINE_ACTIVATION(square)
void forward(Argument& act) { Error __must_check forward(Argument& act) {
SetDevice device(act.deviceId); SetDevice device(act.deviceId);
Matrix::resizeOrCreate(act.in, Matrix::resizeOrCreate(act.in,
act.value->getHeight(), act.value->getHeight(),
...@@ -312,9 +369,13 @@ void forward(Argument& act) { ...@@ -312,9 +369,13 @@ void forward(Argument& act) {
act.in->copyFrom(*act.value); act.in->copyFrom(*act.value);
act.value->square2(*act.value); act.value->square2(*act.value);
return Error();
} }
void backward(Argument& act) { act.grad->squareDerivative(*act.in); } Error __must_check backward(Argument& act) {
act.grad->squareDerivative(*act.in);
return Error();
}
END_DEFINE_ACTIVATION(square) END_DEFINE_ACTIVATION(square)
/** /**
...@@ -324,9 +385,15 @@ END_DEFINE_ACTIVATION(square) ...@@ -324,9 +385,15 @@ END_DEFINE_ACTIVATION(square)
* \f] * \f]
*/ */
BEGIN_DEFINE_ACTIVATION(exponential) BEGIN_DEFINE_ACTIVATION(exponential)
void forward(Argument& act) { act.value->exp2(*act.value); } Error __must_check forward(Argument& act) {
act.value->exp2(*act.value);
return Error();
}
void backward(Argument& act) { act.grad->expDerivative(*act.value); } Error __must_check backward(Argument& act) {
act.grad->expDerivative(*act.value);
return Error();
}
END_DEFINE_ACTIVATION(exponential) END_DEFINE_ACTIVATION(exponential)
/** /**
...@@ -336,7 +403,7 @@ END_DEFINE_ACTIVATION(exponential) ...@@ -336,7 +403,7 @@ END_DEFINE_ACTIVATION(exponential)
* \f] * \f]
*/ */
BEGIN_DEFINE_ACTIVATION(log) BEGIN_DEFINE_ACTIVATION(log)
void forward(Argument& act) { Error __must_check forward(Argument& act) {
SetDevice device(act.deviceId); SetDevice device(act.deviceId);
Matrix::resizeOrCreate(act.in, Matrix::resizeOrCreate(act.in,
act.value->getHeight(), act.value->getHeight(),
...@@ -346,9 +413,13 @@ void forward(Argument& act) { ...@@ -346,9 +413,13 @@ void forward(Argument& act) {
act.in->copyFrom(*act.value); act.in->copyFrom(*act.value);
act.value->log2(*act.value); act.value->log2(*act.value);
return Error();
} }
void backward(Argument& act) { act.grad->dotDiv(*act.grad, *act.in); } Error __must_check backward(Argument& act) {
act.grad->dotDiv(*act.grad, *act.in);
return Error();
}
END_DEFINE_ACTIVATION(log) END_DEFINE_ACTIVATION(log)
ActivationFunction* ActivationFunction::create(const std::string& type) { ActivationFunction* ActivationFunction::create(const std::string& type) {
......
...@@ -15,6 +15,7 @@ limitations under the License. */ ...@@ -15,6 +15,7 @@ limitations under the License. */
#pragma once #pragma once
#include <string> #include <string>
#include <vector> #include <vector>
#include "paddle/utils/Error.h"
namespace paddle { namespace paddle {
...@@ -48,7 +49,7 @@ public: ...@@ -48,7 +49,7 @@ public:
* *
* Usually, act is Layer::output_ * Usually, act is Layer::output_
*/ */
virtual void forward(Argument& act) = 0; virtual Error __must_check forward(Argument& act) = 0;
/** /**
* @brief Backward propagaion * @brief Backward propagaion
...@@ -57,7 +58,7 @@ public: ...@@ -57,7 +58,7 @@ public:
* - Before calling backward(), act.grad = dE / dy, where E is the error/cost * - Before calling backward(), act.grad = dE / dy, where E is the error/cost
* - After backward() returns, act.grad = dE / dx = (dE/dy) * (dy/dx) * - After backward() returns, act.grad = dE / dx = (dE/dy) * (dy/dx)
*/ */
virtual void backward(Argument& act) = 0; virtual Error __must_check backward(Argument& act) = 0;
virtual const std::string& getName() const = 0; virtual const std::string& getName() const = 0;
}; };
......
...@@ -314,13 +314,13 @@ void GatedRecurrentLayer::forwardBatch(int batchSize, ...@@ -314,13 +314,13 @@ void GatedRecurrentLayer::forwardBatch(int batchSize,
batchValue_->resizeOrCreate(*output_.value); batchValue_->resizeOrCreate(*output_.value);
batchValue_->copy(*inputValue, *gate_.value, /* seq2batch */ true); batchValue_->copy(*inputValue, *gate_.value, /* seq2batch */ true);
if (bias_ && bias_->getWGrad()) { if (bias_) {
gate_.value->addBias(*(bias_->getW()), 1); gate_.value->addBias(*(bias_->getW()), 1);
} }
{ {
int numBatch = batchValue_->getNumBatch(); int numBatch = batchValue_->getNumBatch();
int batchSize = 0; int curBatchSize = 0;
AsyncGpuBlock asyncGpuBlock; AsyncGpuBlock asyncGpuBlock;
for (int n = 0; n < numBatch; n++) { for (int n = 0; n < numBatch; n++) {
MatrixPtr outputValueTmp = batchValue_->getBatchValue(n); MatrixPtr outputValueTmp = batchValue_->getBatchValue(n);
...@@ -330,16 +330,17 @@ void GatedRecurrentLayer::forwardBatch(int batchSize, ...@@ -330,16 +330,17 @@ void GatedRecurrentLayer::forwardBatch(int batchSize,
gruValue.resetOutputValue = gruValue.resetOutputValue =
(batchValue_->getBatchValue(*resetOutput_.value, n))->getData(); (batchValue_->getBatchValue(*resetOutput_.value, n))->getData();
batchSize = outputValueTmp->getHeight(); curBatchSize = outputValueTmp->getHeight();
gruValue.prevOutValue = gruValue.prevOutValue =
(n == 0 ? nullptr (n == 0
: (batchValue_->getBatchValue(n - 1, batchSize))->getData()); ? nullptr
: (batchValue_->getBatchValue(n - 1, curBatchSize))->getData());
{ {
if (useGpu_) { if (useGpu_) {
GruCompute::forward<1>(gruValue, getSize(), batchSize); GruCompute::forward<1>(gruValue, getSize(), curBatchSize);
} else { } else {
GruCompute::forward<0>(gruValue, getSize(), batchSize); GruCompute::forward<0>(gruValue, getSize(), curBatchSize);
} }
} }
} }
......
...@@ -15,6 +15,7 @@ limitations under the License. */ ...@@ -15,6 +15,7 @@ limitations under the License. */
#include "paddle/utils/Util.h" #include "paddle/utils/Util.h"
#include "paddle/math/SparseMatrix.h" #include "paddle/math/SparseMatrix.h"
#include "paddle/utils/Error.h"
#include "paddle/utils/Logging.h" #include "paddle/utils/Logging.h"
#include "AddtoLayer.h" #include "AddtoLayer.h"
...@@ -334,7 +335,8 @@ void Layer::showOutputStats() { ...@@ -334,7 +335,8 @@ void Layer::showOutputStats() {
void Layer::forwardActivation() { void Layer::forwardActivation() {
/* activation */ /* activation */
activation_->forward(output_); auto status = activation_->forward(output_);
status.check();
/* dropout */ /* dropout */
if (config_.drop_rate() > 0) { if (config_.drop_rate() > 0) {
...@@ -372,7 +374,8 @@ void Layer::backwardActivation() { ...@@ -372,7 +374,8 @@ void Layer::backwardActivation() {
oGrad->dotMul(*oGrad, *dropOutMask_); oGrad->dotMul(*oGrad, *dropOutMask_);
} }
activation_->backward(output_); auto status = activation_->backward(output_);
status.check();
} }
void Layer::forwardDropOut() { void Layer::forwardDropOut() {
......
...@@ -506,9 +506,12 @@ void MDLstmLayer::forwardGate2OutputSequence(int start, ...@@ -506,9 +506,12 @@ void MDLstmLayer::forwardGate2OutputSequence(int start,
*frameState_[start + preOffsetV[i]].value, *checkFgOneDim, 1.0, 1.0); *frameState_[start + preOffsetV[i]].value, *checkFgOneDim, 1.0, 1.0);
} }
} }
activationGate_->forward(frameInputGate_[idxCurr]); auto status = activationGate_->forward(frameInputGate_[idxCurr]);
activationGate_->forward(frameForgetGate_[idxCurr]); status.check();
activation_->forward(frameInputNode_[idxCurr]); status = activationGate_->forward(frameForgetGate_[idxCurr]);
status.check();
status = activation_->forward(frameInputNode_[idxCurr]);
status.check();
frameState_[idxCurr].value->zeroMem(); frameState_[idxCurr].value->zeroMem();
for (int i = 0; i < numDims_; i++) { for (int i = 0; i < numDims_; i++) {
...@@ -530,10 +533,12 @@ void MDLstmLayer::forwardGate2OutputSequence(int start, ...@@ -530,10 +533,12 @@ void MDLstmLayer::forwardGate2OutputSequence(int start,
frameOutputGate_[idxCurr].value->addDotMul( frameOutputGate_[idxCurr].value->addDotMul(
*frameState_[idxCurr].value, *checkOg_, 1.0, 1.0); *frameState_[idxCurr].value, *checkOg_, 1.0, 1.0);
activationGate_->forward(frameOutputGate_[idxCurr]); status = activationGate_->forward(frameOutputGate_[idxCurr]);
status.check();
framePreOutput_[idxCurr].value->copyFrom(*(frameState_[idxCurr].value)); framePreOutput_[idxCurr].value->copyFrom(*(frameState_[idxCurr].value));
activationState_->forward(framePreOutput_[idxCurr]); status = activationState_->forward(framePreOutput_[idxCurr]);
status.check();
frameOutput_[idxCurr].value->dotMul(*framePreOutput_[idxCurr].value, frameOutput_[idxCurr].value->dotMul(*framePreOutput_[idxCurr].value,
*frameOutputGate_[idxCurr].value); *frameOutputGate_[idxCurr].value);
...@@ -640,12 +645,12 @@ void MDLstmLayer::backwardGate2OutputSequence(int start, ...@@ -640,12 +645,12 @@ void MDLstmLayer::backwardGate2OutputSequence(int start,
framePreOutput_[idxCurr].grad->dotMul(*frameOutput_[idxCurr].grad, framePreOutput_[idxCurr].grad->dotMul(*frameOutput_[idxCurr].grad,
*frameOutputGate_[idxCurr].value); *frameOutputGate_[idxCurr].value);
activationState_->backward(framePreOutput_[idxCurr]); activationState_->backward(framePreOutput_[idxCurr]).check();
frameState_[idxCurr].grad->copyFrom(*(framePreOutput_[idxCurr].grad)); frameState_[idxCurr].grad->copyFrom(*(framePreOutput_[idxCurr].grad));
frameOutputGate_[idxCurr].grad->dotMul(*frameOutput_[idxCurr].grad, frameOutputGate_[idxCurr].grad->dotMul(*frameOutput_[idxCurr].grad,
*framePreOutput_[idxCurr].value); *framePreOutput_[idxCurr].value);
activationGate_->backward(frameOutputGate_[idxCurr]); activationGate_->backward(frameOutputGate_[idxCurr]).check();
frameState_[idxCurr].grad->addDotMul( frameState_[idxCurr].grad->addDotMul(
*frameOutputGate_[idxCurr].grad, *checkOg_, 1.0, 1.0); *frameOutputGate_[idxCurr].grad, *checkOg_, 1.0, 1.0);
...@@ -702,9 +707,9 @@ void MDLstmLayer::backwardGate2OutputSequence(int start, ...@@ -702,9 +707,9 @@ void MDLstmLayer::backwardGate2OutputSequence(int start,
} }
} }
activationGate_->backward(frameInputGate_[idxCurr]); activationGate_->backward(frameInputGate_[idxCurr]).check();
activationGate_->backward(frameForgetGate_[idxCurr]); activationGate_->backward(frameForgetGate_[idxCurr]).check();
activation_->backward(frameInputNode_[idxCurr]); activation_->backward(frameInputNode_[idxCurr]).check();
if (bias_->getWGrad()) { if (bias_->getWGrad()) {
for (int i = 0; i < numDims_; i++) { for (int i = 0; i < numDims_; i++) {
......
...@@ -193,7 +193,8 @@ public: ...@@ -193,7 +193,8 @@ public:
forwardOneInput(l); forwardOneInput(l);
} }
activation_->forward(sampleOut_); auto status = activation_->forward(sampleOut_);
status.check();
forwardCost(); forwardCost();
} }
...@@ -207,7 +208,8 @@ public: ...@@ -207,7 +208,8 @@ public:
backwardCost(); backwardCost();
activation_->backward(sampleOut_); auto status = activation_->backward(sampleOut_);
status.check();
if (biases_->getWGrad()) { if (biases_->getWGrad()) {
backwardBias(callback); backwardBias(callback);
......
...@@ -217,21 +217,22 @@ void RecurrentLayer::forwardOneSequence(int start, int length) { ...@@ -217,21 +217,22 @@ void RecurrentLayer::forwardOneSequence(int start, int length) {
if (prevOutput_) { if (prevOutput_) {
frameOutput_[start].value->mul(*prevOutput_, *weight_->getW(), 1, 1); frameOutput_[start].value->mul(*prevOutput_, *weight_->getW(), 1, 1);
} }
activation_->forward(frameOutput_[start]); activation_->forward(frameOutput_[start]).check();
for (int i = 1; i < length; ++i) { for (int i = 1; i < length; ++i) {
frameOutput_[start + i].value->mul( frameOutput_[start + i].value->mul(
*frameOutput_[start + i - 1].value, *weight_->getW(), 1, 1); *frameOutput_[start + i - 1].value, *weight_->getW(), 1, 1);
activation_->forward(frameOutput_[start + i]); activation_->forward(frameOutput_[start + i]).check();
} }
if (prevOutput_) { if (prevOutput_) {
prevOutput_->assign(*frameOutput_[start + length - 1].value); prevOutput_->assign(*frameOutput_[start + length - 1].value);
} }
} else { } else {
activation_->forward(frameOutput_[start + length - 1]); activation_->forward(frameOutput_[start + length - 1]).check();
for (int i = length - 2; i >= 0; --i) { for (int i = length - 2; i >= 0; --i) {
frameOutput_[start + i].value->mul( frameOutput_[start + i].value->mul(
*frameOutput_[start + i + 1].value, *weight_->getW(), 1, 1); *frameOutput_[start + i + 1].value, *weight_->getW(), 1, 1);
activation_->forward(frameOutput_[start + i]); activation_->forward(frameOutput_[start + i]).check();
} }
} }
} }
...@@ -280,11 +281,11 @@ void RecurrentLayer::backwardOneSequence(int start, int length) { ...@@ -280,11 +281,11 @@ void RecurrentLayer::backwardOneSequence(int start, int length) {
MatrixPtr weightT = weight_->getW()->getTranspose(); MatrixPtr weightT = weight_->getW()->getTranspose();
if (!reversed_) { if (!reversed_) {
for (int i = length - 1; i > 0; --i) { for (int i = length - 1; i > 0; --i) {
activation_->backward(frameOutput_[start + i]); activation_->backward(frameOutput_[start + i]).check();
frameOutput_[start + i - 1].grad->mul( frameOutput_[start + i - 1].grad->mul(
*frameOutput_[start + i].grad, *weightT, 1, 1); *frameOutput_[start + i].grad, *weightT, 1, 1);
} }
activation_->backward(frameOutput_[start]); activation_->backward(frameOutput_[start]).check();
if (weight_->getWGrad()) { if (weight_->getWGrad()) {
weight_->getWGrad()->mul( weight_->getWGrad()->mul(
*output_.value->subMatrix(start, length - 1)->getTranspose(), *output_.value->subMatrix(start, length - 1)->getTranspose(),
...@@ -294,11 +295,11 @@ void RecurrentLayer::backwardOneSequence(int start, int length) { ...@@ -294,11 +295,11 @@ void RecurrentLayer::backwardOneSequence(int start, int length) {
} }
} else { } else {
for (int i = 0; i < length - 1; ++i) { for (int i = 0; i < length - 1; ++i) {
activation_->backward(frameOutput_[start + i]); activation_->backward(frameOutput_[start + i]).check();
frameOutput_[start + i + 1].grad->mul( frameOutput_[start + i + 1].grad->mul(
*frameOutput_[start + i].grad, *weightT, 1, 1); *frameOutput_[start + i].grad, *weightT, 1, 1);
} }
activation_->backward(frameOutput_[start + length - 1]); activation_->backward(frameOutput_[start + length - 1]).check();
if (weight_->getWGrad()) { if (weight_->getWGrad()) {
weight_->getWGrad()->mul( weight_->getWGrad()->mul(
*output_.value->subMatrix(start + 1, length - 1)->getTranspose(), *output_.value->subMatrix(start + 1, length - 1)->getTranspose(),
...@@ -333,7 +334,7 @@ void RecurrentLayer::forwardBatch(int batchSize, ...@@ -333,7 +334,7 @@ void RecurrentLayer::forwardBatch(int batchSize,
} }
Argument arg; Argument arg;
arg.value = batch2; arg.value = batch2;
activation_->forward(arg); activation_->forward(arg).check();
} }
} }
batchValue_->copyBackSeq(*output_.value); batchValue_->copyBackSeq(*output_.value);
...@@ -363,7 +364,7 @@ void RecurrentLayer::backwardBatch(int batchSize, ...@@ -363,7 +364,7 @@ void RecurrentLayer::backwardBatch(int batchSize,
Argument arg; Argument arg;
arg.value = batch1; arg.value = batch1;
arg.grad = batch2; arg.grad = batch2;
activation_->backward(arg); activation_->backward(arg).check();
if (n != 0) { if (n != 0) {
batch1 = batchGrad_->getBatchValue(n - 1, batch2->getHeight()); batch1 = batchGrad_->getBatchValue(n - 1, batch2->getHeight());
......
...@@ -192,7 +192,8 @@ void SelectiveFullyConnectedLayer::forward(PassType passType) { ...@@ -192,7 +192,8 @@ void SelectiveFullyConnectedLayer::forward(PassType passType) {
nnz, nnz,
/*trans=*/false, /*trans=*/false,
/*useGpu=*/useGpu_); /*useGpu=*/useGpu_);
activation_->forward(arg); //! TODO(yuyang18): Why we cannot invoke forwardActivation here?
activation_->forward(arg).check();
} else /* train and test in train, not generating */ { } else /* train and test in train, not generating */ {
// during training, this layer output value is *Matrix*, which is input of // during training, this layer output value is *Matrix*, which is input of
// eg. multi-class-cross-entropy // eg. multi-class-cross-entropy
......
...@@ -148,11 +148,11 @@ LayerPtr createCTCLayer(string name, ...@@ -148,11 +148,11 @@ LayerPtr createCTCLayer(string name,
ActivationFunction* softmaxActivation = ActivationFunction::create("softmax"); ActivationFunction* softmaxActivation = ActivationFunction::create("softmax");
softmaxActivation->forward(dataLayer->getOutput()); softmaxActivation->forward(dataLayer->getOutput()).check();
layer->forward(PASS_GC); layer->forward(PASS_GC);
layer->backward(); layer->backward();
softmaxActivation->backward(dataLayer->getOutput()); softmaxActivation->backward(dataLayer->getOutput()).check();
return layer; return layer;
} }
......
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
/**
* This header defines some useful attribute by each compiler. It is the
* abstract layer of compilers.
*/
#ifdef __GNUC__
#define GCC_VERSION \
(__GNUC__ * 10000 + __GNUC_MINOR__ * 100 + __GNUC_PATCHLEVEL__)
#else
#define GCC_VERSION
#endif
/**
* __must_check macro. It make the function's return value must be used,
* otherwise it will raise a compile warning. And also Paddle treat all compile
* warnings as errors.
*/
#if GCC_VERSION >= 30400
#define __must_check __attribute__((warn_unused_result))
#else
#define __must_check
#endif
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#pragma once
#include <glog/logging.h>
#include <stdarg.h>
#include <stdio.h>
#include <memory>
#include <string>
#include "Compiler.h"
namespace paddle {
/**
* Error is Paddle error code. It only contain a std::string as error message.
*
*
* There are two styles to return error in Paddle.
*
* 1. Return Error
* When method return a status, the return must use `__must_check` attribute.
* Example as below.
* @code{cpp}
* Error __must_check foo();
*
* Error __must_check bar() {
* // do something.
* Status s = foo(); // invoke other method return status.
* if (!s) return s;
* // do something else.
* return Status();
* }
* @endcode{cpp}
*
* 2. Return by parameter.
* It is another way to return an error, by using a pointer parameter.
* Example as below.
*
* @code{cpp}
* Error bar();
*
* int foo(Error* error) {
* // Do something.
* Error s = bar();
* if (!s) {
* *error = s;
* return 0;
* }
* // Do something else.
* if (someInternalErrorHappend) {
* *error = Error("Some dimension is too large, %d", dimension);
* return 0;
* }
* // End of method.
* return someValue;
* }
*
* Error foobar() {
* Error s;
* // do something.
* foo(&s);
* if (!s) return s;
* }
* @endcode{cpp}
*
*
* Currently there is a helper method 'check' in status, because Paddle always
* use log(FATAL) or CHECK to make program exit before. When we clean all
* log(FATAL) and CHECK in Paddle, 'check' method will be removed.
*/
class Error {
public:
/**
* Construct a no-error value.
*/
Error() {}
/**
* @brief Create an Error use printf syntax.
*/
explicit Error(const char* fmt, ...) {
va_list ap;
va_start(ap, fmt);
constexpr size_t kBufferSize = 1024;
char buffer[kBufferSize];
vsnprintf(buffer, kBufferSize, fmt, ap);
this->msg_.reset(new std::string(buffer));
va_end(ap);
}
/**
* @brief msg will return the error message. If no error, return nullptr.
*/
const char* msg() const {
if (msg_) {
return msg_->c_str();
} else {
return nullptr;
}
}
/**
* @brief operator bool, return True if there is no error.
*/
operator bool() const { return msg_ == nullptr; }
/**
* @brief check this status by glog.
* @note It is a temp method used during cleaning Paddle code. It will be
* removed later.
*/
void check() const { CHECK(*this) << msg(); }
private:
std::shared_ptr<std::string> msg_;
};
} // namespace paddle
...@@ -144,20 +144,20 @@ void runInitFunctions() { ...@@ -144,20 +144,20 @@ void runInitFunctions() {
} }
void initMain(int argc, char** argv) { void initMain(int argc, char** argv) {
initializeLogging(argc, argv);
installLayerStackTracer(); installLayerStackTracer();
std::string line; std::string line;
for (int i = 0; i < argc; ++i) { for (int i = 0; i < argc; ++i) {
line += argv[i]; line += argv[i];
line += ' '; line += ' ';
} }
LOG(INFO) << "commandline: " << line;
#ifndef GFLAGS_GFLAGS_H_ #ifndef GFLAGS_GFLAGS_H_
namespace gflags = google; namespace gflags = google;
#endif #endif
gflags::ParseCommandLineFlags(&argc, &argv, true); gflags::ParseCommandLineFlags(&argc, &argv, true);
initializeLogging(argc, argv);
LOG(INFO) << "commandline: " << line;
CHECK_EQ(argc, 1) << "Unknown commandline argument: " << argv[1]; CHECK_EQ(argc, 1) << "Unknown commandline argument: " << argv[1];
installProfilerSwitch(); installProfilerSwitch();
......
...@@ -4,6 +4,7 @@ add_simple_unittest(test_CustomStackTrace) ...@@ -4,6 +4,7 @@ add_simple_unittest(test_CustomStackTrace)
add_simple_unittest(test_ThreadBarrier) add_simple_unittest(test_ThreadBarrier)
add_simple_unittest(test_SpinLock) add_simple_unittest(test_SpinLock)
add_simple_unittest(test_SIMDFlags) add_simple_unittest(test_SIMDFlags)
add_simple_unittest(test_Error)
add_executable( add_executable(
test_CustomStackTracePrint test_CustomStackTracePrint
......
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserve.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/utils/Error.h"
#include <gtest/gtest.h>
TEST(Error, testAll) {
paddle::Error error;
ASSERT_TRUE(error);
error = paddle::Error("I'm the error");
ASSERT_FALSE(error);
ASSERT_STREQ("I'm the error", error.msg());
error = paddle::Error("error2");
ASSERT_FALSE(error);
ASSERT_STREQ("error2", error.msg());
int i = 3;
auto error3 = paddle::Error("error%d", i);
ASSERT_FALSE(error3);
ASSERT_STREQ("error3", error3.msg());
}
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册