提交 9580c450 编写于 作者: P peterzhang2029

Merge branch 'develop' of https://github.com/PaddlePaddle/Paddle into add_bn_eq

...@@ -76,11 +76,9 @@ set(IOS_PLATFORM ${IOS_PLATFORM} CACHE STRING "Type of iOS Platform") ...@@ -76,11 +76,9 @@ set(IOS_PLATFORM ${IOS_PLATFORM} CACHE STRING "Type of iOS Platform")
# Set the architecture for iOS # Set the architecture for iOS
if(NOT DEFINED IOS_ARCH) if(NOT DEFINED IOS_ARCH)
if(IOS_PLATFORM STREQUAL "OS") if(IOS_PLATFORM STREQUAL "OS")
# FIXME(liuyiqun): support "armv7;armv7s;arm64" future set(IOS_ARCH "armv7;armv7s;arm64")
set(IOS_ARCH "arm64")
elseif(IOS_PLATFORM STREQUAL "SIMULATOR") elseif(IOS_PLATFORM STREQUAL "SIMULATOR")
# FIXME(liuyiqun): support "i386;x86_64" future set(IOS_ARCH "i386;x86_64")
set(IOS_ARCH "x86_64")
endif() endif()
endif() endif()
set(CMAKE_OSX_ARCHITECTURES ${IOS_ARCH} CACHE string "Build architecture for iOS") set(CMAKE_OSX_ARCHITECTURES ${IOS_ARCH} CACHE string "Build architecture for iOS")
...@@ -248,7 +246,7 @@ set(IOS_COMPILER_FLAGS "${XCODE_IOS_PLATFORM_VERSION_FLAGS} ${XCODE_IOS_BITCODE_ ...@@ -248,7 +246,7 @@ set(IOS_COMPILER_FLAGS "${XCODE_IOS_PLATFORM_VERSION_FLAGS} ${XCODE_IOS_BITCODE_
# Hidden visibilty is required for cxx on iOS # Hidden visibilty is required for cxx on iOS
set(CMAKE_C_FLAGS "${IOS_COMPILER_FLAGS} ${CMAKE_C_FLAGS}" CACHE STRING "C flags") set(CMAKE_C_FLAGS "${IOS_COMPILER_FLAGS} ${CMAKE_C_FLAGS}" CACHE STRING "C flags")
set(CMAKE_CXX_FLAGS "${IOS_COMPILER_FLAGS} -fvisibility-inlines-hidden ${CMAKE_CXX_FLAGS}" CACHE STRING "CXX flags") set(CMAKE_CXX_FLAGS "${IOS_COMPILER_FLAGS} -fvisibility=hidden -fvisibility-inlines-hidden ${CMAKE_CXX_FLAGS}" CACHE STRING "CXX flags")
set(IOS_LINK_FLAGS "${XCODE_IOS_PLATFORM_VERSION_FLAGS} -Wl,-search_paths_first") set(IOS_LINK_FLAGS "${XCODE_IOS_PLATFORM_VERSION_FLAGS} -Wl,-search_paths_first")
......
...@@ -45,15 +45,14 @@ IF(NOT ${CBLAS_FOUND}) ...@@ -45,15 +45,14 @@ IF(NOT ${CBLAS_FOUND})
SET(OPTIONAL_ARGS ${OPTIONAL_ARGS} TARGET=ARMV8 BINARY=64 USE_THREAD=0) SET(OPTIONAL_ARGS ${OPTIONAL_ARGS} TARGET=ARMV8 BINARY=64 USE_THREAD=0)
ENDIF() ENDIF()
ELSEIF(IOS) ELSEIF(IOS)
# FIXME(liuyiqun): support multiple architectures IF(CMAKE_OSX_ARCHITECTURES MATCHES "arm64")
SET(OPENBLAS_COMMIT "b5c96fcfcdc82945502a2303116a64d89985daf5") SET(OPENBLAS_COMMIT "b5c96fcfcdc82945502a2303116a64d89985daf5")
SET(OPENBLAS_CC "${OPENBLAS_CC} ${CMAKE_C_FLAGS} -isysroot ${CMAKE_OSX_SYSROOT}") SET(OPENBLAS_CC "${OPENBLAS_CC} ${CMAKE_C_FLAGS} -isysroot ${CMAKE_OSX_SYSROOT}")
IF(CMAKE_OSX_ARCHITECTURES MATCHES "armv7")
SET(OPENBLAS_CC "${OPENBLAS_CC} -arch armv7")
SET(OPTIONAL_ARGS ${OPTIONAL_ARGS} TARGET=ARMV7 ARM_SOFTFP_ABI=1 USE_THREAD=0)
ELSEIF(CMAKE_OSX_ARCHITECTURES MATCHES "arm64")
SET(OPENBLAS_CC "${OPENBLAS_CC} -arch arm64") SET(OPENBLAS_CC "${OPENBLAS_CC} -arch arm64")
SET(OPTIONAL_ARGS ${OPTIONAL_ARGS} TARGET=ARMV8 BINARY=64 USE_THREAD=0 CROSS_SUFFIX=${CROSS_SUFFIX}) SET(OPTIONAL_ARGS ${OPTIONAL_ARGS} TARGET=ARMV8 BINARY=64 USE_THREAD=0 CROSS_SUFFIX=${CROSS_SUFFIX})
ELSE()
MESSAGE(FATAL_ERROR "OpenBLAS only support arm64 architectures on iOS. "
"You can set IOS_USE_VECLIB_FOR_BLAS=ON or USE_EIGEN_FOR_BLAS=ON to use other blas library instead.")
ENDIF() ENDIF()
ELSEIF(RPI) ELSEIF(RPI)
# use hardfp # use hardfp
......
...@@ -12,6 +12,10 @@ ...@@ -12,6 +12,10 @@
# See the License for the specific language governing permissions and # See the License for the specific language governing permissions and
# limitations under the License. # limitations under the License.
IF(MOBILE_INFERENCE)
return()
ENDIF()
INCLUDE(ExternalProject) INCLUDE(ExternalProject)
SET(WARPCTC_SOURCES_DIR ${THIRD_PARTY_PATH}/warpctc) SET(WARPCTC_SOURCES_DIR ${THIRD_PARTY_PATH}/warpctc)
......
# 构建Android平台上的PaddlePaddle库 # Android平台编译指南
用户可通过如下两种方式,交叉编译Android平台上适用的PaddlePaddle库: 用户可通过如下两种方式,交叉编译Android平台上适用的PaddlePaddle库:
- 基于Docker容器的编译方式 - 基于Docker容器的编译方式
......
# 构建iOS平台上的PaddlePaddle库 # iOS平台编译指南
交叉编译iOS平台上适用的PaddlePaddle库,需要在MacOS系统上进行。本文的将介绍在MacOS上,从源码交叉编译iOS平台上适用的PaddlePaddle库。 交叉编译iOS平台上适用的PaddlePaddle库,需要在MacOS系统上进行。本文的将介绍在MacOS上,从源码交叉编译iOS平台上适用的PaddlePaddle库。
## 准备交叉编译环境 ## 准备交叉编译环境
...@@ -25,7 +25,7 @@ iOS平台可选配置参数: ...@@ -25,7 +25,7 @@ iOS平台可选配置参数:
- `IOS_PLATFORM`,可设置为`OS/SIMULATOR`,默认值为`OS` - `IOS_PLATFORM`,可设置为`OS/SIMULATOR`,默认值为`OS`
- `OS`,构建目标为`arm`架构的iPhone或者iPad等物理设备。 - `OS`,构建目标为`arm`架构的iPhone或者iPad等物理设备。
- `SIMULATOR`,构建目标为`x86`架构的模拟器平台。 - `SIMULATOR`,构建目标为`x86`架构的模拟器平台。
- `IOS_ARCH`,目标架构。针对不同的`IOS_PLATFORM`,可设置的目标架构如下表所示: - `IOS_ARCH`,目标架构。针对不同的`IOS_PLATFORM`,可设置的目标架构如下表所示,默认编译所有架构
<table class="docutils"> <table class="docutils">
<colgroup> <colgroup>
...@@ -41,11 +41,11 @@ iOS平台可选配置参数: ...@@ -41,11 +41,11 @@ iOS平台可选配置参数:
<tbody valign="top"> <tbody valign="top">
<tr class="row-even"> <tr class="row-even">
<td>OS</td> <td>OS</td>
<td>armv7, armv7s, arm64 (默认)</td> <td>armv7, armv7s, arm64 </td>
</tr> </tr>
<tr class="row-odd"> <tr class="row-odd">
<td>SIMULATOR</td> <td>SIMULATOR</td>
<td>i386, x86_64 (默认)</td> <td>i386, x86_64 </td>
</tr> </tr>
</tbody> </tbody>
</table> </table>
...@@ -66,7 +66,7 @@ iOS平台可选配置参数: ...@@ -66,7 +66,7 @@ iOS平台可选配置参数:
```bash ```bash
cmake -DCMAKE_SYSTEM_NAME=iOS \ cmake -DCMAKE_SYSTEM_NAME=iOS \
-DIOS_PLATFORM=OS \ -DIOS_PLATFORM=OS \
-DIOS_ARCH="arm64" \ -DIOS_ARCH="armv7;arm64" \
-DIOS_ENABLE_BITCODE=ON \ -DIOS_ENABLE_BITCODE=ON \
-DIOS_USE_VECLIB_FOR_BLAS=ON \ -DIOS_USE_VECLIB_FOR_BLAS=ON \
-DCMAKE_INSTALL_PREFIX=your/path/to/install \ -DCMAKE_INSTALL_PREFIX=your/path/to/install \
...@@ -112,6 +112,6 @@ $ make install ...@@ -112,6 +112,6 @@ $ make install
- `lib`目录,其中包含PaddlePaddle的C-API静态库 - `lib`目录,其中包含PaddlePaddle的C-API静态库
- `third_party`目录,其中包含所依赖的所有第三方库 - `third_party`目录,其中包含所依赖的所有第三方库
注意,不同架构的PaddlePaddle库建议安装到不同的目录下,然后使用`lipo`工具将多个静态库合并成一个支持多个架构的fat库。 注意,如果PaddlePaddle库需要同时支持真机和模拟器,则需要分别编译真机和模拟器版本,然后使用`lipo`工具合并fat库。
自此,PaddlePaddle库已经安装完成,用户可将合成的fat库用于深度学习相关的iOS App中,调用方法见C-API文档。 自此,PaddlePaddle库已经安装完成,用户可将合成的fat库用于深度学习相关的iOS App中,调用方法见C-API文档。
# 构建Raspberry Pi平台上的PaddlePaddle库 # Raspberry Pi平台编译指南
通常有两个方法来构建基于 Rasspberry Pi 的版本: 通常有两个方法来构建基于 Rasspberry Pi 的版本:
......
...@@ -25,7 +25,9 @@ limitations under the License. */ ...@@ -25,7 +25,9 @@ limitations under the License. */
#include "hl_matrix.h" #include "hl_matrix.h"
#include "hl_sequence.h" #include "hl_sequence.h"
#include "hl_sparse.h" #include "hl_sparse.h"
#ifndef PADDLE_MOBILE_INFERENCE
#include "hl_warpctc_wrap.h" #include "hl_warpctc_wrap.h"
#endif
#ifdef HPPL_STUB_FUNC #ifdef HPPL_STUB_FUNC
#include "stub/hl_aggregate_stub.h" #include "stub/hl_aggregate_stub.h"
......
...@@ -41,7 +41,7 @@ bool BatchNormBaseLayer::init(const LayerMap& layerMap, ...@@ -41,7 +41,7 @@ bool BatchNormBaseLayer::init(const LayerMap& layerMap,
useGlobalStats_ = config_.use_global_stats(); useGlobalStats_ = config_.use_global_stats();
} }
movingAvgFraction_ = config_.moving_average_fraction(); movingAvgFraction_ = config_.moving_average_fraction();
EPS = config_.epsilon(); epsilon_ = config_.epsilon();
weight_.reset(new Weight(1, channels_, parameters_[0])); weight_.reset(new Weight(1, channels_, parameters_[0]));
movingMean_.reset(new Weight(1, channels_, parameters_[1])); movingMean_.reset(new Weight(1, channels_, parameters_[1]));
......
...@@ -94,8 +94,8 @@ protected: ...@@ -94,8 +94,8 @@ protected:
bool useGlobalStats_; bool useGlobalStats_;
// use to compute moving mean and variance. // use to compute moving mean and variance.
real movingAvgFraction_; real movingAvgFraction_;
// Epsilon value used in the batch normalization formula. // Epsilon is a small random noise used in batch normalization for stability.
real EPS; real epsilon_;
}; };
} // namespace paddle } // namespace paddle
...@@ -51,7 +51,7 @@ void BatchNormalizationLayer::calMeanAndStd(const MatrixPtr& mat) { ...@@ -51,7 +51,7 @@ void BatchNormalizationLayer::calMeanAndStd(const MatrixPtr& mat) {
calMovingMeanAndVar(); calMovingMeanAndVar();
savedInvVar_->subScalar(-EPS); savedInvVar_->subScalar(-epsilon_);
savedInvVar_->sqrt2(*savedInvVar_); savedInvVar_->sqrt2(*savedInvVar_);
} }
...@@ -72,7 +72,7 @@ void BatchNormalizationLayer::setMeanAndStd() { ...@@ -72,7 +72,7 @@ void BatchNormalizationLayer::setMeanAndStd() {
savedInvVar_->copyFrom(*(movingVar_->getW())); savedInvVar_->copyFrom(*(movingVar_->getW()));
savedInvVar_->downClip(real(0.0)); savedInvVar_->downClip(real(0.0));
savedInvVar_->subScalar(-EPS); savedInvVar_->subScalar(-epsilon_);
savedInvVar_->sqrt2(*savedInvVar_); savedInvVar_->sqrt2(*savedInvVar_);
} }
......
...@@ -60,7 +60,15 @@ void CudnnBatchNormLayer::forward(PassType passType) { ...@@ -60,7 +60,15 @@ void CudnnBatchNormLayer::forward(PassType passType) {
real* beta = biases_->getW()->getData(); real* beta = biases_->getW()->getData();
real* movingMean = movingMean_->getW()->getData(); real* movingMean = movingMean_->getW()->getData();
real* movingVar = movingVar_->getW()->getData(); real* movingVar = movingVar_->getW()->getData();
EPS_ = std::max(MIN_EPS, static_cast<double>(EPS));
/**
* If epsilon_ equals to 1e-5 and eps_ is assigned the value of
* static_cast<double>(epsilon_), The CUDNN_STATUS_BAD_PARAM error
* will occur due to eps_ value is less than
* CUDNN_BN_MIN_EPSILON.
* The following code is to ensure that the eps_ meets requirement.
*/
eps_ = std::max(MIN_EPS, static_cast<double>(epsilon_));
if (!useGlobalStats_) { if (!useGlobalStats_) {
REGISTER_TIMER_INFO("CudnnBatchFwTimer", getName().c_str()); REGISTER_TIMER_INFO("CudnnBatchFwTimer", getName().c_str());
...@@ -76,7 +84,7 @@ void CudnnBatchNormLayer::forward(PassType passType) { ...@@ -76,7 +84,7 @@ void CudnnBatchNormLayer::forward(PassType passType) {
1.0 - movingAvgFraction_, 1.0 - movingAvgFraction_,
movingMean, movingMean,
movingVar, movingVar,
EPS_, eps_,
savedMean, savedMean,
savedInvVar); savedInvVar);
} else { } else {
...@@ -91,7 +99,7 @@ void CudnnBatchNormLayer::forward(PassType passType) { ...@@ -91,7 +99,7 @@ void CudnnBatchNormLayer::forward(PassType passType) {
beta, beta,
movingMean, movingMean,
movingVar, movingVar,
EPS_); eps_);
} else { } else {
// There is a limitation in cudnn library. // There is a limitation in cudnn library.
// When the batch size is larger than 1024 in cuDNN v5.1, // When the batch size is larger than 1024 in cuDNN v5.1,
...@@ -102,7 +110,7 @@ void CudnnBatchNormLayer::forward(PassType passType) { ...@@ -102,7 +110,7 @@ void CudnnBatchNormLayer::forward(PassType passType) {
beta, beta,
movingMean, movingMean,
movingVar, movingVar,
EPS_, eps_,
batchSize, batchSize,
channels_, channels_,
imageH_ * imageD_, imageH_ * imageD_,
...@@ -128,7 +136,15 @@ void CudnnBatchNormLayer::backward(const UpdateCallback& callback) { ...@@ -128,7 +136,15 @@ void CudnnBatchNormLayer::backward(const UpdateCallback& callback) {
real* gamma = weight_->getW()->getData(); real* gamma = weight_->getW()->getData();
real* savedMean = savedMean_->getData(); real* savedMean = savedMean_->getData();
real* savedInvVar = savedInvVar_->getData(); real* savedInvVar = savedInvVar_->getData();
EPS_ = std::max(MIN_EPS, static_cast<double>(EPS));
/**
* If epsilon_ equals to 1e-5 and eps_ is assigned the value of
* static_cast<double>(epsilon_), The CUDNN_STATUS_BAD_PARAM error
* will occur due to eps_ value is less than
* CUDNN_BN_MIN_EPSILON.
* The following code is to ensure that the eps_ meets requirement.
*/
eps_ = std::max(MIN_EPS, static_cast<double>(epsilon_));
auto create = [](MatrixPtr& m, size_t h, size_t w, real** p) { auto create = [](MatrixPtr& m, size_t h, size_t w, real** p) {
Matrix::resizeOrCreate(m, h, w, false, true); Matrix::resizeOrCreate(m, h, w, false, true);
...@@ -159,7 +175,7 @@ void CudnnBatchNormLayer::backward(const UpdateCallback& callback) { ...@@ -159,7 +175,7 @@ void CudnnBatchNormLayer::backward(const UpdateCallback& callback) {
gamma, gamma,
gammaGrad, gammaGrad,
betaGrad, betaGrad,
EPS_, eps_,
savedMean, savedMean,
savedInvVar); savedInvVar);
......
...@@ -46,15 +46,12 @@ public: ...@@ -46,15 +46,12 @@ public:
void backward(const UpdateCallback& callback = nullptr) override; void backward(const UpdateCallback& callback = nullptr) override;
protected: protected:
/** /// Minimum allowed value is CUDNN_BN_MIN_EPSILON defined in cudnn.h.
* Minimum allowed value is CUDNN_BN_MIN_EPSILON defined in cudnn.h.
* Same epsilon value should be used in forward and backward functions.
*/
static const double MIN_EPS; static const double MIN_EPS;
/// Epsilon value used in the batch normalization formula. /// Epsilon value used in the batch normalization formula.
/// If EPS_ is smaller than MIN_EPS, MIN_EPS will be used. /// Same epsilon value should be used in forward and backward functions.
double EPS_; double eps_;
/// Input/output tensor descriptor desc /// Input/output tensor descriptor desc
hl_tensor_descriptor ioDesc_; hl_tensor_descriptor ioDesc_;
......
...@@ -48,7 +48,7 @@ bool MKLDNNBatchNormLayer::init(const LayerMap& layerMap, ...@@ -48,7 +48,7 @@ bool MKLDNNBatchNormLayer::init(const LayerMap& layerMap,
useGlobalStats_ = config_.use_global_stats(); useGlobalStats_ = config_.use_global_stats();
} }
movingAvgFraction_ = config_.moving_average_fraction(); movingAvgFraction_ = config_.moving_average_fraction();
EPS = config_.epsilon(); epsilon_ = config_.epsilon();
VLOG(MKLDNN_BASE) << "--- " << (useGlobalStats_ ? "use" : "do not use") VLOG(MKLDNN_BASE) << "--- " << (useGlobalStats_ ? "use" : "do not use")
<< " --- global stats"; << " --- global stats";
...@@ -213,7 +213,7 @@ void MKLDNNBatchNormLayer::resetFwdPD( ...@@ -213,7 +213,7 @@ void MKLDNNBatchNormLayer::resetFwdPD(
if (wgt) { if (wgt) {
flags_ = (flags_ | batch_normalization_flag::use_scale_shift); flags_ = (flags_ | batch_normalization_flag::use_scale_shift);
} }
auto fwdDesc = bn_fwd::desc(pk, in->getMemoryDesc(), EPS, flags_); auto fwdDesc = bn_fwd::desc(pk, in->getMemoryDesc(), epsilon_, flags_);
pd.reset(new bn_fwd::primitive_desc(fwdDesc, engine_)); pd.reset(new bn_fwd::primitive_desc(fwdDesc, engine_));
CHECK_PRIMITIVE_DESC_EQ(out, pd->dst_primitive_desc()); CHECK_PRIMITIVE_DESC_EQ(out, pd->dst_primitive_desc());
if (wgt) { if (wgt) {
...@@ -280,7 +280,7 @@ void MKLDNNBatchNormLayer::resetBwdPD( ...@@ -280,7 +280,7 @@ void MKLDNNBatchNormLayer::resetBwdPD(
} }
CHECK_PRIMITIVE_DESC_EQ(out, in->getPrimitiveDesc()); CHECK_PRIMITIVE_DESC_EQ(out, in->getPrimitiveDesc());
auto md = in->getMemoryDesc(); auto md = in->getMemoryDesc();
auto bwdDesc = bn_bwd::desc(prop_kind::backward, md, md, EPS, flags_); auto bwdDesc = bn_bwd::desc(prop_kind::backward, md, md, epsilon_, flags_);
pd.reset(new bn_bwd::primitive_desc(bwdDesc, engine_, *fwdPD_)); pd.reset(new bn_bwd::primitive_desc(bwdDesc, engine_, *fwdPD_));
CHECK(pd->weights_primitive_desc() == fwdPD_->weights_primitive_desc()); CHECK(pd->weights_primitive_desc() == fwdPD_->weights_primitive_desc());
CHECK_PRIMITIVE_DESC_EQ(wgt, pd->diff_weights_primitive_desc()); CHECK_PRIMITIVE_DESC_EQ(wgt, pd->diff_weights_primitive_desc());
......
...@@ -32,7 +32,7 @@ protected: ...@@ -32,7 +32,7 @@ protected:
std::shared_ptr<bn_fwd::primitive_desc> fwdPD_; std::shared_ptr<bn_fwd::primitive_desc> fwdPD_;
// Epsilon value used in the batch normalization formula. // Epsilon value used in the batch normalization formula.
real EPS; real epsilon_;
// weight and bias in paddle // weight and bias in paddle
std::unique_ptr<Weight> weight_; std::unique_ptr<Weight> weight_;
......
...@@ -2483,8 +2483,8 @@ class BatchNormLayer(LayerBase): ...@@ -2483,8 +2483,8 @@ class BatchNormLayer(LayerBase):
self.config.use_global_stats = use_global_stats self.config.use_global_stats = use_global_stats
if moving_average_fraction is not None: if moving_average_fraction is not None:
self.config.moving_average_fraction = moving_average_fraction self.config.moving_average_fraction = moving_average_fraction
if epsilon is not None:
self.config.epsilon = epsilon self.config.epsilon = epsilon
input_layer = self.get_input_layer(0) input_layer = self.get_input_layer(0)
image_conf = self.config.inputs[0].image_conf image_conf = self.config.inputs[0].image_conf
......
...@@ -3127,7 +3127,7 @@ def batch_norm_layer(input, ...@@ -3127,7 +3127,7 @@ def batch_norm_layer(input,
(batch_norm_type == "mkldnn_batch_norm") or \ (batch_norm_type == "mkldnn_batch_norm") or \
(batch_norm_type == "cudnn_batch_norm") (batch_norm_type == "cudnn_batch_norm")
assert epsilon >= 1e-5, "Parameter epsilon must be no less than 1e-5." assert epsilon >= 1e-5, "epsilon must be no less than 1e-5."
l = Layer( l = Layer(
name=name, name=name,
......
...@@ -65,6 +65,7 @@ layers { ...@@ -65,6 +65,7 @@ layers {
height: 227 height: 227
width: 227 width: 227
depth: 1 depth: 1
epsilon: 1e-05
} }
layers { layers {
name: "__crmnorm_0__" name: "__crmnorm_0__"
......
...@@ -65,6 +65,7 @@ layers { ...@@ -65,6 +65,7 @@ layers {
height: 256 height: 256
width: 256 width: 256
depth: 1 depth: 1
epsilon: 1e-05
} }
layers { layers {
name: "__crmnorm_0__" name: "__crmnorm_0__"
......
...@@ -36,6 +36,7 @@ layers { ...@@ -36,6 +36,7 @@ layers {
height: 6 height: 6
width: 20 width: 20
depth: 3 depth: 3
epsilon: 1e-05
} }
parameters { parameters {
name: "___batch_norm_0__.w0" name: "___batch_norm_0__.w0"
......
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册