diff --git a/cmake/cross_compiling/ios.cmake b/cmake/cross_compiling/ios.cmake
index 310450f7d009dc0cdae9c0079a96445af8ec8f95..d3f5bf6852b3b295f3b5806b0577a880b0ce6ba6 100644
--- a/cmake/cross_compiling/ios.cmake
+++ b/cmake/cross_compiling/ios.cmake
@@ -76,11 +76,9 @@ set(IOS_PLATFORM ${IOS_PLATFORM} CACHE STRING "Type of iOS Platform")
# Set the architecture for iOS
if(NOT DEFINED IOS_ARCH)
if(IOS_PLATFORM STREQUAL "OS")
- # FIXME(liuyiqun): support "armv7;armv7s;arm64" future
- set(IOS_ARCH "arm64")
+ set(IOS_ARCH "armv7;armv7s;arm64")
elseif(IOS_PLATFORM STREQUAL "SIMULATOR")
- # FIXME(liuyiqun): support "i386;x86_64" future
- set(IOS_ARCH "x86_64")
+ set(IOS_ARCH "i386;x86_64")
endif()
endif()
set(CMAKE_OSX_ARCHITECTURES ${IOS_ARCH} CACHE string "Build architecture for iOS")
@@ -248,7 +246,7 @@ set(IOS_COMPILER_FLAGS "${XCODE_IOS_PLATFORM_VERSION_FLAGS} ${XCODE_IOS_BITCODE_
# Hidden visibilty is required for cxx on iOS
set(CMAKE_C_FLAGS "${IOS_COMPILER_FLAGS} ${CMAKE_C_FLAGS}" CACHE STRING "C flags")
-set(CMAKE_CXX_FLAGS "${IOS_COMPILER_FLAGS} -fvisibility-inlines-hidden ${CMAKE_CXX_FLAGS}" CACHE STRING "CXX flags")
+set(CMAKE_CXX_FLAGS "${IOS_COMPILER_FLAGS} -fvisibility=hidden -fvisibility-inlines-hidden ${CMAKE_CXX_FLAGS}" CACHE STRING "CXX flags")
set(IOS_LINK_FLAGS "${XCODE_IOS_PLATFORM_VERSION_FLAGS} -Wl,-search_paths_first")
diff --git a/cmake/external/openblas.cmake b/cmake/external/openblas.cmake
index 225380798112ba5a15b5989b01207b1b072feedf..4c4f59656dae68739f2f07f3febd510e727fe2dd 100644
--- a/cmake/external/openblas.cmake
+++ b/cmake/external/openblas.cmake
@@ -45,15 +45,14 @@ IF(NOT ${CBLAS_FOUND})
SET(OPTIONAL_ARGS ${OPTIONAL_ARGS} TARGET=ARMV8 BINARY=64 USE_THREAD=0)
ENDIF()
ELSEIF(IOS)
- # FIXME(liuyiqun): support multiple architectures
- SET(OPENBLAS_COMMIT "b5c96fcfcdc82945502a2303116a64d89985daf5")
- SET(OPENBLAS_CC "${OPENBLAS_CC} ${CMAKE_C_FLAGS} -isysroot ${CMAKE_OSX_SYSROOT}")
- IF(CMAKE_OSX_ARCHITECTURES MATCHES "armv7")
- SET(OPENBLAS_CC "${OPENBLAS_CC} -arch armv7")
- SET(OPTIONAL_ARGS ${OPTIONAL_ARGS} TARGET=ARMV7 ARM_SOFTFP_ABI=1 USE_THREAD=0)
- ELSEIF(CMAKE_OSX_ARCHITECTURES MATCHES "arm64")
+ IF(CMAKE_OSX_ARCHITECTURES MATCHES "arm64")
+ SET(OPENBLAS_COMMIT "b5c96fcfcdc82945502a2303116a64d89985daf5")
+ SET(OPENBLAS_CC "${OPENBLAS_CC} ${CMAKE_C_FLAGS} -isysroot ${CMAKE_OSX_SYSROOT}")
SET(OPENBLAS_CC "${OPENBLAS_CC} -arch arm64")
SET(OPTIONAL_ARGS ${OPTIONAL_ARGS} TARGET=ARMV8 BINARY=64 USE_THREAD=0 CROSS_SUFFIX=${CROSS_SUFFIX})
+ ELSE()
+ MESSAGE(FATAL_ERROR "OpenBLAS only support arm64 architectures on iOS. "
+ "You can set IOS_USE_VECLIB_FOR_BLAS=ON or USE_EIGEN_FOR_BLAS=ON to use other blas library instead.")
ENDIF()
ELSEIF(RPI)
# use hardfp
diff --git a/cmake/external/warpctc.cmake b/cmake/external/warpctc.cmake
index 8bd058222880b4df3b08da09c02f9fe7f1d0ee66..a8e1aca49c97df256b1269c286b0bce7732fa932 100644
--- a/cmake/external/warpctc.cmake
+++ b/cmake/external/warpctc.cmake
@@ -12,6 +12,10 @@
# See the License for the specific language governing permissions and
# limitations under the License.
+IF(MOBILE_INFERENCE)
+ return()
+ENDIF()
+
INCLUDE(ExternalProject)
SET(WARPCTC_SOURCES_DIR ${THIRD_PARTY_PATH}/warpctc)
diff --git a/doc/mobile/cross_compiling_for_android_cn.md b/doc/mobile/cross_compiling_for_android_cn.md
index 882066f23714f7ab3bba9199b5fa5ff2325ce849..424d7718c64438496cf0895397babd5408e1ca02 100644
--- a/doc/mobile/cross_compiling_for_android_cn.md
+++ b/doc/mobile/cross_compiling_for_android_cn.md
@@ -1,4 +1,4 @@
-# 构建Android平台上的PaddlePaddle库
+# Android平台编译指南
用户可通过如下两种方式,交叉编译Android平台上适用的PaddlePaddle库:
- 基于Docker容器的编译方式
diff --git a/doc/mobile/cross_compiling_for_ios_cn.md b/doc/mobile/cross_compiling_for_ios_cn.md
index cda636a67de712e072f4cc7ad859dda75211eaa8..9da48e7f2119ce901fbb3abab73400df27be16d2 100644
--- a/doc/mobile/cross_compiling_for_ios_cn.md
+++ b/doc/mobile/cross_compiling_for_ios_cn.md
@@ -1,4 +1,4 @@
-# 构建iOS平台上的PaddlePaddle库
+# iOS平台编译指南
交叉编译iOS平台上适用的PaddlePaddle库,需要在MacOS系统上进行。本文的将介绍在MacOS上,从源码交叉编译iOS平台上适用的PaddlePaddle库。
## 准备交叉编译环境
@@ -25,7 +25,7 @@ iOS平台可选配置参数:
- `IOS_PLATFORM`,可设置为`OS/SIMULATOR`,默认值为`OS`。
- `OS`,构建目标为`arm`架构的iPhone或者iPad等物理设备。
- `SIMULATOR`,构建目标为`x86`架构的模拟器平台。
-- `IOS_ARCH`,目标架构。针对不同的`IOS_PLATFORM`,可设置的目标架构如下表所示:
+- `IOS_ARCH`,目标架构。针对不同的`IOS_PLATFORM`,可设置的目标架构如下表所示,默认编译所有架构:
@@ -41,11 +41,11 @@ iOS平台可选配置参数:
OS |
- armv7, armv7s, arm64 (默认) |
+ armv7, armv7s, arm64 |
SIMULATOR |
- i386, x86_64 (默认) |
+ i386, x86_64 |
@@ -66,7 +66,7 @@ iOS平台可选配置参数:
```bash
cmake -DCMAKE_SYSTEM_NAME=iOS \
-DIOS_PLATFORM=OS \
- -DIOS_ARCH="arm64" \
+ -DIOS_ARCH="armv7;arm64" \
-DIOS_ENABLE_BITCODE=ON \
-DIOS_USE_VECLIB_FOR_BLAS=ON \
-DCMAKE_INSTALL_PREFIX=your/path/to/install \
@@ -112,6 +112,6 @@ $ make install
- `lib`目录,其中包含PaddlePaddle的C-API静态库
- `third_party`目录,其中包含所依赖的所有第三方库
-注意,不同架构的PaddlePaddle库建议安装到不同的目录下,然后使用`lipo`工具将多个静态库合并成一个支持多个架构的fat库。
+注意,如果PaddlePaddle库需要同时支持真机和模拟器,则需要分别编译真机和模拟器版本,然后使用`lipo`工具合并fat库。
自此,PaddlePaddle库已经安装完成,用户可将合成的fat库用于深度学习相关的iOS App中,调用方法见C-API文档。
diff --git a/doc/mobile/cross_compiling_for_raspberry_cn.md b/doc/mobile/cross_compiling_for_raspberry_cn.md
index 6e983645faaed1f67edaeeb82ddbef9cef6bb85f..f8ef9dc8031613831437745995268f3abc392f5b 100644
--- a/doc/mobile/cross_compiling_for_raspberry_cn.md
+++ b/doc/mobile/cross_compiling_for_raspberry_cn.md
@@ -1,4 +1,4 @@
-# 构建Raspberry Pi平台上的PaddlePaddle库
+# Raspberry Pi平台编译指南
通常有两个方法来构建基于 Rasspberry Pi 的版本:
diff --git a/paddle/cuda/include/hl_gpu.h b/paddle/cuda/include/hl_gpu.h
index ede2670882ee2b93f610a2261a4ecc1784bc2d0c..4ab8de80d1c7be0f8e3eb848955373dd5e21bc18 100644
--- a/paddle/cuda/include/hl_gpu.h
+++ b/paddle/cuda/include/hl_gpu.h
@@ -25,7 +25,9 @@ limitations under the License. */
#include "hl_matrix.h"
#include "hl_sequence.h"
#include "hl_sparse.h"
+#ifndef PADDLE_MOBILE_INFERENCE
#include "hl_warpctc_wrap.h"
+#endif
#ifdef HPPL_STUB_FUNC
#include "stub/hl_aggregate_stub.h"
diff --git a/paddle/gserver/layers/BatchNormBaseLayer.cpp b/paddle/gserver/layers/BatchNormBaseLayer.cpp
index d56f70ada3b6b6700e445e9d5ba4ee1e7a9c7843..925af31289d0c8ca534a30a16b14bfd2df90b013 100644
--- a/paddle/gserver/layers/BatchNormBaseLayer.cpp
+++ b/paddle/gserver/layers/BatchNormBaseLayer.cpp
@@ -41,7 +41,7 @@ bool BatchNormBaseLayer::init(const LayerMap& layerMap,
useGlobalStats_ = config_.use_global_stats();
}
movingAvgFraction_ = config_.moving_average_fraction();
- EPS = config_.epsilon();
+ epsilon_ = config_.epsilon();
weight_.reset(new Weight(1, channels_, parameters_[0]));
movingMean_.reset(new Weight(1, channels_, parameters_[1]));
diff --git a/paddle/gserver/layers/BatchNormBaseLayer.h b/paddle/gserver/layers/BatchNormBaseLayer.h
index 78f476024ad8fe809ef8e7c7f8e6ab0757eaec7f..2ac3cd9d670d0fcf9c40ad2f117d5a72479663a3 100644
--- a/paddle/gserver/layers/BatchNormBaseLayer.h
+++ b/paddle/gserver/layers/BatchNormBaseLayer.h
@@ -94,8 +94,8 @@ protected:
bool useGlobalStats_;
// use to compute moving mean and variance.
real movingAvgFraction_;
- // Epsilon value used in the batch normalization formula.
- real EPS;
+ // Epsilon is a small random noise used in batch normalization for stability.
+ real epsilon_;
};
} // namespace paddle
diff --git a/paddle/gserver/layers/BatchNormalizationLayer.cpp b/paddle/gserver/layers/BatchNormalizationLayer.cpp
index aaf59b050616f377b398b290d8d23e997e8a1509..25ab5cd927792d18f78bc1fa33eee4029b427cc7 100644
--- a/paddle/gserver/layers/BatchNormalizationLayer.cpp
+++ b/paddle/gserver/layers/BatchNormalizationLayer.cpp
@@ -51,7 +51,7 @@ void BatchNormalizationLayer::calMeanAndStd(const MatrixPtr& mat) {
calMovingMeanAndVar();
- savedInvVar_->subScalar(-EPS);
+ savedInvVar_->subScalar(-epsilon_);
savedInvVar_->sqrt2(*savedInvVar_);
}
@@ -72,7 +72,7 @@ void BatchNormalizationLayer::setMeanAndStd() {
savedInvVar_->copyFrom(*(movingVar_->getW()));
savedInvVar_->downClip(real(0.0));
- savedInvVar_->subScalar(-EPS);
+ savedInvVar_->subScalar(-epsilon_);
savedInvVar_->sqrt2(*savedInvVar_);
}
diff --git a/paddle/gserver/layers/CudnnBatchNormLayer.cpp b/paddle/gserver/layers/CudnnBatchNormLayer.cpp
index 5b3d07eed1c41bd4975609e75ece28b310753cac..c25960d681a62af1069b23f66f8ca5608808cd6f 100644
--- a/paddle/gserver/layers/CudnnBatchNormLayer.cpp
+++ b/paddle/gserver/layers/CudnnBatchNormLayer.cpp
@@ -60,7 +60,15 @@ void CudnnBatchNormLayer::forward(PassType passType) {
real* beta = biases_->getW()->getData();
real* movingMean = movingMean_->getW()->getData();
real* movingVar = movingVar_->getW()->getData();
- EPS_ = std::max(MIN_EPS, static_cast(EPS));
+
+ /**
+ * If epsilon_ equals to 1e-5 and eps_ is assigned the value of
+ * static_cast(epsilon_), The CUDNN_STATUS_BAD_PARAM error
+ * will occur due to eps_ value is less than
+ * CUDNN_BN_MIN_EPSILON.
+ * The following code is to ensure that the eps_ meets requirement.
+ */
+ eps_ = std::max(MIN_EPS, static_cast(epsilon_));
if (!useGlobalStats_) {
REGISTER_TIMER_INFO("CudnnBatchFwTimer", getName().c_str());
@@ -76,7 +84,7 @@ void CudnnBatchNormLayer::forward(PassType passType) {
1.0 - movingAvgFraction_,
movingMean,
movingVar,
- EPS_,
+ eps_,
savedMean,
savedInvVar);
} else {
@@ -91,7 +99,7 @@ void CudnnBatchNormLayer::forward(PassType passType) {
beta,
movingMean,
movingVar,
- EPS_);
+ eps_);
} else {
// There is a limitation in cudnn library.
// When the batch size is larger than 1024 in cuDNN v5.1,
@@ -102,7 +110,7 @@ void CudnnBatchNormLayer::forward(PassType passType) {
beta,
movingMean,
movingVar,
- EPS_,
+ eps_,
batchSize,
channels_,
imageH_ * imageD_,
@@ -128,7 +136,15 @@ void CudnnBatchNormLayer::backward(const UpdateCallback& callback) {
real* gamma = weight_->getW()->getData();
real* savedMean = savedMean_->getData();
real* savedInvVar = savedInvVar_->getData();
- EPS_ = std::max(MIN_EPS, static_cast(EPS));
+
+ /**
+ * If epsilon_ equals to 1e-5 and eps_ is assigned the value of
+ * static_cast(epsilon_), The CUDNN_STATUS_BAD_PARAM error
+ * will occur due to eps_ value is less than
+ * CUDNN_BN_MIN_EPSILON.
+ * The following code is to ensure that the eps_ meets requirement.
+ */
+ eps_ = std::max(MIN_EPS, static_cast(epsilon_));
auto create = [](MatrixPtr& m, size_t h, size_t w, real** p) {
Matrix::resizeOrCreate(m, h, w, false, true);
@@ -159,7 +175,7 @@ void CudnnBatchNormLayer::backward(const UpdateCallback& callback) {
gamma,
gammaGrad,
betaGrad,
- EPS_,
+ eps_,
savedMean,
savedInvVar);
diff --git a/paddle/gserver/layers/CudnnBatchNormLayer.h b/paddle/gserver/layers/CudnnBatchNormLayer.h
index 4916a9ce80aa356ed431b0a7888ed4f385b2ebde..fb7dbc01d178192441c6c19edddf4b9d4e8fc134 100644
--- a/paddle/gserver/layers/CudnnBatchNormLayer.h
+++ b/paddle/gserver/layers/CudnnBatchNormLayer.h
@@ -46,15 +46,12 @@ public:
void backward(const UpdateCallback& callback = nullptr) override;
protected:
- /**
- * Minimum allowed value is CUDNN_BN_MIN_EPSILON defined in cudnn.h.
- * Same epsilon value should be used in forward and backward functions.
- */
+ /// Minimum allowed value is CUDNN_BN_MIN_EPSILON defined in cudnn.h.
static const double MIN_EPS;
/// Epsilon value used in the batch normalization formula.
- /// If EPS_ is smaller than MIN_EPS, MIN_EPS will be used.
- double EPS_;
+ /// Same epsilon value should be used in forward and backward functions.
+ double eps_;
/// Input/output tensor descriptor desc
hl_tensor_descriptor ioDesc_;
diff --git a/paddle/gserver/layers/MKLDNNBatchNormLayer.cpp b/paddle/gserver/layers/MKLDNNBatchNormLayer.cpp
index f5bd430098247655a29765226243419c50700d4f..4d49d637764df131708793df1906dcdb6d98658c 100644
--- a/paddle/gserver/layers/MKLDNNBatchNormLayer.cpp
+++ b/paddle/gserver/layers/MKLDNNBatchNormLayer.cpp
@@ -48,7 +48,7 @@ bool MKLDNNBatchNormLayer::init(const LayerMap& layerMap,
useGlobalStats_ = config_.use_global_stats();
}
movingAvgFraction_ = config_.moving_average_fraction();
- EPS = config_.epsilon();
+ epsilon_ = config_.epsilon();
VLOG(MKLDNN_BASE) << "--- " << (useGlobalStats_ ? "use" : "do not use")
<< " --- global stats";
@@ -213,7 +213,7 @@ void MKLDNNBatchNormLayer::resetFwdPD(
if (wgt) {
flags_ = (flags_ | batch_normalization_flag::use_scale_shift);
}
- auto fwdDesc = bn_fwd::desc(pk, in->getMemoryDesc(), EPS, flags_);
+ auto fwdDesc = bn_fwd::desc(pk, in->getMemoryDesc(), epsilon_, flags_);
pd.reset(new bn_fwd::primitive_desc(fwdDesc, engine_));
CHECK_PRIMITIVE_DESC_EQ(out, pd->dst_primitive_desc());
if (wgt) {
@@ -280,7 +280,7 @@ void MKLDNNBatchNormLayer::resetBwdPD(
}
CHECK_PRIMITIVE_DESC_EQ(out, in->getPrimitiveDesc());
auto md = in->getMemoryDesc();
- auto bwdDesc = bn_bwd::desc(prop_kind::backward, md, md, EPS, flags_);
+ auto bwdDesc = bn_bwd::desc(prop_kind::backward, md, md, epsilon_, flags_);
pd.reset(new bn_bwd::primitive_desc(bwdDesc, engine_, *fwdPD_));
CHECK(pd->weights_primitive_desc() == fwdPD_->weights_primitive_desc());
CHECK_PRIMITIVE_DESC_EQ(wgt, pd->diff_weights_primitive_desc());
diff --git a/paddle/gserver/layers/MKLDNNBatchNormLayer.h b/paddle/gserver/layers/MKLDNNBatchNormLayer.h
index 769af2dfc7115fee54dc909e488d3d8c8e8f28a6..afd41a28ac4bf4b102aa3d66bef30544ff24d10b 100644
--- a/paddle/gserver/layers/MKLDNNBatchNormLayer.h
+++ b/paddle/gserver/layers/MKLDNNBatchNormLayer.h
@@ -32,7 +32,7 @@ protected:
std::shared_ptr fwdPD_;
// Epsilon value used in the batch normalization formula.
- real EPS;
+ real epsilon_;
// weight and bias in paddle
std::unique_ptr weight_;
diff --git a/python/paddle/trainer/config_parser.py b/python/paddle/trainer/config_parser.py
index da768ee547c427c0fac3d83cfb6dfed70f9e79d6..fd232f94159318dd42a84fdfc560bd61973fbd91 100644
--- a/python/paddle/trainer/config_parser.py
+++ b/python/paddle/trainer/config_parser.py
@@ -2483,8 +2483,8 @@ class BatchNormLayer(LayerBase):
self.config.use_global_stats = use_global_stats
if moving_average_fraction is not None:
self.config.moving_average_fraction = moving_average_fraction
- if epsilon is not None:
- self.config.epsilon = epsilon
+
+ self.config.epsilon = epsilon
input_layer = self.get_input_layer(0)
image_conf = self.config.inputs[0].image_conf
diff --git a/python/paddle/trainer_config_helpers/layers.py b/python/paddle/trainer_config_helpers/layers.py
index 77fa5f8640ba28fc95f9f243fb5ea2df30a260fc..fa5e851390462f745b8467e49f0ebb1edbdb4826 100644
--- a/python/paddle/trainer_config_helpers/layers.py
+++ b/python/paddle/trainer_config_helpers/layers.py
@@ -3127,7 +3127,7 @@ def batch_norm_layer(input,
(batch_norm_type == "mkldnn_batch_norm") or \
(batch_norm_type == "cudnn_batch_norm")
- assert epsilon >= 1e-5, "Parameter epsilon must be no less than 1e-5."
+ assert epsilon >= 1e-5, "epsilon must be no less than 1e-5."
l = Layer(
name=name,
diff --git a/python/paddle/trainer_config_helpers/tests/configs/protostr/img_layers.protostr b/python/paddle/trainer_config_helpers/tests/configs/protostr/img_layers.protostr
index b14121e82cb7d9516c4771fc896b9b3b9e01d1c8..3e0f957648879d4350d662b336c953273bac1378 100644
--- a/python/paddle/trainer_config_helpers/tests/configs/protostr/img_layers.protostr
+++ b/python/paddle/trainer_config_helpers/tests/configs/protostr/img_layers.protostr
@@ -65,6 +65,7 @@ layers {
height: 227
width: 227
depth: 1
+ epsilon: 1e-05
}
layers {
name: "__crmnorm_0__"
diff --git a/python/paddle/trainer_config_helpers/tests/configs/protostr/img_trans_layers.protostr b/python/paddle/trainer_config_helpers/tests/configs/protostr/img_trans_layers.protostr
index c7a487a11231cba6182b654108773037bdb0ec35..a18a4652e14c0cfc4dbca87e67d31aa663ee756b 100644
--- a/python/paddle/trainer_config_helpers/tests/configs/protostr/img_trans_layers.protostr
+++ b/python/paddle/trainer_config_helpers/tests/configs/protostr/img_trans_layers.protostr
@@ -65,6 +65,7 @@ layers {
height: 256
width: 256
depth: 1
+ epsilon: 1e-05
}
layers {
name: "__crmnorm_0__"
diff --git a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_BatchNorm3D.protostr b/python/paddle/trainer_config_helpers/tests/configs/protostr/test_BatchNorm3D.protostr
index 832ed24a31dd2bedba9a4fce77d7a088d1796fdb..9b69ae4a3b3cbcc7c0c69a2d5b3728e2f0204f33 100644
--- a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_BatchNorm3D.protostr
+++ b/python/paddle/trainer_config_helpers/tests/configs/protostr/test_BatchNorm3D.protostr
@@ -36,6 +36,7 @@ layers {
height: 6
width: 20
depth: 3
+ epsilon: 1e-05
}
parameters {
name: "___batch_norm_0__.w0"