diff --git a/cmake/cross_compiling/ios.cmake b/cmake/cross_compiling/ios.cmake index 310450f7d009dc0cdae9c0079a96445af8ec8f95..d3f5bf6852b3b295f3b5806b0577a880b0ce6ba6 100644 --- a/cmake/cross_compiling/ios.cmake +++ b/cmake/cross_compiling/ios.cmake @@ -76,11 +76,9 @@ set(IOS_PLATFORM ${IOS_PLATFORM} CACHE STRING "Type of iOS Platform") # Set the architecture for iOS if(NOT DEFINED IOS_ARCH) if(IOS_PLATFORM STREQUAL "OS") - # FIXME(liuyiqun): support "armv7;armv7s;arm64" future - set(IOS_ARCH "arm64") + set(IOS_ARCH "armv7;armv7s;arm64") elseif(IOS_PLATFORM STREQUAL "SIMULATOR") - # FIXME(liuyiqun): support "i386;x86_64" future - set(IOS_ARCH "x86_64") + set(IOS_ARCH "i386;x86_64") endif() endif() set(CMAKE_OSX_ARCHITECTURES ${IOS_ARCH} CACHE string "Build architecture for iOS") @@ -248,7 +246,7 @@ set(IOS_COMPILER_FLAGS "${XCODE_IOS_PLATFORM_VERSION_FLAGS} ${XCODE_IOS_BITCODE_ # Hidden visibilty is required for cxx on iOS set(CMAKE_C_FLAGS "${IOS_COMPILER_FLAGS} ${CMAKE_C_FLAGS}" CACHE STRING "C flags") -set(CMAKE_CXX_FLAGS "${IOS_COMPILER_FLAGS} -fvisibility-inlines-hidden ${CMAKE_CXX_FLAGS}" CACHE STRING "CXX flags") +set(CMAKE_CXX_FLAGS "${IOS_COMPILER_FLAGS} -fvisibility=hidden -fvisibility-inlines-hidden ${CMAKE_CXX_FLAGS}" CACHE STRING "CXX flags") set(IOS_LINK_FLAGS "${XCODE_IOS_PLATFORM_VERSION_FLAGS} -Wl,-search_paths_first") diff --git a/cmake/external/openblas.cmake b/cmake/external/openblas.cmake index 225380798112ba5a15b5989b01207b1b072feedf..4c4f59656dae68739f2f07f3febd510e727fe2dd 100644 --- a/cmake/external/openblas.cmake +++ b/cmake/external/openblas.cmake @@ -45,15 +45,14 @@ IF(NOT ${CBLAS_FOUND}) SET(OPTIONAL_ARGS ${OPTIONAL_ARGS} TARGET=ARMV8 BINARY=64 USE_THREAD=0) ENDIF() ELSEIF(IOS) - # FIXME(liuyiqun): support multiple architectures - SET(OPENBLAS_COMMIT "b5c96fcfcdc82945502a2303116a64d89985daf5") - SET(OPENBLAS_CC "${OPENBLAS_CC} ${CMAKE_C_FLAGS} -isysroot ${CMAKE_OSX_SYSROOT}") - IF(CMAKE_OSX_ARCHITECTURES MATCHES "armv7") - SET(OPENBLAS_CC "${OPENBLAS_CC} -arch armv7") - SET(OPTIONAL_ARGS ${OPTIONAL_ARGS} TARGET=ARMV7 ARM_SOFTFP_ABI=1 USE_THREAD=0) - ELSEIF(CMAKE_OSX_ARCHITECTURES MATCHES "arm64") + IF(CMAKE_OSX_ARCHITECTURES MATCHES "arm64") + SET(OPENBLAS_COMMIT "b5c96fcfcdc82945502a2303116a64d89985daf5") + SET(OPENBLAS_CC "${OPENBLAS_CC} ${CMAKE_C_FLAGS} -isysroot ${CMAKE_OSX_SYSROOT}") SET(OPENBLAS_CC "${OPENBLAS_CC} -arch arm64") SET(OPTIONAL_ARGS ${OPTIONAL_ARGS} TARGET=ARMV8 BINARY=64 USE_THREAD=0 CROSS_SUFFIX=${CROSS_SUFFIX}) + ELSE() + MESSAGE(FATAL_ERROR "OpenBLAS only support arm64 architectures on iOS. " + "You can set IOS_USE_VECLIB_FOR_BLAS=ON or USE_EIGEN_FOR_BLAS=ON to use other blas library instead.") ENDIF() ELSEIF(RPI) # use hardfp diff --git a/cmake/external/warpctc.cmake b/cmake/external/warpctc.cmake index 8bd058222880b4df3b08da09c02f9fe7f1d0ee66..a8e1aca49c97df256b1269c286b0bce7732fa932 100644 --- a/cmake/external/warpctc.cmake +++ b/cmake/external/warpctc.cmake @@ -12,6 +12,10 @@ # See the License for the specific language governing permissions and # limitations under the License. +IF(MOBILE_INFERENCE) + return() +ENDIF() + INCLUDE(ExternalProject) SET(WARPCTC_SOURCES_DIR ${THIRD_PARTY_PATH}/warpctc) diff --git a/doc/mobile/cross_compiling_for_android_cn.md b/doc/mobile/cross_compiling_for_android_cn.md index 882066f23714f7ab3bba9199b5fa5ff2325ce849..424d7718c64438496cf0895397babd5408e1ca02 100644 --- a/doc/mobile/cross_compiling_for_android_cn.md +++ b/doc/mobile/cross_compiling_for_android_cn.md @@ -1,4 +1,4 @@ -# 构建Android平台上的PaddlePaddle库 +# Android平台编译指南 用户可通过如下两种方式,交叉编译Android平台上适用的PaddlePaddle库: - 基于Docker容器的编译方式 diff --git a/doc/mobile/cross_compiling_for_ios_cn.md b/doc/mobile/cross_compiling_for_ios_cn.md index cda636a67de712e072f4cc7ad859dda75211eaa8..9da48e7f2119ce901fbb3abab73400df27be16d2 100644 --- a/doc/mobile/cross_compiling_for_ios_cn.md +++ b/doc/mobile/cross_compiling_for_ios_cn.md @@ -1,4 +1,4 @@ -# 构建iOS平台上的PaddlePaddle库 +# iOS平台编译指南 交叉编译iOS平台上适用的PaddlePaddle库,需要在MacOS系统上进行。本文的将介绍在MacOS上,从源码交叉编译iOS平台上适用的PaddlePaddle库。 ## 准备交叉编译环境 @@ -25,7 +25,7 @@ iOS平台可选配置参数: - `IOS_PLATFORM`,可设置为`OS/SIMULATOR`,默认值为`OS`。 - `OS`,构建目标为`arm`架构的iPhone或者iPad等物理设备。 - `SIMULATOR`,构建目标为`x86`架构的模拟器平台。 -- `IOS_ARCH`,目标架构。针对不同的`IOS_PLATFORM`,可设置的目标架构如下表所示: +- `IOS_ARCH`,目标架构。针对不同的`IOS_PLATFORM`,可设置的目标架构如下表所示,默认编译所有架构: @@ -41,11 +41,11 @@ iOS平台可选配置参数: - + - +
OSarmv7, armv7s, arm64 (默认)armv7, armv7s, arm64
SIMULATORi386, x86_64 (默认)i386, x86_64
@@ -66,7 +66,7 @@ iOS平台可选配置参数: ```bash cmake -DCMAKE_SYSTEM_NAME=iOS \ -DIOS_PLATFORM=OS \ - -DIOS_ARCH="arm64" \ + -DIOS_ARCH="armv7;arm64" \ -DIOS_ENABLE_BITCODE=ON \ -DIOS_USE_VECLIB_FOR_BLAS=ON \ -DCMAKE_INSTALL_PREFIX=your/path/to/install \ @@ -112,6 +112,6 @@ $ make install - `lib`目录,其中包含PaddlePaddle的C-API静态库 - `third_party`目录,其中包含所依赖的所有第三方库 -注意,不同架构的PaddlePaddle库建议安装到不同的目录下,然后使用`lipo`工具将多个静态库合并成一个支持多个架构的fat库。 +注意,如果PaddlePaddle库需要同时支持真机和模拟器,则需要分别编译真机和模拟器版本,然后使用`lipo`工具合并fat库。 自此,PaddlePaddle库已经安装完成,用户可将合成的fat库用于深度学习相关的iOS App中,调用方法见C-API文档。 diff --git a/doc/mobile/cross_compiling_for_raspberry_cn.md b/doc/mobile/cross_compiling_for_raspberry_cn.md index 6e983645faaed1f67edaeeb82ddbef9cef6bb85f..f8ef9dc8031613831437745995268f3abc392f5b 100644 --- a/doc/mobile/cross_compiling_for_raspberry_cn.md +++ b/doc/mobile/cross_compiling_for_raspberry_cn.md @@ -1,4 +1,4 @@ -# 构建Raspberry Pi平台上的PaddlePaddle库 +# Raspberry Pi平台编译指南 通常有两个方法来构建基于 Rasspberry Pi 的版本: diff --git a/paddle/cuda/include/hl_gpu.h b/paddle/cuda/include/hl_gpu.h index ede2670882ee2b93f610a2261a4ecc1784bc2d0c..4ab8de80d1c7be0f8e3eb848955373dd5e21bc18 100644 --- a/paddle/cuda/include/hl_gpu.h +++ b/paddle/cuda/include/hl_gpu.h @@ -25,7 +25,9 @@ limitations under the License. */ #include "hl_matrix.h" #include "hl_sequence.h" #include "hl_sparse.h" +#ifndef PADDLE_MOBILE_INFERENCE #include "hl_warpctc_wrap.h" +#endif #ifdef HPPL_STUB_FUNC #include "stub/hl_aggregate_stub.h" diff --git a/paddle/gserver/layers/BatchNormBaseLayer.cpp b/paddle/gserver/layers/BatchNormBaseLayer.cpp index d56f70ada3b6b6700e445e9d5ba4ee1e7a9c7843..925af31289d0c8ca534a30a16b14bfd2df90b013 100644 --- a/paddle/gserver/layers/BatchNormBaseLayer.cpp +++ b/paddle/gserver/layers/BatchNormBaseLayer.cpp @@ -41,7 +41,7 @@ bool BatchNormBaseLayer::init(const LayerMap& layerMap, useGlobalStats_ = config_.use_global_stats(); } movingAvgFraction_ = config_.moving_average_fraction(); - EPS = config_.epsilon(); + epsilon_ = config_.epsilon(); weight_.reset(new Weight(1, channels_, parameters_[0])); movingMean_.reset(new Weight(1, channels_, parameters_[1])); diff --git a/paddle/gserver/layers/BatchNormBaseLayer.h b/paddle/gserver/layers/BatchNormBaseLayer.h index 78f476024ad8fe809ef8e7c7f8e6ab0757eaec7f..2ac3cd9d670d0fcf9c40ad2f117d5a72479663a3 100644 --- a/paddle/gserver/layers/BatchNormBaseLayer.h +++ b/paddle/gserver/layers/BatchNormBaseLayer.h @@ -94,8 +94,8 @@ protected: bool useGlobalStats_; // use to compute moving mean and variance. real movingAvgFraction_; - // Epsilon value used in the batch normalization formula. - real EPS; + // Epsilon is a small random noise used in batch normalization for stability. + real epsilon_; }; } // namespace paddle diff --git a/paddle/gserver/layers/BatchNormalizationLayer.cpp b/paddle/gserver/layers/BatchNormalizationLayer.cpp index aaf59b050616f377b398b290d8d23e997e8a1509..25ab5cd927792d18f78bc1fa33eee4029b427cc7 100644 --- a/paddle/gserver/layers/BatchNormalizationLayer.cpp +++ b/paddle/gserver/layers/BatchNormalizationLayer.cpp @@ -51,7 +51,7 @@ void BatchNormalizationLayer::calMeanAndStd(const MatrixPtr& mat) { calMovingMeanAndVar(); - savedInvVar_->subScalar(-EPS); + savedInvVar_->subScalar(-epsilon_); savedInvVar_->sqrt2(*savedInvVar_); } @@ -72,7 +72,7 @@ void BatchNormalizationLayer::setMeanAndStd() { savedInvVar_->copyFrom(*(movingVar_->getW())); savedInvVar_->downClip(real(0.0)); - savedInvVar_->subScalar(-EPS); + savedInvVar_->subScalar(-epsilon_); savedInvVar_->sqrt2(*savedInvVar_); } diff --git a/paddle/gserver/layers/CudnnBatchNormLayer.cpp b/paddle/gserver/layers/CudnnBatchNormLayer.cpp index 5b3d07eed1c41bd4975609e75ece28b310753cac..c25960d681a62af1069b23f66f8ca5608808cd6f 100644 --- a/paddle/gserver/layers/CudnnBatchNormLayer.cpp +++ b/paddle/gserver/layers/CudnnBatchNormLayer.cpp @@ -60,7 +60,15 @@ void CudnnBatchNormLayer::forward(PassType passType) { real* beta = biases_->getW()->getData(); real* movingMean = movingMean_->getW()->getData(); real* movingVar = movingVar_->getW()->getData(); - EPS_ = std::max(MIN_EPS, static_cast(EPS)); + + /** + * If epsilon_ equals to 1e-5 and eps_ is assigned the value of + * static_cast(epsilon_), The CUDNN_STATUS_BAD_PARAM error + * will occur due to eps_ value is less than + * CUDNN_BN_MIN_EPSILON. + * The following code is to ensure that the eps_ meets requirement. + */ + eps_ = std::max(MIN_EPS, static_cast(epsilon_)); if (!useGlobalStats_) { REGISTER_TIMER_INFO("CudnnBatchFwTimer", getName().c_str()); @@ -76,7 +84,7 @@ void CudnnBatchNormLayer::forward(PassType passType) { 1.0 - movingAvgFraction_, movingMean, movingVar, - EPS_, + eps_, savedMean, savedInvVar); } else { @@ -91,7 +99,7 @@ void CudnnBatchNormLayer::forward(PassType passType) { beta, movingMean, movingVar, - EPS_); + eps_); } else { // There is a limitation in cudnn library. // When the batch size is larger than 1024 in cuDNN v5.1, @@ -102,7 +110,7 @@ void CudnnBatchNormLayer::forward(PassType passType) { beta, movingMean, movingVar, - EPS_, + eps_, batchSize, channels_, imageH_ * imageD_, @@ -128,7 +136,15 @@ void CudnnBatchNormLayer::backward(const UpdateCallback& callback) { real* gamma = weight_->getW()->getData(); real* savedMean = savedMean_->getData(); real* savedInvVar = savedInvVar_->getData(); - EPS_ = std::max(MIN_EPS, static_cast(EPS)); + + /** + * If epsilon_ equals to 1e-5 and eps_ is assigned the value of + * static_cast(epsilon_), The CUDNN_STATUS_BAD_PARAM error + * will occur due to eps_ value is less than + * CUDNN_BN_MIN_EPSILON. + * The following code is to ensure that the eps_ meets requirement. + */ + eps_ = std::max(MIN_EPS, static_cast(epsilon_)); auto create = [](MatrixPtr& m, size_t h, size_t w, real** p) { Matrix::resizeOrCreate(m, h, w, false, true); @@ -159,7 +175,7 @@ void CudnnBatchNormLayer::backward(const UpdateCallback& callback) { gamma, gammaGrad, betaGrad, - EPS_, + eps_, savedMean, savedInvVar); diff --git a/paddle/gserver/layers/CudnnBatchNormLayer.h b/paddle/gserver/layers/CudnnBatchNormLayer.h index 4916a9ce80aa356ed431b0a7888ed4f385b2ebde..fb7dbc01d178192441c6c19edddf4b9d4e8fc134 100644 --- a/paddle/gserver/layers/CudnnBatchNormLayer.h +++ b/paddle/gserver/layers/CudnnBatchNormLayer.h @@ -46,15 +46,12 @@ public: void backward(const UpdateCallback& callback = nullptr) override; protected: - /** - * Minimum allowed value is CUDNN_BN_MIN_EPSILON defined in cudnn.h. - * Same epsilon value should be used in forward and backward functions. - */ + /// Minimum allowed value is CUDNN_BN_MIN_EPSILON defined in cudnn.h. static const double MIN_EPS; /// Epsilon value used in the batch normalization formula. - /// If EPS_ is smaller than MIN_EPS, MIN_EPS will be used. - double EPS_; + /// Same epsilon value should be used in forward and backward functions. + double eps_; /// Input/output tensor descriptor desc hl_tensor_descriptor ioDesc_; diff --git a/paddle/gserver/layers/MKLDNNBatchNormLayer.cpp b/paddle/gserver/layers/MKLDNNBatchNormLayer.cpp index f5bd430098247655a29765226243419c50700d4f..4d49d637764df131708793df1906dcdb6d98658c 100644 --- a/paddle/gserver/layers/MKLDNNBatchNormLayer.cpp +++ b/paddle/gserver/layers/MKLDNNBatchNormLayer.cpp @@ -48,7 +48,7 @@ bool MKLDNNBatchNormLayer::init(const LayerMap& layerMap, useGlobalStats_ = config_.use_global_stats(); } movingAvgFraction_ = config_.moving_average_fraction(); - EPS = config_.epsilon(); + epsilon_ = config_.epsilon(); VLOG(MKLDNN_BASE) << "--- " << (useGlobalStats_ ? "use" : "do not use") << " --- global stats"; @@ -213,7 +213,7 @@ void MKLDNNBatchNormLayer::resetFwdPD( if (wgt) { flags_ = (flags_ | batch_normalization_flag::use_scale_shift); } - auto fwdDesc = bn_fwd::desc(pk, in->getMemoryDesc(), EPS, flags_); + auto fwdDesc = bn_fwd::desc(pk, in->getMemoryDesc(), epsilon_, flags_); pd.reset(new bn_fwd::primitive_desc(fwdDesc, engine_)); CHECK_PRIMITIVE_DESC_EQ(out, pd->dst_primitive_desc()); if (wgt) { @@ -280,7 +280,7 @@ void MKLDNNBatchNormLayer::resetBwdPD( } CHECK_PRIMITIVE_DESC_EQ(out, in->getPrimitiveDesc()); auto md = in->getMemoryDesc(); - auto bwdDesc = bn_bwd::desc(prop_kind::backward, md, md, EPS, flags_); + auto bwdDesc = bn_bwd::desc(prop_kind::backward, md, md, epsilon_, flags_); pd.reset(new bn_bwd::primitive_desc(bwdDesc, engine_, *fwdPD_)); CHECK(pd->weights_primitive_desc() == fwdPD_->weights_primitive_desc()); CHECK_PRIMITIVE_DESC_EQ(wgt, pd->diff_weights_primitive_desc()); diff --git a/paddle/gserver/layers/MKLDNNBatchNormLayer.h b/paddle/gserver/layers/MKLDNNBatchNormLayer.h index 769af2dfc7115fee54dc909e488d3d8c8e8f28a6..afd41a28ac4bf4b102aa3d66bef30544ff24d10b 100644 --- a/paddle/gserver/layers/MKLDNNBatchNormLayer.h +++ b/paddle/gserver/layers/MKLDNNBatchNormLayer.h @@ -32,7 +32,7 @@ protected: std::shared_ptr fwdPD_; // Epsilon value used in the batch normalization formula. - real EPS; + real epsilon_; // weight and bias in paddle std::unique_ptr weight_; diff --git a/python/paddle/trainer/config_parser.py b/python/paddle/trainer/config_parser.py index da768ee547c427c0fac3d83cfb6dfed70f9e79d6..fd232f94159318dd42a84fdfc560bd61973fbd91 100644 --- a/python/paddle/trainer/config_parser.py +++ b/python/paddle/trainer/config_parser.py @@ -2483,8 +2483,8 @@ class BatchNormLayer(LayerBase): self.config.use_global_stats = use_global_stats if moving_average_fraction is not None: self.config.moving_average_fraction = moving_average_fraction - if epsilon is not None: - self.config.epsilon = epsilon + + self.config.epsilon = epsilon input_layer = self.get_input_layer(0) image_conf = self.config.inputs[0].image_conf diff --git a/python/paddle/trainer_config_helpers/layers.py b/python/paddle/trainer_config_helpers/layers.py index 77fa5f8640ba28fc95f9f243fb5ea2df30a260fc..fa5e851390462f745b8467e49f0ebb1edbdb4826 100644 --- a/python/paddle/trainer_config_helpers/layers.py +++ b/python/paddle/trainer_config_helpers/layers.py @@ -3127,7 +3127,7 @@ def batch_norm_layer(input, (batch_norm_type == "mkldnn_batch_norm") or \ (batch_norm_type == "cudnn_batch_norm") - assert epsilon >= 1e-5, "Parameter epsilon must be no less than 1e-5." + assert epsilon >= 1e-5, "epsilon must be no less than 1e-5." l = Layer( name=name, diff --git a/python/paddle/trainer_config_helpers/tests/configs/protostr/img_layers.protostr b/python/paddle/trainer_config_helpers/tests/configs/protostr/img_layers.protostr index b14121e82cb7d9516c4771fc896b9b3b9e01d1c8..3e0f957648879d4350d662b336c953273bac1378 100644 --- a/python/paddle/trainer_config_helpers/tests/configs/protostr/img_layers.protostr +++ b/python/paddle/trainer_config_helpers/tests/configs/protostr/img_layers.protostr @@ -65,6 +65,7 @@ layers { height: 227 width: 227 depth: 1 + epsilon: 1e-05 } layers { name: "__crmnorm_0__" diff --git a/python/paddle/trainer_config_helpers/tests/configs/protostr/img_trans_layers.protostr b/python/paddle/trainer_config_helpers/tests/configs/protostr/img_trans_layers.protostr index c7a487a11231cba6182b654108773037bdb0ec35..a18a4652e14c0cfc4dbca87e67d31aa663ee756b 100644 --- a/python/paddle/trainer_config_helpers/tests/configs/protostr/img_trans_layers.protostr +++ b/python/paddle/trainer_config_helpers/tests/configs/protostr/img_trans_layers.protostr @@ -65,6 +65,7 @@ layers { height: 256 width: 256 depth: 1 + epsilon: 1e-05 } layers { name: "__crmnorm_0__" diff --git a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_BatchNorm3D.protostr b/python/paddle/trainer_config_helpers/tests/configs/protostr/test_BatchNorm3D.protostr index 832ed24a31dd2bedba9a4fce77d7a088d1796fdb..9b69ae4a3b3cbcc7c0c69a2d5b3728e2f0204f33 100644 --- a/python/paddle/trainer_config_helpers/tests/configs/protostr/test_BatchNorm3D.protostr +++ b/python/paddle/trainer_config_helpers/tests/configs/protostr/test_BatchNorm3D.protostr @@ -36,6 +36,7 @@ layers { height: 6 width: 20 depth: 3 + epsilon: 1e-05 } parameters { name: "___batch_norm_0__.w0"