Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
机器未来
Paddle
提交
9580c450
P
Paddle
项目概览
机器未来
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1
Issue
1
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
9580c450
编写于
11月 17, 2017
作者:
P
peterzhang2029
浏览文件
操作
浏览文件
下载
差异文件
Merge branch 'develop' of
https://github.com/PaddlePaddle/Paddle
into add_bn_eq
上级
27d7b2cb
c808fbbf
变更
19
显示空白变更内容
内联
并排
Showing
19 changed file
with
63 addition
and
44 deletion
+63
-44
cmake/cross_compiling/ios.cmake
cmake/cross_compiling/ios.cmake
+3
-5
cmake/external/openblas.cmake
cmake/external/openblas.cmake
+6
-7
cmake/external/warpctc.cmake
cmake/external/warpctc.cmake
+4
-0
doc/mobile/cross_compiling_for_android_cn.md
doc/mobile/cross_compiling_for_android_cn.md
+1
-1
doc/mobile/cross_compiling_for_ios_cn.md
doc/mobile/cross_compiling_for_ios_cn.md
+6
-6
doc/mobile/cross_compiling_for_raspberry_cn.md
doc/mobile/cross_compiling_for_raspberry_cn.md
+1
-1
paddle/cuda/include/hl_gpu.h
paddle/cuda/include/hl_gpu.h
+2
-0
paddle/gserver/layers/BatchNormBaseLayer.cpp
paddle/gserver/layers/BatchNormBaseLayer.cpp
+1
-1
paddle/gserver/layers/BatchNormBaseLayer.h
paddle/gserver/layers/BatchNormBaseLayer.h
+2
-2
paddle/gserver/layers/BatchNormalizationLayer.cpp
paddle/gserver/layers/BatchNormalizationLayer.cpp
+2
-2
paddle/gserver/layers/CudnnBatchNormLayer.cpp
paddle/gserver/layers/CudnnBatchNormLayer.cpp
+22
-6
paddle/gserver/layers/CudnnBatchNormLayer.h
paddle/gserver/layers/CudnnBatchNormLayer.h
+3
-6
paddle/gserver/layers/MKLDNNBatchNormLayer.cpp
paddle/gserver/layers/MKLDNNBatchNormLayer.cpp
+3
-3
paddle/gserver/layers/MKLDNNBatchNormLayer.h
paddle/gserver/layers/MKLDNNBatchNormLayer.h
+1
-1
python/paddle/trainer/config_parser.py
python/paddle/trainer/config_parser.py
+2
-2
python/paddle/trainer_config_helpers/layers.py
python/paddle/trainer_config_helpers/layers.py
+1
-1
python/paddle/trainer_config_helpers/tests/configs/protostr/img_layers.protostr
...config_helpers/tests/configs/protostr/img_layers.protostr
+1
-0
python/paddle/trainer_config_helpers/tests/configs/protostr/img_trans_layers.protostr
..._helpers/tests/configs/protostr/img_trans_layers.protostr
+1
-0
python/paddle/trainer_config_helpers/tests/configs/protostr/test_BatchNorm3D.protostr
..._helpers/tests/configs/protostr/test_BatchNorm3D.protostr
+1
-0
未找到文件。
cmake/cross_compiling/ios.cmake
浏览文件 @
9580c450
...
...
@@ -76,11 +76,9 @@ set(IOS_PLATFORM ${IOS_PLATFORM} CACHE STRING "Type of iOS Platform")
# Set the architecture for iOS
if
(
NOT DEFINED IOS_ARCH
)
if
(
IOS_PLATFORM STREQUAL
"OS"
)
# FIXME(liuyiqun): support "armv7;armv7s;arm64" future
set
(
IOS_ARCH
"arm64"
)
set
(
IOS_ARCH
"armv7;armv7s;arm64"
)
elseif
(
IOS_PLATFORM STREQUAL
"SIMULATOR"
)
# FIXME(liuyiqun): support "i386;x86_64" future
set
(
IOS_ARCH
"x86_64"
)
set
(
IOS_ARCH
"i386;x86_64"
)
endif
()
endif
()
set
(
CMAKE_OSX_ARCHITECTURES
${
IOS_ARCH
}
CACHE string
"Build architecture for iOS"
)
...
...
@@ -248,7 +246,7 @@ set(IOS_COMPILER_FLAGS "${XCODE_IOS_PLATFORM_VERSION_FLAGS} ${XCODE_IOS_BITCODE_
# Hidden visibilty is required for cxx on iOS
set
(
CMAKE_C_FLAGS
"
${
IOS_COMPILER_FLAGS
}
${
CMAKE_C_FLAGS
}
"
CACHE STRING
"C flags"
)
set
(
CMAKE_CXX_FLAGS
"
${
IOS_COMPILER_FLAGS
}
-fvisibility-inlines-hidden
${
CMAKE_CXX_FLAGS
}
"
CACHE STRING
"CXX flags"
)
set
(
CMAKE_CXX_FLAGS
"
${
IOS_COMPILER_FLAGS
}
-fvisibility
=hidden -fvisibility
-inlines-hidden
${
CMAKE_CXX_FLAGS
}
"
CACHE STRING
"CXX flags"
)
set
(
IOS_LINK_FLAGS
"
${
XCODE_IOS_PLATFORM_VERSION_FLAGS
}
-Wl,-search_paths_first"
)
...
...
cmake/external/openblas.cmake
浏览文件 @
9580c450
...
...
@@ -45,15 +45,14 @@ IF(NOT ${CBLAS_FOUND})
SET
(
OPTIONAL_ARGS
${
OPTIONAL_ARGS
}
TARGET=ARMV8 BINARY=64 USE_THREAD=0
)
ENDIF
()
ELSEIF
(
IOS
)
# FIXME(liuyiqun): support multiple architectures
IF
(
CMAKE_OSX_ARCHITECTURES MATCHES
"arm64"
)
SET
(
OPENBLAS_COMMIT
"b5c96fcfcdc82945502a2303116a64d89985daf5"
)
SET
(
OPENBLAS_CC
"
${
OPENBLAS_CC
}
${
CMAKE_C_FLAGS
}
-isysroot
${
CMAKE_OSX_SYSROOT
}
"
)
IF
(
CMAKE_OSX_ARCHITECTURES MATCHES
"armv7"
)
SET
(
OPENBLAS_CC
"
${
OPENBLAS_CC
}
-arch armv7"
)
SET
(
OPTIONAL_ARGS
${
OPTIONAL_ARGS
}
TARGET=ARMV7 ARM_SOFTFP_ABI=1 USE_THREAD=0
)
ELSEIF
(
CMAKE_OSX_ARCHITECTURES MATCHES
"arm64"
)
SET
(
OPENBLAS_CC
"
${
OPENBLAS_CC
}
-arch arm64"
)
SET
(
OPTIONAL_ARGS
${
OPTIONAL_ARGS
}
TARGET=ARMV8 BINARY=64 USE_THREAD=0 CROSS_SUFFIX=
${
CROSS_SUFFIX
}
)
ELSE
()
MESSAGE
(
FATAL_ERROR
"OpenBLAS only support arm64 architectures on iOS. "
"You can set IOS_USE_VECLIB_FOR_BLAS=ON or USE_EIGEN_FOR_BLAS=ON to use other blas library instead."
)
ENDIF
()
ELSEIF
(
RPI
)
# use hardfp
...
...
cmake/external/warpctc.cmake
浏览文件 @
9580c450
...
...
@@ -12,6 +12,10 @@
# See the License for the specific language governing permissions and
# limitations under the License.
IF
(
MOBILE_INFERENCE
)
return
()
ENDIF
()
INCLUDE
(
ExternalProject
)
SET
(
WARPCTC_SOURCES_DIR
${
THIRD_PARTY_PATH
}
/warpctc
)
...
...
doc/mobile/cross_compiling_for_android_cn.md
浏览文件 @
9580c450
#
构建Android平台上的PaddlePaddle库
#
Android平台编译指南
用户可通过如下两种方式,交叉编译Android平台上适用的PaddlePaddle库:
-
基于Docker容器的编译方式
...
...
doc/mobile/cross_compiling_for_ios_cn.md
浏览文件 @
9580c450
#
构建iOS平台上的PaddlePaddle库
#
iOS平台编译指南
交叉编译iOS平台上适用的PaddlePaddle库,需要在MacOS系统上进行。本文的将介绍在MacOS上,从源码交叉编译iOS平台上适用的PaddlePaddle库。
## 准备交叉编译环境
...
...
@@ -25,7 +25,7 @@ iOS平台可选配置参数:
-
`IOS_PLATFORM`
,可设置为
`OS/SIMULATOR`
,默认值为
`OS`
。
-
`OS`
,构建目标为
`arm`
架构的iPhone或者iPad等物理设备。
-
`SIMULATOR`
,构建目标为
`x86`
架构的模拟器平台。
-
`IOS_ARCH`
,目标架构。针对不同的
`IOS_PLATFORM`
,可设置的目标架构如下表所示:
-
`IOS_ARCH`
,目标架构。针对不同的
`IOS_PLATFORM`
,可设置的目标架构如下表所示
,默认编译所有架构
:
<table class="docutils">
<colgroup>
...
...
@@ -41,11 +41,11 @@ iOS平台可选配置参数:
<tbody valign="top">
<tr class="row-even">
<td>OS</td>
<td>armv7, armv7s, arm64
(默认)
</td>
<td>armv7, armv7s, arm64 </td>
</tr>
<tr class="row-odd">
<td>SIMULATOR</td>
<td>i386, x86_64
(默认)
</td>
<td>i386, x86_64 </td>
</tr>
</tbody>
</table>
...
...
@@ -66,7 +66,7 @@ iOS平台可选配置参数:
```
bash
cmake
-DCMAKE_SYSTEM_NAME
=
iOS
\
-DIOS_PLATFORM
=
OS
\
-DIOS_ARCH
=
"arm64"
\
-DIOS_ARCH
=
"arm
v7;arm
64"
\
-DIOS_ENABLE_BITCODE
=
ON
\
-DIOS_USE_VECLIB_FOR_BLAS
=
ON
\
-DCMAKE_INSTALL_PREFIX
=
your/path/to/install
\
...
...
@@ -112,6 +112,6 @@ $ make install
-
`lib`
目录,其中包含PaddlePaddle的C-API静态库
-
`third_party`
目录,其中包含所依赖的所有第三方库
注意,
不同架构的PaddlePaddle库建议安装到不同的目录下,然后使用
`lipo`
工具将多个静态库合并成一个支持多个架构的
fat库。
注意,
如果PaddlePaddle库需要同时支持真机和模拟器,则需要分别编译真机和模拟器版本,然后使用
`lipo`
工具合并
fat库。
自此,PaddlePaddle库已经安装完成,用户可将合成的fat库用于深度学习相关的iOS App中,调用方法见C-API文档。
doc/mobile/cross_compiling_for_raspberry_cn.md
浏览文件 @
9580c450
#
构建Raspberry Pi平台上的PaddlePaddle库
#
Raspberry Pi平台编译指南
通常有两个方法来构建基于 Rasspberry Pi 的版本:
...
...
paddle/cuda/include/hl_gpu.h
浏览文件 @
9580c450
...
...
@@ -25,7 +25,9 @@ limitations under the License. */
#include "hl_matrix.h"
#include "hl_sequence.h"
#include "hl_sparse.h"
#ifndef PADDLE_MOBILE_INFERENCE
#include "hl_warpctc_wrap.h"
#endif
#ifdef HPPL_STUB_FUNC
#include "stub/hl_aggregate_stub.h"
...
...
paddle/gserver/layers/BatchNormBaseLayer.cpp
浏览文件 @
9580c450
...
...
@@ -41,7 +41,7 @@ bool BatchNormBaseLayer::init(const LayerMap& layerMap,
useGlobalStats_
=
config_
.
use_global_stats
();
}
movingAvgFraction_
=
config_
.
moving_average_fraction
();
EPS
=
config_
.
epsilon
();
epsilon_
=
config_
.
epsilon
();
weight_
.
reset
(
new
Weight
(
1
,
channels_
,
parameters_
[
0
]));
movingMean_
.
reset
(
new
Weight
(
1
,
channels_
,
parameters_
[
1
]));
...
...
paddle/gserver/layers/BatchNormBaseLayer.h
浏览文件 @
9580c450
...
...
@@ -94,8 +94,8 @@ protected:
bool
useGlobalStats_
;
// use to compute moving mean and variance.
real
movingAvgFraction_
;
// Epsilon
value used in the batch normalization formula
.
real
EPS
;
// Epsilon
is a small random noise used in batch normalization for stability
.
real
epsilon_
;
};
}
// namespace paddle
paddle/gserver/layers/BatchNormalizationLayer.cpp
浏览文件 @
9580c450
...
...
@@ -51,7 +51,7 @@ void BatchNormalizationLayer::calMeanAndStd(const MatrixPtr& mat) {
calMovingMeanAndVar
();
savedInvVar_
->
subScalar
(
-
EPS
);
savedInvVar_
->
subScalar
(
-
epsilon_
);
savedInvVar_
->
sqrt2
(
*
savedInvVar_
);
}
...
...
@@ -72,7 +72,7 @@ void BatchNormalizationLayer::setMeanAndStd() {
savedInvVar_
->
copyFrom
(
*
(
movingVar_
->
getW
()));
savedInvVar_
->
downClip
(
real
(
0.0
));
savedInvVar_
->
subScalar
(
-
EPS
);
savedInvVar_
->
subScalar
(
-
epsilon_
);
savedInvVar_
->
sqrt2
(
*
savedInvVar_
);
}
...
...
paddle/gserver/layers/CudnnBatchNormLayer.cpp
浏览文件 @
9580c450
...
...
@@ -60,7 +60,15 @@ void CudnnBatchNormLayer::forward(PassType passType) {
real
*
beta
=
biases_
->
getW
()
->
getData
();
real
*
movingMean
=
movingMean_
->
getW
()
->
getData
();
real
*
movingVar
=
movingVar_
->
getW
()
->
getData
();
EPS_
=
std
::
max
(
MIN_EPS
,
static_cast
<
double
>
(
EPS
));
/**
* If epsilon_ equals to 1e-5 and eps_ is assigned the value of
* static_cast<double>(epsilon_), The CUDNN_STATUS_BAD_PARAM error
* will occur due to eps_ value is less than
* CUDNN_BN_MIN_EPSILON.
* The following code is to ensure that the eps_ meets requirement.
*/
eps_
=
std
::
max
(
MIN_EPS
,
static_cast
<
double
>
(
epsilon_
));
if
(
!
useGlobalStats_
)
{
REGISTER_TIMER_INFO
(
"CudnnBatchFwTimer"
,
getName
().
c_str
());
...
...
@@ -76,7 +84,7 @@ void CudnnBatchNormLayer::forward(PassType passType) {
1.0
-
movingAvgFraction_
,
movingMean
,
movingVar
,
EPS
_
,
eps
_
,
savedMean
,
savedInvVar
);
}
else
{
...
...
@@ -91,7 +99,7 @@ void CudnnBatchNormLayer::forward(PassType passType) {
beta
,
movingMean
,
movingVar
,
EPS
_
);
eps
_
);
}
else
{
// There is a limitation in cudnn library.
// When the batch size is larger than 1024 in cuDNN v5.1,
...
...
@@ -102,7 +110,7 @@ void CudnnBatchNormLayer::forward(PassType passType) {
beta
,
movingMean
,
movingVar
,
EPS
_
,
eps
_
,
batchSize
,
channels_
,
imageH_
*
imageD_
,
...
...
@@ -128,7 +136,15 @@ void CudnnBatchNormLayer::backward(const UpdateCallback& callback) {
real
*
gamma
=
weight_
->
getW
()
->
getData
();
real
*
savedMean
=
savedMean_
->
getData
();
real
*
savedInvVar
=
savedInvVar_
->
getData
();
EPS_
=
std
::
max
(
MIN_EPS
,
static_cast
<
double
>
(
EPS
));
/**
* If epsilon_ equals to 1e-5 and eps_ is assigned the value of
* static_cast<double>(epsilon_), The CUDNN_STATUS_BAD_PARAM error
* will occur due to eps_ value is less than
* CUDNN_BN_MIN_EPSILON.
* The following code is to ensure that the eps_ meets requirement.
*/
eps_
=
std
::
max
(
MIN_EPS
,
static_cast
<
double
>
(
epsilon_
));
auto
create
=
[](
MatrixPtr
&
m
,
size_t
h
,
size_t
w
,
real
**
p
)
{
Matrix
::
resizeOrCreate
(
m
,
h
,
w
,
false
,
true
);
...
...
@@ -159,7 +175,7 @@ void CudnnBatchNormLayer::backward(const UpdateCallback& callback) {
gamma
,
gammaGrad
,
betaGrad
,
EPS
_
,
eps
_
,
savedMean
,
savedInvVar
);
...
...
paddle/gserver/layers/CudnnBatchNormLayer.h
浏览文件 @
9580c450
...
...
@@ -46,15 +46,12 @@ public:
void
backward
(
const
UpdateCallback
&
callback
=
nullptr
)
override
;
protected:
/**
* Minimum allowed value is CUDNN_BN_MIN_EPSILON defined in cudnn.h.
* Same epsilon value should be used in forward and backward functions.
*/
/// Minimum allowed value is CUDNN_BN_MIN_EPSILON defined in cudnn.h.
static
const
double
MIN_EPS
;
/// Epsilon value used in the batch normalization formula.
///
If EPS_ is smaller than MIN_EPS, MIN_EPS will be used
.
double
EPS
_
;
///
Same epsilon value should be used in forward and backward functions
.
double
eps
_
;
/// Input/output tensor descriptor desc
hl_tensor_descriptor
ioDesc_
;
...
...
paddle/gserver/layers/MKLDNNBatchNormLayer.cpp
浏览文件 @
9580c450
...
...
@@ -48,7 +48,7 @@ bool MKLDNNBatchNormLayer::init(const LayerMap& layerMap,
useGlobalStats_
=
config_
.
use_global_stats
();
}
movingAvgFraction_
=
config_
.
moving_average_fraction
();
EPS
=
config_
.
epsilon
();
epsilon_
=
config_
.
epsilon
();
VLOG
(
MKLDNN_BASE
)
<<
"--- "
<<
(
useGlobalStats_
?
"use"
:
"do not use"
)
<<
" --- global stats"
;
...
...
@@ -213,7 +213,7 @@ void MKLDNNBatchNormLayer::resetFwdPD(
if
(
wgt
)
{
flags_
=
(
flags_
|
batch_normalization_flag
::
use_scale_shift
);
}
auto
fwdDesc
=
bn_fwd
::
desc
(
pk
,
in
->
getMemoryDesc
(),
EPS
,
flags_
);
auto
fwdDesc
=
bn_fwd
::
desc
(
pk
,
in
->
getMemoryDesc
(),
epsilon_
,
flags_
);
pd
.
reset
(
new
bn_fwd
::
primitive_desc
(
fwdDesc
,
engine_
));
CHECK_PRIMITIVE_DESC_EQ
(
out
,
pd
->
dst_primitive_desc
());
if
(
wgt
)
{
...
...
@@ -280,7 +280,7 @@ void MKLDNNBatchNormLayer::resetBwdPD(
}
CHECK_PRIMITIVE_DESC_EQ
(
out
,
in
->
getPrimitiveDesc
());
auto
md
=
in
->
getMemoryDesc
();
auto
bwdDesc
=
bn_bwd
::
desc
(
prop_kind
::
backward
,
md
,
md
,
EPS
,
flags_
);
auto
bwdDesc
=
bn_bwd
::
desc
(
prop_kind
::
backward
,
md
,
md
,
epsilon_
,
flags_
);
pd
.
reset
(
new
bn_bwd
::
primitive_desc
(
bwdDesc
,
engine_
,
*
fwdPD_
));
CHECK
(
pd
->
weights_primitive_desc
()
==
fwdPD_
->
weights_primitive_desc
());
CHECK_PRIMITIVE_DESC_EQ
(
wgt
,
pd
->
diff_weights_primitive_desc
());
...
...
paddle/gserver/layers/MKLDNNBatchNormLayer.h
浏览文件 @
9580c450
...
...
@@ -32,7 +32,7 @@ protected:
std
::
shared_ptr
<
bn_fwd
::
primitive_desc
>
fwdPD_
;
// Epsilon value used in the batch normalization formula.
real
EPS
;
real
epsilon_
;
// weight and bias in paddle
std
::
unique_ptr
<
Weight
>
weight_
;
...
...
python/paddle/trainer/config_parser.py
浏览文件 @
9580c450
...
...
@@ -2483,7 +2483,7 @@ class BatchNormLayer(LayerBase):
self
.
config
.
use_global_stats
=
use_global_stats
if
moving_average_fraction
is
not
None
:
self
.
config
.
moving_average_fraction
=
moving_average_fraction
if
epsilon
is
not
None
:
self
.
config
.
epsilon
=
epsilon
input_layer
=
self
.
get_input_layer
(
0
)
...
...
python/paddle/trainer_config_helpers/layers.py
浏览文件 @
9580c450
...
...
@@ -3127,7 +3127,7 @@ def batch_norm_layer(input,
(
batch_norm_type
==
"mkldnn_batch_norm"
)
or
\
(
batch_norm_type
==
"cudnn_batch_norm"
)
assert
epsilon
>=
1e-5
,
"
Parameter
epsilon must be no less than 1e-5."
assert
epsilon
>=
1e-5
,
"epsilon must be no less than 1e-5."
l
=
Layer
(
name
=
name
,
...
...
python/paddle/trainer_config_helpers/tests/configs/protostr/img_layers.protostr
浏览文件 @
9580c450
...
...
@@ -65,6 +65,7 @@ layers {
height: 227
width: 227
depth: 1
epsilon: 1e-05
}
layers {
name: "__crmnorm_0__"
...
...
python/paddle/trainer_config_helpers/tests/configs/protostr/img_trans_layers.protostr
浏览文件 @
9580c450
...
...
@@ -65,6 +65,7 @@ layers {
height: 256
width: 256
depth: 1
epsilon: 1e-05
}
layers {
name: "__crmnorm_0__"
...
...
python/paddle/trainer_config_helpers/tests/configs/protostr/test_BatchNorm3D.protostr
浏览文件 @
9580c450
...
...
@@ -36,6 +36,7 @@ layers {
height: 6
width: 20
depth: 3
epsilon: 1e-05
}
parameters {
name: "___batch_norm_0__.w0"
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录