Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
Paddle
提交
9580c450
P
Paddle
项目概览
PaddlePaddle
/
Paddle
1 年多 前同步成功
通知
2310
Star
20933
Fork
5423
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
1423
列表
看板
标记
里程碑
合并请求
543
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
1,423
Issue
1,423
列表
看板
标记
里程碑
合并请求
543
合并请求
543
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
9580c450
编写于
11月 17, 2017
作者:
P
peterzhang2029
浏览文件
操作
浏览文件
下载
差异文件
Merge branch 'develop' of
https://github.com/PaddlePaddle/Paddle
into add_bn_eq
上级
27d7b2cb
c808fbbf
变更
19
隐藏空白更改
内联
并排
Showing
19 changed file
with
63 addition
and
44 deletion
+63
-44
cmake/cross_compiling/ios.cmake
cmake/cross_compiling/ios.cmake
+3
-5
cmake/external/openblas.cmake
cmake/external/openblas.cmake
+6
-7
cmake/external/warpctc.cmake
cmake/external/warpctc.cmake
+4
-0
doc/mobile/cross_compiling_for_android_cn.md
doc/mobile/cross_compiling_for_android_cn.md
+1
-1
doc/mobile/cross_compiling_for_ios_cn.md
doc/mobile/cross_compiling_for_ios_cn.md
+6
-6
doc/mobile/cross_compiling_for_raspberry_cn.md
doc/mobile/cross_compiling_for_raspberry_cn.md
+1
-1
paddle/cuda/include/hl_gpu.h
paddle/cuda/include/hl_gpu.h
+2
-0
paddle/gserver/layers/BatchNormBaseLayer.cpp
paddle/gserver/layers/BatchNormBaseLayer.cpp
+1
-1
paddle/gserver/layers/BatchNormBaseLayer.h
paddle/gserver/layers/BatchNormBaseLayer.h
+2
-2
paddle/gserver/layers/BatchNormalizationLayer.cpp
paddle/gserver/layers/BatchNormalizationLayer.cpp
+2
-2
paddle/gserver/layers/CudnnBatchNormLayer.cpp
paddle/gserver/layers/CudnnBatchNormLayer.cpp
+22
-6
paddle/gserver/layers/CudnnBatchNormLayer.h
paddle/gserver/layers/CudnnBatchNormLayer.h
+3
-6
paddle/gserver/layers/MKLDNNBatchNormLayer.cpp
paddle/gserver/layers/MKLDNNBatchNormLayer.cpp
+3
-3
paddle/gserver/layers/MKLDNNBatchNormLayer.h
paddle/gserver/layers/MKLDNNBatchNormLayer.h
+1
-1
python/paddle/trainer/config_parser.py
python/paddle/trainer/config_parser.py
+2
-2
python/paddle/trainer_config_helpers/layers.py
python/paddle/trainer_config_helpers/layers.py
+1
-1
python/paddle/trainer_config_helpers/tests/configs/protostr/img_layers.protostr
...config_helpers/tests/configs/protostr/img_layers.protostr
+1
-0
python/paddle/trainer_config_helpers/tests/configs/protostr/img_trans_layers.protostr
..._helpers/tests/configs/protostr/img_trans_layers.protostr
+1
-0
python/paddle/trainer_config_helpers/tests/configs/protostr/test_BatchNorm3D.protostr
..._helpers/tests/configs/protostr/test_BatchNorm3D.protostr
+1
-0
未找到文件。
cmake/cross_compiling/ios.cmake
浏览文件 @
9580c450
...
...
@@ -76,11 +76,9 @@ set(IOS_PLATFORM ${IOS_PLATFORM} CACHE STRING "Type of iOS Platform")
# Set the architecture for iOS
if
(
NOT DEFINED IOS_ARCH
)
if
(
IOS_PLATFORM STREQUAL
"OS"
)
# FIXME(liuyiqun): support "armv7;armv7s;arm64" future
set
(
IOS_ARCH
"arm64"
)
set
(
IOS_ARCH
"armv7;armv7s;arm64"
)
elseif
(
IOS_PLATFORM STREQUAL
"SIMULATOR"
)
# FIXME(liuyiqun): support "i386;x86_64" future
set
(
IOS_ARCH
"x86_64"
)
set
(
IOS_ARCH
"i386;x86_64"
)
endif
()
endif
()
set
(
CMAKE_OSX_ARCHITECTURES
${
IOS_ARCH
}
CACHE string
"Build architecture for iOS"
)
...
...
@@ -248,7 +246,7 @@ set(IOS_COMPILER_FLAGS "${XCODE_IOS_PLATFORM_VERSION_FLAGS} ${XCODE_IOS_BITCODE_
# Hidden visibilty is required for cxx on iOS
set
(
CMAKE_C_FLAGS
"
${
IOS_COMPILER_FLAGS
}
${
CMAKE_C_FLAGS
}
"
CACHE STRING
"C flags"
)
set
(
CMAKE_CXX_FLAGS
"
${
IOS_COMPILER_FLAGS
}
-fvisibility-inlines-hidden
${
CMAKE_CXX_FLAGS
}
"
CACHE STRING
"CXX flags"
)
set
(
CMAKE_CXX_FLAGS
"
${
IOS_COMPILER_FLAGS
}
-fvisibility
=hidden -fvisibility
-inlines-hidden
${
CMAKE_CXX_FLAGS
}
"
CACHE STRING
"CXX flags"
)
set
(
IOS_LINK_FLAGS
"
${
XCODE_IOS_PLATFORM_VERSION_FLAGS
}
-Wl,-search_paths_first"
)
...
...
cmake/external/openblas.cmake
浏览文件 @
9580c450
...
...
@@ -45,15 +45,14 @@ IF(NOT ${CBLAS_FOUND})
SET
(
OPTIONAL_ARGS
${
OPTIONAL_ARGS
}
TARGET=ARMV8 BINARY=64 USE_THREAD=0
)
ENDIF
()
ELSEIF
(
IOS
)
# FIXME(liuyiqun): support multiple architectures
SET
(
OPENBLAS_COMMIT
"b5c96fcfcdc82945502a2303116a64d89985daf5"
)
SET
(
OPENBLAS_CC
"
${
OPENBLAS_CC
}
${
CMAKE_C_FLAGS
}
-isysroot
${
CMAKE_OSX_SYSROOT
}
"
)
IF
(
CMAKE_OSX_ARCHITECTURES MATCHES
"armv7"
)
SET
(
OPENBLAS_CC
"
${
OPENBLAS_CC
}
-arch armv7"
)
SET
(
OPTIONAL_ARGS
${
OPTIONAL_ARGS
}
TARGET=ARMV7 ARM_SOFTFP_ABI=1 USE_THREAD=0
)
ELSEIF
(
CMAKE_OSX_ARCHITECTURES MATCHES
"arm64"
)
IF
(
CMAKE_OSX_ARCHITECTURES MATCHES
"arm64"
)
SET
(
OPENBLAS_COMMIT
"b5c96fcfcdc82945502a2303116a64d89985daf5"
)
SET
(
OPENBLAS_CC
"
${
OPENBLAS_CC
}
${
CMAKE_C_FLAGS
}
-isysroot
${
CMAKE_OSX_SYSROOT
}
"
)
SET
(
OPENBLAS_CC
"
${
OPENBLAS_CC
}
-arch arm64"
)
SET
(
OPTIONAL_ARGS
${
OPTIONAL_ARGS
}
TARGET=ARMV8 BINARY=64 USE_THREAD=0 CROSS_SUFFIX=
${
CROSS_SUFFIX
}
)
ELSE
()
MESSAGE
(
FATAL_ERROR
"OpenBLAS only support arm64 architectures on iOS. "
"You can set IOS_USE_VECLIB_FOR_BLAS=ON or USE_EIGEN_FOR_BLAS=ON to use other blas library instead."
)
ENDIF
()
ELSEIF
(
RPI
)
# use hardfp
...
...
cmake/external/warpctc.cmake
浏览文件 @
9580c450
...
...
@@ -12,6 +12,10 @@
# See the License for the specific language governing permissions and
# limitations under the License.
IF
(
MOBILE_INFERENCE
)
return
()
ENDIF
()
INCLUDE
(
ExternalProject
)
SET
(
WARPCTC_SOURCES_DIR
${
THIRD_PARTY_PATH
}
/warpctc
)
...
...
doc/mobile/cross_compiling_for_android_cn.md
浏览文件 @
9580c450
#
构建Android平台上的PaddlePaddle库
#
Android平台编译指南
用户可通过如下两种方式,交叉编译Android平台上适用的PaddlePaddle库:
-
基于Docker容器的编译方式
...
...
doc/mobile/cross_compiling_for_ios_cn.md
浏览文件 @
9580c450
#
构建iOS平台上的PaddlePaddle库
#
iOS平台编译指南
交叉编译iOS平台上适用的PaddlePaddle库,需要在MacOS系统上进行。本文的将介绍在MacOS上,从源码交叉编译iOS平台上适用的PaddlePaddle库。
## 准备交叉编译环境
...
...
@@ -25,7 +25,7 @@ iOS平台可选配置参数:
-
`IOS_PLATFORM`
,可设置为
`OS/SIMULATOR`
,默认值为
`OS`
。
-
`OS`
,构建目标为
`arm`
架构的iPhone或者iPad等物理设备。
-
`SIMULATOR`
,构建目标为
`x86`
架构的模拟器平台。
-
`IOS_ARCH`
,目标架构。针对不同的
`IOS_PLATFORM`
,可设置的目标架构如下表所示:
-
`IOS_ARCH`
,目标架构。针对不同的
`IOS_PLATFORM`
,可设置的目标架构如下表所示
,默认编译所有架构
:
<table class="docutils">
<colgroup>
...
...
@@ -41,11 +41,11 @@ iOS平台可选配置参数:
<tbody valign="top">
<tr class="row-even">
<td>OS</td>
<td>armv7, armv7s, arm64
(默认)
</td>
<td>armv7, armv7s, arm64 </td>
</tr>
<tr class="row-odd">
<td>SIMULATOR</td>
<td>i386, x86_64
(默认)
</td>
<td>i386, x86_64 </td>
</tr>
</tbody>
</table>
...
...
@@ -66,7 +66,7 @@ iOS平台可选配置参数:
```
bash
cmake
-DCMAKE_SYSTEM_NAME
=
iOS
\
-DIOS_PLATFORM
=
OS
\
-DIOS_ARCH
=
"arm64"
\
-DIOS_ARCH
=
"arm
v7;arm
64"
\
-DIOS_ENABLE_BITCODE
=
ON
\
-DIOS_USE_VECLIB_FOR_BLAS
=
ON
\
-DCMAKE_INSTALL_PREFIX
=
your/path/to/install
\
...
...
@@ -112,6 +112,6 @@ $ make install
-
`lib`
目录,其中包含PaddlePaddle的C-API静态库
-
`third_party`
目录,其中包含所依赖的所有第三方库
注意,
不同架构的PaddlePaddle库建议安装到不同的目录下,然后使用
`lipo`
工具将多个静态库合并成一个支持多个架构的
fat库。
注意,
如果PaddlePaddle库需要同时支持真机和模拟器,则需要分别编译真机和模拟器版本,然后使用
`lipo`
工具合并
fat库。
自此,PaddlePaddle库已经安装完成,用户可将合成的fat库用于深度学习相关的iOS App中,调用方法见C-API文档。
doc/mobile/cross_compiling_for_raspberry_cn.md
浏览文件 @
9580c450
#
构建Raspberry Pi平台上的PaddlePaddle库
#
Raspberry Pi平台编译指南
通常有两个方法来构建基于 Rasspberry Pi 的版本:
...
...
paddle/cuda/include/hl_gpu.h
浏览文件 @
9580c450
...
...
@@ -25,7 +25,9 @@ limitations under the License. */
#include "hl_matrix.h"
#include "hl_sequence.h"
#include "hl_sparse.h"
#ifndef PADDLE_MOBILE_INFERENCE
#include "hl_warpctc_wrap.h"
#endif
#ifdef HPPL_STUB_FUNC
#include "stub/hl_aggregate_stub.h"
...
...
paddle/gserver/layers/BatchNormBaseLayer.cpp
浏览文件 @
9580c450
...
...
@@ -41,7 +41,7 @@ bool BatchNormBaseLayer::init(const LayerMap& layerMap,
useGlobalStats_
=
config_
.
use_global_stats
();
}
movingAvgFraction_
=
config_
.
moving_average_fraction
();
EPS
=
config_
.
epsilon
();
epsilon_
=
config_
.
epsilon
();
weight_
.
reset
(
new
Weight
(
1
,
channels_
,
parameters_
[
0
]));
movingMean_
.
reset
(
new
Weight
(
1
,
channels_
,
parameters_
[
1
]));
...
...
paddle/gserver/layers/BatchNormBaseLayer.h
浏览文件 @
9580c450
...
...
@@ -94,8 +94,8 @@ protected:
bool
useGlobalStats_
;
// use to compute moving mean and variance.
real
movingAvgFraction_
;
// Epsilon
value used in the batch normalization formula
.
real
EPS
;
// Epsilon
is a small random noise used in batch normalization for stability
.
real
epsilon_
;
};
}
// namespace paddle
paddle/gserver/layers/BatchNormalizationLayer.cpp
浏览文件 @
9580c450
...
...
@@ -51,7 +51,7 @@ void BatchNormalizationLayer::calMeanAndStd(const MatrixPtr& mat) {
calMovingMeanAndVar
();
savedInvVar_
->
subScalar
(
-
EPS
);
savedInvVar_
->
subScalar
(
-
epsilon_
);
savedInvVar_
->
sqrt2
(
*
savedInvVar_
);
}
...
...
@@ -72,7 +72,7 @@ void BatchNormalizationLayer::setMeanAndStd() {
savedInvVar_
->
copyFrom
(
*
(
movingVar_
->
getW
()));
savedInvVar_
->
downClip
(
real
(
0.0
));
savedInvVar_
->
subScalar
(
-
EPS
);
savedInvVar_
->
subScalar
(
-
epsilon_
);
savedInvVar_
->
sqrt2
(
*
savedInvVar_
);
}
...
...
paddle/gserver/layers/CudnnBatchNormLayer.cpp
浏览文件 @
9580c450
...
...
@@ -60,7 +60,15 @@ void CudnnBatchNormLayer::forward(PassType passType) {
real
*
beta
=
biases_
->
getW
()
->
getData
();
real
*
movingMean
=
movingMean_
->
getW
()
->
getData
();
real
*
movingVar
=
movingVar_
->
getW
()
->
getData
();
EPS_
=
std
::
max
(
MIN_EPS
,
static_cast
<
double
>
(
EPS
));
/**
* If epsilon_ equals to 1e-5 and eps_ is assigned the value of
* static_cast<double>(epsilon_), The CUDNN_STATUS_BAD_PARAM error
* will occur due to eps_ value is less than
* CUDNN_BN_MIN_EPSILON.
* The following code is to ensure that the eps_ meets requirement.
*/
eps_
=
std
::
max
(
MIN_EPS
,
static_cast
<
double
>
(
epsilon_
));
if
(
!
useGlobalStats_
)
{
REGISTER_TIMER_INFO
(
"CudnnBatchFwTimer"
,
getName
().
c_str
());
...
...
@@ -76,7 +84,7 @@ void CudnnBatchNormLayer::forward(PassType passType) {
1.0
-
movingAvgFraction_
,
movingMean
,
movingVar
,
EPS
_
,
eps
_
,
savedMean
,
savedInvVar
);
}
else
{
...
...
@@ -91,7 +99,7 @@ void CudnnBatchNormLayer::forward(PassType passType) {
beta
,
movingMean
,
movingVar
,
EPS
_
);
eps
_
);
}
else
{
// There is a limitation in cudnn library.
// When the batch size is larger than 1024 in cuDNN v5.1,
...
...
@@ -102,7 +110,7 @@ void CudnnBatchNormLayer::forward(PassType passType) {
beta
,
movingMean
,
movingVar
,
EPS
_
,
eps
_
,
batchSize
,
channels_
,
imageH_
*
imageD_
,
...
...
@@ -128,7 +136,15 @@ void CudnnBatchNormLayer::backward(const UpdateCallback& callback) {
real
*
gamma
=
weight_
->
getW
()
->
getData
();
real
*
savedMean
=
savedMean_
->
getData
();
real
*
savedInvVar
=
savedInvVar_
->
getData
();
EPS_
=
std
::
max
(
MIN_EPS
,
static_cast
<
double
>
(
EPS
));
/**
* If epsilon_ equals to 1e-5 and eps_ is assigned the value of
* static_cast<double>(epsilon_), The CUDNN_STATUS_BAD_PARAM error
* will occur due to eps_ value is less than
* CUDNN_BN_MIN_EPSILON.
* The following code is to ensure that the eps_ meets requirement.
*/
eps_
=
std
::
max
(
MIN_EPS
,
static_cast
<
double
>
(
epsilon_
));
auto
create
=
[](
MatrixPtr
&
m
,
size_t
h
,
size_t
w
,
real
**
p
)
{
Matrix
::
resizeOrCreate
(
m
,
h
,
w
,
false
,
true
);
...
...
@@ -159,7 +175,7 @@ void CudnnBatchNormLayer::backward(const UpdateCallback& callback) {
gamma
,
gammaGrad
,
betaGrad
,
EPS
_
,
eps
_
,
savedMean
,
savedInvVar
);
...
...
paddle/gserver/layers/CudnnBatchNormLayer.h
浏览文件 @
9580c450
...
...
@@ -46,15 +46,12 @@ public:
void
backward
(
const
UpdateCallback
&
callback
=
nullptr
)
override
;
protected:
/**
* Minimum allowed value is CUDNN_BN_MIN_EPSILON defined in cudnn.h.
* Same epsilon value should be used in forward and backward functions.
*/
/// Minimum allowed value is CUDNN_BN_MIN_EPSILON defined in cudnn.h.
static
const
double
MIN_EPS
;
/// Epsilon value used in the batch normalization formula.
///
If EPS_ is smaller than MIN_EPS, MIN_EPS will be used
.
double
EPS
_
;
///
Same epsilon value should be used in forward and backward functions
.
double
eps
_
;
/// Input/output tensor descriptor desc
hl_tensor_descriptor
ioDesc_
;
...
...
paddle/gserver/layers/MKLDNNBatchNormLayer.cpp
浏览文件 @
9580c450
...
...
@@ -48,7 +48,7 @@ bool MKLDNNBatchNormLayer::init(const LayerMap& layerMap,
useGlobalStats_
=
config_
.
use_global_stats
();
}
movingAvgFraction_
=
config_
.
moving_average_fraction
();
EPS
=
config_
.
epsilon
();
epsilon_
=
config_
.
epsilon
();
VLOG
(
MKLDNN_BASE
)
<<
"--- "
<<
(
useGlobalStats_
?
"use"
:
"do not use"
)
<<
" --- global stats"
;
...
...
@@ -213,7 +213,7 @@ void MKLDNNBatchNormLayer::resetFwdPD(
if
(
wgt
)
{
flags_
=
(
flags_
|
batch_normalization_flag
::
use_scale_shift
);
}
auto
fwdDesc
=
bn_fwd
::
desc
(
pk
,
in
->
getMemoryDesc
(),
EPS
,
flags_
);
auto
fwdDesc
=
bn_fwd
::
desc
(
pk
,
in
->
getMemoryDesc
(),
epsilon_
,
flags_
);
pd
.
reset
(
new
bn_fwd
::
primitive_desc
(
fwdDesc
,
engine_
));
CHECK_PRIMITIVE_DESC_EQ
(
out
,
pd
->
dst_primitive_desc
());
if
(
wgt
)
{
...
...
@@ -280,7 +280,7 @@ void MKLDNNBatchNormLayer::resetBwdPD(
}
CHECK_PRIMITIVE_DESC_EQ
(
out
,
in
->
getPrimitiveDesc
());
auto
md
=
in
->
getMemoryDesc
();
auto
bwdDesc
=
bn_bwd
::
desc
(
prop_kind
::
backward
,
md
,
md
,
EPS
,
flags_
);
auto
bwdDesc
=
bn_bwd
::
desc
(
prop_kind
::
backward
,
md
,
md
,
epsilon_
,
flags_
);
pd
.
reset
(
new
bn_bwd
::
primitive_desc
(
bwdDesc
,
engine_
,
*
fwdPD_
));
CHECK
(
pd
->
weights_primitive_desc
()
==
fwdPD_
->
weights_primitive_desc
());
CHECK_PRIMITIVE_DESC_EQ
(
wgt
,
pd
->
diff_weights_primitive_desc
());
...
...
paddle/gserver/layers/MKLDNNBatchNormLayer.h
浏览文件 @
9580c450
...
...
@@ -32,7 +32,7 @@ protected:
std
::
shared_ptr
<
bn_fwd
::
primitive_desc
>
fwdPD_
;
// Epsilon value used in the batch normalization formula.
real
EPS
;
real
epsilon_
;
// weight and bias in paddle
std
::
unique_ptr
<
Weight
>
weight_
;
...
...
python/paddle/trainer/config_parser.py
浏览文件 @
9580c450
...
...
@@ -2483,8 +2483,8 @@ class BatchNormLayer(LayerBase):
self
.
config
.
use_global_stats
=
use_global_stats
if
moving_average_fraction
is
not
None
:
self
.
config
.
moving_average_fraction
=
moving_average_fraction
if
epsilon
is
not
None
:
self
.
config
.
epsilon
=
epsilon
self
.
config
.
epsilon
=
epsilon
input_layer
=
self
.
get_input_layer
(
0
)
image_conf
=
self
.
config
.
inputs
[
0
].
image_conf
...
...
python/paddle/trainer_config_helpers/layers.py
浏览文件 @
9580c450
...
...
@@ -3127,7 +3127,7 @@ def batch_norm_layer(input,
(
batch_norm_type
==
"mkldnn_batch_norm"
)
or
\
(
batch_norm_type
==
"cudnn_batch_norm"
)
assert
epsilon
>=
1e-5
,
"
Parameter
epsilon must be no less than 1e-5."
assert
epsilon
>=
1e-5
,
"epsilon must be no less than 1e-5."
l
=
Layer
(
name
=
name
,
...
...
python/paddle/trainer_config_helpers/tests/configs/protostr/img_layers.protostr
浏览文件 @
9580c450
...
...
@@ -65,6 +65,7 @@ layers {
height: 227
width: 227
depth: 1
epsilon: 1e-05
}
layers {
name: "__crmnorm_0__"
...
...
python/paddle/trainer_config_helpers/tests/configs/protostr/img_trans_layers.protostr
浏览文件 @
9580c450
...
...
@@ -65,6 +65,7 @@ layers {
height: 256
width: 256
depth: 1
epsilon: 1e-05
}
layers {
name: "__crmnorm_0__"
...
...
python/paddle/trainer_config_helpers/tests/configs/protostr/test_BatchNorm3D.protostr
浏览文件 @
9580c450
...
...
@@ -36,6 +36,7 @@ layers {
height: 6
width: 20
depth: 3
epsilon: 1e-05
}
parameters {
name: "___batch_norm_0__.w0"
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录