Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
PaddlePaddle
PaddleDetection
提交
8567d042
P
PaddleDetection
项目概览
PaddlePaddle
/
PaddleDetection
大约 2 年 前同步成功
通知
708
Star
11112
Fork
2696
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
184
列表
看板
标记
里程碑
合并请求
40
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
PaddleDetection
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
184
Issue
184
列表
看板
标记
里程碑
合并请求
40
合并请求
40
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
8567d042
编写于
6月 20, 2018
作者:
F
fengjiayi
浏览文件
操作
浏览文件
下载
差异文件
Merge branch 'develop' of
https://github.com/PaddlePaddle/Paddle
into dev_refine_bilinear_interp
上级
5d33481c
25241e9e
变更
44
隐藏空白更改
内联
并排
Showing
44 changed file
with
1635 addition
and
346 deletion
+1635
-346
benchmark/fluid/fluid_benchmark.py
benchmark/fluid/fluid_benchmark.py
+1
-1
benchmark/fluid/kube_gen_job.py
benchmark/fluid/kube_gen_job.py
+11
-5
cmake/external/mkldnn.cmake
cmake/external/mkldnn.cmake
+2
-1
doc/fluid/howto/cluster/fluid_cluster_train_cn.md
doc/fluid/howto/cluster/fluid_cluster_train_cn.md
+2
-2
doc/fluid/howto/cluster/fluid_recordio.md
doc/fluid/howto/cluster/fluid_recordio.md
+2
-2
paddle/fluid/operators/activation_op.cc
paddle/fluid/operators/activation_op.cc
+2
-2
paddle/fluid/operators/detection_map_op.cc
paddle/fluid/operators/detection_map_op.cc
+6
-6
paddle/fluid/operators/gaussian_random_mkldnn_op.cc
paddle/fluid/operators/gaussian_random_mkldnn_op.cc
+55
-0
paddle/fluid/operators/gaussian_random_op.cc
paddle/fluid/operators/gaussian_random_op.cc
+19
-2
paddle/fluid/operators/math/concat.cu
paddle/fluid/operators/math/concat.cu
+12
-31
paddle/fluid/operators/parallel_do_op.cc
paddle/fluid/operators/parallel_do_op.cc
+1
-1
paddle/fluid/operators/recurrent_op.cc
paddle/fluid/operators/recurrent_op.cc
+2
-1
paddle/fluid/operators/sum_mkldnn_op.cc
paddle/fluid/operators/sum_mkldnn_op.cc
+240
-0
paddle/fluid/operators/sum_op.cc
paddle/fluid/operators/sum_op.cc
+26
-6
paddle/fluid/operators/while_op.cc
paddle/fluid/operators/while_op.cc
+2
-2
paddle/fluid/platform/mkldnn_helper.h
paddle/fluid/platform/mkldnn_helper.h
+6
-0
python/paddle/fluid/backward.py
python/paddle/fluid/backward.py
+6
-5
python/paddle/fluid/clip.py
python/paddle/fluid/clip.py
+123
-11
python/paddle/fluid/inferencer.py
python/paddle/fluid/inferencer.py
+37
-9
python/paddle/fluid/initializer.py
python/paddle/fluid/initializer.py
+141
-91
python/paddle/fluid/layers/control_flow.py
python/paddle/fluid/layers/control_flow.py
+30
-3
python/paddle/fluid/layers/detection.py
python/paddle/fluid/layers/detection.py
+43
-2
python/paddle/fluid/layers/nn.py
python/paddle/fluid/layers/nn.py
+196
-77
python/paddle/fluid/layers/tensor.py
python/paddle/fluid/layers/tensor.py
+17
-13
python/paddle/fluid/optimizer.py
python/paddle/fluid/optimizer.py
+295
-19
python/paddle/fluid/profiler.py
python/paddle/fluid/profiler.py
+112
-5
python/paddle/fluid/regularizer.py
python/paddle/fluid/regularizer.py
+43
-3
python/paddle/fluid/tests/book/notest_understand_sentiment.py
...on/paddle/fluid/tests/book/notest_understand_sentiment.py
+5
-5
python/paddle/fluid/tests/book/test_fit_a_line.py
python/paddle/fluid/tests/book/test_fit_a_line.py
+5
-5
python/paddle/fluid/tests/book/test_image_classification.py
python/paddle/fluid/tests/book/test_image_classification.py
+5
-5
python/paddle/fluid/tests/book/test_label_semantic_roles.py
python/paddle/fluid/tests/book/test_label_semantic_roles.py
+5
-5
python/paddle/fluid/tests/book/test_machine_translation.py
python/paddle/fluid/tests/book/test_machine_translation.py
+5
-5
python/paddle/fluid/tests/book/test_recognize_digits.py
python/paddle/fluid/tests/book/test_recognize_digits.py
+5
-5
python/paddle/fluid/tests/book/test_recommender_system.py
python/paddle/fluid/tests/book/test_recommender_system.py
+5
-5
python/paddle/fluid/tests/book/test_word2vec.py
python/paddle/fluid/tests/book/test_word2vec.py
+5
-5
python/paddle/fluid/tests/unittests/test_concat_op.py
python/paddle/fluid/tests/unittests/test_concat_op.py
+12
-1
python/paddle/fluid/tests/unittests/test_gaussian_random_mkldnn_op.py
...e/fluid/tests/unittests/test_gaussian_random_mkldnn_op.py
+26
-0
python/paddle/fluid/tests/unittests/test_gaussian_random_op.py
...n/paddle/fluid/tests/unittests/test_gaussian_random_op.py
+12
-1
python/paddle/fluid/tests/unittests/test_layers.py
python/paddle/fluid/tests/unittests/test_layers.py
+9
-0
python/paddle/fluid/tests/unittests/test_optimizer.py
python/paddle/fluid/tests/unittests/test_optimizer.py
+66
-0
python/paddle/fluid/tests/unittests/test_sum_mkldnn_op.py
python/paddle/fluid/tests/unittests/test_sum_mkldnn_op.py
+26
-0
python/paddle/fluid/tests/unittests/test_sum_op.py
python/paddle/fluid/tests/unittests/test_sum_op.py
+6
-0
python/paddle/fluid/transpiler/distribute_transpiler.py
python/paddle/fluid/transpiler/distribute_transpiler.py
+4
-2
python/paddle/reader/decorator.py
python/paddle/reader/decorator.py
+2
-2
未找到文件。
benchmark/fluid/fluid_benchmark.py
浏览文件 @
8567d042
...
@@ -97,7 +97,7 @@ def dist_transpile(trainer_id, args):
...
@@ -97,7 +97,7 @@ def dist_transpile(trainer_id, args):
return
train_program
,
fluid
.
default_startup_program
()
return
train_program
,
fluid
.
default_startup_program
()
else
:
else
:
raise
ValueError
(
raise
ValueError
(
'TRAINING_ROLE environment variable must be either TRAINER or PSERVER'
'
PADDLE_
TRAINING_ROLE environment variable must be either TRAINER or PSERVER'
)
)
...
...
benchmark/fluid/kube_gen_job.py
浏览文件 @
8567d042
...
@@ -108,10 +108,10 @@ def gen_job():
...
@@ -108,10 +108,10 @@ def gen_job():
tn_container
[
"ports"
][
0
][
"containerPort"
]
=
spreadport
tn_container
[
"ports"
][
0
][
"containerPort"
]
=
spreadport
envs
.
append
({
"name"
:
"PADDLE_JOB_NAME"
,
"value"
:
args
.
jobname
})
envs
.
append
({
"name"
:
"PADDLE_JOB_NAME"
,
"value"
:
args
.
jobname
})
envs
.
append
({
"name"
:
"TRAINERS"
,
"value"
:
str
(
args
.
trainers
)})
envs
.
append
({
"name"
:
"
PADDLE_
TRAINERS"
,
"value"
:
str
(
args
.
trainers
)})
envs
.
append
({
"name"
:
"PSERVERS"
,
"value"
:
str
(
args
.
pservers
)})
envs
.
append
({
"name"
:
"PSERVERS"
,
"value"
:
str
(
args
.
pservers
)})
envs
.
append
({
"name"
:
"ENTRY"
,
"value"
:
args
.
entry
})
envs
.
append
({
"name"
:
"ENTRY"
,
"value"
:
args
.
entry
})
envs
.
append
({
"name"
:
"PADDLE_
INIT
_PORT"
,
"value"
:
str
(
args
.
port
)})
envs
.
append
({
"name"
:
"PADDLE_
PSERVER
_PORT"
,
"value"
:
str
(
args
.
port
)})
envs
.
append
({
"name"
:
"PADDLE_PSERVER_PORT"
,
"value"
:
str
(
args
.
port
)})
envs
.
append
({
"name"
:
"PADDLE_PSERVER_PORT"
,
"value"
:
str
(
args
.
port
)})
# NOTE: these directories below are cluster specific, please modify
# NOTE: these directories below are cluster specific, please modify
# this settings before you run on your own cluster.
# this settings before you run on your own cluster.
...
@@ -167,16 +167,22 @@ def gen_job():
...
@@ -167,16 +167,22 @@ def gen_job():
tn_container
[
"volumeMounts"
]
=
volumeMounts
tn_container
[
"volumeMounts"
]
=
volumeMounts
ps_container
[
"env"
]
=
envs
ps_container
[
"env"
]
=
envs
ps_container
[
"env"
].
append
({
"name"
:
"TRAINING_ROLE"
,
"value"
:
"PSERVER"
})
ps_container
[
"env"
].
append
({
"name"
:
"PADDLE_TRAINING_ROLE"
,
"value"
:
"PSERVER"
})
tn_container
[
"env"
]
=
envs
tn_container
[
"env"
]
=
envs
if
args
.
disttype
==
"pserver"
:
if
args
.
disttype
==
"pserver"
:
tn_container
[
"env"
].
append
({
tn_container
[
"env"
].
append
({
"name"
:
"TRAINING_ROLE"
,
"name"
:
"
PADDLE_
TRAINING_ROLE"
,
"value"
:
"TRAINER"
"value"
:
"TRAINER"
})
})
elif
args
.
disttype
==
"nccl2"
or
args
.
disttype
==
"local"
:
elif
args
.
disttype
==
"nccl2"
or
args
.
disttype
==
"local"
:
# NCCL2 have no training role, set to plain WORKER
# NCCL2 have no training role, set to plain WORKER
tn_container
[
"env"
].
append
({
"name"
:
"TRAINING_ROLE"
,
"value"
:
"WORKER"
})
tn_container
[
"env"
].
append
({
"name"
:
"PADDLE_TRAINING_ROLE"
,
"value"
:
"WORKER"
})
os
.
mkdir
(
args
.
jobname
)
os
.
mkdir
(
args
.
jobname
)
if
args
.
disttype
==
"pserver"
:
if
args
.
disttype
==
"pserver"
:
...
...
cmake/external/mkldnn.cmake
浏览文件 @
8567d042
...
@@ -45,7 +45,8 @@ IF(${CBLAS_PROVIDER} STREQUAL "MKLML")
...
@@ -45,7 +45,8 @@ IF(${CBLAS_PROVIDER} STREQUAL "MKLML")
ELSE
()
ELSE
()
MESSAGE
(
FATAL_ERROR
"Should enable MKLML when build MKLDNN"
)
MESSAGE
(
FATAL_ERROR
"Should enable MKLML when build MKLDNN"
)
ENDIF
()
ENDIF
()
SET
(
MKLDNN_FLAG
"-Wno-error=strict-overflow -Wno-error=unused-result -Wno-unused-result"
)
SET
(
MKLDNN_FLAG
"-Wno-error=strict-overflow -Wno-error=unused-result"
)
SET
(
MKLDNN_FLAG
"
${
MKLDNN_FLAG
}
-Wno-unused-result -Wno-unused-value"
)
SET
(
MKLDNN_CFLAG
"
${
CMAKE_C_FLAGS
}
${
MKLDNN_FLAG
}
"
)
SET
(
MKLDNN_CFLAG
"
${
CMAKE_C_FLAGS
}
${
MKLDNN_FLAG
}
"
)
SET
(
MKLDNN_CXXFLAG
"
${
CMAKE_CXX_FLAGS
}
${
MKLDNN_FLAG
}
"
)
SET
(
MKLDNN_CXXFLAG
"
${
CMAKE_CXX_FLAGS
}
${
MKLDNN_FLAG
}
"
)
ExternalProject_Add
(
ExternalProject_Add
(
...
...
doc/fluid/howto/cluster/fluid_cluster_train_cn.md
浏览文件 @
8567d042
...
@@ -168,13 +168,13 @@ cd /paddle/python/paddle/fluid/tests/book
...
@@ -168,13 +168,13 @@ cd /paddle/python/paddle/fluid/tests/book
第二步,启动Parameter Server:
第二步,启动Parameter Server:
```
bash
```
bash
PADDLE_
INIT_PORT
=
6174
PADDLE_INIT_PSERVERS
=
192.168.1.2
TRAINERS
=
2
POD_IP
=
192.168.1.2
PADDLE_INIT_TRAINER_ID
=
1
TRAINING_ROLE
=
PSERVER python test_fit_a_line.py
PADDLE_
PSERVER_PORT
=
6174
PADDLE_PSERVER_IPS
=
192.168.1.2
PADDLE_TRAINERS
=
2
PADDLE_CURRENT_IP
=
192.168.1.2
PADDLE_TRAINER_ID
=
1
PADDLE_
TRAINING_ROLE
=
PSERVER python test_fit_a_line.py
```
```
执行命令后请等待出现提示:
```Server listening on 192.168.1.2:6174 ```
, 表示Paramter Server已经正常启动。
执行命令后请等待出现提示:
```Server listening on 192.168.1.2:6174 ```
, 表示Paramter Server已经正常启动。
第三步,启动Trainer:
第三步,启动Trainer:
```
bash
```
bash
PADDLE_
INIT_PORT
=
6174
PADDLE_INIT_PSERVERS
=
192.168.1.3
TRAINERS
=
2
POD_IP
=
192.168.1.3
PADDLE_INIT_TRAINER_ID
=
1
TRAINING_ROLE
=
TRAINER python test_fit_a_line.py
PADDLE_
PSERVER_PORT
=
6174
PADDLE_PSERVER_IPS
=
192.168.1.3
PADDLE_TRAINERS
=
2
PADDLE_CURRENT_IPP
=
192.168.1.3
PADDLE_TRAINER_ID
=
1
PADDLE_
TRAINING_ROLE
=
TRAINER python test_fit_a_line.py
```
```
由于我们定义的Trainer的数量是2个,因此需要在另外一个计算节点上再启动一个Trainer。
由于我们定义的Trainer的数量是2个,因此需要在另外一个计算节点上再启动一个Trainer。
...
...
doc/fluid/howto/cluster/fluid_recordio.md
浏览文件 @
8567d042
...
@@ -114,8 +114,8 @@ def gen_train_list(file_pattern, trainers, trainer_id):
...
@@ -114,8 +114,8 @@ def gen_train_list(file_pattern, trainers, trainer_id):
ret_list
.
append
(
f
)
ret_list
.
append
(
f
)
return
ret_list
return
ret_list
trainers
=
int
(
os
.
getenv
(
"TRAINERS"
))
trainers
=
int
(
os
.
getenv
(
"
PADDLE_
TRAINERS"
))
trainer_id
=
int
(
os
.
getenv
(
"PADDLE_
INIT_
TRAINER_ID"
))
trainer_id
=
int
(
os
.
getenv
(
"PADDLE_TRAINER_ID"
))
data_file
=
fluid
.
layers
.
io
.
open_files
(
data_file
=
fluid
.
layers
.
io
.
open_files
(
filenames
=
gen_train_list
(
"./mnist-[0-9]*.recordio"
,
2
,
0
),
filenames
=
gen_train_list
(
"./mnist-[0-9]*.recordio"
,
2
,
0
),
thread_num
=
1
,
thread_num
=
1
,
...
...
paddle/fluid/operators/activation_op.cc
浏览文件 @
8567d042
...
@@ -143,7 +143,7 @@ $$out = \\frac{e^{x} - e^{-x}}{e^{x} + e^{-x}}$$
...
@@ -143,7 +143,7 @@ $$out = \\frac{e^{x} - e^{-x}}{e^{x} + e^{-x}}$$
__attribute__
((
unused
))
constexpr
char
TanhShrinkDoc
[]
=
R"DOC(
__attribute__
((
unused
))
constexpr
char
TanhShrinkDoc
[]
=
R"DOC(
TanhShrink Activation Operator.
TanhShrink Activation Operator.
$$out = x - \frac{e^{x} - e^{-x}}{e^{x} + e^{-x}}$$
$$out = x - \
\
frac{e^{x} - e^{-x}}{e^{x} + e^{-x}}$$
)DOC"
;
)DOC"
;
...
@@ -385,7 +385,7 @@ class STanhOpMaker : public framework::OpProtoAndCheckerMaker {
...
@@ -385,7 +385,7 @@ class STanhOpMaker : public framework::OpProtoAndCheckerMaker {
AddComment
(
R"DOC(
AddComment
(
R"DOC(
STanh Activation Operator.
STanh Activation Operator.
$$out = b * \frac{e^{a * x} - e^{-a * x}}{e^{a * x} + e^{-a * x}}$$
$$out = b * \
\
frac{e^{a * x} - e^{-a * x}}{e^{a * x} + e^{-a * x}}$$
)DOC"
);
)DOC"
);
}
}
...
...
paddle/fluid/operators/detection_map_op.cc
浏览文件 @
8567d042
...
@@ -175,12 +175,12 @@ class DetectionMAPOpMaker : public framework::OpProtoAndCheckerMaker {
...
@@ -175,12 +175,12 @@ class DetectionMAPOpMaker : public framework::OpProtoAndCheckerMaker {
AddComment
(
R"DOC(
AddComment
(
R"DOC(
Detection mAP evaluate operator.
Detection mAP evaluate operator.
The general steps are as follows. First, calculate the true positive and
The general steps are as follows. First, calculate the true positive and
false positive according to the input of detection and labels, then
false positive according to the input of detection and labels, then
calculate the mAP evaluate value.
calculate the mAP evaluate value.
Supporting '11 point' and 'integral' mAP algorithm. Please get more information
Supporting '11 point' and 'integral' mAP algorithm. Please get more information
from the following articles:
from the following articles:
https://sanchom.wordpress.com/tag/average-precision/
https://sanchom.wordpress.com/tag/average-precision/
https://arxiv.org/abs/1512.02325
https://arxiv.org/abs/1512.02325
)DOC"
);
)DOC"
);
}
}
...
...
paddle/fluid/operators/gaussian_random_mkldnn_op.cc
0 → 100644
浏览文件 @
8567d042
/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include <string>
#include "paddle/fluid/operators/mean_op.h"
namespace
paddle
{
namespace
operators
{
using
framework
::
DataLayout
;
template
<
typename
T
>
class
GaussianMKLDNNKernel
:
public
paddle
::
framework
::
OpKernel
<
T
>
{
public:
void
Compute
(
const
framework
::
ExecutionContext
&
context
)
const
override
{
float
mean
=
context
.
Attr
<
float
>
(
"mean"
);
float
std
=
context
.
Attr
<
float
>
(
"std"
);
auto
*
tensor
=
context
.
Output
<
framework
::
Tensor
>
(
"Out"
);
T
*
data
=
tensor
->
mutable_data
<
T
>
(
context
.
GetPlace
());
unsigned
int
seed
=
static_cast
<
unsigned
int
>
(
context
.
Attr
<
int
>
(
"seed"
));
std
::
minstd_rand
engine
;
if
(
seed
==
0
)
{
seed
=
std
::
random_device
()();
}
engine
.
seed
(
seed
);
std
::
normal_distribution
<
T
>
dist
(
mean
,
std
);
int64_t
size
=
tensor
->
numel
();
for
(
int64_t
i
=
0
;
i
<
size
;
++
i
)
{
data
[
i
]
=
dist
(
engine
);
}
// The format of output is set as the mkldnn's format
// TODO(@mozga-intel) The format of matrix sets inside the another layers.
tensor
->
set_layout
(
DataLayout
::
kMKLDNN
);
tensor
->
set_format
(
mkldnn
::
memory
::
format
::
oihw
);
}
};
}
// namespace operators
}
// namespace paddle
namespace
ops
=
paddle
::
operators
;
REGISTER_OP_KERNEL
(
gaussian_random
,
MKLDNN
,
::
paddle
::
platform
::
CPUPlace
,
ops
::
GaussianMKLDNNKernel
<
float
>
);
paddle/fluid/operators/gaussian_random_op.cc
浏览文件 @
8567d042
...
@@ -15,6 +15,10 @@ limitations under the License. */
...
@@ -15,6 +15,10 @@ limitations under the License. */
#include <random>
#include <random>
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/framework/op_registry.h"
#ifdef PADDLE_WITH_MKLDNN
#include "paddle/fluid/platform/mkldnn_helper.h"
#endif
namespace
paddle
{
namespace
paddle
{
namespace
operators
{
namespace
operators
{
...
@@ -62,9 +66,20 @@ class GaussianRandomOp : public framework::OperatorWithKernel {
...
@@ -62,9 +66,20 @@ class GaussianRandomOp : public framework::OperatorWithKernel {
protected:
protected:
framework
::
OpKernelType
GetExpectedKernelType
(
framework
::
OpKernelType
GetExpectedKernelType
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
framework
::
LibraryType
library
{
framework
::
LibraryType
::
kPlain
};
framework
::
DataLayout
layout
{
framework
::
DataLayout
::
kAnyLayout
};
#ifdef PADDLE_WITH_MKLDNN
if
(
library
==
framework
::
LibraryType
::
kPlain
&&
platform
::
CanMKLDNNBeUsed
(
ctx
))
{
library
=
framework
::
LibraryType
::
kMKLDNN
;
layout
=
framework
::
DataLayout
::
kMKLDNN
;
}
#endif
return
framework
::
OpKernelType
(
return
framework
::
OpKernelType
(
static_cast
<
framework
::
proto
::
VarType
::
Type
>
(
ctx
.
Attr
<
int
>
(
"dtype"
)),
static_cast
<
framework
::
proto
::
VarType
::
Type
>
(
ctx
.
Attr
<
int
>
(
"dtype"
)),
ctx
.
device_context
());
ctx
.
device_context
()
,
layout
,
library
);
}
}
};
};
...
@@ -95,7 +110,9 @@ class GaussianRandomOpMaker : public framework::OpProtoAndCheckerMaker {
...
@@ -95,7 +110,9 @@ class GaussianRandomOpMaker : public framework::OpProtoAndCheckerMaker {
"(int, default 5(FP32)) "
"(int, default 5(FP32)) "
"Output data type."
)
"Output data type."
)
.
SetDefault
(
framework
::
proto
::
VarType
::
FP32
);
.
SetDefault
(
framework
::
proto
::
VarType
::
FP32
);
AddAttr
<
bool
>
(
"use_mkldnn"
,
"(bool, default false) Only used in mkldnn kernel"
)
.
SetDefault
(
false
);
AddComment
(
R"DOC(
AddComment
(
R"DOC(
GaussianRandom Operator.
GaussianRandom Operator.
...
...
paddle/fluid/operators/math/concat.cu
浏览文件 @
8567d042
...
@@ -22,43 +22,24 @@ namespace paddle {
...
@@ -22,43 +22,24 @@ namespace paddle {
namespace
operators
{
namespace
operators
{
namespace
math
{
namespace
math
{
template
<
typename
T
>
__device__
T
upper_bound
(
const
T
*
first
,
T
count
,
T
val
)
{
const
T
*
orig
=
first
;
const
T
*
it
=
nullptr
;
T
step
=
0
;
while
(
count
>
0
)
{
it
=
first
;
step
=
count
/
2
;
it
+=
step
;
if
(
!
(
val
<
*
it
))
{
first
=
++
it
;
count
-=
step
+
1
;
}
else
{
count
=
step
;
}
}
return
first
-
orig
;
}
template
<
typename
T
>
template
<
typename
T
>
__global__
void
KernelConcat
(
T
**
inputs
,
const
int
*
input_cols
,
int
col_size
,
__global__
void
KernelConcat
(
T
**
inputs
,
const
int
*
input_cols
,
int
col_size
,
const
int
output_rows
,
const
int
output_cols
,
const
int
output_rows
,
const
int
output_cols
,
T
*
output
)
{
T
*
output
)
{
int
tid_x
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
int
tid_x
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
int
segment
=
upper_bound
<
int
>
(
input_cols
,
col_size
,
tid_x
)
-
1
;
int
curr_segment
=
0
;
int
curr_offset
=
input_cols
[
0
];
int
curr_offset
=
input_cols
[
segment
];
int
curr_segment
=
segment
;
for
(;
tid_x
<
output_cols
;
tid_x
+=
blockDim
.
x
*
gridDim
.
x
)
{
for
(;
tid_x
<
output_cols
;
tid_x
+=
blockDim
.
x
*
gridDim
.
x
)
{
T
curr_col_offset
;
int
curr_col_offset
=
input_cols
[
curr_segment
+
1
]
;
while
(
(
curr_col_offset
=
input_cols
[
curr_segment
+
1
])
<=
tid_x
)
{
while
(
curr_col_offset
<=
tid_x
)
{
curr_offset
=
curr_col_offset
;
curr_offset
=
curr_col_offset
;
++
curr_segment
;
++
curr_segment
;
curr_col_offset
=
input_cols
[
curr_segment
+
1
];
}
}
int
local_col
=
tid_x
-
curr_offset
;
int
local_col
=
tid_x
-
curr_offset
;
int
segment_width
=
curr_col_offset
-
curr_offset
;
int
segment_width
=
curr_col_offset
-
curr_offset
;
T
*
input_ptr
=
inputs
[
curr_segment
];
T
*
input_ptr
=
inputs
[
curr_segment
];
int
tid_y
=
blockIdx
.
y
*
blockDim
.
y
+
threadIdx
.
y
;
int
tid_y
=
blockIdx
.
y
*
blockDim
.
y
+
threadIdx
.
y
;
for
(;
tid_y
<
output_rows
;
tid_y
+=
blockDim
.
y
*
gridDim
.
y
)
for
(;
tid_y
<
output_rows
;
tid_y
+=
blockDim
.
y
*
gridDim
.
y
)
...
@@ -89,14 +70,14 @@ __global__ void KernelConcatGrad(const T* input_data, const int in_row,
...
@@ -89,14 +70,14 @@ __global__ void KernelConcatGrad(const T* input_data, const int in_row,
const
int
in_col
,
const
int
*
out_cols
,
const
int
in_col
,
const
int
*
out_cols
,
int
out_cols_size
,
T
**
outputs_data
)
{
int
out_cols_size
,
T
**
outputs_data
)
{
int
tid_x
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
int
tid_x
=
blockIdx
.
x
*
blockDim
.
x
+
threadIdx
.
x
;
int
segment
=
upper_bound
<
int
>
(
out_cols
,
out_cols_size
,
tid_x
)
-
1
;
int
curr_segment
=
0
;
int
curr_offset
=
out_cols
[
segment
];
int
curr_offset
=
out_cols
[
0
];
int
curr_segment
=
segment
;
for
(;
tid_x
<
in_col
;
tid_x
+=
blockDim
.
x
*
gridDim
.
x
)
{
for
(;
tid_x
<
in_col
;
tid_x
+=
blockDim
.
x
*
gridDim
.
x
)
{
T
curr_col_offset
;
int
curr_col_offset
=
out_cols
[
curr_segment
+
1
]
;
while
(
(
curr_col_offset
=
out_cols
[
curr_segment
+
1
])
<=
tid_x
)
{
while
(
curr_col_offset
<=
tid_x
)
{
curr_offset
=
curr_col_offset
;
curr_offset
=
curr_col_offset
;
++
curr_segment
;
++
curr_segment
;
curr_col_offset
=
out_cols
[
curr_segment
+
1
];
}
}
int
local_col
=
tid_x
-
curr_offset
;
int
local_col
=
tid_x
-
curr_offset
;
...
@@ -228,7 +209,7 @@ class ConcatGradFunctor<platform::CUDADeviceContext, T> {
...
@@ -228,7 +209,7 @@ class ConcatGradFunctor<platform::CUDADeviceContext, T> {
outputs_cols
[
0
]
=
0
;
outputs_cols
[
0
]
=
0
;
for
(
int
i
=
0
;
i
<
o_num
;
++
i
)
{
for
(
int
i
=
0
;
i
<
o_num
;
++
i
)
{
int
t_col
=
outputs
->
at
(
i
)
->
numel
()
/
out_row
;
int
t_col
=
ref_inputs
.
at
(
i
)
->
numel
()
/
out_row
;
if
(
sameShape
)
{
if
(
sameShape
)
{
if
(
t_col
!=
out0_col
)
sameShape
=
false
;
if
(
t_col
!=
out0_col
)
sameShape
=
false
;
}
}
...
...
paddle/fluid/operators/parallel_do_op.cc
浏览文件 @
8567d042
...
@@ -295,7 +295,7 @@ class ParallelDoGradOp : public framework::OperatorBase {
...
@@ -295,7 +295,7 @@ class ParallelDoGradOp : public framework::OperatorBase {
auto
sum_op
=
framework
::
OpRegistry
::
CreateOp
(
auto
sum_op
=
framework
::
OpRegistry
::
CreateOp
(
"sum"
,
{{
"X"
,
{
s
,
tmp_name
}}},
{{
"Out"
,
{
s
}}},
"sum"
,
{{
"X"
,
{
s
,
tmp_name
}}},
{{
"Out"
,
{
s
}}},
framework
::
AttributeMap
{});
framework
::
AttributeMap
{
{
"use_mkldnn"
,
{
false
}}
});
VLOG
(
10
)
<<
sum_op
->
DebugStringEx
(
sub_scopes
[
0
]);
VLOG
(
10
)
<<
sum_op
->
DebugStringEx
(
sub_scopes
[
0
]);
sum_op
->
Run
(
*
sub_scopes
[
0
],
places
[
0
]);
sum_op
->
Run
(
*
sub_scopes
[
0
],
places
[
0
]);
WaitOnPlace
(
places
[
0
]);
WaitOnPlace
(
places
[
0
]);
...
...
paddle/fluid/operators/recurrent_op.cc
浏览文件 @
8567d042
...
@@ -429,7 +429,8 @@ class RecurrentGradOp : public RecurrentBase {
...
@@ -429,7 +429,8 @@ class RecurrentGradOp : public RecurrentBase {
auto
sum_op
=
framework
::
OpRegistry
::
CreateOp
(
auto
sum_op
=
framework
::
OpRegistry
::
CreateOp
(
"sum"
,
{{
"X"
,
{
pg_names
[
param_id
],
new_inside_name
}}},
"sum"
,
{{
"X"
,
{
pg_names
[
param_id
],
new_inside_name
}}},
{{
"Out"
,
{
pg_names
[
param_id
]}}},
framework
::
AttributeMap
{});
{{
"Out"
,
{
pg_names
[
param_id
]}}},
framework
::
AttributeMap
{{
"use_mkldnn"
,
{
false
}}});
sum_op
->
Run
(
cur_scope
,
place
);
sum_op
->
Run
(
cur_scope
,
place
);
cur_scope
.
Rename
(
new_inside_name
,
inside_grad_name
);
cur_scope
.
Rename
(
new_inside_name
,
inside_grad_name
);
...
...
paddle/fluid/operators/sum_mkldnn_op.cc
0 → 100644
浏览文件 @
8567d042
// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
//
// Licensed under the Apache License, Version 2.0 (the "License");
// you may not use this file except in compliance with the License.
// You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing, software
// distributed under the License is distributed on an "AS IS" BASIS,
// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
// See the License for the specific language governing permissions and
// limitations under the License.
/*Licensed under the Apache License, Version 2.0(the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "mkldnn.hpp"
#include "paddle/fluid/framework/tensor.h"
#include "paddle/fluid/operators/math/selected_rows_functor.h"
#include "paddle/fluid/operators/sum_op.h"
#include "paddle/fluid/platform/device_context.h"
#include "paddle/fluid/platform/mkldnn_helper.h"
namespace
paddle
{
namespace
operators
{
using
paddle
::
framework
::
Tensor
;
using
paddle
::
platform
::
MKLDNNDeviceContext
;
using
paddle
::
platform
::
CPUDeviceContext
;
using
framework
::
DataLayout
;
using
mkldnn
::
memory
;
using
mkldnn
::
primitive
;
using
mkldnn
::
stream
;
using
mkldnn
::
sum
;
using
mkldnn
::
reorder
;
using
platform
::
to_void_cast
;
template
<
typename
T
>
class
SumMKLDNNOpKernel
:
public
paddle
::
framework
::
OpKernel
<
T
>
{
public:
void
Compute
(
const
paddle
::
framework
::
ExecutionContext
&
ctx
)
const
override
{
PADDLE_ENFORCE
(
paddle
::
platform
::
is_cpu_place
(
ctx
.
GetPlace
()),
"It must use CPUPlace."
);
auto
&
dev_ctx
=
ctx
.
template
device_context
<
MKLDNNDeviceContext
>();
const
auto
&
mkldnn_engine
=
dev_ctx
.
GetEngine
();
auto
in_vars
=
ctx
.
MultiInputVar
(
"X"
);
const
int
N
=
in_vars
.
size
();
auto
out_var
=
ctx
.
OutputVar
(
"Out"
);
bool
in_place
=
out_var
==
in_vars
[
0
];
if
(
out_var
->
IsType
<
framework
::
LoDTensor
>
())
{
LoDTensor
*
output
=
ctx
.
Output
<
LoDTensor
>
(
"Out"
);
T
*
output_data
=
output
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
std
::
vector
<
int
>
dst_tz
=
framework
::
vectorize2int
(
output
->
dims
());
auto
src_tz
=
dst_tz
;
memory
::
format
output_format
{
memory
::
format
::
format_undef
};
std
::
vector
<
float
>
scales
;
std
::
vector
<
memory
::
primitive_desc
>
srcs_mpd
;
std
::
vector
<
mkldnn
::
memory
>
srcs_mem
;
PADDLE_ENFORCE
(
in_vars
[
0
]
->
IsType
<
LoDTensor
>
(),
"Input[0] must be LoDTensors"
);
auto
&
input0
=
in_vars
[
0
]
->
Get
<
LoDTensor
>
();
PADDLE_ENFORCE
(
input0
.
layout
()
==
DataLayout
::
kMKLDNN
&&
input0
.
format
()
!=
memory
::
format
::
format_undef
,
"Wrong layout/format for inputs[0]"
);
memory
::
format
input_format
=
input0
.
format
();
if
(
src_tz
.
size
()
==
1
&&
(
input_format
==
memory
::
format
::
nchw
||
input_format
==
memory
::
format
::
nhwc
))
{
input_format
=
memory
::
format
::
x
;
}
if
(
src_tz
.
size
()
==
2
&&
(
input_format
==
memory
::
format
::
nchw
||
input_format
==
memory
::
format
::
nhwc
))
{
input_format
=
memory
::
format
::
nc
;
}
for
(
int
i
=
in_place
?
1
:
0
;
i
<
N
;
i
++
)
{
PADDLE_ENFORCE
(
in_vars
[
i
]
->
IsType
<
LoDTensor
>
(),
"all inputs must be all LoDTensors"
);
auto
&
input
=
in_vars
[
i
]
->
Get
<
LoDTensor
>
();
PADDLE_ENFORCE
(
input
.
layout
()
==
DataLayout
::
kMKLDNN
&&
input
.
format
()
!=
memory
::
format
::
format_undef
,
"Wrong layout/format for inputs"
);
if
(
input
.
numel
()
==
0
)
{
continue
;
}
const
T
*
input_data
=
input
.
data
<
T
>
();
auto
src_md
=
memory
::
desc
(
src_tz
,
memory
::
data_type
::
f32
,
input_format
);
auto
src_mpd
=
memory
::
primitive_desc
(
src_md
,
mkldnn_engine
);
auto
src_mem
=
memory
(
src_mpd
,
to_void_cast
(
input_data
));
srcs_mpd
.
push_back
(
src_mpd
);
srcs_mem
.
push_back
(
src_mem
);
scales
.
push_back
(
1.0
);
}
auto
dst_md
=
memory
::
desc
(
dst_tz
,
memory
::
data_type
::
f32
,
memory
::
format
::
any
);
auto
sum_pd
=
sum
::
primitive_desc
(
dst_md
,
scales
,
srcs_mpd
);
std
::
shared_ptr
<
memory
>
dst_mem
;
if
(
in_place
)
{
dst_mem
.
reset
(
new
memory
(
sum_pd
.
dst_primitive_desc
()));
}
else
{
dst_mem
.
reset
(
new
memory
(
sum_pd
.
dst_primitive_desc
(),
output_data
));
}
std
::
vector
<
mkldnn
::
primitive
::
at
>
inputs
;
for
(
size_t
i
=
0
;
i
<
srcs_mem
.
size
();
++
i
)
{
inputs
.
push_back
(
srcs_mem
[
i
]);
}
auto
sum_prim
=
mkldnn
::
sum
(
sum_pd
,
inputs
,
*
dst_mem
);
output_format
=
(
memory
::
format
)
platform
::
GetMKLDNNFormat
(
sum_pd
);
primitive
reorder_prim
;
std
::
shared_ptr
<
memory
>
target_mem
;
if
(
in_place
)
{
output_format
=
input_format
;
target_mem
.
reset
(
new
memory
(
{{{
src_tz
},
memory
::
data_type
::
f32
,
output_format
},
mkldnn_engine
},
output_data
));
reorder_prim
=
reorder
(
*
dst_mem
,
*
target_mem
);
}
std
::
vector
<
primitive
>
pipeline
;
pipeline
.
push_back
(
sum_prim
);
if
(
in_place
)
pipeline
.
push_back
(
reorder_prim
);
stream
(
stream
::
kind
::
eager
).
submit
(
pipeline
).
wait
();
output
->
set_layout
(
DataLayout
::
kMKLDNN
);
output
->
set_format
(
output_format
);
}
else
if
(
out_var
->
IsType
<
framework
::
SelectedRows
>
())
{
// TODO(@mozga-intel) Add MKLDNN SelectedRows support
std
::
unique_ptr
<
framework
::
SelectedRows
>
in0
;
if
(
in_place
)
{
// If is in_place, we store the input[0] to in0
auto
&
in_sel0
=
in_vars
[
0
]
->
Get
<
SelectedRows
>
();
auto
&
rows
=
in_sel0
.
rows
();
in0
.
reset
(
new
framework
::
SelectedRows
(
rows
,
in_sel0
.
height
()));
in0
->
mutable_value
()
->
ShareDataWith
(
in_sel0
.
value
());
}
auto
get_selected_row
=
[
&
](
size_t
i
)
->
const
SelectedRows
&
{
if
(
i
==
0
&&
in0
)
{
return
*
in0
.
get
();
}
else
{
return
in_vars
[
i
]
->
Get
<
SelectedRows
>
();
}
};
auto
*
out
=
ctx
.
Output
<
SelectedRows
>
(
"Out"
);
out
->
mutable_rows
()
->
clear
();
auto
*
out_value
=
out
->
mutable_value
();
// Runtime InferShape
size_t
first_dim
=
0
;
for
(
int
i
=
0
;
i
<
N
;
i
++
)
{
auto
&
sel_row
=
get_selected_row
(
i
);
first_dim
+=
sel_row
.
rows
().
size
();
}
auto
in_dim
=
framework
::
vectorize
(
get_selected_row
(
N
-
1
).
value
().
dims
());
in_dim
[
0
]
=
static_cast
<
int64_t
>
(
first_dim
);
out_value
->
Resize
(
framework
::
make_ddim
(
in_dim
));
// if all the input sparse vars are empty, no need to
// merge these vars.
if
(
first_dim
==
0UL
)
{
return
;
}
out_value
->
mutable_data
<
T
>
(
ctx
.
GetPlace
());
math
::
SelectedRowsAddTo
<
CPUDeviceContext
,
T
>
functor
;
int64_t
offset
=
0
;
for
(
int
i
=
0
;
i
<
N
;
i
++
)
{
auto
&
sel_row
=
get_selected_row
(
i
);
if
(
sel_row
.
rows
().
size
()
==
0
)
{
continue
;
}
PADDLE_ENFORCE_EQ
(
out
->
height
(),
sel_row
.
height
());
functor
(
ctx
.
template
device_context
<
CPUDeviceContext
>(),
sel_row
,
offset
,
out
);
offset
+=
sel_row
.
value
().
numel
();
}
}
else
if
(
out_var
->
IsType
<
framework
::
LoDTensorArray
>
())
{
// TODO(@mozga-intel) Add MKLDNN LoDTensorArray support
auto
&
out_array
=
*
out_var
->
GetMutable
<
framework
::
LoDTensorArray
>
();
for
(
size_t
i
=
in_place
?
1
:
0
;
i
<
in_vars
.
size
();
++
i
)
{
PADDLE_ENFORCE
(
in_vars
[
i
]
->
IsType
<
framework
::
LoDTensorArray
>
(),
"Only support all inputs are TensorArray"
);
auto
&
in_array
=
in_vars
[
i
]
->
Get
<
framework
::
LoDTensorArray
>
();
for
(
size_t
i
=
0
;
i
<
in_array
.
size
();
++
i
)
{
if
(
in_array
[
i
].
numel
()
!=
0
)
{
if
(
i
>=
out_array
.
size
())
{
out_array
.
resize
(
i
+
1
);
}
if
(
out_array
[
i
].
numel
()
==
0
)
{
framework
::
TensorCopy
(
in_array
[
i
],
in_array
[
i
].
place
(),
ctx
.
device_context
(),
&
out_array
[
i
]);
out_array
[
i
].
set_lod
(
in_array
[
i
].
lod
());
}
else
{
PADDLE_ENFORCE
(
out_array
[
i
].
lod
()
==
in_array
[
i
].
lod
());
auto
in
=
EigenVector
<
T
>::
Flatten
(
in_array
[
i
]);
auto
result
=
EigenVector
<
T
>::
Flatten
(
out_array
[
i
]);
result
.
device
(
*
ctx
.
template
device_context
<
MKLDNNDeviceContext
>()
.
eigen_device
())
=
result
+
in
;
}
}
}
}
}
else
{
PADDLE_THROW
(
"Unexpected branch, output variable type is %s"
,
out_var
->
Type
().
name
());
}
}
};
}
// namespace operators
}
// namespace paddle
REGISTER_OP_KERNEL
(
sum
,
MKLDNN
,
::
paddle
::
platform
::
CPUPlace
,
paddle
::
operators
::
SumMKLDNNOpKernel
<
float
>
);
paddle/fluid/operators/sum_op.cc
浏览文件 @
8567d042
...
@@ -18,6 +18,10 @@ limitations under the License. */
...
@@ -18,6 +18,10 @@ limitations under the License. */
#include "paddle/fluid/framework/var_type_inference.h"
#include "paddle/fluid/framework/var_type_inference.h"
#include "paddle/fluid/operators/detail/safe_ref.h"
#include "paddle/fluid/operators/detail/safe_ref.h"
#ifdef PADDLE_WITH_MKLDNN
#include "paddle/fluid/platform/mkldnn_helper.h"
#endif
namespace
paddle
{
namespace
paddle
{
namespace
operators
{
namespace
operators
{
using
framework
::
Tensor
;
using
framework
::
Tensor
;
...
@@ -63,6 +67,18 @@ class SumOp : public framework::OperatorWithKernel {
...
@@ -63,6 +67,18 @@ class SumOp : public framework::OperatorWithKernel {
framework
::
OpKernelType
GetExpectedKernelType
(
framework
::
OpKernelType
GetExpectedKernelType
(
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
const
framework
::
ExecutionContext
&
ctx
)
const
override
{
auto
x_vars
=
ctx
.
MultiInputVar
(
"X"
);
auto
x_vars
=
ctx
.
MultiInputVar
(
"X"
);
framework
::
LibraryType
library
{
framework
::
LibraryType
::
kPlain
};
framework
::
DataLayout
layout
{
framework
::
DataLayout
::
kAnyLayout
};
#ifdef PADDLE_WITH_MKLDNN
if
(
library
==
framework
::
LibraryType
::
kPlain
&&
platform
::
CanMKLDNNBeUsed
(
ctx
))
{
library
=
framework
::
LibraryType
::
kMKLDNN
;
layout
=
framework
::
DataLayout
::
kMKLDNN
;
}
#endif
if
(
x_vars
[
0
]
->
IsType
<
framework
::
LoDTensor
>
())
{
if
(
x_vars
[
0
]
->
IsType
<
framework
::
LoDTensor
>
())
{
int
dtype
=
-
1
;
int
dtype
=
-
1
;
for
(
auto
&
x_var
:
x_vars
)
{
for
(
auto
&
x_var
:
x_vars
)
{
...
@@ -80,26 +96,27 @@ class SumOp : public framework::OperatorWithKernel {
...
@@ -80,26 +96,27 @@ class SumOp : public framework::OperatorWithKernel {
"Sum operator should have at least one tensor"
);
"Sum operator should have at least one tensor"
);
return
framework
::
OpKernelType
(
return
framework
::
OpKernelType
(
static_cast
<
framework
::
proto
::
VarType
::
Type
>
(
dtype
),
static_cast
<
framework
::
proto
::
VarType
::
Type
>
(
dtype
),
ctx
.
GetPlace
(),
ctx
.
device_context
()
);
layout
,
library
);
}
else
if
(
x_vars
[
0
]
->
IsType
<
framework
::
SelectedRows
>
())
{
}
else
if
(
x_vars
[
0
]
->
IsType
<
framework
::
SelectedRows
>
())
{
for
(
auto
&
var
:
x_vars
)
{
for
(
auto
&
var
:
x_vars
)
{
auto
&
value
=
var
->
Get
<
framework
::
SelectedRows
>
().
value
();
auto
&
value
=
var
->
Get
<
framework
::
SelectedRows
>
().
value
();
if
(
value
.
IsInitialized
())
{
if
(
value
.
IsInitialized
())
{
return
framework
::
OpKernelType
(
framework
::
ToDataType
(
value
.
type
()),
return
framework
::
OpKernelType
(
framework
::
ToDataType
(
value
.
type
()),
ctx
.
device_context
());
ctx
.
device_context
()
,
layout
,
library
);
}
}
}
}
// if input sparse vars are not initialized, use an default kernel type.
// if input sparse vars are not initialized, use an default kernel type.
return
framework
::
OpKernelType
(
framework
::
proto
::
VarType
::
FP32
,
return
framework
::
OpKernelType
(
framework
::
proto
::
VarType
::
FP32
,
ctx
.
device_context
());
ctx
.
device_context
()
,
layout
,
library
);
}
else
if
(
x_vars
[
0
]
->
IsType
<
framework
::
LoDTensorArray
>
())
{
}
else
if
(
x_vars
[
0
]
->
IsType
<
framework
::
LoDTensorArray
>
())
{
for
(
auto
&
x_var
:
x_vars
)
{
for
(
auto
&
x_var
:
x_vars
)
{
auto
&
array
=
x_var
->
Get
<
framework
::
LoDTensorArray
>
();
auto
&
array
=
x_var
->
Get
<
framework
::
LoDTensorArray
>
();
for
(
auto
&
each
:
array
)
{
for
(
auto
&
each
:
array
)
{
if
(
each
.
numel
()
!=
0
)
{
if
(
each
.
numel
()
!=
0
)
{
return
framework
::
OpKernelType
(
framework
::
ToDataType
(
each
.
type
()),
return
framework
::
OpKernelType
(
framework
::
ToDataType
(
each
.
type
()),
ctx
.
device_context
());
ctx
.
device_context
(),
layout
,
library
);
}
}
}
}
}
}
...
@@ -116,6 +133,9 @@ class SumOpMaker : public framework::OpProtoAndCheckerMaker {
...
@@ -116,6 +133,9 @@ class SumOpMaker : public framework::OpProtoAndCheckerMaker {
AddInput
(
"X"
,
"(vector<Tensor>) The input tensors of sum operator."
)
AddInput
(
"X"
,
"(vector<Tensor>) The input tensors of sum operator."
)
.
AsDuplicable
();
.
AsDuplicable
();
AddOutput
(
"Out"
,
"(Tensor) The output tensor of sum operator."
).
Reuse
(
"X"
);
AddOutput
(
"Out"
,
"(Tensor) The output tensor of sum operator."
).
Reuse
(
"X"
);
AddAttr
<
bool
>
(
"use_mkldnn"
,
"(bool, default false) Only used in mkldnn kernel"
)
.
SetDefault
(
false
);
AddComment
(
R"DOC(
AddComment
(
R"DOC(
Sum operator.
Sum operator.
...
@@ -132,7 +152,6 @@ class SumOpVarTypeInference : public framework::VarTypeInference {
...
@@ -132,7 +152,6 @@ class SumOpVarTypeInference : public framework::VarTypeInference {
framework
::
BlockDesc
*
block
)
const
override
{
framework
::
BlockDesc
*
block
)
const
override
{
auto
&
inputs
=
op_desc
.
Input
(
"X"
);
auto
&
inputs
=
op_desc
.
Input
(
"X"
);
auto
var_type
=
framework
::
proto
::
VarType
::
SELECTED_ROWS
;
auto
var_type
=
framework
::
proto
::
VarType
::
SELECTED_ROWS
;
for
(
auto
&
name
:
op_desc
.
Input
(
"X"
))
{
for
(
auto
&
name
:
op_desc
.
Input
(
"X"
))
{
VLOG
(
10
)
<<
name
<<
" "
VLOG
(
10
)
<<
name
<<
" "
<<
block
->
FindRecursiveOrCreateVar
(
name
).
GetType
();
<<
block
->
FindRecursiveOrCreateVar
(
name
).
GetType
();
...
@@ -206,6 +225,7 @@ namespace ops = paddle::operators;
...
@@ -206,6 +225,7 @@ namespace ops = paddle::operators;
REGISTER_OPERATOR
(
sum
,
ops
::
SumOp
,
ops
::
SumOpMaker
,
ops
::
SumGradMaker
,
REGISTER_OPERATOR
(
sum
,
ops
::
SumOp
,
ops
::
SumOpMaker
,
ops
::
SumGradMaker
,
ops
::
SumOpVarTypeInference
);
ops
::
SumOpVarTypeInference
);
REGISTER_OP_CPU_KERNEL
(
REGISTER_OP_CPU_KERNEL
(
sum
,
ops
::
SumKernel
<
paddle
::
platform
::
CPUDeviceContext
,
float
>
,
sum
,
ops
::
SumKernel
<
paddle
::
platform
::
CPUDeviceContext
,
float
>
,
ops
::
SumKernel
<
paddle
::
platform
::
CPUDeviceContext
,
double
>
,
ops
::
SumKernel
<
paddle
::
platform
::
CPUDeviceContext
,
double
>
,
...
...
paddle/fluid/operators/while_op.cc
浏览文件 @
8567d042
...
@@ -203,11 +203,11 @@ class WhileGradOp : public framework::OperatorBase {
...
@@ -203,11 +203,11 @@ class WhileGradOp : public framework::OperatorBase {
->
set_lod
(
inside_tensor
.
lod
());
->
set_lod
(
inside_tensor
.
lod
());
}
}
}
}
auto
new_inside_name
=
cur_scope
.
Rename
(
inside_grad_name
);
auto
new_inside_name
=
cur_scope
.
Rename
(
inside_grad_name
);
auto
sum_op
=
framework
::
OpRegistry
::
CreateOp
(
auto
sum_op
=
framework
::
OpRegistry
::
CreateOp
(
"sum"
,
{{
"X"
,
{
pg_names
[
param_id
],
new_inside_name
}}},
"sum"
,
{{
"X"
,
{
pg_names
[
param_id
],
new_inside_name
}}},
{{
"Out"
,
{
pg_names
[
param_id
]}}},
framework
::
AttributeMap
{});
{{
"Out"
,
{
pg_names
[
param_id
]}}},
framework
::
AttributeMap
{{
"use_mkldnn"
,
{
false
}}});
sum_op
->
Run
(
cur_scope
,
dev_place
);
sum_op
->
Run
(
cur_scope
,
dev_place
);
cur_scope
.
Rename
(
new_inside_name
,
inside_grad_name
);
cur_scope
.
Rename
(
new_inside_name
,
inside_grad_name
);
}
}
...
...
paddle/fluid/platform/mkldnn_helper.h
浏览文件 @
8567d042
...
@@ -99,5 +99,11 @@ inline mkldnn::memory::format GetMKLDNNFormat(const mkldnn::memory memory) {
...
@@ -99,5 +99,11 @@ inline mkldnn::memory::format GetMKLDNNFormat(const mkldnn::memory memory) {
memory
.
get_primitive_desc
().
desc
().
data
.
format
);
memory
.
get_primitive_desc
().
desc
().
data
.
format
);
}
}
inline
mkldnn
::
memory
::
format
GetMKLDNNFormat
(
const
mkldnn
::
sum
::
primitive_desc
&
memory
)
{
return
static_cast
<
mkldnn
::
memory
::
format
>
(
memory
.
dst_primitive_desc
().
desc
().
data
.
format
);
}
}
// namespace platform
}
// namespace platform
}
// namespace paddle
}
// namespace paddle
python/paddle/fluid/backward.py
浏览文件 @
8567d042
...
@@ -132,9 +132,9 @@ def _addup_repetitive_outputs_(op_descs):
...
@@ -132,9 +132,9 @@ def _addup_repetitive_outputs_(op_descs):
for
idx
,
op_desc
in
enumerate
(
op_descs
):
for
idx
,
op_desc
in
enumerate
(
op_descs
):
for
var_name
in
op_desc
.
input_arg_names
():
for
var_name
in
op_desc
.
input_arg_names
():
if
len
(
renamed_vars
[
var_name
])
>
1
:
if
len
(
renamed_vars
[
var_name
])
>
1
:
pending_sum_ops
.
append
(
pending_sum_ops
.
append
(
(
_create_op_desc_
(
(
_create_op_desc_
(
"sum"
,
{
"X"
:
renamed_vars
[
var_name
]},
"sum"
,
{
"X"
:
renamed_vars
[
var_name
]},
{
"Out"
:
[
var_name
]},
{
"Out"
:
[
var_name
]},
{
}),
idx
))
{
"use_mkldnn"
:
False
}),
idx
))
renamed_vars
[
var_name
]
=
[
var_name
]
renamed_vars
[
var_name
]
=
[
var_name
]
for
var_name
in
op_desc
.
output_arg_names
():
for
var_name
in
op_desc
.
output_arg_names
():
if
var_name
==
core
.
empty_var_name
(
if
var_name
==
core
.
empty_var_name
(
...
@@ -161,8 +161,9 @@ def _addup_repetitive_outputs_(op_descs):
...
@@ -161,8 +161,9 @@ def _addup_repetitive_outputs_(op_descs):
renamed_vars
[
var_name
].
append
(
new_name
)
renamed_vars
[
var_name
].
append
(
new_name
)
for
var_name
,
inputs
in
renamed_vars
.
iteritems
():
for
var_name
,
inputs
in
renamed_vars
.
iteritems
():
if
len
(
inputs
)
>
1
:
if
len
(
inputs
)
>
1
:
pending_sum_ops
.
append
((
_create_op_desc_
(
pending_sum_ops
.
append
(
"sum"
,
{
"X"
:
inputs
},
{
"Out"
:
[
var_name
]},
{}),
len
(
op_descs
)))
(
_create_op_desc_
(
"sum"
,
{
"X"
:
inputs
},
{
"Out"
:
[
var_name
]},
{
"use_mkldnn"
:
False
}),
len
(
op_descs
)))
# sum_op descs are sorted according to their insert position
# sum_op descs are sorted according to their insert position
for
p
in
reversed
(
pending_sum_ops
):
for
p
in
reversed
(
pending_sum_ops
):
op_descs
.
insert
(
p
[
1
],
p
[
0
])
op_descs
.
insert
(
p
[
1
],
p
[
0
])
...
...
python/paddle/fluid/clip.py
浏览文件 @
8567d042
...
@@ -24,8 +24,6 @@ __all__ = [
...
@@ -24,8 +24,6 @@ __all__ = [
'GradientClipByValue'
,
'GradientClipByValue'
,
'GradientClipByNorm'
,
'GradientClipByNorm'
,
'GradientClipByGlobalNorm'
,
'GradientClipByGlobalNorm'
,
'append_gradient_clip_ops'
,
'error_clip_callback'
,
]
]
...
@@ -38,6 +36,25 @@ class BaseErrorClipAttr(object):
...
@@ -38,6 +36,25 @@ class BaseErrorClipAttr(object):
class
ErrorClipByValue
(
BaseErrorClipAttr
):
class
ErrorClipByValue
(
BaseErrorClipAttr
):
"""
Clips tensor values to the range [min, max].
Given a tensor t, this operation clips its value to min and max inplace.
- Any values less than min are set to min.
- Any values greater than max are set to max.
Args:
max (float): The maximum value to clip by.
min (float, optional): The minimum value to clip by. if not set by user,
\
will be set to -max by framework.
Examples:
.. code-block:: python
var = fluid.framework.Variable(..., error_clip=ErrorClipByValue(max=5.0), ...)
"""
def
__init__
(
self
,
max
,
min
=
None
):
def
__init__
(
self
,
max
,
min
=
None
):
max
=
float
(
max
)
max
=
float
(
max
)
if
min
is
None
:
if
min
is
None
:
...
@@ -99,6 +116,31 @@ class NullGradientClipAttr(BaseGradientClipAttr):
...
@@ -99,6 +116,31 @@ class NullGradientClipAttr(BaseGradientClipAttr):
class
GradientClipByValue
(
BaseGradientClipAttr
):
class
GradientClipByValue
(
BaseGradientClipAttr
):
"""
Clips gradient values to the range [min, max].
Given a tensor t, this operation clips its value to min and max inplace.
- Any values less than min are set to min.
- Any values greater than max are set to max.
Args:
max (float): The maximum value to clip by.
min (float, optional): The minimum value to clip by. if not set by user,
\
will be set to -max by framework.
Examples:
.. code-block:: python
w_param_attrs = ParamAttr(name=None,
initializer=UniformInitializer(low=-1.0, high=1.0, seed=0),
learning_rate=1.0,
regularizer=L1Decay(1.0),
trainable=True,
clip=GradientClipByValue(-1.0, 1.0))
y_predict = fluid.layers.fc(input=x, size=1, param_attr=w_param_attrs)
"""
def
__init__
(
self
,
max
,
min
=
None
):
def
__init__
(
self
,
max
,
min
=
None
):
max
=
float
(
max
)
max
=
float
(
max
)
if
min
is
None
:
if
min
is
None
:
...
@@ -120,6 +162,37 @@ class GradientClipByValue(BaseGradientClipAttr):
...
@@ -120,6 +162,37 @@ class GradientClipByValue(BaseGradientClipAttr):
class
GradientClipByNorm
(
BaseGradientClipAttr
):
class
GradientClipByNorm
(
BaseGradientClipAttr
):
"""
Clips tensor values to a maximum L2-norm.
This operator limits the L2 norm of the input :math:`X` within :math:`max\_norm`.
If the L2 norm of :math:`X` is less than or equal to :math:`max\_norm`, :math:`Out`
will be the same as :math:`X`. If the L2 norm of :math:`X` is greater than
:math:`max\_norm`, :math:`X` will be linearly scaled to make the L2 norm of
:math:`Out` equal to :math:`max\_norm`, as shown in the following formula:
.. math::
Out =
\\
frac{max\_norm * X}{norm(X)},
where :math:`norm(X)` represents the L2 norm of :math:`X`.
Args:
clip_norm (float): The maximum norm value
Examples:
.. code-block:: python
w_param_attrs = ParamAttr(name=None,
initializer=UniformInitializer(low=-1.0, high=1.0, seed=0),
learning_rate=1.0,
regularizer=L1Decay(1.0),
trainable=True,
clip=GradientClipByNorm(clip_norm=2.0))
y_predict = fluid.layers.fc(input=x, size=1, param_attr=w_param_attrs)
"""
def
__init__
(
self
,
clip_norm
):
def
__init__
(
self
,
clip_norm
):
self
.
clip_norm
=
clip_norm
self
.
clip_norm
=
clip_norm
...
@@ -135,6 +208,44 @@ class GradientClipByNorm(BaseGradientClipAttr):
...
@@ -135,6 +208,44 @@ class GradientClipByNorm(BaseGradientClipAttr):
class
GradientClipByGlobalNorm
(
BaseGradientClipAttr
):
class
GradientClipByGlobalNorm
(
BaseGradientClipAttr
):
"""
Clips values of multiple tensors by the ratio of the sum of their norms.
Given a list of tensors t_list, and a clipping ratio clip_norm, this
operation returns a list of clipped tensors list_clipped and the global
norm (global_norm) of all tensors in t_list.
To perform the clipping, the values :math:`t\_list[i]` are set to:
.. math::
t\_list[i] = t\_list[i] *
\\
frac{clip\_norm}{\max(global\_norm, clip\_norm)}
where:
.. math::
global\_norm = \sqrt{\sum_{i=0}^{N-1}(l2norm(t\_list[i]))^2}
If :math:`clip\_norm > global\_norm` then the entries in t_list remain as they are,
otherwise they're all shrunk by the global ratio.
Args:
clip_norm (float): The maximum norm value
group_name (str, optional): The group name for this clip.
Examples:
.. code-block:: python
p_g_clip = fluid.backward.append_backward(loss=avg_cost_clip)
with fluid.program_guard(main_program=prog_clip):
fluid.clip.set_gradient_clip(
fluid.clip.GradientClipByGlobalNorm(clip_norm=2.0))
p_g_clip = fluid.clip.append_gradient_clip_ops(p_g_clip)
"""
def
__init__
(
self
,
clip_norm
,
group_name
=
"default_group"
):
def
__init__
(
self
,
clip_norm
,
group_name
=
"default_group"
):
if
not
isinstance
(
group_name
,
basestring
):
if
not
isinstance
(
group_name
,
basestring
):
raise
TypeError
(
"'group_name' must be a basestring."
)
raise
TypeError
(
"'group_name' must be a basestring."
)
...
@@ -183,15 +294,16 @@ class GradientClipByGlobalNorm(BaseGradientClipAttr):
...
@@ -183,15 +294,16 @@ class GradientClipByGlobalNorm(BaseGradientClipAttr):
def
set_gradient_clip
(
clip
,
param_list
=
None
,
program
=
None
):
def
set_gradient_clip
(
clip
,
param_list
=
None
,
program
=
None
):
"""
"""
To specify parameters that require gradient clip.
To specify parameters that require gradient clip.
Args:
clip(BaseGradientClipAttr): An instance of some derived class of BaseGradientClipAttr,
Args:
which describes the type and detailed attributes of required gradient clip.
clip(BaseGradientClipAttr): An instance of some derived class of BaseGradientClipAttr,
param_list(list, None by default): Parameters that require gradient clip.
which describes the type and detailed attributes of required gradient clip.
It can be a list of parameter or a list of parameter's name.
param_list(list(Variable)): Parameters that require gradient clip.
When it's None, all parameters in the program will be included.
It can be a list of parameter or a list of parameter's name.
program(Program, None by default): The program where parameters are.
When it's None, all parameters in the program will be included.
Will be the default main program when assigned with None.
program(Program): The program where parameters are.
Will be the default main program when assigned with None.
"""
"""
if
not
isinstance
(
clip
,
BaseGradientClipAttr
):
if
not
isinstance
(
clip
,
BaseGradientClipAttr
):
raise
TypeError
(
raise
TypeError
(
...
...
python/paddle/fluid/inferencer.py
浏览文件 @
8567d042
...
@@ -27,13 +27,30 @@ __all__ = ['Inferencer', ]
...
@@ -27,13 +27,30 @@ __all__ = ['Inferencer', ]
class
Inferencer
(
object
):
class
Inferencer
(
object
):
"""
Inferencer High Level API.
Args:
infer_func (Python func): Infer function that will return predict Variable
param_path (str): The path where the inference model is saved by fluid.io.save_params
place (Place): place to do the inference
parallel (bool): use parallel_executor to run the inference, it will use multi CPU/GPU.
Examples:
.. code-block:: python
def inference_program():
x = fluid.layers.data(name='x', shape=[13], dtype='float32')
y_predict = fluid.layers.fc(input=x, size=1, act=None)
return y_predict
place = fluid.CPUPlace()
inferencer = fluid.Inferencer(
infer_func=inference_program, param_path="/tmp/model", place=place)
"""
def
__init__
(
self
,
infer_func
,
param_path
,
place
=
None
,
parallel
=
False
):
def
__init__
(
self
,
infer_func
,
param_path
,
place
=
None
,
parallel
=
False
):
"""
:param infer_func: a function that will return predict Variable
:param param_path: the path where the inference model is saved by fluid.io.save_params
:param place: place to do the inference
:param parallel: use parallel_executor to run the inference, it will use multi CPU/GPU.
"""
self
.
param_path
=
param_path
self
.
param_path
=
param_path
self
.
scope
=
core
.
Scope
()
self
.
scope
=
core
.
Scope
()
self
.
parallel
=
parallel
self
.
parallel
=
parallel
...
@@ -60,9 +77,20 @@ class Inferencer(object):
...
@@ -60,9 +77,20 @@ class Inferencer(object):
def
infer
(
self
,
inputs
,
return_numpy
=
True
):
def
infer
(
self
,
inputs
,
return_numpy
=
True
):
"""
"""
:param inputs: a map of {"input_name": input_var} that will be feed into the inference program
Do Inference for Inputs
to get the predict value
:return: the predict value of the inference model
Args:
inputs (map): a map of {"input_name": input_var} that will be feed into the inference program
return_numpy (bool): transform return value into numpy or not
Returns:
Tensor or Numpy: the predict value of the inference model for the inputs
Examples:
.. code-block:: python
tensor_x = numpy.random.uniform(0, 10, [batch_size, 13]).astype("float32")
results = inferencer.infer({'x': tensor_x})
"""
"""
if
not
isinstance
(
inputs
,
dict
):
if
not
isinstance
(
inputs
,
dict
):
raise
ValueError
(
raise
ValueError
(
...
...
python/paddle/fluid/initializer.py
浏览文件 @
8567d042
...
@@ -19,26 +19,39 @@ from framework import convert_np_dtype_to_dtype_
...
@@ -19,26 +19,39 @@ from framework import convert_np_dtype_to_dtype_
from
core
import
VarDesc
from
core
import
VarDesc
__all__
=
[
__all__
=
[
'Constant'
,
'Uniform'
,
'Normal'
,
'Xavier'
,
'Bilinear'
,
'force_init_on_cpu'
,
'Constant'
,
'Uniform'
,
'Normal'
,
'Xavier'
,
'Bilinear'
,
'MSRA'
,
'init_on_cpu'
,
'ConstantInitializer'
,
'UniformInitializer'
,
'force_init_on_cpu'
,
'init_on_cpu'
,
'ConstantInitializer'
,
'NormalInitializer'
,
'XavierInitializer'
,
'BilinearInitializer'
'UniformInitializer'
,
'NormalInitializer'
,
'XavierInitializer'
,
'BilinearInitializer'
,
'MSRAInitializer'
]
]
_force_init_on_cpu_
=
False
_force_init_on_cpu_
=
False
def
force_init_on_cpu
():
def
force_init_on_cpu
():
"""
The flag of whether force to init variables on CPU.
Examples:
.. code-block:: python
if force_init_on_cpu():
pass
"""
return
_force_init_on_cpu_
return
_force_init_on_cpu_
@
contextlib
.
contextmanager
@
contextlib
.
contextmanager
def
init_on_cpu
():
def
init_on_cpu
():
"""
"""
Switch program with `with` statement
Force the variable to be inited on CPU.
Examples:
Examples:
>>> with init_on_cpu():
.. code-block:: python
>>> step = layers.create_global_var()
with init_on_cpu():
step = layers.create_global_var()
"""
"""
global
_force_init_on_cpu_
global
_force_init_on_cpu_
...
@@ -104,14 +117,18 @@ class Initializer(object):
...
@@ -104,14 +117,18 @@ class Initializer(object):
class
ConstantInitializer
(
Initializer
):
class
ConstantInitializer
(
Initializer
):
"""Implements the constant initializer
"""Implements the constant initializer
Args:
value (float): constant value to initialize the variable
Examples:
.. code-block:: python
fc = fluid.layers.fc(input=x, size=10,
param_attr=fluid.initializer.Constant(value=2.0))
"""
"""
def
__init__
(
self
,
value
=
0.0
,
force_cpu
=
False
):
def
__init__
(
self
,
value
=
0.0
,
force_cpu
=
False
):
"""Constructor for ConstantInitializer
Args:
value: constant value to initialize the variable
"""
assert
value
is
not
None
assert
value
is
not
None
super
(
ConstantInitializer
,
self
).
__init__
()
super
(
ConstantInitializer
,
self
).
__init__
()
self
.
_value
=
value
self
.
_value
=
value
...
@@ -146,16 +163,20 @@ class ConstantInitializer(Initializer):
...
@@ -146,16 +163,20 @@ class ConstantInitializer(Initializer):
class
UniformInitializer
(
Initializer
):
class
UniformInitializer
(
Initializer
):
"""Implements the random uniform distribution initializer
"""Implements the random uniform distribution initializer
Args:
low (float): lower boundary of the uniform distribution
high (float): upper boundary of the uniform distribution
seed (int): random seed
Examples:
.. code-block:: python
fc = fluid.layers.fc(input=x, size=10,
param_attr=fluid.initializer.Uniform(low=-0.5, high=0.5))
"""
"""
def
__init__
(
self
,
low
=-
1.0
,
high
=
1.0
,
seed
=
0
):
def
__init__
(
self
,
low
=-
1.0
,
high
=
1.0
,
seed
=
0
):
"""Constructor for UniformInitializer
Args:
low: lower boundary of the uniform distribution
high: upper boundary of the uniform distribution
seed: random seed
"""
assert
low
is
not
None
assert
low
is
not
None
assert
high
is
not
None
assert
high
is
not
None
assert
high
>=
low
assert
high
>=
low
...
@@ -196,17 +217,21 @@ class UniformInitializer(Initializer):
...
@@ -196,17 +217,21 @@ class UniformInitializer(Initializer):
class
NormalInitializer
(
Initializer
):
class
NormalInitializer
(
Initializer
):
"""Implements the random Normal(Gaussian) distribution initializer
"""Implements the Random Normal(Gaussian) distribution initializer
Args:
loc (float): mean of the normal distribution
scale (float): standard deviation of the normal distribution
seed (int): random seed
Examples:
.. code-block:: python
fc = fluid.layers.fc(input=x, size=10,
param_attr=fluid.initializer.Normal(loc=0.0, scale=2.0))
"""
"""
def
__init__
(
self
,
loc
=
0.0
,
scale
=
1.0
,
seed
=
0
):
def
__init__
(
self
,
loc
=
0.0
,
scale
=
1.0
,
seed
=
0
):
"""Constructor for NormalInitializer
Args:
loc: mean of the normal distribution
scale: standard deviation of the normal distribution
seed: random seed
"""
assert
loc
is
not
None
assert
loc
is
not
None
assert
scale
is
not
None
assert
scale
is
not
None
assert
seed
is
not
None
assert
seed
is
not
None
...
@@ -246,39 +271,49 @@ class NormalInitializer(Initializer):
...
@@ -246,39 +271,49 @@ class NormalInitializer(Initializer):
class
XavierInitializer
(
Initializer
):
class
XavierInitializer
(
Initializer
):
"""Implements the Xavier initializer
"""
This class implements the Xavier weight initializer from the paper
This class implements the Xavier weight initializer from the paper
Understanding the difficulty of training deep feedforward neural
`Understanding the difficulty of training deep feedforward neural
networks[1] by Xavier Glorot and Yoshua Bengio.
networks <http://proceedings.mlr.press/v9/glorot10a/glorot10a.pdf>`_
by Xavier Glorot and Yoshua Bengio.
This initializer is designed to keep the scale of the gradients
This initializer is designed to keep the scale of the gradients
approximately same in all the layers. In case of Uniform distribution,
approximately same in all the layers. In case of Uniform distribution,
the range is [-x, x], where x = sqrt(6 / (fan_in + fan_out)).
the range is [-x, x], where
.. math::
x = \sqrt{
\\
frac{6.0}{fan\_in + fan\_out}}
In case of Normal distribution, the mean is 0 and the standard deviation
In case of Normal distribution, the mean is 0 and the standard deviation
is sqrt(2/ (fan_in + fan_out)).
is
.. math::
\sqrt{
\\
frac{2.0}{fan\_in + fan\_out}}
Args:
uniform (bool): whether to use uniform or normal distribution
fan_in (float): fan_in for Xavier initialization. If None, it is
inferred from the variable.
fan_out (float): fan_out for Xavier initialization. If None, it is
inferred from the variable.
seed (int): random seed
Note:
It is recommended to set fan_in and fan_out to None for most cases.
Examples:
.. code-block:: python
fc = fluid.layers.fc(
input=queries, size=10,
param_attr=fluid.initializer.Xavier(uniform=False))
References:
[1] Understanding the difficulty of training deep feedforward neural
networks. International conference on artificial intelligence and
statistics.
(http://proceedings.mlr.press/v9/glorot10a.html)
"""
"""
def
__init__
(
self
,
uniform
=
True
,
fan_in
=
None
,
fan_out
=
None
,
seed
=
0
):
def
__init__
(
self
,
uniform
=
True
,
fan_in
=
None
,
fan_out
=
None
,
seed
=
0
):
"""Constructor for XavierInitializer
Args:
uniform: whether to use uniform or normal distribution
fan_in: fan_in for Xavier initialization. If None, it is
inferred from the variable.
fan_out: fan_out for Xavier initialization. If None, it is
inferred from the variable.
seed: random seed
Note: It is recommended to set fan_in and fan_out to None for
most cases.
"""
assert
uniform
is
not
None
assert
uniform
is
not
None
assert
seed
is
not
None
assert
seed
is
not
None
super
(
XavierInitializer
,
self
).
__init__
()
super
(
XavierInitializer
,
self
).
__init__
()
...
@@ -342,30 +377,42 @@ class MSRAInitializer(Initializer):
...
@@ -342,30 +377,42 @@ class MSRAInitializer(Initializer):
"""Implements the MSRA initializer a.k.a. Kaiming Initializer
"""Implements the MSRA initializer a.k.a. Kaiming Initializer
This class implements the weight initialization from the paper
This class implements the weight initialization from the paper
Delving Deep into Rectifiers: Surpassing Human-Level Performance on
`Delving Deep into Rectifiers: Surpassing Human-Level Performance on
ImageNet Classification[1] by Kaiming He, Xiangyu Zhang, Shaoqing Ren
ImageNet Classification <https://arxiv.org/abs/1502.01852>`_
and Jian Sun. This is a robust initialization method that particularly
by Kaiming He, Xiangyu Zhang, Shaoqing Ren and Jian Sun. This is a
considers the rectifier nonlinearities. In case of Uniform distribution,
robust initialization method that particularly considers the rectifier
the range is [-x, x], where x = sqrt(6 / fan_in). In case of Normal
nonlinearities. In case of Uniform distribution, the range is [-x, x], where
distribution, the mean is 0 and the standard deviation
is sqrt(2/ fan_in).
.. math::
References:
x = \sqrt{
\\
frac{6.0}{fan\_in}}
[1] Delving Deep into Rectifiers: Surpassing Human-Level Performance
on ImageNet Classification
In case of Normal distribution, the mean is 0 and the standard deviation
(https://arxiv.org/abs/1502.01852)
is
.. math::
\sqrt{
\\
frac{2.0}{fan\_in}}
Args:
uniform (bool): whether to use uniform or normal distribution
fan_in (float): fan_in for MSRAInitializer. If None, it is
\
inferred from the variable.
seed (int): random seed
Note:
It is recommended to set fan_in to None for most cases.
Examples:
.. code-block:: python
fc = fluid.layers.fc(
input=queries, size=10,
param_attr=fluid.initializer.MSRA(uniform=False))
"""
"""
def
__init__
(
self
,
uniform
=
True
,
fan_in
=
None
,
seed
=
0
):
def
__init__
(
self
,
uniform
=
True
,
fan_in
=
None
,
seed
=
0
):
"""Constructor for MSRAInitializer
"""Constructor for MSRAInitializer
Args:
uniform: whether to use uniform or normal distribution
fan_in: fan_in for MSRAInitializer. If None, it is
inferred from the variable.
seed: random seed
Note: It is recommended to set fan_in to None for most cases.
"""
"""
assert
uniform
is
not
None
assert
uniform
is
not
None
assert
seed
is
not
None
assert
seed
is
not
None
...
@@ -425,34 +472,37 @@ class MSRAInitializer(Initializer):
...
@@ -425,34 +472,37 @@ class MSRAInitializer(Initializer):
class
BilinearInitializer
(
Initializer
):
class
BilinearInitializer
(
Initializer
):
"""Implements the bilinear initializer.
"""
This initializer can be used in transposed convolution operator to
This initializer can be used in transposed convolution operator to
act as upsampling. Users can upsample a feature map with shape of
act as upsampling. Users can upsample a feature map with shape of
(B, C, H, W) by any integer factor. The usage is:
(B, C, H, W) by any integer factor. The usage is:
>>> factor = 2
Examples:
>>> w_attr = ParamAttr(learning_rate=0., regularizer=L2Decay(0.),
>>> initializer=Bilinear())
.. code-block:: python
>>> conv_up = fluid.layers.conv2d_transpose(
>>> input,
factor = 2
>>> num_filters=C,
w_attr = ParamAttr(learning_rate=0., regularizer=L2Decay(0.),
>>> output_size=None,
initializer=Bilinear())
>>> filter_size=2 * factor - factor % 2,
conv_up = fluid.layers.conv2d_transpose(
>>> padding=ceil((factor - 1) / 2.),
input,
>>> stride=factor,
num_filters=C,
>>> groups=C,
output_size=None,
>>> param_attr=w_attr,
filter_size=2 * factor - factor % 2,
>>> bias_attr=False)
padding=ceil((factor - 1) / 2.),
stride=factor,
groups=C,
Where, `num_filters=C` and `groups=C` means this is channel-wise tranposed
param_attr=w_attr,
bias_attr=False)
Where, `num_filters=C` and `groups=C` means this is channel-wise transposed
convolution. The filter shape will be (C, 1, K, K) where K is `filer_size`,
convolution. The filter shape will be (C, 1, K, K) where K is `filer_size`,
This initializer will set a (K, K) interpolation kernel for every channel
This initializer will set a (K, K) interpolation kernel for every channel
of the filter identically. The resulting shape of the output feature map
of the filter identically. The resulting shape of the output feature map
will be (B, C, factor * H, factor * W). Note that the learning rate and the
will be (B, C, factor * H, factor * W). Note that the learning rate and the
weight decay are set to 0 in order to keep coefficient values of bilinear
weight decay are set to 0 in order to keep coefficient values of bilinear
interpolation unchanged during training.
interpolation unchanged during training.
"""
"""
def
__init__
(
self
):
def
__init__
(
self
):
...
@@ -469,7 +519,7 @@ class BilinearInitializer(Initializer):
...
@@ -469,7 +519,7 @@ class BilinearInitializer(Initializer):
be added.
be added.
Returns:
Returns:
the initialization op
Operator:
the initialization op
Raises:
Raises:
ValueError: If type of `var` and `block` is not right.
ValueError: If type of `var` and `block` is not right.
...
...
python/paddle/fluid/layers/control_flow.py
浏览文件 @
8567d042
...
@@ -185,12 +185,14 @@ def Print(input,
...
@@ -185,12 +185,14 @@ def Print(input,
Returns:
Returns:
Variable: Output tensor, same data with input tensor.
Variable: Output tensor, same data with input tensor.
Examples:
Examples:
.. code-block:: python
.. code-block:: python
value = some_layer(...)
value = some_layer(...)
Print(value, summarize=10,
Print(value, summarize=10,
message="The content of some_layer: ")
message="The content of some_layer: ")
'''
'''
helper
=
LayerHelper
(
'print'
,
**
locals
())
helper
=
LayerHelper
(
'print'
,
**
locals
())
out
=
helper
.
create_tmp_variable
(
dtype
=
helper
.
input_dtype
())
out
=
helper
.
create_tmp_variable
(
dtype
=
helper
.
input_dtype
())
...
@@ -1201,6 +1203,31 @@ class ConditionalBlockGuard(BlockGuard):
...
@@ -1201,6 +1203,31 @@ class ConditionalBlockGuard(BlockGuard):
class
ConditionalBlock
(
object
):
class
ConditionalBlock
(
object
):
'''
**ConditionalBlock**
ConditionalBlock is an operator that bind a block to a specific condition,
if the condition matches, the corresponding block will be executed.
Args:
inputs (Variable): bool conditions.
is_scalar_condition (bool): whether the branch is controled by a scalar.
name(str): name of this ConditionalBlock.
Examples:
.. code-block:: python
cond = layers.less_than(x=label, y=limit)
true_image, false_image = layers.split_lod_tensor(
input=image, mask=cond)
true_cond = layers.ConditionalBlock([true_image])
with true_cond.block():
...
with false_cond.block():
...
'''
def
__init__
(
self
,
inputs
,
is_scalar_condition
=
False
,
name
=
None
):
def
__init__
(
self
,
inputs
,
is_scalar_condition
=
False
,
name
=
None
):
for
each_input
in
inputs
:
for
each_input
in
inputs
:
if
not
isinstance
(
each_input
,
Variable
):
if
not
isinstance
(
each_input
,
Variable
):
...
...
python/paddle/fluid/layers/detection.py
浏览文件 @
8567d042
...
@@ -16,7 +16,7 @@ All layers just related to the detection neural network.
...
@@ -16,7 +16,7 @@ All layers just related to the detection neural network.
"""
"""
from
layer_function_generator
import
generate_layer_fn
from
layer_function_generator
import
generate_layer_fn
from
layer_function_generator
import
autodoc
from
layer_function_generator
import
autodoc
,
templatedoc
from
..layer_helper
import
LayerHelper
from
..layer_helper
import
LayerHelper
import
tensor
import
tensor
import
nn
import
nn
...
@@ -155,7 +155,7 @@ def detection_output(loc,
...
@@ -155,7 +155,7 @@ def detection_output(loc,
return
nmsed_outs
return
nmsed_outs
@
auto
doc
()
@
template
doc
()
def
detection_map
(
detect_res
,
def
detection_map
(
detect_res
,
label
,
label
,
class_num
,
class_num
,
...
@@ -166,6 +166,47 @@ def detection_map(detect_res,
...
@@ -166,6 +166,47 @@ def detection_map(detect_res,
input_states
=
None
,
input_states
=
None
,
out_states
=
None
,
out_states
=
None
,
ap_version
=
'integral'
):
ap_version
=
'integral'
):
"""
${comment}
Args:
detect_res: ${detect_res_comment}
label: ${label_comment}
class_num: ${class_num_comment}
background_label: ${background_label_comment}
overlap_threshold: ${overlap_threshold_comment}
evaluate_difficult: ${evaluate_difficult_comment}
has_state: ${has_state_comment}
input_states: If not None, It contains 3 elements:
1. pos_count ${pos_count_comment}.
2. true_pos ${true_pos_comment}.
3. false_pos ${false_pos_comment}.
out_states: If not None, it contains 3 elements.
1. accum_pos_count ${accum_pos_count_comment}.
2. accum_true_pos ${accum_true_pos_comment}.
3. accum_false_pos ${accum_false_pos_comment}.
ap_version: ${ap_type_comment}
Returns:
${map_comment}
Examples:
.. code-block:: python
detect_res = fluid.layers.data(
name='detect_res',
shape=[10, 6],
append_batch_size=False,
dtype='float32')
label = fluid.layers.data(
name='label',
shape=[10, 6],
append_batch_size=False,
dtype='float32')
map_out = fluid.layers.detection_map(detect_res, label, 21)
"""
helper
=
LayerHelper
(
"detection_map"
,
**
locals
())
helper
=
LayerHelper
(
"detection_map"
,
**
locals
())
def
__create_var
(
type
):
def
__create_var
(
type
):
...
...
python/paddle/fluid/layers/nn.py
浏览文件 @
8567d042
...
@@ -12,7 +12,7 @@
...
@@ -12,7 +12,7 @@
# See the License for the specific language governing permissions and
# See the License for the specific language governing permissions and
# limitations under the License.
# limitations under the License.
"""
"""
All layers just related to the neural network.
All layers just related to the neural network.
"""
"""
from
..layer_helper
import
LayerHelper
from
..layer_helper
import
LayerHelper
...
@@ -93,6 +93,7 @@ __all__ = [
...
@@ -93,6 +93,7 @@ __all__ = [
'mean_iou'
,
'mean_iou'
,
'relu'
,
'relu'
,
'log'
,
'log'
,
'crop'
,
]
]
...
@@ -108,14 +109,14 @@ def fc(input,
...
@@ -108,14 +109,14 @@ def fc(input,
"""
"""
**Fully Connected Layer**
**Fully Connected Layer**
This function creates a fully connected layer in the network. It can take
This function creates a fully connected layer in the network. It can take
multiple tensors as its inputs. It creates a variable called weights for
multiple tensors as its inputs. It creates a variable called weights for
each input tensor, which represents a fully connected weight matrix from
each input tensor, which represents a fully connected weight matrix from
each input unit to each output unit. The fully connected layer multiplies
each input unit to each output unit. The fully connected layer multiplies
each input tensor with its coresponding weight to produce an output Tensor.
each input tensor with its coresponding weight to produce an output Tensor.
If multiple input tensors are given, the results of multiple multiplications
If multiple input tensors are given, the results of multiple multiplications
will be sumed up. If bias_attr is not None, a bias variable will be created
will be sumed up. If bias_attr is not None, a bias variable will be created
and added to the output. Finally, if activation is not None, it will be applied
and added to the output. Finally, if activation is not None, it will be applied
to the output as well.
to the output as well.
This process can be formulated as follows:
This process can be formulated as follows:
...
@@ -197,7 +198,10 @@ def fc(input,
...
@@ -197,7 +198,10 @@ def fc(input,
else
:
else
:
pre_bias
=
helper
.
create_tmp_variable
(
dtype
)
pre_bias
=
helper
.
create_tmp_variable
(
dtype
)
helper
.
append_op
(
helper
.
append_op
(
type
=
"sum"
,
inputs
=
{
"X"
:
mul_results
},
outputs
=
{
"Out"
:
pre_bias
})
type
=
"sum"
,
inputs
=
{
"X"
:
mul_results
},
outputs
=
{
"Out"
:
pre_bias
},
attrs
=
{
"use_mkldnn"
:
use_mkldnn
})
# add bias
# add bias
pre_activation
=
helper
.
append_bias_op
(
pre_bias
,
dim_start
=
num_flatten_dims
)
pre_activation
=
helper
.
append_bias_op
(
pre_bias
,
dim_start
=
num_flatten_dims
)
# add activation
# add activation
...
@@ -846,7 +850,7 @@ def crf_decoding(input, param_attr, label=None):
...
@@ -846,7 +850,7 @@ def crf_decoding(input, param_attr, label=None):
Returns:
Returns:
Variable: ${viterbi_path_comment}
Variable: ${viterbi_path_comment}
Examples:
Examples:
.. code-block:: python
.. code-block:: python
...
@@ -1084,7 +1088,7 @@ def chunk_eval(input,
...
@@ -1084,7 +1088,7 @@ def chunk_eval(input,
Here is a NER example of labeling for these tagging schemes:
Here is a NER example of labeling for these tagging schemes:
.. code-block:: python
.. code-block:: python
====== ====== ====== ===== == ============ ===== ===== ===== == =========
====== ====== ====== ===== == ============ ===== ===== ===== == =========
Li Ming works at Agricultural Bank of China in Beijing.
Li Ming works at Agricultural Bank of China in Beijing.
====== ====== ====== ===== == ============ ===== ===== ===== == =========
====== ====== ====== ===== == ============ ===== ===== ===== == =========
...
@@ -1110,7 +1114,7 @@ def chunk_eval(input,
...
@@ -1110,7 +1114,7 @@ def chunk_eval(input,
is the num of chunk types, and `tag_type` get its value from the following table.
is the num of chunk types, and `tag_type` get its value from the following table.
.. code-block:: python
.. code-block:: python
Scheme Begin Inside End Single
Scheme Begin Inside End Single
plain 0 - - -
plain 0 - - -
IOB 0 1 - -
IOB 0 1 - -
...
@@ -1146,7 +1150,7 @@ def chunk_eval(input,
...
@@ -1146,7 +1150,7 @@ def chunk_eval(input,
tuple: tuple containing: precision, recall, f1_score,
tuple: tuple containing: precision, recall, f1_score,
num_infer_chunks, num_label_chunks,
num_infer_chunks, num_label_chunks,
num_correct_chunks
num_correct_chunks
Examples:
Examples:
.. code-block:: python
.. code-block:: python
...
@@ -1246,7 +1250,7 @@ def sequence_softmax(input, param_attr=None, bias_attr=None, use_cudnn=True):
...
@@ -1246,7 +1250,7 @@ def sequence_softmax(input, param_attr=None, bias_attr=None, use_cudnn=True):
"""
"""
This function computes the softmax activation among all time-steps for each
This function computes the softmax activation among all time-steps for each
sequence. The dimension of each time-step should be 1. Thus, the shape of
sequence. The dimension of each time-step should be 1. Thus, the shape of
input Tensor can be either :math:`[N, 1]` or :math:`[N]`, where :math:`N`
input Tensor can be either :math:`[N, 1]` or :math:`[N]`, where :math:`N`
is the sum of the length of all sequences.
is the sum of the length of all sequences.
For i-th sequence in a mini-batch:
For i-th sequence in a mini-batch:
...
@@ -1266,7 +1270,7 @@ def sequence_softmax(input, param_attr=None, bias_attr=None, use_cudnn=True):
...
@@ -1266,7 +1270,7 @@ def sequence_softmax(input, param_attr=None, bias_attr=None, use_cudnn=True):
param_attr (ParamAttr|None): attributes for parameter
param_attr (ParamAttr|None): attributes for parameter
use_cudnn (bool): Use cudnn kernel or not, it is valid only when the cudnn
\
use_cudnn (bool): Use cudnn kernel or not, it is valid only when the cudnn
\
library is installed. Default: True
library is installed. Default: True
Returns:
Returns:
Variable: output of sequence_softmax
Variable: output of sequence_softmax
...
@@ -1827,11 +1831,11 @@ def pool2d(input,
...
@@ -1827,11 +1831,11 @@ def pool2d(input,
${comment}
${comment}
Args:
Args:
input (Variable): The input tensor of pooling operator. The format of
input (Variable): The input tensor of pooling operator. The format of
input tensor is NCHW, where N is batch size, C is
input tensor is NCHW, where N is batch size, C is
the number of channels, H is the height of the
the number of channels, H is the height of the
feature, and W is the width of the feature.
feature, and W is the width of the feature.
pool_size (int): The side length of pooling windows. All pooling
pool_size (int): The side length of pooling windows. All pooling
windows are squares with pool_size on a side.
windows are squares with pool_size on a side.
pool_type: ${pooling_type_comment}
pool_type: ${pooling_type_comment}
pool_stride (int): stride of the pooling layer.
pool_stride (int): stride of the pooling layer.
...
@@ -1840,7 +1844,7 @@ def pool2d(input,
...
@@ -1840,7 +1844,7 @@ def pool2d(input,
use_cudnn: ${use_cudnn_comment}
use_cudnn: ${use_cudnn_comment}
ceil_mode: ${ceil_mode_comment}
ceil_mode: ${ceil_mode_comment}
use_mkldnn: ${use_mkldnn_comment}
use_mkldnn: ${use_mkldnn_comment}
name (str|None): A name for this layer(optional). If set None, the
name (str|None): A name for this layer(optional). If set None, the
layer will be named automatically.
layer will be named automatically.
Returns:
Returns:
...
@@ -1858,10 +1862,10 @@ def pool2d(input,
...
@@ -1858,10 +1862,10 @@ def pool2d(input,
data = fluid.layers.data(
data = fluid.layers.data(
name='data', shape=[3, 32, 32], dtype='float32')
name='data', shape=[3, 32, 32], dtype='float32')
conv2d = fluid.layers.pool2d(
conv2d = fluid.layers.pool2d(
input=data,
input=data,
pool_size=2,
pool_size=2,
pool_type='max',
pool_type='max',
pool_stride=1,
pool_stride=1,
global_pooling=False)
global_pooling=False)
"""
"""
if
pool_type
not
in
[
"max"
,
"avg"
]:
if
pool_type
not
in
[
"max"
,
"avg"
]:
...
@@ -2226,14 +2230,14 @@ def beam_search_decode(ids, scores, name=None):
...
@@ -2226,14 +2230,14 @@ def beam_search_decode(ids, scores, name=None):
This layers is to pack the output of beam search layer into sentences and
This layers is to pack the output of beam search layer into sentences and
associated scores. It is usually called after the beam search layer.
associated scores. It is usually called after the beam search layer.
Typically, the output of beam search layer is a tensor of selected ids, with
Typically, the output of beam search layer is a tensor of selected ids, with
a tensor of the score of each id. Beam search layer's output ids, however,
a tensor of the score of each id. Beam search layer's output ids, however,
are generated directly during the tree search, and they are stacked by each
are generated directly during the tree search, and they are stacked by each
level of the search tree. Thus we need to reorganize them into sentences,
level of the search tree. Thus we need to reorganize them into sentences,
based on the score of each id. This layer takes the output of beam search
based on the score of each id. This layer takes the output of beam search
layer as input and repack them into sentences.
layer as input and repack them into sentences.
Args:
Args:
ids (Variable): The selected ids, output of beam search layer.
ids (Variable): The selected ids, output of beam search layer.
scores (Variable): The associated scores of the ids, out put of beam
scores (Variable): The associated scores of the ids, out put of beam
search layer.
search layer.
name (str): The name of this layer. It is optional.
name (str): The name of this layer. It is optional.
...
@@ -2241,7 +2245,7 @@ def beam_search_decode(ids, scores, name=None):
...
@@ -2241,7 +2245,7 @@ def beam_search_decode(ids, scores, name=None):
Returns:
Returns:
tuple(Variable): a tuple of two output tensors: sentence_ids, sentence_scores.
tuple(Variable): a tuple of two output tensors: sentence_ids, sentence_scores.
sentence_ids is a tensor with shape [size, length], where size is the
sentence_ids is a tensor with shape [size, length], where size is the
beam size of beam search, and length is the length of each sentence.
beam size of beam search, and length is the length of each sentence.
Note that the length of sentences may vary.
Note that the length of sentences may vary.
sentence_scores is a tensor with the same shape as sentence_ids.
sentence_scores is a tensor with the same shape as sentence_ids.
...
@@ -2674,18 +2678,35 @@ def sequence_expand(x, y, ref_level=-1, name=None):
...
@@ -2674,18 +2678,35 @@ def sequence_expand(x, y, ref_level=-1, name=None):
def
beam_search
(
pre_ids
,
ids
,
scores
,
beam_size
,
end_id
,
level
=
0
):
def
beam_search
(
pre_ids
,
ids
,
scores
,
beam_size
,
end_id
,
level
=
0
):
'''
'''
**beam search**
This function implements the beam search algorithm.
This function implements the beam search algorithm.
Beam search is a classical algorithm for selecting candidate words
in a machine translation task.
Refer to `Beam search <https://en.wikipedia.org/wiki/Beam_search>`_
for more details.
Args:
Args:
pre_ids (Variable):
${pre_ids_comment}
pre_ids (Variable):
ids in previous step.
ids (Variable):
${ids_comment}
ids (Variable):
a LoDTensor of shape of [None,k]
scores (Variable):
${scores_comment}
scores (Variable):
a LoDTensor that has the same shape and LoD with `ids`
beam_size (int):
${beam_size_comment}
beam_size (int):
beam size for beam search
end_id (int):
${end_id_comment}
end_id (int):
the token id which indicates the end of a sequence
level (int):
${level_comment}
level (int):
the level of LoDTensor
Returns:
Returns:
tuple: a tuple of beam_search output variables: selected_ids, selected_scores
tuple: a tuple of beam_search output variables: `selected_ids`, `selected_scores`
Examples:
.. code-block:: python
# current_score is a Tensor of shape (num_batch_size, embed_size), which
# consists score of each candidate word.
topk_scores, topk_indices = pd.topk(current_score, k=50)
selected_ids, selected_scores = pd.beam_search(
pre_ids, topk_indices, topk_scores, beam_size, end_id=10, level=0)
'''
'''
helper
=
LayerHelper
(
'beam_search'
,
**
locals
())
helper
=
LayerHelper
(
'beam_search'
,
**
locals
())
score_type
=
scores
.
dtype
score_type
=
scores
.
dtype
...
@@ -2901,7 +2922,7 @@ def reduce_mean(input, dim=None, keep_dim=False, name=None):
...
@@ -2901,7 +2922,7 @@ def reduce_mean(input, dim=None, keep_dim=False, name=None):
`None`, compute the mean over all elements of :attr:`input`
`None`, compute the mean over all elements of :attr:`input`
and return a variable with a single element, otherwise it
and return a variable with a single element, otherwise it
must be in the range :math:`[-rank(input), rank(input))`. If
must be in the range :math:`[-rank(input), rank(input))`. If
:math:`dim[i] < 0`, the dimension to reduce is
:math:`dim[i] < 0`, the dimension to reduce is
:math:`rank(input) + dim[i]`.
:math:`rank(input) + dim[i]`.
keep_dim (bool): Whether to reserve the reduced dimension in the
keep_dim (bool): Whether to reserve the reduced dimension in the
output Tensor. The result tensor will have one fewer dimension
output Tensor. The result tensor will have one fewer dimension
...
@@ -3372,16 +3393,16 @@ def topk(input, k, name=None):
...
@@ -3372,16 +3393,16 @@ def topk(input, k, name=None):
Args:
Args:
input(Variable): The input variable which can be a vector or Tensor with
input(Variable): The input variable which can be a vector or Tensor with
higher rank.
higher rank.
k(int): The number of top elements to look for along the last dimension
k(int): The number of top elements to look for along the last dimension
of input.
of input.
name(str|None): A name for this layer(optional). If set None, the layer
name(str|None): A name for this layer(optional). If set None, the layer
will be named automatically.
will be named automatically.
Default: None
Default: None
Returns:
Returns:
Tuple[Variable]: A tuple with two elements. Each element is a Variable.
Tuple[Variable]: A tuple with two elements. Each element is a Variable.
The first one is k largest elements along each last
The first one is k largest elements along each last
dimensional slice. The second one is indices of values
dimensional slice. The second one is indices of values
within the last dimension of input.
within the last dimension of input.
Raises:
Raises:
...
@@ -3576,15 +3597,15 @@ def warpctc(input, label, blank=0, norm_by_times=False):
...
@@ -3576,15 +3597,15 @@ def warpctc(input, label, blank=0, norm_by_times=False):
It's shape is [Lp, num_classes + 1], where Lp is the sum of all input
It's shape is [Lp, num_classes + 1], where Lp is the sum of all input
sequences' length and num_classes is the true number of classes.
sequences' length and num_classes is the true number of classes.
(not including the blank label).
(not including the blank label).
label (Variable): The ground truth of variable-length sequence,
label (Variable): The ground truth of variable-length sequence,
which is a 2-D Tensor with LoD information. It is of the shape [Lg, 1],
which is a 2-D Tensor with LoD information. It is of the shape [Lg, 1],
where Lg is th sum of all labels' length.
where Lg is th sum of all labels' length.
blank (int, default 0): The blank label index of Connectionist
blank (int, default 0): The blank label index of Connectionist
Temporal Classification (CTC) loss, which is in the
Temporal Classification (CTC) loss, which is in the
half-opened interval [0, num_classes + 1).
half-opened interval [0, num_classes + 1).
norm_by_times(bool, default false): Whether to normalize the gradients
norm_by_times(bool, default false): Whether to normalize the gradients
by the number of time-step, which is also the sequence's length.
by the number of time-step, which is also the sequence's length.
There is no need to normalize the gradients if warpctc layer was
There is no need to normalize the gradients if warpctc layer was
follewed by a mean_op.
follewed by a mean_op.
Returns:
Returns:
...
@@ -3690,8 +3711,8 @@ def nce(input,
...
@@ -3690,8 +3711,8 @@ def nce(input,
input (Variable): input variable.
input (Variable): input variable.
label (Variable): label.
label (Variable): label.
num_total_classes (int):${num_total_classes_comment}
num_total_classes (int):${num_total_classes_comment}
sample_weight (Variable|None): A Variable of shape [batch_size, 1]
sample_weight (Variable|None): A Variable of shape [batch_size, 1]
storing a weight for each sample. The default weight for each
storing a weight for each sample. The default weight for each
sample is 1.0.
sample is 1.0.
param_attr (ParamAttr|None): attributes for parameter
param_attr (ParamAttr|None): attributes for parameter
bias_attr (ParamAttr|None): attributes for bias
bias_attr (ParamAttr|None): attributes for bias
...
@@ -4081,7 +4102,7 @@ def smooth_l1(x, y, inside_weight=None, outside_weight=None, sigma=None):
...
@@ -4081,7 +4102,7 @@ def smooth_l1(x, y, inside_weight=None, outside_weight=None, sigma=None):
This layer computes the smooth L1 loss for Variable :attr:`x` and :attr:`y`.
This layer computes the smooth L1 loss for Variable :attr:`x` and :attr:`y`.
It takes the first dimension of :attr:`x` and :attr:`y` as batch size.
It takes the first dimension of :attr:`x` and :attr:`y` as batch size.
For each instance, it computes the smooth L1 loss element by element first
For each instance, it computes the smooth L1 loss element by element first
and then sums all the losses. So the shape of ouput Variable is
and then sums all the losses. So the shape of ouput Variable is
[batch_size, 1].
[batch_size, 1].
Args:
Args:
...
@@ -4090,14 +4111,14 @@ def smooth_l1(x, y, inside_weight=None, outside_weight=None, sigma=None):
...
@@ -4090,14 +4111,14 @@ def smooth_l1(x, y, inside_weight=None, outside_weight=None, sigma=None):
y (Variable): A tensor with rank at least 2. The target value of smooth
y (Variable): A tensor with rank at least 2. The target value of smooth
L1 loss op with same shape as :attr:`x`.
L1 loss op with same shape as :attr:`x`.
inside_weight (Variable|None): A tensor with rank at least 2. This
inside_weight (Variable|None): A tensor with rank at least 2. This
input is optional and should have same shape with :attr:`x`. If
input is optional and should have same shape with :attr:`x`. If
provided, the result of (:attr:`x` - :attr:`y`) will be multiplied
provided, the result of (:attr:`x` - :attr:`y`) will be multiplied
by this tensor element by element.
by this tensor element by element.
outside_weight (Variable|None): A tensor with rank at least 2. This
outside_weight (Variable|None): A tensor with rank at least 2. This
input is optional and should have same shape with :attr:`x`. If
input is optional and should have same shape with :attr:`x`. If
provided, the out smooth L1 loss will be multiplied by this tensor
provided, the out smooth L1 loss will be multiplied by this tensor
element by element.
element by element.
sigma (float|None): Hyper parameter of smooth L1 loss layer. A float
sigma (float|None): Hyper parameter of smooth L1 loss layer. A float
scalar with default value 1.0.
scalar with default value 1.0.
Returns:
Returns:
...
@@ -4143,7 +4164,7 @@ def one_hot(input, depth):
...
@@ -4143,7 +4164,7 @@ def one_hot(input, depth):
Examples:
Examples:
.. code-block:: python
.. code-block:: python
label = layers.data(name="label", shape=[1], dtype="float32")
label = layers.data(name="label", shape=[1], dtype="float32")
one_hot_label = layers.one_hot(input=label, depth=10)
one_hot_label = layers.one_hot(input=label, depth=10)
"""
"""
...
@@ -4297,10 +4318,10 @@ def reshape(x, shape, actual_shape=None, act=None, inplace=True, name=None):
...
@@ -4297,10 +4318,10 @@ def reshape(x, shape, actual_shape=None, act=None, inplace=True, name=None):
def
lod_reset
(
x
,
y
=
None
,
target_lod
=
None
):
def
lod_reset
(
x
,
y
=
None
,
target_lod
=
None
):
"""
"""
Set LoD of :attr:`x` to a new one specified by :attr:`y` or
Set LoD of :attr:`x` to a new one specified by :attr:`y` or
:attr:`target_lod`. When :attr:`y` provided, :attr:`y.lod` would be
:attr:`target_lod`. When :attr:`y` provided, :attr:`y.lod` would be
considered as target LoD first, otherwise :attr:`y.data` would be
considered as target LoD first, otherwise :attr:`y.data` would be
considered as target LoD. If :attr:`y` is not provided, target LoD should
considered as target LoD. If :attr:`y` is not provided, target LoD should
be specified by :attr:`target_lod`. If target LoD is specified by
be specified by :attr:`target_lod`. If target LoD is specified by
:attr:`Y.data` or :attr:`target_lod`, only one level LoD is supported.
:attr:`Y.data` or :attr:`target_lod`, only one level LoD is supported.
.. code-block:: text
.. code-block:: text
...
@@ -4354,7 +4375,7 @@ def lod_reset(x, y=None, target_lod=None):
...
@@ -4354,7 +4375,7 @@ def lod_reset(x, y=None, target_lod=None):
Args:
Args:
x (Variable): Input variable which could be a Tensor or LodTensor.
x (Variable): Input variable which could be a Tensor or LodTensor.
y (Variable|None): If provided, output's LoD would be derived
y (Variable|None): If provided, output's LoD would be derived
from :attr:`y`.
from :attr:`y`.
target_lod (list|tuple|None): One level LoD which should be considered
target_lod (list|tuple|None): One level LoD which should be considered
as target LoD when :attr:`y` not provided.
as target LoD when :attr:`y` not provided.
...
@@ -4670,7 +4691,7 @@ def image_resize(input,
...
@@ -4670,7 +4691,7 @@ def image_resize(input,
"""
"""
**Resize a Batch of Images**
**Resize a Batch of Images**
The input must be a tensor of the shape (num_batches, channels, in_h, in_w),
The input must be a tensor of the shape (num_batches, channels, in_h, in_w),
and the resizing only applies on the last two dimensions(hight and width).
and the resizing only applies on the last two dimensions(hight and width).
Supporting resample methods:
Supporting resample methods:
...
@@ -4766,9 +4787,9 @@ def resize_bilinear(input, out_shape=None, scale=None, name=None):
...
@@ -4766,9 +4787,9 @@ def resize_bilinear(input, out_shape=None, scale=None, name=None):
def
image_resize_short
(
input
,
out_short_len
,
resample
=
'BILINEAR'
):
def
image_resize_short
(
input
,
out_short_len
,
resample
=
'BILINEAR'
):
"""
"""
Resize a batch of images. The short edge of input images will be
Resize a batch of images. The short edge of input images will be
resized to the given 'out_short_len'. The long edge of input images
resized to the given 'out_short_len'. The long edge of input images
will be resized proportionately to make images' length-width ratio
will be resized proportionately to make images' length-width ratio
constant.
constant.
Args:
Args:
...
@@ -4801,7 +4822,7 @@ def gather(input, index):
...
@@ -4801,7 +4822,7 @@ def gather(input, index):
"""
"""
**Gather Layer**
**Gather Layer**
Output is obtained by gathering entries of the outer-most dimension
Output is obtained by gathering entries of the outer-most dimension
of X indexed by `index` and concatenate them together.
of X indexed by `index` and concatenate them together.
.. math::
.. math::
...
@@ -4826,7 +4847,7 @@ def gather(input, index):
...
@@ -4826,7 +4847,7 @@ def gather(input, index):
[5, 6]]
[5, 6]]
Args:
Args:
input (Variable): The source input with rank>=1.
input (Variable): The source input with rank>=1.
index (Variable): The index input with rank=1.
index (Variable): The index input with rank=1.
Returns:
Returns:
...
@@ -4862,7 +4883,7 @@ def random_crop(x, shape, seed=None):
...
@@ -4862,7 +4883,7 @@ def random_crop(x, shape, seed=None):
Returns:
Returns:
${out_comment}
${out_comment}
Examples:
Examples:
>>> img = fluid.layers.data("img", [3, 256, 256])
>>> img = fluid.layers.data("img", [3, 256, 256])
>>> cropped_img = fluid.layers.random_crop(img, shape=[3, 224, 224])
>>> cropped_img = fluid.layers.random_crop(img, shape=[3, 224, 224])
...
@@ -4908,7 +4929,7 @@ def log(x):
...
@@ -4908,7 +4929,7 @@ def log(x):
Out =
\\
ln(x)
Out =
\\
ln(x)
Args:
Args:
x (Variable): Input tensor.
x (Variable): Input tensor.
Returns:
Returns:
Variable: The natural log of the input tensor computed element-wise.
Variable: The natural log of the input tensor computed element-wise.
...
@@ -4937,7 +4958,7 @@ def relu(x):
...
@@ -4937,7 +4958,7 @@ def relu(x):
Out =
\\
max(0, x)
Out =
\\
max(0, x)
Args:
Args:
x (Variable): The input tensor.
x (Variable): The input tensor.
Returns:
Returns:
Variable: The output tensor with the same shape as input.
Variable: The output tensor with the same shape as input.
...
@@ -4958,15 +4979,15 @@ def relu(x):
...
@@ -4958,15 +4979,15 @@ def relu(x):
def
mean_iou
(
input
,
label
,
num_classes
):
def
mean_iou
(
input
,
label
,
num_classes
):
"""
"""
Mean Intersection-Over-Union is a common evaluation metric for
Mean Intersection-Over-Union is a common evaluation metric for
semantic image segmentation, which first computes the IOU for each
semantic image segmentation, which first computes the IOU for each
semantic class and then computes the average over classes.
semantic class and then computes the average over classes.
IOU is defined as follows:
IOU is defined as follows:
.. math::
.. math::
IOU =
\\
frac{true\_positiv}{(true\_positive + false\_positive + false\_negative)}.
IOU =
\\
frac{true\_positiv}{(true\_positive + false\_positive + false\_negative)}.
The predictions are accumulated in a confusion matrix and mean-IOU
The predictions are accumulated in a confusion matrix and mean-IOU
is then calculated from it.
is then calculated from it.
...
@@ -4979,12 +5000,12 @@ def mean_iou(input, label, num_classes):
...
@@ -4979,12 +5000,12 @@ def mean_iou(input, label, num_classes):
Returns:
Returns:
mean_iou (Variable): A Tensor representing the mean intersection-over-union with shape [1].
mean_iou (Variable): A Tensor representing the mean intersection-over-union with shape [1].
out_wrong(Variable): A Tensor with shape [num_classes]. The wrong numbers of each class.
out_wrong(Variable): A Tensor with shape [num_classes]. The wrong numbers of each class.
out_correct(Variable): A Tensor with shape [num_classes]. The correct numbers of each class.
out_correct(Variable): A Tensor with shape [num_classes]. The correct numbers of each class.
Examples:
Examples:
.. code-block:: python
.. code-block:: python
iou, wrongs, corrects = fluid.layers.mean_iou(predict, label, num_classes)
iou, wrongs, corrects = fluid.layers.mean_iou(predict, label, num_classes)
"""
"""
helper
=
LayerHelper
(
'mean_iou'
,
**
locals
())
helper
=
LayerHelper
(
'mean_iou'
,
**
locals
())
...
@@ -5003,3 +5024,101 @@ def mean_iou(input, label, num_classes):
...
@@ -5003,3 +5024,101 @@ def mean_iou(input, label, num_classes):
},
},
attrs
=
{
"num_classes"
:
num_classes
})
attrs
=
{
"num_classes"
:
num_classes
})
return
out_mean_iou
,
out_wrong
,
out_correct
return
out_mean_iou
,
out_wrong
,
out_correct
def
crop
(
x
,
shape
=
None
,
offsets
=
None
,
name
=
None
):
"""
Crop input into output, as specified by offsets and shape.
.. code-block:: text
* Case 1:
Given
X = [[0, 1, 2, 0, 0]
[0, 3, 4, 0, 0]
[0, 0, 0, 0, 0]],
and
shape = [2, 2],
offsets = [0, 1],
output is:
Out = [[1, 2],
[3, 4]].
* Case 2:
Given
X = [[0, 1, 2, 5, 0]
[0, 3, 4, 6, 0]
[0, 0, 0, 0, 0]],
and shape is tensor
shape = [[0, 0, 0]
[0, 0, 0]]
and
offsets = [0, 1],
output is:
Out = [[1, 2, 5],
[3, 4, 6]].
Args:
x (Variable): The input tensor variable.
shape (Variable|list/tuple of integer): The output shape is specified
by `shape`, which can a Variable or a list/tupe of integer.
If a tensor Variable, it's rank must be the same as `x`. This way
is suitable for the case that the output shape may be changed each
iteration. If a list/tupe of integer, it's length must be the same
as the rank of `x`
offsets (Variable|list/tuple of integer|None): Specifies the copping
offsets at each dimension. It can be a Variable or or a list/tupe
of integer. If a tensor Variable, it's rank must be the same as `x`.
This way is suitable for the case that the offsets may be changed
each iteration. If a list/tupe of integer, it's length must be the
same as the rank of `x`. If None, the offsets are 0 at each
dimension.
name(str|None): A name for this layer(optional). If set None, the layer
will be named automatically.
Returns:
Variable: The cropped tensor variable.
Raises:
ValueError: If shape is not a list, tuple or Variable.
Examples:
.. code-block:: python
x = fluid.layers.data(name="x", shape=[3, 5], dtype="float32")
y = fluid.layers.data(name="y", shape=[2, 3], dtype="float32")
crop = fluid.layers.crop(x, shape=y)
# or
z = fluid.layers.data(name="z", shape=[3, 5], dtype="float32")
crop = fluid.layers.crop(z, shape=[2, 3])
"""
helper
=
LayerHelper
(
'crop'
,
**
locals
())
if
not
(
isinstance
(
shape
,
list
)
or
isinstance
(
shape
,
tuple
)
or
\
isinstance
(
shape
,
Variable
)):
raise
ValueError
(
"The shape should be a list, tuple or Variable."
)
if
offsets
is
None
:
offsets
=
[
0
]
*
len
(
x
.
shape
)
out
=
helper
.
create_tmp_variable
(
x
.
dtype
)
ipts
=
{
'X'
:
x
}
attrs
=
{}
if
isinstance
(
shape
,
Variable
):
ipts
[
'Y'
]
=
shape
else
:
attrs
[
'shape'
]
=
shape
if
isinstance
(
offsets
,
Variable
):
ipts
[
'Offsets'
]
=
offsets
else
:
attrs
[
'offsets'
]
=
offsets
helper
.
append_op
(
type
=
'crop'
,
inputs
=
ipts
,
outputs
=
{
'Out'
:
out
},
attrs
=
None
if
len
(
attrs
)
==
0
else
attrs
)
return
out
python/paddle/fluid/layers/tensor.py
浏览文件 @
8567d042
...
@@ -230,7 +230,11 @@ def sums(input, out=None):
...
@@ -230,7 +230,11 @@ def sums(input, out=None):
helper
=
LayerHelper
(
'sum'
,
**
locals
())
helper
=
LayerHelper
(
'sum'
,
**
locals
())
if
out
is
None
:
if
out
is
None
:
out
=
helper
.
create_tmp_variable
(
dtype
=
helper
.
input_dtype
())
out
=
helper
.
create_tmp_variable
(
dtype
=
helper
.
input_dtype
())
helper
.
append_op
(
type
=
'sum'
,
inputs
=
{
'X'
:
input
},
outputs
=
{
'Out'
:
out
})
helper
.
append_op
(
type
=
'sum'
,
inputs
=
{
'X'
:
input
},
outputs
=
{
'Out'
:
out
},
attrs
=
{
'use_mkldnn'
:
False
})
return
out
return
out
...
@@ -380,7 +384,7 @@ def argmin(x, axis=0):
...
@@ -380,7 +384,7 @@ def argmin(x, axis=0):
"""
"""
**argmin**
**argmin**
This function computes the indices of the min elements
This function computes the indices of the min elements
of the input tensor's element along the provided axis.
of the input tensor's element along the provided axis.
Args:
Args:
...
@@ -395,7 +399,7 @@ def argmin(x, axis=0):
...
@@ -395,7 +399,7 @@ def argmin(x, axis=0):
.. code-block:: python
.. code-block:: python
out = fluid.layers.argmin(x=in, axis=0)
out = fluid.layers.argmin(x=in, axis=0)
out = fluid.layers.argmin(x=in, axis=-1)
out = fluid.layers.argmin(x=in, axis=-1)
"""
"""
helper
=
LayerHelper
(
"arg_min"
,
**
locals
())
helper
=
LayerHelper
(
"arg_min"
,
**
locals
())
out
=
helper
.
create_tmp_variable
(
VarDesc
.
VarType
.
INT64
)
out
=
helper
.
create_tmp_variable
(
VarDesc
.
VarType
.
INT64
)
...
@@ -411,7 +415,7 @@ def argmax(x, axis=0):
...
@@ -411,7 +415,7 @@ def argmax(x, axis=0):
"""
"""
**argmax**
**argmax**
This function computes the indices of the max elements
This function computes the indices of the max elements
of the input tensor's element along the provided axis.
of the input tensor's element along the provided axis.
Args:
Args:
...
@@ -426,7 +430,7 @@ def argmax(x, axis=0):
...
@@ -426,7 +430,7 @@ def argmax(x, axis=0):
.. code-block:: python
.. code-block:: python
out = fluid.layers.argmax(x=in, axis=0)
out = fluid.layers.argmax(x=in, axis=0)
out = fluid.layers.argmax(x=in, axis=-1)
out = fluid.layers.argmax(x=in, axis=-1)
"""
"""
helper
=
LayerHelper
(
"arg_max"
,
**
locals
())
helper
=
LayerHelper
(
"arg_max"
,
**
locals
())
out
=
helper
.
create_tmp_variable
(
VarDesc
.
VarType
.
INT64
)
out
=
helper
.
create_tmp_variable
(
VarDesc
.
VarType
.
INT64
)
...
@@ -495,9 +499,9 @@ def reverse(x, axis):
...
@@ -495,9 +499,9 @@ def reverse(x, axis):
Args:
Args:
x(Vairbale): the input to be reversed.
x(Vairbale): the input to be reversed.
axis(int|tuple|list): Axis that along which order of elements
axis(int|tuple|list): Axis that along which order of elements
is reversed. If it is a tuple or a list, reversing
is reversed. If it is a tuple or a list, reversing
will be apply on each axis in the tuple or list.
will be apply on each axis in the tuple or list.
Returns:
Returns:
Variable: The reversed tensor.
Variable: The reversed tensor.
...
@@ -528,9 +532,9 @@ def save(x, file_path, overwrite=True):
...
@@ -528,9 +532,9 @@ def save(x, file_path, overwrite=True):
Args:
Args:
x(variable): The Tensor/LoDTensor to be saved.
x(variable): The Tensor/LoDTensor to be saved.
file_path(str): The file path where the variable will be saved.
file_path(str): The file path where the variable will be saved.
overwrite(bool): Whether or not cover the given file when it has already
overwrite(bool): Whether or not cover the given file when it has already
existed. If it's set 'False' and the file is existed, a runtime
existed. If it's set 'False' and the file is existed, a runtime
error will be thrown.
error will be thrown.
"""
"""
helper
=
LayerHelper
(
"save"
,
**
locals
())
helper
=
LayerHelper
(
"save"
,
**
locals
())
helper
.
append_op
(
helper
.
append_op
(
...
@@ -550,8 +554,8 @@ def save_combine(x, file_path, overwrite=True):
...
@@ -550,8 +554,8 @@ def save_combine(x, file_path, overwrite=True):
a single file.
a single file.
file_path(str): The file path where variables will be saved.
file_path(str): The file path where variables will be saved.
overwrite(bool): Whether or not cover the given file when it has already
overwrite(bool): Whether or not cover the given file when it has already
existed. If it's set 'False' and the file is existed, a runtime
existed. If it's set 'False' and the file is existed, a runtime
error will be thrown.
error will be thrown.
Returns:
Returns:
There is no return value.
There is no return value.
...
...
python/paddle/fluid/optimizer.py
浏览文件 @
8567d042
...
@@ -26,10 +26,10 @@ from clip import append_gradient_clip_ops, error_clip_callback
...
@@ -26,10 +26,10 @@ from clip import append_gradient_clip_ops, error_clip_callback
from
contextlib
import
contextmanager
from
contextlib
import
contextmanager
__all__
=
[
__all__
=
[
'SGD'
,
'Momentum'
,
'Adagrad'
,
'Adam'
,
'Adamax'
,
'DecayedAdagrad'
,
'SGD'
,
'Momentum'
,
'Adagrad'
,
'Adam'
,
'Adamax'
,
'DecayedAdagrad'
,
'Ftrl'
,
'SGDOptimizer'
,
'MomentumOptimizer'
,
'AdagradOptimizer'
,
'AdamOptimizer'
,
'SGDOptimizer'
,
'MomentumOptimizer'
,
'AdagradOptimizer'
,
'AdamOptimizer'
,
'AdamaxOptimizer'
,
'DecayedAdagradOptimizer'
,
'RMSPropOptimizer'
,
'AdamaxOptimizer'
,
'DecayedAdagradOptimizer'
,
'RMSPropOptimizer'
,
'
Adadelta'
,
'ModelAverage'
,
'
Optimizer'
'
FtrlOptimizer'
,
'Adadelta'
,
'ModelAverage'
,
'Optimizer'
,
'RMSProp
Optimizer'
]
]
...
@@ -192,15 +192,15 @@ class Optimizer(object):
...
@@ -192,15 +192,15 @@ class Optimizer(object):
"""Add optimization operators to update gradients to variables.
"""Add optimization operators to update gradients to variables.
Args:
Args:
loss: the target that this optimization is for.
loss(Variable): the target that this optimization is for.
parameters_and_grads: a list of (variable, gradient) pair to update.
parameters_and_grads(list(tuple(Variable, Variable))):
a list of (variable, gradient) pair to update.
Returns:
Returns:
return_op_list: a list of operators that will complete one step of
return_op_list: a list of operators that will complete one step of
optimization. This will include parameter update ops, global step
optimization. This will include parameter update ops, global step
update ops and any other custom ops required by subclasses to manage
update ops and any other custom ops required by subclasses to manage
their internal state.
their internal state.
:param startup_program:
"""
"""
# This is a default implementation of create_optimization_pass that
# This is a default implementation of create_optimization_pass that
# can be shared by most optimizers. This implementation assumes that
# can be shared by most optimizers. This implementation assumes that
...
@@ -268,7 +268,22 @@ class Optimizer(object):
...
@@ -268,7 +268,22 @@ class Optimizer(object):
class
SGDOptimizer
(
Optimizer
):
class
SGDOptimizer
(
Optimizer
):
""" Simple SGD optimizer without any state.
"""
Optimizer of the stochastic gradient descent algorithm.
.. math::
param\_out = param - learning\_rate * grad
Args:
learning_rate (float|Variable): the learning rate used to update parameters.
\
Can be a float value or a Variable with one float value as data element.
Examples:
.. code-block:: python
sgd_optimizer = fluid.optimizer.SGD(learning_rate=0.2)
sgd_optimizer.minimize(cost)
"""
"""
def
__init__
(
self
,
learning_rate
,
**
kwargs
):
def
__init__
(
self
,
learning_rate
,
**
kwargs
):
...
@@ -294,7 +309,37 @@ class SGDOptimizer(Optimizer):
...
@@ -294,7 +309,37 @@ class SGDOptimizer(Optimizer):
class
MomentumOptimizer
(
Optimizer
):
class
MomentumOptimizer
(
Optimizer
):
"""Simple Momentum optimizer with velocity state
"""
Simple Momentum optimizer with velocity state
This optimizer has a flag for Nestrov Momentum.
The update equations are as follows:
.. math::
& velocity = mu * velocity + gradient
& if (use\_nesterov):
&\quad param = param - gradient * learning\_rate + mu * velocity * learning\_rate
& else:
&\quad param = param - learning\_rate * velocity
Args:
learning_rate (float|Variable): the learning rate used to update parameters.
\
Can be a float value or a Variable with one float value as data element.
momentum (float): momentum factor
use_nesterov (bool): enables Nesterov momentum
Examples:
.. code-block:: python
optimizer = fluid.optimizer.Momentum(learning_rate=0.2, momentum=0.1)
optimizer.minimize(cost)
"""
"""
_velocity_acc_str
=
"velocity"
_velocity_acc_str
=
"velocity"
...
@@ -338,7 +383,32 @@ class MomentumOptimizer(Optimizer):
...
@@ -338,7 +383,32 @@ class MomentumOptimizer(Optimizer):
class
AdagradOptimizer
(
Optimizer
):
class
AdagradOptimizer
(
Optimizer
):
"""Simple Adagrad optimizer with moment state
"""
**Adaptive Gradient Algorithm (Adagrad)**
The update is done as follows:
.. math::
moment\_out &= moment + grad * grad
param\_out &= param -
\\
frac{learning\_rate * grad}{\sqrt{moment\_out} + \epsilon}
The original paper(http://www.jmlr.org/papers/volume12/duchi11a/duchi11a.pdf)
does not have the epsilon attribute. It is added here in our implementation
as also proposed here: http://cs231n.github.io/neural-networks-3/#ada
for numerical stability to avoid the division by zero error.
Args:
learning_rate (float|Variable): the learning rate used to update parameters.
\
Can be a float value or a Variable with one float value as data element.
epsilon (float): a small float value for numerical stability.
Examples:
.. code-block:: python
optimizer = fluid.optimizer.Adagrad(learning_rate=0.2)
optimizer.minimize(cost)
"""
"""
_moment_acc_str
=
"moment"
_moment_acc_str
=
"moment"
...
@@ -379,7 +449,40 @@ class AdagradOptimizer(Optimizer):
...
@@ -379,7 +449,40 @@ class AdagradOptimizer(Optimizer):
class
AdamOptimizer
(
Optimizer
):
class
AdamOptimizer
(
Optimizer
):
"""Implements the Adam Optimizer
"""
This implements the Adam optimizer from Section 2 of the Adam
paper : https://arxiv.org/abs/1412.6980.
Adam is a first-order gradient-based optimization method based on
adaptive estimates of lower-order moments.
Adam updates:
.. math::
t & = t + 1
moment\_1\_out & = {
\\
beta}_1 * moment\_1 + (1 - {
\\
beta}_1) * grad
moment\_2\_out & = {
\\
beta}_2 * moment\_2 + (1 - {
\\
beta}_2) * grad * grad
learning\_rate & = learning\_rate *
\\
\\
frac{\sqrt{1 - {
\\
beta}_2^t}}{1 - {
\\
beta}_1^t}
param\_out & = param - learning\_rate *
\\
frac{moment\_1}{\sqrt{moment\_2} + \epsilon}
Args:
learning_rate (float|Variable): the learning rate used to update parameters.
\
Can be a float value or a Variable with one float value as data element.
beta1 (float): The exponential decay rate for the 1st moment estimates.
beta2 (float): The exponential decay rate for the 2nd moment estimates.
epsilon (float): a small float value for numerical stability.
Examples:
.. code-block:: python
optimizer = fluid.optimizer.Adam(learning_rate=0.2)
optimizer.minimize(cost)
"""
"""
_moment1_acc_str
=
"moment1"
_moment1_acc_str
=
"moment1"
_moment2_acc_str
=
"moment2"
_moment2_acc_str
=
"moment2"
...
@@ -484,7 +587,42 @@ class AdamOptimizer(Optimizer):
...
@@ -484,7 +587,42 @@ class AdamOptimizer(Optimizer):
class
AdamaxOptimizer
(
Optimizer
):
class
AdamaxOptimizer
(
Optimizer
):
"""Implements the Adamax Optimizer
"""
We implement the Adamax optimizer from Section 7 of the Adam
paper: https://arxiv.org/abs/1412.6980. Adamax is a variant of the
Adam algorithm based on the infinity norm.
Adamax updates:
.. math::
t & = t + 1
moment\_out & = {
\\
beta}_1 * moment + (1 - {
\\
beta}_1) * grad
inf\_norm\_out & = max({
\\
beta}_2 * inf\_norm + \epsilon, |grad|)
learning\_rate & =
\\
frac{learning\_rate}{1 - {
\\
beta}_1^t}
param\_out & = param - learning\_rate *
\\
frac{moment\_out}{inf\_norm\_out}
The original paper does not have an epsilon attribute.
However, it is added here for numerical stability to prevent the
division by 0 error.
Args:
learning_rate (float|Variable): the learning rate used to update parameters.
\
Can be a float value or a Variable with one float value as data element.
beta1 (float): The exponential decay rate for the 1st moment estimates.
beta2 (float): The exponential decay rate for the 2nd moment estimates.
epsilon (float): a small float value for numerical stability.
Examples:
.. code-block:: python
optimizer = fluid.optimizer.Adamax(learning_rate=0.2)
optimizer.minimize(cost)
"""
"""
_moment_acc_str
=
"moment"
_moment_acc_str
=
"moment"
_inf_norm_acc_str
=
"inf_norm"
_inf_norm_acc_str
=
"inf_norm"
...
@@ -568,7 +706,34 @@ class AdamaxOptimizer(Optimizer):
...
@@ -568,7 +706,34 @@ class AdamaxOptimizer(Optimizer):
class
DecayedAdagradOptimizer
(
Optimizer
):
class
DecayedAdagradOptimizer
(
Optimizer
):
"""Simple Decayed Adagrad optimizer with moment state
"""
**Decayed Adagrad Optimizer**
The original paper(http://www.jmlr.org/papers/volume12/duchi11a/duchi11a.pdf)
The update is done as follows:
.. math::
moment\_out & = decay * moment + (1 - decay) * grad * grad
param\_out & = param -
\\
frac{learning\_rate * grad}{\sqrt{moment\_out} + \epsilon}
The original paper(http://www.jmlr.org/papers/volume12/duchi11a/duchi11a.pdf)
does not have an epsilon attribute. It is added here for numerical
stability to avoid the division by zero error.
Args:
learning_rate (float|Variable): the learning rate used to update parameters.
\
Can be a float value or a Variable with one float value as data element.
decay (float): decay rate.
epsilon (float): a small float value for numerical stability.
Examples:
.. code-block:: python
optimizer = fluid.optimizer.DecayedAdagrad(learning_rate=0.2)
optimizer.minimize(cost)
"""
"""
_moment_acc_str
=
"moment"
_moment_acc_str
=
"moment"
...
@@ -614,6 +779,7 @@ class DecayedAdagradOptimizer(Optimizer):
...
@@ -614,6 +779,7 @@ class DecayedAdagradOptimizer(Optimizer):
class
AdadeltaOptimizer
(
Optimizer
):
class
AdadeltaOptimizer
(
Optimizer
):
"""
"""
**Adadelta Optimizer**
**Adadelta Optimizer**
Simple Adadelta optimizer with average squared grad state and
Simple Adadelta optimizer with average squared grad state and
average squared update state.
average squared update state.
The details of adadelta please refer to this
The details of adadelta please refer to this
...
@@ -628,7 +794,7 @@ class AdadeltaOptimizer(Optimizer):
...
@@ -628,7 +794,7 @@ class AdadeltaOptimizer(Optimizer):
E(dx_t^2) &=
\\
rho * E(dx_{t-1}^2) + (1-
\\
rho) * (-g*learning
\\
_rate)^2
E(dx_t^2) &=
\\
rho * E(dx_{t-1}^2) + (1-
\\
rho) * (-g*learning
\\
_rate)^2
Args:
Args:
learning_rate(float): global le
ra
ning rate
learning_rate(float): global le
ar
ning rate
rho(float): rho in equation
rho(float): rho in equation
epsilon(float): epsilon in equation
epsilon(float): epsilon in equation
...
@@ -703,37 +869,37 @@ class RMSPropOptimizer(Optimizer):
...
@@ -703,37 +869,37 @@ class RMSPropOptimizer(Optimizer):
.. math::
.. math::
r(w, t) & =
\\
rho r(w, t-1) + (1 -
\\
rho)(
\\
nabla Q_{i}(w))^2
\\\\
r(w, t) & =
\\
rho r(w, t-1) + (1 -
\\
rho)(
\\
nabla Q_{i}(w))^2
w & = w -
\\
frac{
\\
eta} {
\\
sqrt{r(w,t) +
\\
epsilon}}
\\
nabla Q_{i}(w)
w & = w -
\\
frac{
\\
eta} {
\\
sqrt{r(w,t) +
\\
epsilon}}
\\
nabla Q_{i}(w)
The first equation calculates moving average of the squared gradient for
The first equation calculates moving average of the squared gradient for
each weight. Then dividing the gradient by :math:
`sqrt{v(w,t)}`.
each weight. Then dividing the gradient by :math:`sqrt{v(w,t)}`.
In some cases, adding a momentum term :math: `
\\
beta` is beneficial.
In some cases, adding a momentum term :math: `
\\
beta` is beneficial.
In our implementation, Nesterov momentum is used:
In our implementation, Nesterov momentum is used:
.. math::
.. math::
r(w, t) & =
\\
rho r(w, t-1) + (1 -
\\
rho)(
\\
nabla Q_{i}(w))^2
\\\\
r(w, t) & =
\\
rho r(w, t-1) + (1 -
\\
rho)(
\\
nabla Q_{i}(w))^2
v(w, t) & =
\\
beta v(w, t-1) +
\\
frac{
\\
eta} {
\\
sqrt{v(w,t) +
v(w, t) & =
\\
beta v(w, t-1) +
\\
frac{
\\
eta} {
\\
sqrt{v(w,t) +
\\
epsilon}}
\\
nabla Q_{i}(w)
\\
epsilon}}
\\
nabla Q_{i}(w)
w & = w - v(w, t)
w & = w - v(w, t)
where, :math:
`
\\
rho` is a hyperparameter and typical values are 0.9, 0.95
where, :math:`
\\
rho` is a hyperparameter and typical values are 0.9, 0.95
and so on. :math: `beta` is the momentum term. :math: `
\\
epsilon` is a
and so on. :math: `beta` is the momentum term. :math: `
\\
epsilon` is a
smoothing term to avoid division by zero, usually set somewhere in range
smoothing term to avoid division by zero, usually set somewhere in range
from 1e-4 to 1e-8.
from 1e-4 to 1e-8.
Args:
Args:
learning_rate(float): global le
ra
ning rate.
learning_rate(float): global le
ar
ning rate.
rho(float): rho is :math: `
\\
rho` in equation, set 0.95 by default.
rho(float): rho is :math: `
\\
rho` in equation, set 0.95 by default.
epsilon(float): :math: `
\\
epsilon` in equation is smoothing term to
epsilon(float): :math: `
\\
epsilon` in equation is smoothing term to
avoid division by zero, set 1e-6 by default.
avoid division by zero, set 1e-6 by default.
momentum(float): :math:
`
\\
beta` in equation is the momentum term,
momentum(float): :math:`
\\
beta` in equation is the momentum term,
set 0.0 by default.
set 0.0 by default.
Raises:
Raises:
...
@@ -810,6 +976,113 @@ class RMSPropOptimizer(Optimizer):
...
@@ -810,6 +976,113 @@ class RMSPropOptimizer(Optimizer):
return
rmsprop_op
return
rmsprop_op
class
FtrlOptimizer
(
Optimizer
):
"""
FTRL (Follow The Regularized Leader) Optimizer.
The paper that proposed Follow The Regularized Leader (FTRL):
(https://www.eecs.tufts.edu/~dsculley/papers/ad-click-prediction.pdf)
.. math::
&new\_accum = squared\_accum + grad^2
&if (lr\_power == -0.5):
&\quad linear\_accum += grad -
\\
frac{
\\
sqrt{new\_accum} -
\\
sqrt{squared\_accum}}{learning\_rate * param}
&else:
&\quad linear\_accum += grad -
\\
frac{new\_accum^{-lr\_power} - accum^{-lr\_power}}{learning\_rate * param}
&x = l1 * sign(linear\_accum) - linear\_accum
&if (lr\_power == -0.5):
&\quad y =
\\
frac{
\\
sqrt{new\_accum}}{learning\_rate} + (2 * l2)
&\quad pre\_shrink =
\\
frac{x}{y}
&\quad param = (abs(linear\_accum) > l1).select(pre\_shrink, 0.0)
&else:
&\quad y =
\\
frac{new\_accum^{-lr\_power}}{learning\_rate} + (2 * l2)
&\quad pre\_shrink =
\\
frac{x}{y}
&\quad param = (abs(linear\_accum) > l1).select(pre\_shrink, 0.0)
&squared\_accum += grad^2
Args:
learning_rate (float|Variable): global learning rate.
l1 (float):
l2 (float):
lr_power (float):
Raises:
ValueError: If learning_rate, rho, epsilon, momentum are None.
Examples:
.. code-block:: python
optimizer = fluid.optimizer.Ftrl(0.0001)
_, params_grads = optimizer.minimize(cost)
"""
_squared_acc_str
=
"squared"
_linear_acc_str
=
"linear"
def
__init__
(
self
,
learning_rate
,
l1
=
0.0
,
l2
=
0.0
,
lr_power
=-
0.5
,
**
kwargs
):
super
(
FtrlOptimizer
,
self
).
__init__
(
learning_rate
=
learning_rate
,
**
kwargs
)
if
learning_rate
is
None
:
raise
ValueError
(
"learning_rate is not set."
)
self
.
type
=
"ftrl"
self
.
_l1
=
l1
self
.
_l2
=
l2
self
.
_lr_power
=
lr_power
def
_create_accumulators
(
self
,
block
,
parameters
):
if
not
isinstance
(
block
,
framework
.
Block
):
raise
TypeError
(
"block is not instance of framework.Block."
)
for
p
in
parameters
:
self
.
_add_accumulator
(
self
.
_squared_acc_str
,
p
)
self
.
_add_accumulator
(
self
.
_linear_acc_str
,
p
)
def
_append_optimize_op
(
self
,
block
,
param_and_grad
):
if
not
isinstance
(
block
,
framework
.
Block
):
raise
TypeError
(
"block is not instance of framework.Block."
)
squared_acc
=
self
.
_get_accumulator
(
self
.
_squared_acc_str
,
param_and_grad
[
0
])
linear_acc
=
self
.
_get_accumulator
(
self
.
_linear_acc_str
,
param_and_grad
[
0
])
ftrl_op
=
block
.
append_op
(
type
=
self
.
type
,
inputs
=
{
"Param"
:
param_and_grad
[
0
],
"Grad"
:
param_and_grad
[
1
],
"SquaredAccumulator"
:
squared_acc
,
"LinearAccumulator"
:
linear_acc
,
"LearningRate"
:
self
.
_create_param_lr
(
param_and_grad
),
},
outputs
=
{
"ParamOut"
:
param_and_grad
[
0
],
"SquaredAccumOut"
:
squared_acc
,
"LinearAccumOut"
:
linear_acc
},
attrs
=
{
"l1"
:
self
.
_l1
,
"l2"
:
self
.
_l1
,
"lr_power"
:
self
.
_lr_power
})
return
ftrl_op
# We short the class name, since users will use the optimizer with the package
# We short the class name, since users will use the optimizer with the package
# name. The sample code:
# name. The sample code:
#
#
...
@@ -826,6 +1099,7 @@ Adamax = AdamaxOptimizer
...
@@ -826,6 +1099,7 @@ Adamax = AdamaxOptimizer
DecayedAdagrad
=
DecayedAdagradOptimizer
DecayedAdagrad
=
DecayedAdagradOptimizer
Adadelta
=
AdadeltaOptimizer
Adadelta
=
AdadeltaOptimizer
RMSProp
=
RMSPropOptimizer
RMSProp
=
RMSPropOptimizer
Ftrl
=
FtrlOptimizer
class
ModelAverage
(
Optimizer
):
class
ModelAverage
(
Optimizer
):
...
@@ -844,7 +1118,9 @@ class ModelAverage(Optimizer):
...
@@ -844,7 +1118,9 @@ class ModelAverage(Optimizer):
max_average_window: The maximum size of average window.
max_average_window: The maximum size of average window.
Examples:
Examples:
...
.. code-block:: python
optimizer = fluid.optimizer.Momentum()
optimizer = fluid.optimizer.Momentum()
_, params_grads = optimizer.minimize(cost)
_, params_grads = optimizer.minimize(cost)
model_average = fluid.optimizer.ModelAverage(params_grads, 0.15,
model_average = fluid.optimizer.ModelAverage(params_grads, 0.15,
...
...
python/paddle/fluid/profiler.py
浏览文件 @
8567d042
...
@@ -42,6 +42,9 @@ def cuda_profiler(output_file, output_mode=None, config=None):
...
@@ -42,6 +42,9 @@ def cuda_profiler(output_file, output_mode=None, config=None):
counters/options for profiling by `config` argument. The default config
counters/options for profiling by `config` argument. The default config
is ['gpustarttimestamp', 'gpustarttimestamp', 'gridsize3d',
is ['gpustarttimestamp', 'gpustarttimestamp', 'gridsize3d',
'threadblocksize', 'streamid', 'enableonstart 0', 'conckerneltrace'].
'threadblocksize', 'streamid', 'enableonstart 0', 'conckerneltrace'].
Then users can use NVIDIA Visual Profiler
(https://developer.nvidia.com/nvidia-visual-profiler) tools to load this
this output file to visualize results.
Args:
Args:
output_file (string) : The output file name, the result will be
output_file (string) : The output file name, the result will be
...
@@ -50,6 +53,33 @@ def cuda_profiler(output_file, output_mode=None, config=None):
...
@@ -50,6 +53,33 @@ def cuda_profiler(output_file, output_mode=None, config=None):
Comma separated values format. It should be 'kvp' or 'csv'.
Comma separated values format. It should be 'kvp' or 'csv'.
config (list of string) : The profiler options and counters can refer
config (list of string) : The profiler options and counters can refer
to "Compute Command Line Profiler User Guide".
to "Compute Command Line Profiler User Guide".
Raises:
ValueError: If `output_mode` is not in ['kvp', 'csv'].
Examples:
.. code-block:: python
import paddle.fluid as fluid
import paddle.fluid.profiler as profiler
epoc = 8
dshape = [4, 3, 28, 28]
data = fluid.layers.data(name='data', shape=[3, 28, 28], dtype='float32')
conv = fluid.layers.conv2d(data, 20, 3, stride=[1, 1], padding=[1, 1])
place = fluid.CUDAPlace(0)
exe = fluid.Executor(place)
exe.run(fluid.default_startup_program())
output_file = 'cuda_profiler.txt'
with profiler.cuda_profiler(output_file, 'csv') as nvprof:
for i in range(epoc):
input = np.random.random(dshape).astype('float32')
exe.run(fluid.default_main_program(), feed={'data': input})
# then use NVIDIA Visual Profiler (nvvp) to load this output file
# to visualize results.
"""
"""
if
output_mode
is
None
:
if
output_mode
is
None
:
output_mode
=
'csv'
output_mode
=
'csv'
...
@@ -69,19 +99,52 @@ def cuda_profiler(output_file, output_mode=None, config=None):
...
@@ -69,19 +99,52 @@ def cuda_profiler(output_file, output_mode=None, config=None):
def
reset_profiler
():
def
reset_profiler
():
"""The profiler clear interface.
"""
reset_profiler will clear the previous time record.
Clear the previous time record. This interface does not work for
`fluid.profiler.cuda_profiler`, it only works for
`fluid.profiler.start_profiler`, `fluid.profiler.stop_profiler`,
and `fluid.profiler.profiler`.
Examples:
.. code-block:: python
import paddle.fluid.profiler as profiler
with profiler.profiler(state, 'total', '/tmp/profile'):
for iter in range(10):
if iter == 2:
profiler.reset_profiler()
# ...
"""
"""
core
.
reset_profiler
()
core
.
reset_profiler
()
def
start_profiler
(
state
):
def
start_profiler
(
state
):
"""Enable the profiler.
"""
Enable the profiler. Uers can use `fluid.profiler.start_profiler` and
`fluid.profiler.stop_profiler` to insert the code, except the usage of
`fluid.profiler.profiler` interface.
Args:
Args:
state (string) : The profiling state, which should be 'CPU', 'GPU'
state (string) : The profiling state, which should be 'CPU', 'GPU'
or 'All'. 'CPU' means only profile CPU. 'GPU' means profiling
or 'All'. 'CPU' means only profile CPU. 'GPU' means profiling
GPU as well. 'All' also generates timeline.
GPU as well. 'All' also generates timeline.
Raises:
ValueError: If `state` is not in ['CPU', 'GPU', 'All'].
Examples:
.. code-block:: python
import paddle.fluid.profiler as profiler
profiler.start_profiler('GPU')
for iter in range(10):
if iter == 2:
profiler.reset_profiler()
# except each iteration
profiler.stop_profiler('total', '/tmp/profile')
"""
"""
if
core
.
is_profiler_enabled
():
if
core
.
is_profiler_enabled
():
return
return
...
@@ -97,7 +160,10 @@ def start_profiler(state):
...
@@ -97,7 +160,10 @@ def start_profiler(state):
def
stop_profiler
(
sorted_key
=
None
,
profile_path
=
'/tmp/profile'
):
def
stop_profiler
(
sorted_key
=
None
,
profile_path
=
'/tmp/profile'
):
"""Stop the profiler.
"""
Stop the profiler. Uers can use `fluid.profiler.start_profiler` and
`fluid.profiler.stop_profiler` to insert the code, except the usage of
`fluid.profiler.profiler` interface.
Args:
Args:
sorted_key (string) : If None, the profiling results will be printed
sorted_key (string) : If None, the profiling results will be printed
...
@@ -111,6 +177,23 @@ def stop_profiler(sorted_key=None, profile_path='/tmp/profile'):
...
@@ -111,6 +177,23 @@ def stop_profiler(sorted_key=None, profile_path='/tmp/profile'):
The `ave` means sorting by the average execution time.
The `ave` means sorting by the average execution time.
profile_path (string) : If state == 'All', it will write a profile
profile_path (string) : If state == 'All', it will write a profile
proto output file.
proto output file.
Raises:
ValueError: If `sorted_key` is not in
['calls', 'total', 'max', 'min', 'ave'].
Examples:
.. code-block:: python
import paddle.fluid.profiler as profiler
profiler.start_profiler('GPU')
for iter in range(10):
if iter == 2:
profiler.reset_profiler()
# except each iteration
profiler.stop_profiler('total', '/tmp/profile')
"""
"""
if
not
core
.
is_profiler_enabled
():
if
not
core
.
is_profiler_enabled
():
return
return
...
@@ -137,7 +220,12 @@ def profiler(state, sorted_key=None, profile_path='/tmp/profile'):
...
@@ -137,7 +220,12 @@ def profiler(state, sorted_key=None, profile_path='/tmp/profile'):
Different from cuda_profiler, this profiler can be used to profile both CPU
Different from cuda_profiler, this profiler can be used to profile both CPU
and GPU program. By defalut, it records the CPU and GPU operator kernels,
and GPU program. By defalut, it records the CPU and GPU operator kernels,
if you want to profile other program, you can refer the profiling tutorial
if you want to profile other program, you can refer the profiling tutorial
to add more records.
to add more records in C++ code.
If the state == 'All', a profile proto file will be written to
`profile_path`. This file records timeline information during the execution.
Then users can visualize this file to see the timeline, please refer
https://github.com/PaddlePaddle/Paddle/blob/develop/doc/fluid/howto/optimization/timeline.md
Args:
Args:
state (string) : The profiling state, which should be 'CPU' or 'GPU',
state (string) : The profiling state, which should be 'CPU' or 'GPU',
...
@@ -156,6 +244,25 @@ def profiler(state, sorted_key=None, profile_path='/tmp/profile'):
...
@@ -156,6 +244,25 @@ def profiler(state, sorted_key=None, profile_path='/tmp/profile'):
The `ave` means sorting by the average execution time.
The `ave` means sorting by the average execution time.
profile_path (string) : If state == 'All', it will write a profile
profile_path (string) : If state == 'All', it will write a profile
proto output file.
proto output file.
Raises:
ValueError: If `state` is not in ['CPU', 'GPU', 'All']. If `sorted_key` is
not in ['calls', 'total', 'max', 'min', 'ave'].
Examples:
.. code-block:: python
import paddle.fluid.profiler as profiler
with profiler.profiler('All', 'total', '/tmp/profile') as prof:
for pass_id in range(pass_num):
for batch_id, data in enumerate(train_reader()):
exe.run(fluid.default_main_program(),
feed=feeder.feed(data),
fetch_list=[],
use_program_cache=True)
# ...
"""
"""
start_profiler
(
state
)
start_profiler
(
state
)
yield
yield
...
...
python/paddle/fluid/regularizer.py
浏览文件 @
8567d042
...
@@ -16,8 +16,8 @@ import framework
...
@@ -16,8 +16,8 @@ import framework
from
.
import
core
from
.
import
core
__all__
=
[
__all__
=
[
'append_regularization_ops'
,
'
WeightDecayRegularizer'
,
'L1Decay'
,
'L2Decay
'
,
'append_regularization_ops'
,
'
L1Decay'
,
'L2Decay'
,
'L1DecayRegularizer
'
,
'L
1DecayRegularizer'
,
'L
2DecayRegularizer'
'L2DecayRegularizer'
]
]
...
@@ -36,7 +36,8 @@ def append_regularization_ops(parameters_and_grads, regularization=None):
...
@@ -36,7 +36,8 @@ def append_regularization_ops(parameters_and_grads, regularization=None):
set. It will be applied with regularizer.
set. It will be applied with regularizer.
Returns:
Returns:
list of (parameters, gradients) pair with the regularized gradient
list[(Variable, Variable)]: list of (parameters, gradients)
\
pair with the regularized gradient
Raises:
Raises:
Exception: Unknown regularization type
Exception: Unknown regularization type
...
@@ -100,6 +101,24 @@ class WeightDecayRegularizer(object):
...
@@ -100,6 +101,24 @@ class WeightDecayRegularizer(object):
class
L2DecayRegularizer
(
WeightDecayRegularizer
):
class
L2DecayRegularizer
(
WeightDecayRegularizer
):
"""Implements the L2 Weight Decay Regularization
"""Implements the L2 Weight Decay Regularization
Small values of L2 can help prevent over fitting the training data.
.. math::
L2WeightDecay = reg\_coeff * parameter
Args:
regularization_coeff(float): regularization coeff
Examples:
.. code-block:: python
optimizer = fluid.optimizer.Adagrad(
learning_rate=1e-4,
regularization=fluid.regularizer.L2DecayRegularizer(
regularization_coeff=0.1))
optimizer.minimize(avg_cost)
"""
"""
def
__init__
(
self
,
regularization_coeff
=
0.0
):
def
__init__
(
self
,
regularization_coeff
=
0.0
):
...
@@ -154,6 +173,27 @@ class L2DecayRegularizer(WeightDecayRegularizer):
...
@@ -154,6 +173,27 @@ class L2DecayRegularizer(WeightDecayRegularizer):
class
L1DecayRegularizer
(
WeightDecayRegularizer
):
class
L1DecayRegularizer
(
WeightDecayRegularizer
):
"""Implements the L1 Weight Decay Regularization
"""Implements the L1 Weight Decay Regularization
L1 regularization encourages sparsity.
.. math::
L1WeightDecay = reg\_coeff * sign(parameter)
Args:
regularization_coeff(float): regularization coeff
Examples:
.. code-block:: python
program = fluid.framework.Program()
block = program.global_block()
mul_x = block.create_parameter(
dtype="float32",
shape=[5, 10],
lod_level=0,
name="mul.x",
regularizer=fluid.regularizer.L1DecayRegularizer(0.5))
"""
"""
def
__init__
(
self
,
regularization_coeff
=
0.0
):
def
__init__
(
self
,
regularization_coeff
=
0.0
):
...
...
python/paddle/fluid/tests/book/notest_understand_sentiment.py
浏览文件 @
8567d042
...
@@ -194,16 +194,16 @@ def train(word_dict,
...
@@ -194,16 +194,16 @@ def train(word_dict,
if
is_local
:
if
is_local
:
train_loop
(
fluid
.
default_main_program
())
train_loop
(
fluid
.
default_main_program
())
else
:
else
:
port
=
os
.
getenv
(
"PADDLE_
INIT
_PORT"
,
"6174"
)
port
=
os
.
getenv
(
"PADDLE_
PSERVER
_PORT"
,
"6174"
)
pserver_ips
=
os
.
getenv
(
"PADDLE_
INIT_PSERVER
S"
)
# ip,ip...
pserver_ips
=
os
.
getenv
(
"PADDLE_
PSERVER_IP
S"
)
# ip,ip...
eplist
=
[]
eplist
=
[]
for
ip
in
pserver_ips
.
split
(
","
):
for
ip
in
pserver_ips
.
split
(
","
):
eplist
.
append
(
':'
.
join
([
ip
,
port
]))
eplist
.
append
(
':'
.
join
([
ip
,
port
]))
pserver_endpoints
=
","
.
join
(
eplist
)
# ip:port,ip:port...
pserver_endpoints
=
","
.
join
(
eplist
)
# ip:port,ip:port...
trainers
=
int
(
os
.
getenv
(
"TRAINERS"
))
trainers
=
int
(
os
.
getenv
(
"
PADDLE_
TRAINERS"
))
current_endpoint
=
os
.
getenv
(
"POD_IP"
)
+
":"
+
port
current_endpoint
=
os
.
getenv
(
"POD_IP"
)
+
":"
+
port
trainer_id
=
int
(
os
.
getenv
(
"PADDLE_
INIT_
TRAINER_ID"
))
trainer_id
=
int
(
os
.
getenv
(
"PADDLE_TRAINER_ID"
))
training_role
=
os
.
getenv
(
"TRAINING_ROLE"
,
"TRAINER"
)
training_role
=
os
.
getenv
(
"
PADDLE_
TRAINING_ROLE"
,
"TRAINER"
)
t
=
fluid
.
DistributeTranspiler
()
t
=
fluid
.
DistributeTranspiler
()
t
.
transpile
(
trainer_id
,
pservers
=
pserver_endpoints
,
trainers
=
trainers
)
t
.
transpile
(
trainer_id
,
pservers
=
pserver_endpoints
,
trainers
=
trainers
)
if
training_role
==
"PSERVER"
:
if
training_role
==
"PSERVER"
:
...
...
python/paddle/fluid/tests/book/test_fit_a_line.py
浏览文件 @
8567d042
...
@@ -69,16 +69,16 @@ def train(use_cuda, save_dirname, is_local):
...
@@ -69,16 +69,16 @@ def train(use_cuda, save_dirname, is_local):
if
is_local
:
if
is_local
:
train_loop
(
fluid
.
default_main_program
())
train_loop
(
fluid
.
default_main_program
())
else
:
else
:
port
=
os
.
getenv
(
"PADDLE_
INIT
_PORT"
,
"6174"
)
port
=
os
.
getenv
(
"PADDLE_
PSERVER
_PORT"
,
"6174"
)
pserver_ips
=
os
.
getenv
(
"PADDLE_
INIT_PSERVER
S"
)
# ip,ip...
pserver_ips
=
os
.
getenv
(
"PADDLE_
PSERVER_IP
S"
)
# ip,ip...
eplist
=
[]
eplist
=
[]
for
ip
in
pserver_ips
.
split
(
","
):
for
ip
in
pserver_ips
.
split
(
","
):
eplist
.
append
(
':'
.
join
([
ip
,
port
]))
eplist
.
append
(
':'
.
join
([
ip
,
port
]))
pserver_endpoints
=
","
.
join
(
eplist
)
# ip:port,ip:port...
pserver_endpoints
=
","
.
join
(
eplist
)
# ip:port,ip:port...
trainers
=
int
(
os
.
getenv
(
"TRAINERS"
))
trainers
=
int
(
os
.
getenv
(
"
PADDLE_
TRAINERS"
))
current_endpoint
=
os
.
getenv
(
"POD_IP"
)
+
":"
+
port
current_endpoint
=
os
.
getenv
(
"POD_IP"
)
+
":"
+
port
trainer_id
=
int
(
os
.
getenv
(
"PADDLE_
INIT_
TRAINER_ID"
))
trainer_id
=
int
(
os
.
getenv
(
"PADDLE_TRAINER_ID"
))
training_role
=
os
.
getenv
(
"TRAINING_ROLE"
,
"TRAINER"
)
training_role
=
os
.
getenv
(
"
PADDLE_
TRAINING_ROLE"
,
"TRAINER"
)
t
=
fluid
.
DistributeTranspiler
()
t
=
fluid
.
DistributeTranspiler
()
t
.
transpile
(
trainer_id
,
pservers
=
pserver_endpoints
,
trainers
=
trainers
)
t
.
transpile
(
trainer_id
,
pservers
=
pserver_endpoints
,
trainers
=
trainers
)
if
training_role
==
"PSERVER"
:
if
training_role
==
"PSERVER"
:
...
...
python/paddle/fluid/tests/book/test_image_classification.py
浏览文件 @
8567d042
...
@@ -178,16 +178,16 @@ def train(net_type, use_cuda, save_dirname, is_local):
...
@@ -178,16 +178,16 @@ def train(net_type, use_cuda, save_dirname, is_local):
if
is_local
:
if
is_local
:
train_loop
(
fluid
.
default_main_program
())
train_loop
(
fluid
.
default_main_program
())
else
:
else
:
port
=
os
.
getenv
(
"PADDLE_
INIT
_PORT"
,
"6174"
)
port
=
os
.
getenv
(
"PADDLE_
PSERVER
_PORT"
,
"6174"
)
pserver_ips
=
os
.
getenv
(
"PADDLE_
INIT_PSERVER
S"
)
# ip,ip...
pserver_ips
=
os
.
getenv
(
"PADDLE_
PSERVER_IP
S"
)
# ip,ip...
eplist
=
[]
eplist
=
[]
for
ip
in
pserver_ips
.
split
(
","
):
for
ip
in
pserver_ips
.
split
(
","
):
eplist
.
append
(
':'
.
join
([
ip
,
port
]))
eplist
.
append
(
':'
.
join
([
ip
,
port
]))
pserver_endpoints
=
","
.
join
(
eplist
)
# ip:port,ip:port...
pserver_endpoints
=
","
.
join
(
eplist
)
# ip:port,ip:port...
trainers
=
int
(
os
.
getenv
(
"TRAINERS"
))
trainers
=
int
(
os
.
getenv
(
"
PADDLE_
TRAINERS"
))
current_endpoint
=
os
.
getenv
(
"POD_IP"
)
+
":"
+
port
current_endpoint
=
os
.
getenv
(
"POD_IP"
)
+
":"
+
port
trainer_id
=
int
(
os
.
getenv
(
"PADDLE_
INIT_
TRAINER_ID"
))
trainer_id
=
int
(
os
.
getenv
(
"PADDLE_TRAINER_ID"
))
training_role
=
os
.
getenv
(
"TRAINING_ROLE"
,
"TRAINER"
)
training_role
=
os
.
getenv
(
"
PADDLE_
TRAINING_ROLE"
,
"TRAINER"
)
t
=
fluid
.
DistributeTranspiler
()
t
=
fluid
.
DistributeTranspiler
()
t
.
transpile
(
trainer_id
,
pservers
=
pserver_endpoints
,
trainers
=
trainers
)
t
.
transpile
(
trainer_id
,
pservers
=
pserver_endpoints
,
trainers
=
trainers
)
if
training_role
==
"PSERVER"
:
if
training_role
==
"PSERVER"
:
...
...
python/paddle/fluid/tests/book/test_label_semantic_roles.py
浏览文件 @
8567d042
...
@@ -209,16 +209,16 @@ def train(use_cuda, save_dirname=None, is_local=True):
...
@@ -209,16 +209,16 @@ def train(use_cuda, save_dirname=None, is_local=True):
if
is_local
:
if
is_local
:
train_loop
(
fluid
.
default_main_program
())
train_loop
(
fluid
.
default_main_program
())
else
:
else
:
port
=
os
.
getenv
(
"PADDLE_
INIT
_PORT"
,
"6174"
)
port
=
os
.
getenv
(
"PADDLE_
PSERVER
_PORT"
,
"6174"
)
pserver_ips
=
os
.
getenv
(
"PADDLE_
INIT_PSERVER
S"
)
# ip,ip...
pserver_ips
=
os
.
getenv
(
"PADDLE_
PSERVER_IP
S"
)
# ip,ip...
eplist
=
[]
eplist
=
[]
for
ip
in
pserver_ips
.
split
(
","
):
for
ip
in
pserver_ips
.
split
(
","
):
eplist
.
append
(
':'
.
join
([
ip
,
port
]))
eplist
.
append
(
':'
.
join
([
ip
,
port
]))
pserver_endpoints
=
","
.
join
(
eplist
)
# ip:port,ip:port...
pserver_endpoints
=
","
.
join
(
eplist
)
# ip:port,ip:port...
trainers
=
int
(
os
.
getenv
(
"TRAINERS"
))
trainers
=
int
(
os
.
getenv
(
"
PADDLE_
TRAINERS"
))
current_endpoint
=
os
.
getenv
(
"POD_IP"
)
+
":"
+
port
current_endpoint
=
os
.
getenv
(
"POD_IP"
)
+
":"
+
port
trainer_id
=
int
(
os
.
getenv
(
"PADDLE_
INIT_
TRAINER_ID"
))
trainer_id
=
int
(
os
.
getenv
(
"PADDLE_TRAINER_ID"
))
training_role
=
os
.
getenv
(
"TRAINING_ROLE"
,
"TRAINER"
)
training_role
=
os
.
getenv
(
"
PADDLE_
TRAINING_ROLE"
,
"TRAINER"
)
t
=
fluid
.
DistributeTranspiler
()
t
=
fluid
.
DistributeTranspiler
()
t
.
transpile
(
trainer_id
,
pservers
=
pserver_endpoints
,
trainers
=
trainers
)
t
.
transpile
(
trainer_id
,
pservers
=
pserver_endpoints
,
trainers
=
trainers
)
if
training_role
==
"PSERVER"
:
if
training_role
==
"PSERVER"
:
...
...
python/paddle/fluid/tests/book/test_machine_translation.py
浏览文件 @
8567d042
...
@@ -200,16 +200,16 @@ def train_main(use_cuda, is_sparse, is_local=True):
...
@@ -200,16 +200,16 @@ def train_main(use_cuda, is_sparse, is_local=True):
if
is_local
:
if
is_local
:
train_loop
(
framework
.
default_main_program
())
train_loop
(
framework
.
default_main_program
())
else
:
else
:
port
=
os
.
getenv
(
"PADDLE_
INIT
_PORT"
,
"6174"
)
port
=
os
.
getenv
(
"PADDLE_
PSERVER
_PORT"
,
"6174"
)
pserver_ips
=
os
.
getenv
(
"PADDLE_
INIT_PSERVER
S"
)
# ip,ip...
pserver_ips
=
os
.
getenv
(
"PADDLE_
PSERVER_IP
S"
)
# ip,ip...
eplist
=
[]
eplist
=
[]
for
ip
in
pserver_ips
.
split
(
","
):
for
ip
in
pserver_ips
.
split
(
","
):
eplist
.
append
(
':'
.
join
([
ip
,
port
]))
eplist
.
append
(
':'
.
join
([
ip
,
port
]))
pserver_endpoints
=
","
.
join
(
eplist
)
# ip:port,ip:port...
pserver_endpoints
=
","
.
join
(
eplist
)
# ip:port,ip:port...
trainers
=
int
(
os
.
getenv
(
"TRAINERS"
))
trainers
=
int
(
os
.
getenv
(
"
PADDLE_
TRAINERS"
))
current_endpoint
=
os
.
getenv
(
"POD_IP"
)
+
":"
+
port
current_endpoint
=
os
.
getenv
(
"POD_IP"
)
+
":"
+
port
trainer_id
=
int
(
os
.
getenv
(
"PADDLE_
INIT_
TRAINER_ID"
))
trainer_id
=
int
(
os
.
getenv
(
"PADDLE_TRAINER_ID"
))
training_role
=
os
.
getenv
(
"TRAINING_ROLE"
,
"TRAINER"
)
training_role
=
os
.
getenv
(
"
PADDLE_
TRAINING_ROLE"
,
"TRAINER"
)
t
=
fluid
.
DistributeTranspiler
()
t
=
fluid
.
DistributeTranspiler
()
t
.
transpile
(
trainer_id
,
pservers
=
pserver_endpoints
,
trainers
=
trainers
)
t
.
transpile
(
trainer_id
,
pservers
=
pserver_endpoints
,
trainers
=
trainers
)
if
training_role
==
"PSERVER"
:
if
training_role
==
"PSERVER"
:
...
...
python/paddle/fluid/tests/book/test_recognize_digits.py
浏览文件 @
8567d042
...
@@ -151,16 +151,16 @@ def train(nn_type,
...
@@ -151,16 +151,16 @@ def train(nn_type,
if
is_local
:
if
is_local
:
train_loop
(
fluid
.
default_main_program
())
train_loop
(
fluid
.
default_main_program
())
else
:
else
:
port
=
os
.
getenv
(
"PADDLE_
INIT
_PORT"
,
"6174"
)
port
=
os
.
getenv
(
"PADDLE_
PSERVER
_PORT"
,
"6174"
)
pserver_ips
=
os
.
getenv
(
"PADDLE_
INIT_PSERVER
S"
)
# ip,ip...
pserver_ips
=
os
.
getenv
(
"PADDLE_
PSERVER_IP
S"
)
# ip,ip...
eplist
=
[]
eplist
=
[]
for
ip
in
pserver_ips
.
split
(
","
):
for
ip
in
pserver_ips
.
split
(
","
):
eplist
.
append
(
':'
.
join
([
ip
,
port
]))
eplist
.
append
(
':'
.
join
([
ip
,
port
]))
pserver_endpoints
=
","
.
join
(
eplist
)
# ip:port,ip:port...
pserver_endpoints
=
","
.
join
(
eplist
)
# ip:port,ip:port...
trainers
=
int
(
os
.
getenv
(
"TRAINERS"
))
trainers
=
int
(
os
.
getenv
(
"
PADDLE_
TRAINERS"
))
current_endpoint
=
os
.
getenv
(
"POD_IP"
)
+
":"
+
port
current_endpoint
=
os
.
getenv
(
"POD_IP"
)
+
":"
+
port
trainer_id
=
int
(
os
.
getenv
(
"PADDLE_
INIT_
TRAINER_ID"
))
trainer_id
=
int
(
os
.
getenv
(
"PADDLE_TRAINER_ID"
))
training_role
=
os
.
getenv
(
"TRAINING_ROLE"
,
"TRAINER"
)
training_role
=
os
.
getenv
(
"
PADDLE_
TRAINING_ROLE"
,
"TRAINER"
)
t
=
fluid
.
DistributeTranspiler
()
t
=
fluid
.
DistributeTranspiler
()
t
.
transpile
(
trainer_id
,
pservers
=
pserver_endpoints
,
trainers
=
trainers
)
t
.
transpile
(
trainer_id
,
pservers
=
pserver_endpoints
,
trainers
=
trainers
)
if
training_role
==
"PSERVER"
:
if
training_role
==
"PSERVER"
:
...
...
python/paddle/fluid/tests/book/test_recommender_system.py
浏览文件 @
8567d042
...
@@ -220,16 +220,16 @@ def train(use_cuda, save_dirname, is_local=True):
...
@@ -220,16 +220,16 @@ def train(use_cuda, save_dirname, is_local=True):
if
is_local
:
if
is_local
:
train_loop
(
fluid
.
default_main_program
())
train_loop
(
fluid
.
default_main_program
())
else
:
else
:
port
=
os
.
getenv
(
"PADDLE_
INIT
_PORT"
,
"6174"
)
port
=
os
.
getenv
(
"PADDLE_
PSERVER
_PORT"
,
"6174"
)
pserver_ips
=
os
.
getenv
(
"PADDLE_
INIT_PSERVER
S"
)
# ip,ip...
pserver_ips
=
os
.
getenv
(
"PADDLE_
PSERVER_IP
S"
)
# ip,ip...
eplist
=
[]
eplist
=
[]
for
ip
in
pserver_ips
.
split
(
","
):
for
ip
in
pserver_ips
.
split
(
","
):
eplist
.
append
(
':'
.
join
([
ip
,
port
]))
eplist
.
append
(
':'
.
join
([
ip
,
port
]))
pserver_endpoints
=
","
.
join
(
eplist
)
# ip:port,ip:port...
pserver_endpoints
=
","
.
join
(
eplist
)
# ip:port,ip:port...
trainers
=
int
(
os
.
getenv
(
"TRAINERS"
))
trainers
=
int
(
os
.
getenv
(
"
PADDLE_
TRAINERS"
))
current_endpoint
=
os
.
getenv
(
"POD_IP"
)
+
":"
+
port
current_endpoint
=
os
.
getenv
(
"POD_IP"
)
+
":"
+
port
trainer_id
=
int
(
os
.
getenv
(
"PADDLE_
INIT_
TRAINER_ID"
))
trainer_id
=
int
(
os
.
getenv
(
"PADDLE_TRAINER_ID"
))
training_role
=
os
.
getenv
(
"TRAINING_ROLE"
,
"TRAINER"
)
training_role
=
os
.
getenv
(
"
PADDLE_
TRAINING_ROLE"
,
"TRAINER"
)
t
=
fluid
.
DistributeTranspiler
()
t
=
fluid
.
DistributeTranspiler
()
t
.
transpile
(
trainer_id
,
pservers
=
pserver_endpoints
,
trainers
=
trainers
)
t
.
transpile
(
trainer_id
,
pservers
=
pserver_endpoints
,
trainers
=
trainers
)
if
training_role
==
"PSERVER"
:
if
training_role
==
"PSERVER"
:
...
...
python/paddle/fluid/tests/book/test_word2vec.py
浏览文件 @
8567d042
...
@@ -125,16 +125,16 @@ def train(use_cuda, is_sparse, is_parallel, save_dirname, is_local=True):
...
@@ -125,16 +125,16 @@ def train(use_cuda, is_sparse, is_parallel, save_dirname, is_local=True):
if
is_local
:
if
is_local
:
train_loop
(
fluid
.
default_main_program
())
train_loop
(
fluid
.
default_main_program
())
else
:
else
:
port
=
os
.
getenv
(
"PADDLE_
INIT
_PORT"
,
"6174"
)
port
=
os
.
getenv
(
"PADDLE_
PSERVER
_PORT"
,
"6174"
)
pserver_ips
=
os
.
getenv
(
"PADDLE_
INIT_PSERVER
S"
)
# ip,ip...
pserver_ips
=
os
.
getenv
(
"PADDLE_
PSERVER_IP
S"
)
# ip,ip...
eplist
=
[]
eplist
=
[]
for
ip
in
pserver_ips
.
split
(
","
):
for
ip
in
pserver_ips
.
split
(
","
):
eplist
.
append
(
':'
.
join
([
ip
,
port
]))
eplist
.
append
(
':'
.
join
([
ip
,
port
]))
pserver_endpoints
=
","
.
join
(
eplist
)
# ip:port,ip:port...
pserver_endpoints
=
","
.
join
(
eplist
)
# ip:port,ip:port...
trainers
=
int
(
os
.
getenv
(
"TRAINERS"
))
trainers
=
int
(
os
.
getenv
(
"
PADDLE_
TRAINERS"
))
current_endpoint
=
os
.
getenv
(
"POD_IP"
)
+
":"
+
port
current_endpoint
=
os
.
getenv
(
"POD_IP"
)
+
":"
+
port
trainer_id
=
int
(
os
.
getenv
(
"PADDLE_
INIT_
TRAINER_ID"
))
trainer_id
=
int
(
os
.
getenv
(
"PADDLE_TRAINER_ID"
))
training_role
=
os
.
getenv
(
"TRAINING_ROLE"
,
"TRAINER"
)
training_role
=
os
.
getenv
(
"
PADDLE_
TRAINING_ROLE"
,
"TRAINER"
)
t
=
fluid
.
DistributeTranspiler
()
t
=
fluid
.
DistributeTranspiler
()
t
.
transpile
(
trainer_id
,
pservers
=
pserver_endpoints
,
trainers
=
trainers
)
t
.
transpile
(
trainer_id
,
pservers
=
pserver_endpoints
,
trainers
=
trainers
)
if
training_role
==
"PSERVER"
:
if
training_role
==
"PSERVER"
:
...
...
python/paddle/fluid/tests/unittests/test_concat_op.py
浏览文件 @
8567d042
...
@@ -43,7 +43,7 @@ class TestConcatOp(OpTest):
...
@@ -43,7 +43,7 @@ class TestConcatOp(OpTest):
self
.
axis
=
1
self
.
axis
=
1
class
TestConcatOp2
(
OpTest
):
class
TestConcatOp2
(
TestConcatOp
):
def
init_test_data
(
self
):
def
init_test_data
(
self
):
self
.
x0
=
np
.
random
.
random
((
2
,
3
,
4
,
5
)).
astype
(
'float32'
)
self
.
x0
=
np
.
random
.
random
((
2
,
3
,
4
,
5
)).
astype
(
'float32'
)
self
.
x1
=
np
.
random
.
random
((
2
,
3
,
4
,
5
)).
astype
(
'float32'
)
self
.
x1
=
np
.
random
.
random
((
2
,
3
,
4
,
5
)).
astype
(
'float32'
)
...
@@ -51,5 +51,16 @@ class TestConcatOp2(OpTest):
...
@@ -51,5 +51,16 @@ class TestConcatOp2(OpTest):
self
.
axis
=
1
self
.
axis
=
1
class
TestConcatOp3
(
TestConcatOp
):
def
init_test_data
(
self
):
self
.
x0
=
np
.
random
.
random
((
1
,
256
,
170
,
256
)).
astype
(
'float32'
)
self
.
x1
=
np
.
random
.
random
((
1
,
128
,
170
,
256
)).
astype
(
'float32'
)
self
.
x2
=
np
.
random
.
random
((
1
,
128
,
170
,
256
)).
astype
(
'float32'
)
self
.
axis
=
1
def
test_check_grad
(
self
):
pass
if
__name__
==
'__main__'
:
if
__name__
==
'__main__'
:
unittest
.
main
()
unittest
.
main
()
python/paddle/fluid/tests/unittests/test_gaussian_random_mkldnn_op.py
0 → 100644
浏览文件 @
8567d042
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import
unittest
from
test_gaussian_random_op
import
TestGaussianRandomOp
class
TestMKLDNN
(
TestGaussianRandomOp
):
def
init_kernel_type
(
self
):
self
.
use_mkldnn
=
True
if
__name__
==
'__main__'
:
unittest
.
main
()
python/paddle/fluid/tests/unittests/test_gaussian_random_op.py
浏览文件 @
8567d042
...
@@ -25,7 +25,15 @@ class TestGaussianRandomOp(unittest.TestCase):
...
@@ -25,7 +25,15 @@ class TestGaussianRandomOp(unittest.TestCase):
def
setUp
(
self
):
def
setUp
(
self
):
self
.
op_type
=
"gaussian_random"
self
.
op_type
=
"gaussian_random"
self
.
inputs
=
{}
self
.
inputs
=
{}
self
.
attrs
=
{
"shape"
:
[
1000
,
784
],
"mean"
:
.
0
,
"std"
:
1.
,
"seed"
:
10
}
self
.
use_mkldnn
=
False
self
.
init_kernel_type
()
self
.
attrs
=
{
"shape"
:
[
1000
,
784
],
"mean"
:
.
0
,
"std"
:
1.
,
"seed"
:
10
,
"use_mkldnn"
:
self
.
use_mkldnn
}
self
.
outputs
=
[
"Out"
]
self
.
outputs
=
[
"Out"
]
...
@@ -58,6 +66,9 @@ class TestGaussianRandomOp(unittest.TestCase):
...
@@ -58,6 +66,9 @@ class TestGaussianRandomOp(unittest.TestCase):
self
.
assertAlmostEqual
(
numpy
.
mean
(
tensor
),
.
0
,
delta
=
0.1
)
self
.
assertAlmostEqual
(
numpy
.
mean
(
tensor
),
.
0
,
delta
=
0.1
)
self
.
assertAlmostEqual
(
numpy
.
std
(
tensor
),
1.
,
delta
=
0.1
)
self
.
assertAlmostEqual
(
numpy
.
std
(
tensor
),
1.
,
delta
=
0.1
)
def
init_kernel_type
(
self
):
pass
if
__name__
==
"__main__"
:
if
__name__
==
"__main__"
:
unittest
.
main
()
unittest
.
main
()
python/paddle/fluid/tests/unittests/test_layers.py
浏览文件 @
8567d042
...
@@ -401,6 +401,15 @@ class TestBook(unittest.TestCase):
...
@@ -401,6 +401,15 @@ class TestBook(unittest.TestCase):
self
.
assertIsNotNone
(
output
)
self
.
assertIsNotNone
(
output
)
print
(
str
(
program
))
print
(
str
(
program
))
def
test_maxout
(
self
):
program
=
Program
()
with
program_guard
(
program
):
x
=
layers
.
data
(
name
=
'x'
,
shape
=
[
3
,
5
],
dtype
=
"float32"
)
y
=
layers
.
data
(
name
=
'y'
,
shape
=
[
2
,
3
],
dtype
=
"float32"
)
output
=
layers
.
crop
(
x
,
shape
=
y
)
self
.
assertIsNotNone
(
output
)
print
(
str
(
program
))
if
__name__
==
'__main__'
:
if
__name__
==
'__main__'
:
unittest
.
main
()
unittest
.
main
()
python/paddle/fluid/tests/unittests/test_optimizer.py
浏览文件 @
8567d042
...
@@ -434,5 +434,71 @@ class TestDecayedAdagradOptimizer(unittest.TestCase):
...
@@ -434,5 +434,71 @@ class TestDecayedAdagradOptimizer(unittest.TestCase):
self
.
assertAlmostEqual
(
init_ops
[
1
].
attr
(
'value'
),
0.0
)
self
.
assertAlmostEqual
(
init_ops
[
1
].
attr
(
'value'
),
0.0
)
class
TestFtrlOptimizer
(
unittest
.
TestCase
):
class
MockFtrl
(
optimizer
.
FtrlOptimizer
):
def
get_accumulators
(
self
):
return
self
.
_accumulators
def
get_squared_str
(
self
):
return
self
.
_squared_acc_str
def
get_linear_str
(
self
):
return
self
.
_linear_acc_str
def
test_ftrl_optimizer
(
self
):
init_program
=
framework
.
Program
()
program
=
framework
.
Program
()
block
=
program
.
global_block
()
mul_x
=
block
.
create_parameter
(
dtype
=
"float32"
,
shape
=
[
5
,
10
],
lod_level
=
0
,
name
=
"mul.x"
,
optimize_attr
=
{
'learning_rate'
:
1.1
})
mul_y
=
block
.
create_var
(
dtype
=
"float32"
,
shape
=
[
10
,
8
],
lod_level
=
0
,
name
=
"mul.y"
)
mul_out
=
block
.
create_var
(
dtype
=
"float32"
,
shape
=
[
5
,
8
],
lod_level
=
0
,
name
=
"mul.out"
)
block
.
append_op
(
type
=
"mul"
,
inputs
=
{
"X"
:
mul_x
,
"Y"
:
mul_y
},
outputs
=
{
"Out"
:
mul_out
},
attrs
=
{
"x_num_col_dims"
:
1
})
mean_out
=
block
.
create_var
(
dtype
=
"float32"
,
shape
=
[
1
],
lod_level
=
0
,
name
=
"mean.out"
)
block
.
append_op
(
type
=
"mean"
,
inputs
=
{
"X"
:
mul_out
},
outputs
=
{
"Out"
:
mean_out
})
learning_rate
=
0.01
ftrl_optimizer
=
self
.
MockFtrl
(
learning_rate
=
learning_rate
,
l1
=
0.0
,
l2
=
0.0
,
lr_power
=-
0.5
)
params_grads
=
append_backward
(
mean_out
)
self
.
assertEqual
(
len
(
params_grads
),
1
)
self
.
assertEqual
(
len
(
ftrl_optimizer
.
get_accumulators
()),
0
)
opts
=
ftrl_optimizer
.
create_optimization_pass
(
params_grads
,
mul_out
,
init_program
)
self
.
assertEqual
(
len
(
opts
),
3
)
self
.
assertEqual
([
op
.
type
for
op
in
opts
],
[
"fill_constant"
,
"elementwise_mul"
,
"ftrl"
])
# Check accumulators
accumulators
=
ftrl_optimizer
.
get_accumulators
()
self
.
assertEqual
(
len
(
accumulators
),
2
)
self
.
assertTrue
(
ftrl_optimizer
.
get_squared_str
()
in
accumulators
)
self
.
assertTrue
(
ftrl_optimizer
.
get_linear_str
()
in
accumulators
)
squared_acc
=
accumulators
[
ftrl_optimizer
.
get_squared_str
()]
linear_acc
=
accumulators
[
ftrl_optimizer
.
get_linear_str
()]
self
.
assertEqual
(
len
(
squared_acc
),
1
)
self
.
assertEqual
(
len
(
linear_acc
),
1
)
self
.
assertTrue
(
mul_x
.
name
in
squared_acc
)
self
.
assertTrue
(
mul_x
.
name
in
linear_acc
)
# Check init_program
init_ops
=
init_program
.
global_block
().
ops
self
.
assertEqual
(
len
(
init_ops
),
3
)
self
.
assertEqual
(
init_ops
[
0
].
type
,
"fill_constant"
)
self
.
assertAlmostEqual
(
init_ops
[
0
].
attr
(
'value'
),
learning_rate
)
if
__name__
==
'__main__'
:
if
__name__
==
'__main__'
:
unittest
.
main
()
unittest
.
main
()
python/paddle/fluid/tests/unittests/test_sum_mkldnn_op.py
0 → 100644
浏览文件 @
8567d042
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import
unittest
from
test_sum_op
import
TestSumOp
class
TestMKLDNN
(
TestSumOp
):
def
init_kernel_type
(
self
):
self
.
use_mkldnn
=
True
if
__name__
==
'__main__'
:
unittest
.
main
()
python/paddle/fluid/tests/unittests/test_sum_op.py
浏览文件 @
8567d042
...
@@ -20,12 +20,15 @@ from op_test import OpTest
...
@@ -20,12 +20,15 @@ from op_test import OpTest
class
TestSumOp
(
OpTest
):
class
TestSumOp
(
OpTest
):
def
setUp
(
self
):
def
setUp
(
self
):
self
.
op_type
=
"sum"
self
.
op_type
=
"sum"
self
.
use_mkldnn
=
False
self
.
init_kernel_type
()
x0
=
np
.
random
.
random
((
3
,
4
)).
astype
(
'float32'
)
x0
=
np
.
random
.
random
((
3
,
4
)).
astype
(
'float32'
)
x1
=
np
.
random
.
random
((
3
,
4
)).
astype
(
'float32'
)
x1
=
np
.
random
.
random
((
3
,
4
)).
astype
(
'float32'
)
x2
=
np
.
random
.
random
((
3
,
4
)).
astype
(
'float32'
)
x2
=
np
.
random
.
random
((
3
,
4
)).
astype
(
'float32'
)
self
.
inputs
=
{
"X"
:
[(
"x0"
,
x0
),
(
"x1"
,
x1
),
(
"x2"
,
x2
)]}
self
.
inputs
=
{
"X"
:
[(
"x0"
,
x0
),
(
"x1"
,
x1
),
(
"x2"
,
x2
)]}
y
=
x0
+
x1
+
x2
y
=
x0
+
x1
+
x2
self
.
outputs
=
{
'Out'
:
y
}
self
.
outputs
=
{
'Out'
:
y
}
self
.
attrs
=
{
'use_mkldnn'
:
self
.
use_mkldnn
}
def
test_check_output
(
self
):
def
test_check_output
(
self
):
self
.
check_output
()
self
.
check_output
()
...
@@ -33,6 +36,9 @@ class TestSumOp(OpTest):
...
@@ -33,6 +36,9 @@ class TestSumOp(OpTest):
def
test_check_grad
(
self
):
def
test_check_grad
(
self
):
self
.
check_grad
([
'x0'
],
'Out'
)
self
.
check_grad
([
'x0'
],
'Out'
)
def
init_kernel_type
(
self
):
pass
if
__name__
==
"__main__"
:
if
__name__
==
"__main__"
:
unittest
.
main
()
unittest
.
main
()
python/paddle/fluid/transpiler/distribute_transpiler.py
浏览文件 @
8567d042
...
@@ -824,7 +824,8 @@ class DistributeTranspiler:
...
@@ -824,7 +824,8 @@ class DistributeTranspiler:
table_opt_block
.
append_op
(
table_opt_block
.
append_op
(
type
=
"sum"
,
type
=
"sum"
,
inputs
=
{
"X"
:
pserver_side_table_grad_list
},
inputs
=
{
"X"
:
pserver_side_table_grad_list
},
outputs
=
{
"Out"
:
[
grad_var
]})
outputs
=
{
"Out"
:
[
grad_var
]},
attrs
=
{
"use_mkldnn"
:
False
})
else
:
else
:
# in async_mode, for table gradient, it also need to be splited to each parameter server
# in async_mode, for table gradient, it also need to be splited to each parameter server
origin_grad_name
=
grad_var
.
name
origin_grad_name
=
grad_var
.
name
...
@@ -1056,7 +1057,8 @@ class DistributeTranspiler:
...
@@ -1056,7 +1057,8 @@ class DistributeTranspiler:
optimize_block
.
append_op
(
optimize_block
.
append_op
(
type
=
"sum"
,
type
=
"sum"
,
inputs
=
{
"X"
:
vars2merge
},
inputs
=
{
"X"
:
vars2merge
},
outputs
=
{
"Out"
:
merged_var
})
outputs
=
{
"Out"
:
merged_var
},
attrs
=
{
"use_mkldnn"
:
False
})
# TODO(panyx0718): What if it's SELECTED_ROWS.
# TODO(panyx0718): What if it's SELECTED_ROWS.
if
not
merged_var
.
type
==
core
.
VarDesc
.
VarType
.
SELECTED_ROWS
:
if
not
merged_var
.
type
==
core
.
VarDesc
.
VarType
.
SELECTED_ROWS
:
optimize_block
.
append_op
(
optimize_block
.
append_op
(
...
...
python/paddle/reader/decorator.py
浏览文件 @
8567d042
...
@@ -336,7 +336,7 @@ def _buf2lines(buf, line_break="\n"):
...
@@ -336,7 +336,7 @@ def _buf2lines(buf, line_break="\n"):
class
PipeReader
:
class
PipeReader
:
"""
"""
PipeReader read data by stream from a command, take it's
PipeReader read data by stream from a command, take it's
stdout into a pipe buffer and redirect it to the parser to
stdout into a pipe buffer and redirect it to the parser to
parse, then yield data as your desired format.
parse, then yield data as your desired format.
...
@@ -352,7 +352,7 @@ class PipeReader:
...
@@ -352,7 +352,7 @@ class PipeReader:
An example:
An example:
.. code-block:: python
.. code-block:: python
def example_reader():
def example_reader():
for f in myfiles:
for f in myfiles:
pr = PipeReader("cat %s"%f)
pr = PipeReader("cat %s"%f)
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录