Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
BaiXuePrincess
Paddle
提交
e02cbf35
P
Paddle
项目概览
BaiXuePrincess
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
e02cbf35
编写于
6月 20, 2018
作者:
Y
Yancey1989
浏览文件
操作
浏览文件
下载
差异文件
Merge branch 'develop' of github.com:PaddlePaddle/Paddle into fix_pserver_sub_blocks
上级
5c7d6a55
6e1c48d1
变更
63
隐藏空白更改
内联
并排
Showing
63 changed file
with
2726 addition
and
757 deletion
+2726
-757
benchmark/fluid/Dockerfile
benchmark/fluid/Dockerfile
+13
-4
benchmark/fluid/fluid_benchmark.py
benchmark/fluid/fluid_benchmark.py
+10
-3
benchmark/fluid/kube_gen_job.py
benchmark/fluid/kube_gen_job.py
+13
-7
doc/fluid/api/gen_doc.sh
doc/fluid/api/gen_doc.sh
+1
-1
doc/fluid/api/transpiler.rst
doc/fluid/api/transpiler.rst
+46
-0
doc/fluid/howto/cluster/fluid_cluster_train_cn.md
doc/fluid/howto/cluster/fluid_cluster_train_cn.md
+2
-2
doc/fluid/howto/cluster/fluid_recordio.md
doc/fluid/howto/cluster/fluid_recordio.md
+2
-2
doc/fluid/howto/inference/build_and_install_lib_cn.rst
doc/fluid/howto/inference/build_and_install_lib_cn.rst
+1
-0
paddle/contrib/inference/demo/simple_on_word2vec.cc
paddle/contrib/inference/demo/simple_on_word2vec.cc
+9
-13
paddle/contrib/inference/paddle_inference_api.cc
paddle/contrib/inference/paddle_inference_api.cc
+50
-0
paddle/contrib/inference/paddle_inference_api.h
paddle/contrib/inference/paddle_inference_api.h
+33
-5
paddle/contrib/inference/paddle_inference_api_anakin_engine.cc
...e/contrib/inference/paddle_inference_api_anakin_engine.cc
+5
-2
paddle/contrib/inference/paddle_inference_api_anakin_engine_tester.cc
...ib/inference/paddle_inference_api_anakin_engine_tester.cc
+7
-9
paddle/contrib/inference/paddle_inference_api_impl.cc
paddle/contrib/inference/paddle_inference_api_impl.cc
+7
-6
paddle/contrib/inference/test_paddle_inference_api_impl.cc
paddle/contrib/inference/test_paddle_inference_api_impl.cc
+10
-16
paddle/fluid/framework/executor.cc
paddle/fluid/framework/executor.cc
+13
-4
paddle/fluid/framework/executor.h
paddle/fluid/framework/executor.h
+1
-1
paddle/fluid/inference/analysis/tensorrt_subgraph_pass.cc
paddle/fluid/inference/analysis/tensorrt_subgraph_pass.cc
+3
-3
paddle/fluid/operators/activation_op.cc
paddle/fluid/operators/activation_op.cc
+2
-2
paddle/fluid/operators/batch_norm_mkldnn_op.cc
paddle/fluid/operators/batch_norm_mkldnn_op.cc
+0
-14
paddle/fluid/operators/batch_norm_op.cc
paddle/fluid/operators/batch_norm_op.cc
+0
-16
paddle/fluid/operators/batch_norm_op.h
paddle/fluid/operators/batch_norm_op.h
+16
-0
paddle/fluid/operators/bilinear_interp_op.cc
paddle/fluid/operators/bilinear_interp_op.cc
+2
-1
paddle/fluid/operators/bilinear_interp_op.h
paddle/fluid/operators/bilinear_interp_op.h
+24
-18
paddle/fluid/operators/logical_op.cc
paddle/fluid/operators/logical_op.cc
+1
-1
paddle/fluid/operators/math/concat.cu
paddle/fluid/operators/math/concat.cu
+1
-1
paddle/fluid/operators/math/math_function.cc
paddle/fluid/operators/math/math_function.cc
+1
-0
paddle/fluid/operators/tensorrt_engine_op.cc
paddle/fluid/operators/tensorrt_engine_op.cc
+4
-1
paddle/fluid/operators/tensorrt_engine_op.h
paddle/fluid/operators/tensorrt_engine_op.h
+3
-1
paddle/fluid/operators/tensorrt_engine_op_test.cc
paddle/fluid/operators/tensorrt_engine_op_test.cc
+0
-1
paddle/fluid/pybind/pybind.cc
paddle/fluid/pybind/pybind.cc
+6
-0
paddle/fluid/pybind/tensor_py.h
paddle/fluid/pybind/tensor_py.h
+1
-1
python/paddle/fluid/__init__.py
python/paddle/fluid/__init__.py
+2
-1
python/paddle/fluid/average.py
python/paddle/fluid/average.py
+19
-0
python/paddle/fluid/backward.py
python/paddle/fluid/backward.py
+59
-12
python/paddle/fluid/data_feeder.py
python/paddle/fluid/data_feeder.py
+107
-1
python/paddle/fluid/executor.py
python/paddle/fluid/executor.py
+27
-3
python/paddle/fluid/framework.py
python/paddle/fluid/framework.py
+614
-166
python/paddle/fluid/io.py
python/paddle/fluid/io.py
+568
-100
python/paddle/fluid/layers/control_flow.py
python/paddle/fluid/layers/control_flow.py
+30
-3
python/paddle/fluid/layers/nn.py
python/paddle/fluid/layers/nn.py
+24
-7
python/paddle/fluid/lod_tensor.py
python/paddle/fluid/lod_tensor.py
+48
-33
python/paddle/fluid/metrics.py
python/paddle/fluid/metrics.py
+7
-7
python/paddle/fluid/nets.py
python/paddle/fluid/nets.py
+198
-29
python/paddle/fluid/parallel_executor.py
python/paddle/fluid/parallel_executor.py
+76
-40
python/paddle/fluid/param_attr.py
python/paddle/fluid/param_attr.py
+102
-4
python/paddle/fluid/recordio_writer.py
python/paddle/fluid/recordio_writer.py
+50
-0
python/paddle/fluid/tests/book/notest_understand_sentiment.py
...on/paddle/fluid/tests/book/notest_understand_sentiment.py
+5
-5
python/paddle/fluid/tests/book/test_fit_a_line.py
python/paddle/fluid/tests/book/test_fit_a_line.py
+5
-5
python/paddle/fluid/tests/book/test_image_classification.py
python/paddle/fluid/tests/book/test_image_classification.py
+5
-5
python/paddle/fluid/tests/book/test_label_semantic_roles.py
python/paddle/fluid/tests/book/test_label_semantic_roles.py
+5
-5
python/paddle/fluid/tests/book/test_machine_translation.py
python/paddle/fluid/tests/book/test_machine_translation.py
+5
-5
python/paddle/fluid/tests/book/test_recognize_digits.py
python/paddle/fluid/tests/book/test_recognize_digits.py
+5
-5
python/paddle/fluid/tests/book/test_recommender_system.py
python/paddle/fluid/tests/book/test_recommender_system.py
+5
-5
python/paddle/fluid/tests/book/test_word2vec.py
python/paddle/fluid/tests/book/test_word2vec.py
+5
-5
python/paddle/fluid/tests/unittests/test_bilinear_interp_op.py
...n/paddle/fluid/tests/unittests/test_bilinear_interp_op.py
+43
-3
python/paddle/fluid/trainer.py
python/paddle/fluid/trainer.py
+133
-15
python/paddle/fluid/transpiler/distribute_transpiler.py
python/paddle/fluid/transpiler/distribute_transpiler.py
+196
-153
python/paddle/fluid/transpiler/memory_optimization_transpiler.py
...paddle/fluid/transpiler/memory_optimization_transpiler.py
+10
-0
python/paddle/fluid/transpiler/ps_dispatcher.py
python/paddle/fluid/transpiler/ps_dispatcher.py
+14
-4
python/paddle/fluid/unique_name.py
python/paddle/fluid/unique_name.py
+1
-1
tools/codestyle/docstring_checker.py
tools/codestyle/docstring_checker.py
+4
-0
tools/print_signatures.py
tools/print_signatures.py
+67
-0
未找到文件。
benchmark/fluid/Dockerfile
浏览文件 @
e02cbf35
FROM
nvidia/cuda:9.0-cudnn7-devel-ubuntu16.04
# Use UBUNTU_MIRROR can speed up apt-get speed.
# ARG UBUNTU_MIRROR
# RUN /bin/bash -c 'if [[ -n ${UBUNTU_MIRROR} ]]; then sed -i 's#http://archive.ubuntu.com/ubuntu#${UBUNTU_MIRROR}#g' /etc/apt/sources.list; fi'
RUN
apt-get update
&&
apt-get
install
-y
python python-pip iputils-ping libgtk2.0-dev wget vim net-tools iftop python-opencv
RUN
ln
-s
/usr/lib/x86_64-linux-gnu/libcudnn.so.7 /usr/lib/libcudnn.so
&&
ln
-s
/usr/lib/x86_64-linux-gnu/libnccl.so.2 /usr/lib/libnccl.so
RUN
pip
install
-U
pip
RUN
pip
install
-U
kubernetes paddlepaddle
# IMPORTANT:
# Add "ENV http_proxy=http://ip:port" if your download is slow, and don't forget to unset it at runtime.
# exmaple: unset http_proxy && unset https_proxy && python fluid_benchmark.py ...
RUN
pip
install
-U
pip
RUN
pip
install
-U
kubernetes paddlepaddle
RUN
sh
-c
'echo "import paddle.v2 as paddle\npaddle.dataset.cifar.train10()\npaddle.dataset.flowers.fetch()" | python'
RUN
sh
-c
'echo "import paddle.v2 as paddle\npaddle.dataset.mnist.train()\npaddle.dataset.mnist.test()\npaddle.dataset.imdb.fetch()" | python'
...
...
@@ -14,9 +21,11 @@ RUN pip uninstall -y paddlepaddle && mkdir /workspace
ADD
https://raw.githubusercontent.com/PaddlePaddle/cloud/develop/docker/paddle_k8s /usr/bin
ADD
https://raw.githubusercontent.com/PaddlePaddle/cloud/develop/docker/k8s_tools.py /root
RUN
chmod
+x /usr/bin/paddle_k8s
ADD
*.whl /
RUN
pip
install
/
*
.whl
&&
rm
-f
/
*
.whl
&&
chmod
+x /usr/bin/paddle_k8s
RUN
pip
install
/
*
.whl
&&
rm
-f
/
*
.whl
ENV
LD_LIBRARY_PATH=/usr/local/lib
ADD
fluid_benchmark.py recordio_converter.py models/ /workspace/
ADD
fluid_benchmark.py recordio_converter.py args.py recordio_converter.py run.sh run_fluid_benchmark.sh /workspace/
ADD
models/ /workspace/models/
benchmark/fluid/fluid_benchmark.py
浏览文件 @
e02cbf35
...
...
@@ -97,7 +97,7 @@ def dist_transpile(trainer_id, args):
return
train_program
,
fluid
.
default_startup_program
()
else
:
raise
ValueError
(
'TRAINING_ROLE environment variable must be either TRAINER or PSERVER'
'
PADDLE_
TRAINING_ROLE environment variable must be either TRAINER or PSERVER'
)
...
...
@@ -264,8 +264,6 @@ def train_parallel(avg_loss, infer_prog, optimizer, train_reader, test_reader,
break
else
:
loss
,
=
exe
.
run
([
avg_loss
.
name
],
feed
=
feeder
.
feed
(
data
))
if
args
.
update_method
==
"pserver"
:
exe
.
bcast_params
()
if
args
.
use_reader_op
:
num_samples
+=
args
.
batch_size
*
args
.
gpus
else
:
...
...
@@ -301,9 +299,18 @@ def print_train_time(start_time, end_time, num_samples):
(
num_samples
,
train_elapsed
,
examples_per_sec
))
def
print_paddle_envs
():
print
(
'----------- Configuration envs -----------'
)
for
k
in
os
.
environ
:
if
"PADDLE_"
in
k
:
print
"ENV %s:%s"
%
(
k
,
os
.
environ
[
k
])
print
(
'------------------------------------------------'
)
def
main
():
args
=
parse_args
()
print_arguments
(
args
)
print_paddle_envs
()
# the unique trainer id, starting from 0, needed by trainer
# only
...
...
benchmark/fluid/kube_gen_job.py
浏览文件 @
e02cbf35
...
...
@@ -17,6 +17,7 @@ import copy
import
argparse
import
random
import
os
import
copy
from
kube_templates
import
pserver
,
trainer
,
envs
...
...
@@ -108,10 +109,9 @@ def gen_job():
tn_container
[
"ports"
][
0
][
"containerPort"
]
=
spreadport
envs
.
append
({
"name"
:
"PADDLE_JOB_NAME"
,
"value"
:
args
.
jobname
})
envs
.
append
({
"name"
:
"TRAINERS"
,
"value"
:
str
(
args
.
trainers
)})
envs
.
append
({
"name"
:
"PSERVERS"
,
"value"
:
str
(
args
.
pservers
)})
envs
.
append
({
"name"
:
"
PADDLE_
TRAINERS"
,
"value"
:
str
(
args
.
trainers
)})
envs
.
append
({
"name"
:
"P
ADDLE_P
SERVERS"
,
"value"
:
str
(
args
.
pservers
)})
envs
.
append
({
"name"
:
"ENTRY"
,
"value"
:
args
.
entry
})
envs
.
append
({
"name"
:
"PADDLE_INIT_PORT"
,
"value"
:
str
(
args
.
port
)})
envs
.
append
({
"name"
:
"PADDLE_PSERVER_PORT"
,
"value"
:
str
(
args
.
port
)})
# NOTE: these directories below are cluster specific, please modify
# this settings before you run on your own cluster.
...
...
@@ -166,17 +166,23 @@ def gen_job():
tn
[
"spec"
][
"template"
][
"spec"
][
"volumes"
]
=
volumes
tn_container
[
"volumeMounts"
]
=
volumeMounts
ps_container
[
"env"
]
=
envs
ps_container
[
"env"
].
append
({
"name"
:
"TRAINING_ROLE"
,
"value"
:
"PSERVER"
})
ps_container
[
"env"
]
=
copy
.
deepcopy
(
envs
)
ps_container
[
"env"
].
append
({
"name"
:
"PADDLE_TRAINING_ROLE"
,
"value"
:
"PSERVER"
})
tn_container
[
"env"
]
=
envs
if
args
.
disttype
==
"pserver"
:
tn_container
[
"env"
].
append
({
"name"
:
"TRAINING_ROLE"
,
"name"
:
"
PADDLE_
TRAINING_ROLE"
,
"value"
:
"TRAINER"
})
elif
args
.
disttype
==
"nccl2"
or
args
.
disttype
==
"local"
:
# NCCL2 have no training role, set to plain WORKER
tn_container
[
"env"
].
append
({
"name"
:
"TRAINING_ROLE"
,
"value"
:
"WORKER"
})
tn_container
[
"env"
].
append
({
"name"
:
"PADDLE_TRAINING_ROLE"
,
"value"
:
"WORKER"
})
os
.
mkdir
(
args
.
jobname
)
if
args
.
disttype
==
"pserver"
:
...
...
doc/fluid/api/gen_doc.sh
浏览文件 @
e02cbf35
#!/bin/bash
python gen_doc.py layers
--submodules
control_flow device io nn ops tensor detection learning_rate_scheduler metric
>
layers.rst
for
module
in
data_feeder clip metrics executor initializer io nets optimizer param_attr profiler regularizer
for
module
in
data_feeder clip metrics executor initializer io nets optimizer param_attr profiler regularizer
transpiler
do
python gen_doc.py
${
module
}
>
${
module
}
.rst
done
doc/fluid/api/transpiler.rst
0 → 100644
浏览文件 @
e02cbf35
.. THIS FILE IS GENERATED BY `gen_doc.{py|sh}`
!DO NOT EDIT THIS FILE MANUALLY!
==========
transpiler
==========
DistributeTranspiler
--------------------
.. autoclass:: paddle.fluid.transpiler.DistributeTranspiler
:members:
:noindex:
InferenceTranspiler
-------------------
.. autoclass:: paddle.fluid.transpiler.InferenceTranspiler
:members:
:noindex:
memory_optimize
---------------
.. autofunction:: paddle.fluid.transpiler.memory_optimize
:noindex:
release_memory
--------------
.. autofunction:: paddle.fluid.transpiler.release_memory
:noindex:
HashName
--------
.. autoclass:: paddle.fluid.transpiler.HashName
:members:
:noindex:
RoundRobin
----------
.. autoclass:: paddle.fluid.transpiler.RoundRobin
:members:
:noindex:
doc/fluid/howto/cluster/fluid_cluster_train_cn.md
浏览文件 @
e02cbf35
...
...
@@ -168,13 +168,13 @@ cd /paddle/python/paddle/fluid/tests/book
第二步,启动Parameter Server:
```
bash
PADDLE_
INIT_PORT
=
6174
PADDLE_INIT_PSERVERS
=
192.168.1.2
TRAINERS
=
2
POD_IP
=
192.168.1.2
PADDLE_INIT_TRAINER_ID
=
1
TRAINING_ROLE
=
PSERVER python test_fit_a_line.py
PADDLE_
PSERVER_PORT
=
6174
PADDLE_PSERVER_IPS
=
192.168.1.2
PADDLE_TRAINERS
=
2
PADDLE_CURRENT_IP
=
192.168.1.2
PADDLE_TRAINER_ID
=
1
PADDLE_
TRAINING_ROLE
=
PSERVER python test_fit_a_line.py
```
执行命令后请等待出现提示:
```Server listening on 192.168.1.2:6174 ```
, 表示Paramter Server已经正常启动。
第三步,启动Trainer:
```
bash
PADDLE_
INIT_PORT
=
6174
PADDLE_INIT_PSERVERS
=
192.168.1.3
TRAINERS
=
2
POD_IP
=
192.168.1.3
PADDLE_INIT_TRAINER_ID
=
1
TRAINING_ROLE
=
TRAINER python test_fit_a_line.py
PADDLE_
PSERVER_PORT
=
6174
PADDLE_PSERVER_IPS
=
192.168.1.3
PADDLE_TRAINERS
=
2
PADDLE_CURRENT_IPP
=
192.168.1.3
PADDLE_TRAINER_ID
=
1
PADDLE_
TRAINING_ROLE
=
TRAINER python test_fit_a_line.py
```
由于我们定义的Trainer的数量是2个,因此需要在另外一个计算节点上再启动一个Trainer。
...
...
doc/fluid/howto/cluster/fluid_recordio.md
浏览文件 @
e02cbf35
...
...
@@ -114,8 +114,8 @@ def gen_train_list(file_pattern, trainers, trainer_id):
ret_list
.
append
(
f
)
return
ret_list
trainers
=
int
(
os
.
getenv
(
"TRAINERS"
))
trainer_id
=
int
(
os
.
getenv
(
"PADDLE_
INIT_
TRAINER_ID"
))
trainers
=
int
(
os
.
getenv
(
"
PADDLE_
TRAINERS"
))
trainer_id
=
int
(
os
.
getenv
(
"PADDLE_TRAINER_ID"
))
data_file
=
fluid
.
layers
.
io
.
open_files
(
filenames
=
gen_train_list
(
"./mnist-[0-9]*.recordio"
,
2
,
0
),
thread_num
=
1
,
...
...
doc/fluid/howto/inference/build_and_install_lib_cn.rst
浏览文件 @
e02cbf35
...
...
@@ -13,6 +13,7 @@ cpu_noavx_openblas `fluid.tgz <https://guest:@paddleci.ngrok.io/repository
cuda7.5_cudnn5_avx_mkl `fluid.tgz <https://guest:@paddleci.ngrok.io/repository/download/Manylinux1_Cuda75cudnn5cp27cp27mu/.lastSuccessful/fluid.tgz>`_
cuda8.0_cudnn5_avx_mkl `fluid.tgz <https://guest:@paddleci.ngrok.io/repository/download/Manylinux1_Cuda80cudnn5cp27cp27mu/.lastSuccessful/fluid.tgz>`_
cuda8.0_cudnn7_avx_mkl `fluid.tgz <https://guest:@paddleci.ngrok.io/repository/download/Manylinux1_Cuda8cudnn7cp27cp27mu/.lastSuccessful/fluid.tgz>`_
cuda9.0_cudnn7_avx_mkl `fluid.tgz <https://guest:@paddleci.ngrok.io/repository/download/Manylinux1_Cuda90cudnn7avxMkl/.lastSuccessful/fluid.tgz>`_
====================== ========================================
从源码编译
...
...
paddle/contrib/inference/demo/simple_on_word2vec.cc
浏览文件 @
e02cbf35
...
...
@@ -40,10 +40,9 @@ void Main(bool use_gpu) {
//# 2. Prepare input.
int64_t
data
[
4
]
=
{
1
,
2
,
3
,
4
};
PaddleBuf
buf
{.
data
=
data
,
.
length
=
sizeof
(
data
)};
PaddleTensor
tensor
{.
name
=
""
,
.
shape
=
std
::
vector
<
int
>
({
4
,
1
}),
.
data
=
buf
,
.
data
=
PaddleBuf
(
data
,
sizeof
(
data
))
,
.
dtype
=
PaddleDType
::
INT64
};
// For simplicity, we set all the slots with the same data.
...
...
@@ -55,14 +54,12 @@ void Main(bool use_gpu) {
//# 4. Get output.
ASSERT_EQ
(
outputs
.
size
(),
1UL
);
LOG
(
INFO
)
<<
"output buffer size: "
<<
outputs
.
front
().
data
.
length
;
const
size_t
num_elements
=
outputs
.
front
().
data
.
length
/
sizeof
(
float
);
LOG
(
INFO
)
<<
"output buffer size: "
<<
outputs
.
front
().
data
.
length
()
;
const
size_t
num_elements
=
outputs
.
front
().
data
.
length
()
/
sizeof
(
float
);
// The outputs' buffers are in CPU memory.
for
(
size_t
i
=
0
;
i
<
std
::
min
(
5UL
,
num_elements
);
i
++
)
{
LOG
(
INFO
)
<<
static_cast
<
float
*>
(
outputs
.
front
().
data
.
data
)[
i
];
LOG
(
INFO
)
<<
static_cast
<
float
*>
(
outputs
.
front
().
data
.
data
()
)[
i
];
}
// TODO(Superjomn): this is should be free automatically
free
(
outputs
[
0
].
data
.
data
);
}
}
...
...
@@ -86,10 +83,9 @@ void MainThreads(int num_threads, bool use_gpu) {
for
(
int
batch_id
=
0
;
batch_id
<
num_batches
;
++
batch_id
)
{
// 2. Dummy Input Data
int64_t
data
[
4
]
=
{
1
,
2
,
3
,
4
};
PaddleBuf
buf
{.
data
=
data
,
.
length
=
sizeof
(
data
)};
PaddleTensor
tensor
{.
name
=
""
,
.
shape
=
std
::
vector
<
int
>
({
4
,
1
}),
.
data
=
buf
,
.
data
=
PaddleBuf
(
data
,
sizeof
(
data
))
,
.
dtype
=
PaddleDType
::
INT64
};
std
::
vector
<
PaddleTensor
>
inputs
(
4
,
tensor
);
std
::
vector
<
PaddleTensor
>
outputs
;
...
...
@@ -99,13 +95,13 @@ void MainThreads(int num_threads, bool use_gpu) {
// 4. Get output.
ASSERT_EQ
(
outputs
.
size
(),
1UL
);
LOG
(
INFO
)
<<
"TID: "
<<
tid
<<
", "
<<
"output buffer size: "
<<
outputs
.
front
().
data
.
length
;
const
size_t
num_elements
=
outputs
.
front
().
data
.
length
/
sizeof
(
float
);
<<
"output buffer size: "
<<
outputs
.
front
().
data
.
length
();
const
size_t
num_elements
=
outputs
.
front
().
data
.
length
()
/
sizeof
(
float
);
// The outputs' buffers are in CPU memory.
for
(
size_t
i
=
0
;
i
<
std
::
min
(
5UL
,
num_elements
);
i
++
)
{
LOG
(
INFO
)
<<
static_cast
<
float
*>
(
outputs
.
front
().
data
.
data
)[
i
];
LOG
(
INFO
)
<<
static_cast
<
float
*>
(
outputs
.
front
().
data
.
data
()
)[
i
];
}
free
(
outputs
[
0
].
data
.
data
);
}
});
}
...
...
paddle/contrib/inference/paddle_inference_api.cc
浏览文件 @
e02cbf35
...
...
@@ -13,3 +13,53 @@ See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/contrib/inference/paddle_inference_api.h"
namespace
paddle
{
PaddleBuf
::
PaddleBuf
(
PaddleBuf
&&
other
)
:
data_
(
other
.
data_
),
length_
(
other
.
length_
),
memory_owned_
(
other
.
memory_owned_
)
{
other
.
memory_owned_
=
false
;
other
.
data_
=
nullptr
;
other
.
length_
=
0
;
}
PaddleBuf
::
PaddleBuf
(
const
PaddleBuf
&
other
)
{
*
this
=
other
;
}
PaddleBuf
&
PaddleBuf
::
operator
=
(
const
PaddleBuf
&
other
)
{
// only the buffer with external memory can be copied
assert
(
!
other
.
memory_owned_
);
data_
=
other
.
data_
;
length_
=
other
.
length_
;
memory_owned_
=
other
.
memory_owned_
;
return
*
this
;
}
void
PaddleBuf
::
Resize
(
size_t
length
)
{
// Only the owned memory can be reset, the external memory can't be changed.
if
(
length_
==
length
)
return
;
assert
(
memory_owned_
);
Free
();
data_
=
new
char
[
length
];
length_
=
length
;
memory_owned_
=
true
;
}
void
PaddleBuf
::
Reset
(
void
*
data
,
size_t
length
)
{
Free
();
memory_owned_
=
false
;
data_
=
data
;
length_
=
length
;
}
void
PaddleBuf
::
Free
()
{
if
(
memory_owned_
&&
data_
)
{
assert
(
length_
>
0
);
delete
static_cast
<
char
*>
(
data_
);
data_
=
nullptr
;
length_
=
0
;
}
}
}
// namespace paddle
\ No newline at end of file
paddle/contrib/inference/paddle_inference_api.h
浏览文件 @
e02cbf35
...
...
@@ -21,6 +21,7 @@ limitations under the License. */
#pragma once
#include <cassert>
#include <memory>
#include <string>
#include <vector>
...
...
@@ -32,12 +33,38 @@ enum PaddleDType {
INT64
,
};
struct
PaddleBuf
{
void
*
data
;
// pointer to the data memory.
size_t
length
;
// number of memory bytes.
class
PaddleBuf
{
public:
PaddleBuf
()
=
default
;
PaddleBuf
(
PaddleBuf
&&
other
);
// Copy only available when memory is managed externally.
explicit
PaddleBuf
(
const
PaddleBuf
&
);
PaddleBuf
&
operator
=
(
const
PaddleBuf
&
);
// Do not own the memory.
PaddleBuf
(
void
*
data
,
size_t
length
)
:
data_
(
data
),
length_
(
length
),
memory_owned_
{
false
}
{}
// Own memory.
PaddleBuf
(
size_t
length
)
:
data_
(
new
char
[
length
]),
length_
(
length
),
memory_owned_
(
true
)
{}
// Resize to `length` bytes.
void
Resize
(
size_t
length
);
// Reset to external memory.
void
Reset
(
void
*
data
,
size_t
length
);
bool
empty
()
const
{
return
length_
==
0
;
}
void
*
data
()
const
{
return
data_
;
}
size_t
length
()
const
{
return
length_
;
}
~
PaddleBuf
()
{
Free
();
}
private:
void
Free
();
void
*
data_
{
nullptr
};
// pointer to the data memory.
size_t
length_
{
0
};
// number of memory bytes.
bool
memory_owned_
{
true
};
};
struct
PaddleTensor
{
PaddleTensor
()
=
default
;
std
::
string
name
;
// variable name.
std
::
vector
<
int
>
shape
;
// TODO(Superjomn) for LoD support, add a vector<vector<int>> field if needed.
...
...
@@ -67,8 +94,9 @@ class PaddlePredictor {
// Predict an record.
// The caller should be responsible for allocating and releasing the memory of
// `inputs`. `inputs` should be alive until Run returns. caller should be
// responsible for releasing the memory of `output_data`.
// `inputs`. `inputs` should be available until Run returns. Caller should be
// responsible for the output tensor's buffer, either allocated or passed from
// outside.
virtual
bool
Run
(
const
std
::
vector
<
PaddleTensor
>&
inputs
,
std
::
vector
<
PaddleTensor
>*
output_data
)
=
0
;
...
...
paddle/contrib/inference/paddle_inference_api_anakin_engine.cc
浏览文件 @
e02cbf35
...
...
@@ -48,7 +48,7 @@ bool PaddleInferenceAnakinPredictor::Run(
auto
d_tensor_in_p
=
executor_
.
get_in
(
input
.
name
);
float
*
d_data_p
=
d_tensor_in_p
->
mutable_data
();
if
(
cudaMemcpy
(
d_data_p
,
static_cast
<
float
*>
(
input
.
data
.
data
),
static_cast
<
float
*>
(
input
.
data
.
data
()
),
d_tensor_in_p
->
valid_size
()
*
sizeof
(
float
),
cudaMemcpyHostToDevice
)
!=
0
)
{
LOG
(
ERROR
)
<<
"copy data from CPU to GPU error"
;
...
...
@@ -65,8 +65,11 @@ bool PaddleInferenceAnakinPredictor::Run(
for
(
auto
&
output
:
*
output_data
)
{
auto
*
tensor
=
executor_
.
get_out
(
output
.
name
);
output
.
shape
=
tensor
->
shape
();
if
(
output
.
data
.
length
()
<
tensor
->
valid_size
()
*
sizeof
(
float
))
{
output
.
data
.
Resize
(
tensor
->
valid_size
()
*
sizeof
(
float
));
}
// Copy data from GPU -> CPU
if
(
cudaMemcpy
(
output
.
data
.
data
,
if
(
cudaMemcpy
(
output
.
data
.
data
()
,
tensor
->
mutable_data
(),
tensor
->
valid_size
()
*
sizeof
(
float
),
cudaMemcpyDeviceToHost
)
!=
0
)
{
...
...
paddle/contrib/inference/paddle_inference_api_anakin_engine_tester.cc
浏览文件 @
e02cbf35
...
...
@@ -37,28 +37,26 @@ TEST(inference, anakin) {
float
data
[
1
*
3
*
224
*
224
]
=
{
1.0
f
};
PaddleBuf
buf
{.
data
=
data
,
.
length
=
sizeof
(
data
)};
PaddleTensor
tensor
{.
name
=
"input_0"
,
.
shape
=
std
::
vector
<
int
>
({
1
,
3
,
224
,
224
}),
.
data
=
buf
,
.
data
=
PaddleBuf
(
data
,
sizeof
(
data
))
,
.
dtype
=
PaddleDType
::
FLOAT32
};
// For simplicity, we set all the slots with the same data.
std
::
vector
<
PaddleTensor
>
paddle_tensor_feeds
(
1
,
tensor
);
std
::
vector
<
PaddleTensor
>
paddle_tensor_feeds
;
paddle_tensor_feeds
.
emplace_back
(
std
::
move
(
tensor
));
float
data_out
[
1000
];
PaddleBuf
buf_out
{.
data
=
data_out
,
.
length
=
sizeof
(
data
)};
PaddleTensor
tensor_out
{.
name
=
"prob_out"
,
.
shape
=
std
::
vector
<
int
>
({
1000
,
1
}),
.
data
=
buf_out
,
.
data
=
PaddleBuf
()
,
.
dtype
=
PaddleDType
::
FLOAT32
};
std
::
vector
<
PaddleTensor
>
outputs
(
1
,
tensor_out
);
std
::
vector
<
PaddleTensor
>
outputs
;
outputs
.
emplace_back
(
std
::
move
(
tensor_out
));
ASSERT_TRUE
(
predictor
->
Run
(
paddle_tensor_feeds
,
&
outputs
));
float
*
data_o
=
static_cast
<
float
*>
(
outputs
[
0
].
data
.
data
);
float
*
data_o
=
static_cast
<
float
*>
(
outputs
[
0
].
data
.
data
()
);
for
(
size_t
j
=
0
;
j
<
1000
;
++
j
)
{
LOG
(
INFO
)
<<
"output["
<<
j
<<
"]: "
<<
data_o
[
j
];
}
...
...
paddle/contrib/inference/paddle_inference_api_impl.cc
浏览文件 @
e02cbf35
...
...
@@ -178,8 +178,8 @@ bool NativePaddlePredictor::SetFeed(const std::vector<PaddleTensor> &inputs,
// TODO(panyx0718): Init LoDTensor from existing memcpy to save a copy.
std
::
memcpy
(
static_cast
<
void
*>
(
input_ptr
),
inputs
[
i
].
data
.
data
,
inputs
[
i
].
data
.
length
);
inputs
[
i
].
data
.
data
()
,
inputs
[
i
].
data
.
length
()
);
feeds
->
push_back
(
input
);
}
return
true
;
...
...
@@ -241,10 +241,11 @@ bool NativePaddlePredictor::GetFetch(
}
outputs
->
at
(
i
).
shape
=
shape
;
outputs
->
at
(
i
).
data
.
length
=
sizeof
(
float
)
*
data
.
size
();
outputs
->
at
(
i
).
data
.
data
=
malloc
(
outputs
->
at
(
i
).
data
.
length
);
std
::
memcpy
(
outputs
->
at
(
i
).
data
.
data
,
data
.
data
(),
outputs
->
at
(
i
).
data
.
length
);
auto
&
buffer
=
outputs
->
at
(
i
).
data
;
if
(
buffer
.
empty
()
||
buffer
.
length
()
<
sizeof
(
float
)
*
data
.
size
())
{
buffer
.
Resize
(
sizeof
(
float
)
*
data
.
size
());
}
std
::
memcpy
(
buffer
.
data
(),
data
.
data
(),
buffer
.
length
());
outputs
->
at
(
i
).
dtype
=
PaddleDType
::
FLOAT32
;
// TODO(panyx0718): support other types? fill tensor name? avoid a copy.
}
...
...
paddle/contrib/inference/test_paddle_inference_api_impl.cc
浏览文件 @
e02cbf35
...
...
@@ -27,13 +27,12 @@ namespace paddle {
PaddleTensor
LodTensorToPaddleTensor
(
framework
::
LoDTensor
*
t
)
{
PaddleTensor
pt
;
pt
.
data
.
data
=
t
->
data
<
void
>
();
if
(
t
->
type
()
==
typeid
(
int64_t
))
{
pt
.
data
.
length
=
t
->
numel
()
*
sizeof
(
int64_t
);
pt
.
data
.
Reset
(
t
->
data
<
void
>
(),
t
->
numel
()
*
sizeof
(
int64_t
)
);
pt
.
dtype
=
PaddleDType
::
INT64
;
}
else
if
(
t
->
type
()
==
typeid
(
float
))
{
pt
.
data
.
length
=
t
->
numel
()
*
sizeof
(
float
);
pt
.
data
.
Reset
(
t
->
data
<
void
>
(),
t
->
numel
()
*
sizeof
(
float
)
);
pt
.
dtype
=
PaddleDType
::
FLOAT32
;
}
else
{
LOG
(
FATAL
)
<<
"unsupported type."
;
...
...
@@ -79,8 +78,8 @@ void MainWord2Vec(bool use_gpu) {
std
::
vector
<
PaddleTensor
>
outputs
;
ASSERT_TRUE
(
predictor
->
Run
(
paddle_tensor_feeds
,
&
outputs
));
ASSERT_EQ
(
outputs
.
size
(),
1UL
);
size_t
len
=
outputs
[
0
].
data
.
length
;
float
*
data
=
static_cast
<
float
*>
(
outputs
[
0
].
data
.
data
);
size_t
len
=
outputs
[
0
].
data
.
length
()
;
float
*
data
=
static_cast
<
float
*>
(
outputs
[
0
].
data
.
data
()
);
for
(
size_t
j
=
0
;
j
<
len
/
sizeof
(
float
);
++
j
)
{
ASSERT_LT
(
data
[
j
],
1.0
);
ASSERT_GT
(
data
[
j
],
-
1.0
);
...
...
@@ -103,8 +102,6 @@ void MainWord2Vec(bool use_gpu) {
EXPECT_LT
(
lod_data
[
i
]
-
data
[
i
],
1e-3
);
EXPECT_GT
(
lod_data
[
i
]
-
data
[
i
],
-
1e-3
);
}
free
(
outputs
[
0
].
data
.
data
);
}
void
MainImageClassification
(
bool
use_gpu
)
{
...
...
@@ -143,13 +140,12 @@ void MainImageClassification(bool use_gpu) {
std
::
vector
<
PaddleTensor
>
outputs
;
ASSERT_TRUE
(
predictor
->
Run
(
paddle_tensor_feeds
,
&
outputs
));
ASSERT_EQ
(
outputs
.
size
(),
1UL
);
size_t
len
=
outputs
[
0
].
data
.
length
;
float
*
data
=
static_cast
<
float
*>
(
outputs
[
0
].
data
.
data
);
size_t
len
=
outputs
[
0
].
data
.
length
()
;
float
*
data
=
static_cast
<
float
*>
(
outputs
[
0
].
data
.
data
()
);
float
*
lod_data
=
output1
.
data
<
float
>
();
for
(
size_t
j
=
0
;
j
<
len
/
sizeof
(
float
);
++
j
)
{
EXPECT_NEAR
(
lod_data
[
j
],
data
[
j
],
1e-3
);
}
free
(
data
);
}
void
MainThreadsWord2Vec
(
bool
use_gpu
)
{
...
...
@@ -192,8 +188,8 @@ void MainThreadsWord2Vec(bool use_gpu) {
// check outputs range
ASSERT_EQ
(
local_outputs
.
size
(),
1UL
);
const
size_t
len
=
local_outputs
[
0
].
data
.
length
;
float
*
data
=
static_cast
<
float
*>
(
local_outputs
[
0
].
data
.
data
);
const
size_t
len
=
local_outputs
[
0
].
data
.
length
()
;
float
*
data
=
static_cast
<
float
*>
(
local_outputs
[
0
].
data
.
data
()
);
for
(
size_t
j
=
0
;
j
<
len
/
sizeof
(
float
);
++
j
)
{
ASSERT_LT
(
data
[
j
],
1.0
);
ASSERT_GT
(
data
[
j
],
-
1.0
);
...
...
@@ -205,7 +201,6 @@ void MainThreadsWord2Vec(bool use_gpu) {
for
(
int
i
=
0
;
i
<
refs
[
tid
].
numel
();
++
i
)
{
EXPECT_NEAR
(
ref_data
[
i
],
data
[
i
],
1e-3
);
}
free
(
data
);
});
}
for
(
int
i
=
0
;
i
<
num_jobs
;
++
i
)
{
...
...
@@ -251,14 +246,13 @@ void MainThreadsImageClassification(bool use_gpu) {
// check outputs correctness
ASSERT_EQ
(
local_outputs
.
size
(),
1UL
);
const
size_t
len
=
local_outputs
[
0
].
data
.
length
;
float
*
data
=
static_cast
<
float
*>
(
local_outputs
[
0
].
data
.
data
);
const
size_t
len
=
local_outputs
[
0
].
data
.
length
()
;
float
*
data
=
static_cast
<
float
*>
(
local_outputs
[
0
].
data
.
data
()
);
float
*
ref_data
=
refs
[
tid
].
data
<
float
>
();
EXPECT_EQ
(
refs
[
tid
].
numel
(),
len
/
sizeof
(
float
));
for
(
int
i
=
0
;
i
<
refs
[
tid
].
numel
();
++
i
)
{
EXPECT_NEAR
(
ref_data
[
i
],
data
[
i
],
1e-3
);
}
free
(
data
);
});
}
for
(
int
i
=
0
;
i
<
num_jobs
;
++
i
)
{
...
...
paddle/fluid/framework/executor.cc
浏览文件 @
e02cbf35
...
...
@@ -321,7 +321,8 @@ std::vector<std::shared_ptr<ExecutorPrepareContext>> Executor::Prepare(
}
void
Executor
::
RunPreparedContext
(
ExecutorPrepareContext
*
ctx
,
Scope
*
scope
,
bool
create_local_scope
,
bool
create_vars
)
{
bool
create_local_scope
,
bool
create_vars
,
bool
keep_kids
)
{
Scope
*
local_scope
=
scope
;
if
(
create_vars
)
{
if
(
create_local_scope
)
{
...
...
@@ -344,12 +345,20 @@ void Executor::RunPreparedContext(ExecutorPrepareContext* ctx, Scope* scope,
}
}
platform
::
DeviceContextPool
::
Instance
().
Get
(
place_
)
->
Wait
();
if
(
create_vars
&&
create_local_
scope
)
{
if
(
local_scope
!=
scope
)
{
scope
->
DeleteScope
(
local_scope
);
}
else
{
// Delete the local scopes created in operators.
scope
->
DropKids
();
if
(
!
keep_kids
)
{
// By default, we should delete all kid scopes after run executor because
// some operators may create local scope when running, such as while_op.
// But when while_op also create a local executor to run it's sub block,
// the sub scopes it created should not be dropped immediately, because
// while_grad_op will use some variables created during while_op run, so
// we need to keep the kids and wait for the outer executor to drop them.
scope
->
DropKids
();
}
}
if
(
FLAGS_benchmark
)
{
VLOG
(
2
)
<<
"-------------------------------------------------------"
;
VLOG
(
2
)
<<
"Memory used after deleting local scope: "
...
...
paddle/fluid/framework/executor.h
浏览文件 @
e02cbf35
...
...
@@ -78,7 +78,7 @@ class Executor {
void
RunPreparedContext
(
ExecutorPrepareContext
*
ctx
,
Scope
*
scope
,
bool
create_local_scope
=
true
,
bool
create_vars
=
true
);
bool
create_vars
=
true
,
bool
keep_kids
=
false
);
void
RunPreparedContext
(
ExecutorPrepareContext
*
ctx
,
Scope
*
scope
,
std
::
map
<
std
::
string
,
const
LoDTensor
*>*
feed_targets
,
...
...
paddle/fluid/inference/analysis/tensorrt_subgraph_pass.cc
浏览文件 @
e02cbf35
...
...
@@ -27,7 +27,7 @@ void TensorRTSubGraphPass::Run(DataFlowGraph *graph) {
SubGraphFuse
(
graph
,
node_inside_subgraph_teller_
);
}
}
// analysis
}
// inference
}
//
namespace
analysis
}
//
namespace
inference
}
// paddle
}
//
namespace
paddle
paddle/fluid/operators/activation_op.cc
浏览文件 @
e02cbf35
...
...
@@ -143,7 +143,7 @@ $$out = \\frac{e^{x} - e^{-x}}{e^{x} + e^{-x}}$$
__attribute__
((
unused
))
constexpr
char
TanhShrinkDoc
[]
=
R"DOC(
TanhShrink Activation Operator.
$$out = x - \frac{e^{x} - e^{-x}}{e^{x} + e^{-x}}$$
$$out = x - \
\
frac{e^{x} - e^{-x}}{e^{x} + e^{-x}}$$
)DOC"
;
...
...
@@ -385,7 +385,7 @@ class STanhOpMaker : public framework::OpProtoAndCheckerMaker {
AddComment
(
R"DOC(
STanh Activation Operator.
$$out = b * \frac{e^{a * x} - e^{-a * x}}{e^{a * x} + e^{-a * x}}$$
$$out = b * \
\
frac{e^{a * x} - e^{-a * x}}{e^{a * x} + e^{-a * x}}$$
)DOC"
);
}
...
...
paddle/fluid/operators/batch_norm_mkldnn_op.cc
浏览文件 @
e02cbf35
...
...
@@ -21,8 +21,6 @@ namespace operators {
using
batch_norm_bwd
=
mkldnn
::
batch_normalization_backward
;
using
batch_norm_fwd
=
mkldnn
::
batch_normalization_forward
;
using
framework
::
DataLayout
;
using
framework
::
Tensor
;
using
mkldnn
::
memory
;
using
mkldnn
::
primitive
;
using
mkldnn
::
reorder
;
...
...
@@ -31,18 +29,6 @@ using paddle::platform::MKLDNNDeviceContext;
using
paddle
::
platform
::
MKLDNNMemDesc
;
using
platform
::
to_void_cast
;
template
<
typename
T
>
using
EigenArrayMap
=
Eigen
::
Map
<
Eigen
::
Array
<
T
,
Eigen
::
Dynamic
,
Eigen
::
Dynamic
>>
;
template
<
typename
T
>
using
ConstEigenArrayMap
=
Eigen
::
Map
<
const
Eigen
::
Array
<
T
,
Eigen
::
Dynamic
,
Eigen
::
Dynamic
>>
;
template
<
typename
T
>
using
EigenVectorArrayMap
=
Eigen
::
Map
<
Eigen
::
Array
<
T
,
Eigen
::
Dynamic
,
1
>>
;
template
<
typename
T
>
using
ConstEigenVectorArrayMap
=
Eigen
::
Map
<
const
Eigen
::
Array
<
T
,
Eigen
::
Dynamic
,
1
>>
;
namespace
{
template
<
typename
T
>
struct
bn_type_traits
{
...
...
paddle/fluid/operators/batch_norm_op.cc
浏览文件 @
e02cbf35
...
...
@@ -22,22 +22,6 @@ limitations under the License. */
namespace
paddle
{
namespace
operators
{
using
Tensor
=
framework
::
Tensor
;
using
LoDTensor
=
framework
::
LoDTensor
;
using
DataLayout
=
framework
::
DataLayout
;
template
<
typename
T
>
using
EigenArrayMap
=
Eigen
::
Map
<
Eigen
::
Array
<
T
,
Eigen
::
Dynamic
,
Eigen
::
Dynamic
>>
;
template
<
typename
T
>
using
ConstEigenArrayMap
=
Eigen
::
Map
<
const
Eigen
::
Array
<
T
,
Eigen
::
Dynamic
,
Eigen
::
Dynamic
>>
;
template
<
typename
T
>
using
EigenVectorArrayMap
=
Eigen
::
Map
<
Eigen
::
Array
<
T
,
Eigen
::
Dynamic
,
1
>>
;
template
<
typename
T
>
using
ConstEigenVectorArrayMap
=
Eigen
::
Map
<
const
Eigen
::
Array
<
T
,
Eigen
::
Dynamic
,
1
>>
;
class
BatchNormOp
:
public
framework
::
OperatorWithKernel
{
public:
using
framework
::
OperatorWithKernel
::
OperatorWithKernel
;
...
...
paddle/fluid/operators/batch_norm_op.h
浏览文件 @
e02cbf35
...
...
@@ -19,6 +19,22 @@ limitations under the License. */
namespace
paddle
{
namespace
operators
{
using
Tensor
=
framework
::
Tensor
;
using
LoDTensor
=
framework
::
LoDTensor
;
using
DataLayout
=
framework
::
DataLayout
;
template
<
typename
T
>
using
EigenArrayMap
=
Eigen
::
Map
<
Eigen
::
Array
<
T
,
Eigen
::
Dynamic
,
Eigen
::
Dynamic
>>
;
template
<
typename
T
>
using
ConstEigenArrayMap
=
Eigen
::
Map
<
const
Eigen
::
Array
<
T
,
Eigen
::
Dynamic
,
Eigen
::
Dynamic
>>
;
template
<
typename
T
>
using
EigenVectorArrayMap
=
Eigen
::
Map
<
Eigen
::
Array
<
T
,
Eigen
::
Dynamic
,
1
>>
;
template
<
typename
T
>
using
ConstEigenVectorArrayMap
=
Eigen
::
Map
<
const
Eigen
::
Array
<
T
,
Eigen
::
Dynamic
,
1
>>
;
template
<
typename
DeviceContext
,
typename
T
>
class
BatchNormKernel
:
public
framework
::
OpKernel
<
T
>
{
public:
...
...
paddle/fluid/operators/bilinear_interp_op.cc
浏览文件 @
e02cbf35
...
...
@@ -110,6 +110,7 @@ REGISTER_OPERATOR(bilinear_interp, ops::BilinearInterpOp,
ops
::
BilinearInterpOpMaker
,
paddle
::
framework
::
DefaultGradOpDescMaker
<
true
>
);
REGISTER_OPERATOR
(
bilinear_interp_grad
,
ops
::
BilinearInterpOpGrad
);
REGISTER_OP_CPU_KERNEL
(
bilinear_interp
,
ops
::
BilinearInterpKernel
<
float
>
);
REGISTER_OP_CPU_KERNEL
(
bilinear_interp
,
ops
::
BilinearInterpKernel
<
float
>
,
ops
::
BilinearInterpKernel
<
uint8_t
>
);
REGISTER_OP_CPU_KERNEL
(
bilinear_interp_grad
,
ops
::
BilinearInterpGradKernel
<
float
>
);
paddle/fluid/operators/bilinear_interp_op.h
浏览文件 @
e02cbf35
...
...
@@ -46,8 +46,10 @@ class BilinearInterpKernel : public framework::OpKernel<T> {
int
in_chw
=
channels
*
in_hw
;
int
out_chw
=
channels
*
out_hw
;
T
ratio_h
=
(
out_h
>
1
)
?
static_cast
<
T
>
(
in_h
-
1
)
/
(
out_h
-
1
)
:
0.
f
;
T
ratio_w
=
(
out_w
>
1
)
?
static_cast
<
T
>
(
in_w
-
1
)
/
(
out_w
-
1
)
:
0.
f
;
float
ratio_h
=
(
out_h
>
1
)
?
static_cast
<
float
>
(
in_h
-
1
)
/
(
out_h
-
1
)
:
0.
f
;
float
ratio_w
=
(
out_w
>
1
)
?
static_cast
<
float
>
(
in_w
-
1
)
/
(
out_w
-
1
)
:
0.
f
;
if
(
in_h
==
out_h
&&
in_w
==
out_w
)
{
memcpy
(
output
,
input
,
input_t
->
numel
()
*
sizeof
(
T
));
...
...
@@ -56,24 +58,24 @@ class BilinearInterpKernel : public framework::OpKernel<T> {
for
(
int
i
=
0
;
i
<
out_h
;
++
i
)
{
// loop for images
int
h
=
ratio_h
*
i
;
int
hid
=
(
h
<
in_h
-
1
)
?
1
:
0
;
T
h1lambda
=
ratio_h
*
i
-
h
;
T
h2lambda
=
1
-
h1lambda
;
float
h1lambda
=
ratio_h
*
i
-
h
;
float
h2lambda
=
1.
f
-
h1lambda
;
for
(
int
j
=
0
;
j
<
out_w
;
++
j
)
{
int
w
=
ratio_w
*
j
;
int
wid
=
(
w
<
in_w
-
1
)
?
1
:
0
;
T
w1lambda
=
ratio_w
*
j
-
w
;
T
w2lambda
=
1
-
w1lambda
;
float
w1lambda
=
ratio_w
*
j
-
w
;
float
w2lambda
=
1.
f
-
w1lambda
;
// calculate four position for bilinear interpolation
const
T
*
in_pos
=
&
input
[
k
*
in_chw
+
h
*
in_w
+
w
];
T
*
out_pos
=
&
output
[
k
*
out_chw
+
i
*
out_w
+
j
];
for
(
int
c
=
0
;
c
<
channels
;
++
c
)
{
// loop for channels
// bilinear interpolation
out_pos
[
0
]
=
out_pos
[
0
]
=
static_cast
<
T
>
(
h2lambda
*
(
w2lambda
*
in_pos
[
0
]
+
w1lambda
*
in_pos
[
wid
])
+
h1lambda
*
(
w2lambda
*
in_pos
[
hid
*
in_w
]
+
w1lambda
*
in_pos
[
hid
*
in_w
+
wid
]);
w1lambda
*
in_pos
[
hid
*
in_w
+
wid
])
)
;
in_pos
+=
in_hw
;
out_pos
+=
out_hw
;
}
...
...
@@ -117,8 +119,10 @@ class BilinearInterpGradKernel : public framework::OpKernel<T> {
int
in_chw
=
channels
*
in_hw
;
int
out_chw
=
channels
*
out_hw
;
T
ratio_h
=
(
out_h
>
1
)
?
static_cast
<
T
>
(
in_h
-
1
)
/
(
out_h
-
1
)
:
0.
f
;
T
ratio_w
=
(
out_w
>
1
)
?
static_cast
<
T
>
(
in_w
-
1
)
/
(
out_w
-
1
)
:
0.
f
;
float
ratio_h
=
(
out_h
>
1
)
?
static_cast
<
float
>
(
in_h
-
1
)
/
(
out_h
-
1
)
:
0.
f
;
float
ratio_w
=
(
out_w
>
1
)
?
static_cast
<
float
>
(
in_w
-
1
)
/
(
out_w
-
1
)
:
0.
f
;
if
(
in_h
==
out_h
&&
in_w
==
out_w
)
{
memcpy
(
d_input
,
d_output
,
d_input_t
->
numel
()
*
sizeof
(
T
));
...
...
@@ -127,22 +131,24 @@ class BilinearInterpGradKernel : public framework::OpKernel<T> {
for
(
int
i
=
0
;
i
<
out_h
;
++
i
)
{
// loop for images
int
h
=
ratio_h
*
i
;
int
hid
=
(
h
<
in_h
-
1
)
?
1
:
0
;
T
h1lambda
=
ratio_h
*
i
-
h
;
T
h2lambda
=
1
-
h1lambda
;
float
h1lambda
=
ratio_h
*
i
-
h
;
float
h2lambda
=
1
-
h1lambda
;
for
(
int
j
=
0
;
j
<
out_w
;
++
j
)
{
int
w
=
ratio_w
*
j
;
int
wid
=
(
w
<
in_w
-
1
)
?
1
:
0
;
T
w1lambda
=
ratio_w
*
j
-
w
;
T
w2lambda
=
1
-
w1lambda
;
float
w1lambda
=
ratio_w
*
j
-
w
;
float
w2lambda
=
1
-
w1lambda
;
T
*
in_pos
=
&
d_input
[
k
*
in_chw
+
h
*
in_w
+
w
];
const
T
*
out_pos
=
&
d_output
[
k
*
out_chw
+
i
*
out_w
+
j
];
for
(
int
c
=
0
;
c
<
channels
;
++
c
)
{
// loop for channels
in_pos
[
0
]
+=
h2lambda
*
w2lambda
*
out_pos
[
0
];
in_pos
[
wid
]
+=
h2lambda
*
w1lambda
*
out_pos
[
0
];
in_pos
[
hid
*
in_w
]
+=
h1lambda
*
w2lambda
*
out_pos
[
0
];
in_pos
[
hid
*
in_w
+
wid
]
+=
h1lambda
*
w1lambda
*
out_pos
[
0
];
in_pos
[
0
]
+=
static_cast
<
T
>
(
h2lambda
*
w2lambda
*
out_pos
[
0
]);
in_pos
[
wid
]
+=
static_cast
<
T
>
(
h2lambda
*
w1lambda
*
out_pos
[
0
]);
in_pos
[
hid
*
in_w
]
+=
static_cast
<
T
>
(
h1lambda
*
w2lambda
*
out_pos
[
0
]);
in_pos
[
hid
*
in_w
+
wid
]
+=
static_cast
<
T
>
(
h1lambda
*
w1lambda
*
out_pos
[
0
]);
in_pos
+=
in_hw
;
out_pos
+=
out_hw
;
}
...
...
paddle/fluid/operators/logical_op.cc
浏览文件 @
e02cbf35
...
...
@@ -146,6 +146,6 @@ REGISTER_UNARY_LOGICAL_OP(logical_not, "$$Out = !X$$");
REGISTER_UNARY_LOGICAL_KERNEL
(
logical_not
,
CPU
,
paddle
::
operators
::
LogicalNotFunctor
);
REGISTER_BINARY_LOGICAL_OP
(
logical_xor
,
"$$Out = (X || Y)
\\
,
\\
&
\\
&
\\
,
!(X
\\
&
\\
& Y)$$"
);
"$$Out = (X || Y)
\\
&
\\
&
!(X
\\
&
\\
& Y)$$"
);
REGISTER_BINARY_LOGICAL_KERNEL
(
logical_xor
,
CPU
,
paddle
::
operators
::
LogicalXorFunctor
);
paddle/fluid/operators/math/concat.cu
浏览文件 @
e02cbf35
...
...
@@ -209,7 +209,7 @@ class ConcatGradFunctor<platform::CUDADeviceContext, T> {
outputs_cols
[
0
]
=
0
;
for
(
int
i
=
0
;
i
<
o_num
;
++
i
)
{
int
t_col
=
outputs
->
at
(
i
)
->
numel
()
/
out_row
;
int
t_col
=
ref_inputs
.
at
(
i
)
->
numel
()
/
out_row
;
if
(
sameShape
)
{
if
(
t_col
!=
out0_col
)
sameShape
=
false
;
}
...
...
paddle/fluid/operators/math/math_function.cc
浏览文件 @
e02cbf35
...
...
@@ -30,6 +30,7 @@ template struct SetConstant<platform::CPUDeviceContext, double>;
template
struct
SetConstant
<
platform
::
CPUDeviceContext
,
int
>;
template
struct
SetConstant
<
platform
::
CPUDeviceContext
,
int64_t
>;
template
struct
SetConstant
<
platform
::
CPUDeviceContext
,
bool
>;
template
struct
SetConstant
<
platform
::
CPUDeviceContext
,
uint8_t
>;
#define DEFINE_CPU_TRANS(RANK) \
template struct Transpose<platform::CPUDeviceContext, platform::float16, \
...
...
paddle/fluid/operators/tensorrt_engine_op.cc
浏览文件 @
e02cbf35
...
...
@@ -14,11 +14,14 @@
#ifdef PADDLE_WITH_CUDA
#include "paddle/fluid/operators/tensorrt_engine_op.h"
#include <string>
#include <vector>
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/inference/tensorrt/convert/op_converter.h"
#include "paddle/fluid/inference/tensorrt/engine.h"
#include "paddle/fluid/inference/utils/singleton.h"
#include "paddle/fluid/operators/tensorrt_engine_op.h"
namespace
paddle
{
namespace
operators
{
...
...
paddle/fluid/operators/tensorrt_engine_op.h
浏览文件 @
e02cbf35
...
...
@@ -16,10 +16,12 @@
#ifdef PADDLE_WITH_CUDA
#include <string>
#include <vector>
#include "paddle/fluid/framework/operator.h"
#include "paddle/fluid/inference/analysis/helper.h"
#include "paddle/fluid/inference/tensorrt/engine.h"
#include "paddle/fluid/inference/tensorrt/engine.h"
namespace
paddle
{
namespace
operators
{
...
...
paddle/fluid/operators/tensorrt_engine_op_test.cc
浏览文件 @
e02cbf35
...
...
@@ -179,7 +179,6 @@ void Execute(int batch_size, int input_dim, int output_dim, int nlayers = 1) {
const
std
::
string
&
z_name
,
bool
x_created
,
const
shape_t
&
x_shape
,
const
shape_t
&
y_shape
,
const
shape_t
&
z_shape
)
{
LOG
(
INFO
)
<<
"create fc op"
;
auto
*
fc
=
block_desc
.
AppendOp
();
fc
->
SetType
(
"mul"
);
...
...
paddle/fluid/pybind/pybind.cc
浏览文件 @
e02cbf35
...
...
@@ -159,6 +159,11 @@ PYBIND11_PLUGIN(core) {
new
(
&
instance
)
LoDTensor
(
new_offset_lod
);
})
.
def
(
"__init__"
,
[](
LoDTensor
&
instance
)
{
new
(
&
instance
)
LoDTensor
();
})
// We implement offset based LOD in C++ while we use length based with
// Python API. So we changed set_lod to set_recursive_sequence_lengths to
// avoid misuse.
// The discussion is here:
// https://github.com/PaddlePaddle/Paddle/issues/10855
.
def
(
"set_lod"
,
[](
LoDTensor
&
self
,
const
std
::
vector
<
std
::
vector
<
size_t
>>
&
lod
)
{
// the input lod is offset-based level-of-detail info
...
...
@@ -199,6 +204,7 @@ PYBIND11_PLUGIN(core) {
std
::
copy
(
lod
.
begin
(),
lod
.
end
(),
std
::
back_inserter
(
new_lod
));
return
new_lod
;
})
// Set above comments of set_lod.
.
def
(
"recursive_sequence_lengths"
,
[](
LoDTensor
&
self
)
->
std
::
vector
<
std
::
vector
<
size_t
>>
{
// output the length-based lod info
...
...
paddle/fluid/pybind/tensor_py.h
浏览文件 @
e02cbf35
...
...
@@ -97,7 +97,7 @@ struct CastToPyBufferImpl<true, I, ARGS...> {
inline
pybind11
::
buffer_info
CastToPyBuffer
(
const
framework
::
Tensor
&
tensor
)
{
auto
buffer_info
=
details
::
CastToPyBufferImpl
<
true
,
0
,
float
,
int
,
double
,
int64_t
,
bool
,
platform
::
float16
>
()(
tensor
);
uint8_t
,
platform
::
float16
>
()(
tensor
);
return
buffer_info
;
}
...
...
python/paddle/fluid/__init__.py
浏览文件 @
e02cbf35
...
...
@@ -44,7 +44,7 @@ import metrics
import
transpiler
from
param_attr
import
ParamAttr
,
WeightNormParamAttr
from
data_feeder
import
DataFeeder
from
core
import
LoDTensor
,
CPUPlace
,
CUDAPlace
,
CUDAPinnedPlace
from
core
import
LoDTensor
,
CPUPlace
,
CUDAPlace
,
CUDAPinnedPlace
,
Scope
from
transpiler
import
DistributeTranspiler
,
InferenceTranspiler
,
\
memory_optimize
,
release_memory
from
concurrency
import
(
Go
,
make_channel
,
channel_send
,
channel_recv
,
...
...
@@ -83,6 +83,7 @@ __all__ = framework.__all__ + executor.__all__ + concurrency.__all__ + \
'profiler'
,
'unique_name'
,
'recordio_writer'
,
'Scope'
,
]
...
...
python/paddle/fluid/average.py
浏览文件 @
e02cbf35
...
...
@@ -36,6 +36,25 @@ def _is_number_or_matrix_(var):
class
WeightedAverage
(
object
):
"""
Calculate weighted average.
The average calculating is accomplished via Python totally.
They do not change Paddle's Program, nor do anything to
modify NN model's configuration. They are completely
wrappers of Python functions.
Examples:
.. code-block:: python
avg = fluid.average.WeightedAverage()
avg.add(value=2.0, weight=1)
avg.add(value=4.0, weight=2)
avg.eval()
# The result is 3.333333333.
# For (2.0 * 1 + 4.0 * 2) / (1 + 2) = 3.333333333
"""
def
__init__
(
self
):
warnings
.
warn
(
"The %s is deprecated, please use fluid.metrics.Accuracy instead."
%
...
...
python/paddle/fluid/backward.py
浏览文件 @
e02cbf35
...
...
@@ -147,7 +147,7 @@ def _addup_repetitive_outputs_(op_descs):
else
:
if
len
(
renamed_vars
[
var_name
])
==
1
:
new_name
=
var_name
+
"@RENAME@"
+
\
str
(
var_rename_count
[
var_name
])
str
(
var_rename_count
[
var_name
])
var_rename_count
[
var_name
]
+=
1
# rename original var_name
renamed_vars
[
var_name
][
0
]
=
new_name
...
...
@@ -155,7 +155,7 @@ def _addup_repetitive_outputs_(op_descs):
_rename_arg_
(
pending_sum_ops
,
var_name
,
new_name
)
new_name
=
var_name
+
"@RENAME@"
+
\
str
(
var_rename_count
[
var_name
])
str
(
var_rename_count
[
var_name
])
var_rename_count
[
var_name
]
+=
1
op_desc
.
rename_output
(
var_name
,
new_name
)
renamed_vars
[
var_name
].
append
(
new_name
)
...
...
@@ -435,18 +435,65 @@ def _get_stop_gradients_(program):
def
append_backward
(
loss
,
parameter_list
=
None
,
no_grad_set
=
None
,
callbacks
=
None
):
"""
Append backward part to main_program
Append backward part to main_program
.
Args:
loss(Variable): The variable generated by cost function.
parameter_list(list[string]): Parameters that need to be updated by
optimizer. If None, it means all parameters need to be updated.
no_grad_set(set): Variables that have no gradients in Block 0.
All variables with `step_gradient=True` from all blocks will be
automatically added.
A complete neural network training is made up of forward and backward
propagation. However, when we configure a network, we only need to
specify its forwrd part. The backward part is generated automatically
according to the forward part by this function.
Return:
(list[(Variable,Variable)]): list of (parameter, gradient) pair.
In most cases, users do not need to invoke this function manually. It
will be automatically invoked by the optimizer's `minimize` function.
Args:
loss(Variable): The loss variable of the network.
parameter_list(list[string]|None): Names of parameters that need
to be updated by optimizers.
If it is None, all parameters
will be updated.
Default: None
no_grad_set(set|None): Variables in the Block 0 whose gradients
should be ignored. All variables with
`step_gradient=True` from all blocks will
be automatically added into this set.
Default: None
callbacks(list[callable object]|None): The callbacks are used for
doing some custom jobs during
backward part building. All
callable objects in it will
be invoked once each time a
new gradient operator is added
into the program. The callable
object must has two input
parameters: 'block' and 'context'.
The 'block' is the block which
the new gradient operator will
be added to. The 'context' is a
map, whose keys are gradient
variable names and values are
corresponding original variables.
In addition to this, the 'context'
has another special key-value pair:
the key is string '__current_op_desc__'
and the value is the op_desc of the
gradient operator who has just
triggered the callable object.
Returns:
list[(Variable,Variable)]: Pairs of parameter and its
corresponding gradients. The key is the parameter and the
value is gradient variable.
Raises:
AssertionError: If `loss` is not an instance of Variable.
Examples:
.. code-block:: python
# network configuration code
# ...
avg_loss = fluid.layers.mean(loss)
param_grad_list = fluid.backward.append_backward(loss=avg_loss)
"""
assert
isinstance
(
loss
,
framework
.
Variable
)
...
...
python/paddle/fluid/data_feeder.py
浏览文件 @
e02cbf35
...
...
@@ -29,6 +29,13 @@ class DataToLoDTensorConverter(object):
self
.
place
=
place
self
.
lod_level
=
lod_level
self
.
shape
=
shape
negtive_count
=
0
for
s
in
self
.
shape
:
if
s
<
0
:
negtive_count
+=
1
if
negtive_count
>
1
:
self
.
shape
=
None
break
if
dtype
==
core
.
VarDesc
.
VarType
.
FP32
:
self
.
dtype
=
'float32'
elif
dtype
==
core
.
VarDesc
.
VarType
.
INT64
:
...
...
@@ -61,7 +68,9 @@ class DataToLoDTensorConverter(object):
self
.
_feed_impl_
(
each_data
,
lod
[
1
:],
lod_level
-
1
)
def
done
(
self
):
arr
=
numpy
.
array
(
self
.
data
,
dtype
=
self
.
dtype
).
reshape
(
self
.
shape
)
arr
=
numpy
.
array
(
self
.
data
,
dtype
=
self
.
dtype
)
if
self
.
shape
:
arr
=
arr
.
reshape
(
self
.
shape
)
t
=
core
.
LoDTensor
()
t
.
set
(
arr
,
self
.
place
)
if
self
.
lod_level
>
0
:
...
...
@@ -70,6 +79,61 @@ class DataToLoDTensorConverter(object):
class
DataFeeder
(
object
):
"""
DataFeeder converts the data that returned by a reader into a data
structure that can feed into Executor and ParallelExecutor. The reader
usually returns a list of mini-batch data entries. Each data entry in
the list is one sample. Each sample is a list or a tuple with one
feature or multiple features.
The simple usage shows below:
.. code-block:: python
place = fluid.CPUPlace()
img = fluid.layers.data(name='image', shape=[1, 28, 28])
label = fluid.layers.data(name='label', shape=[1], dtype='int64')
feeder = fluid.DataFeeder([img, label], fluid.CPUPlace())
result = feeder.feed([([0] * 784, [9]), ([1] * 784, [1])])
If you want to feed data into GPU side separately in advance when you
use multi-GPU to train a model, you can use `decorate_reader` function.
.. code-block:: python
place=fluid.CUDAPlace(0)
feeder = fluid.DataFeeder(place=place, feed_list=[data, label])
reader = feeder.decorate_reader(
paddle.batch(flowers.train(), batch_size=16))
Args:
feed_list(list): The Variables or Variables'name that will
feed into model.
place(Place): place indicates feed data into CPU or GPU, if you want to
feed data into GPU, please using `fluid.CUDAPlace(i)` (`i` represents
the GPU id), or if you want to feed data into CPU, please using
`fluid.CPUPlace()`.
program(Program): The Program that will feed data into, if program
is None, it will use default_main_program(). Default None.
Raises:
ValueError: If some Variable is not in this Program.
Examples:
.. code-block:: python
# ...
place = fluid.CPUPlace()
feed_list = [
main_program.global_block().var(var_name) for var_name in feed_vars_name
] # feed_vars_name is a list of variables' name.
feeder = fluid.DataFeeder(feed_list, place)
for data in reader():
outs = exe.run(program=main_program,
feed=feeder.feed(data))
"""
def
__init__
(
self
,
feed_list
,
place
,
program
=
None
):
self
.
feed_dtypes
=
[]
self
.
feed_names
=
[]
...
...
@@ -99,6 +163,16 @@ class DataFeeder(object):
self
.
place
=
place
def
feed
(
self
,
iterable
):
"""
According to feed_list and iterable, converters the input into
a data structure that can feed into Executor and ParallelExecutor.
Args:
iterable(list|tuple): the input data.
Returns:
dict: the result of conversion.
"""
converter
=
[]
for
lod_level
,
shape
,
dtype
in
six
.
zip
(
self
.
feed_lod_level
,
self
.
feed_shapes
,
self
.
feed_dtypes
):
...
...
@@ -121,6 +195,20 @@ class DataFeeder(object):
return
ret_dict
def
feed_parallel
(
self
,
iterable
,
num_places
=
None
):
"""
Takes multiple mini-batches. Each mini-batch will be feed on each
device in advance.
Args:
iterable(list|tuple): the input data.
num_places(int): the number of devices. Default None.
Returns:
dict: the result of conversion.
Notes:
The number of devices and number of mini-batches must be same.
"""
if
isinstance
(
self
.
place
,
core
.
CUDAPlace
):
places
=
[
core
.
CUDAPlace
(
i
)
...
...
@@ -159,6 +247,24 @@ class DataFeeder(object):
multi_devices
,
num_places
=
None
,
drop_last
=
True
):
"""
Converter the input data into a data that returned by reader into
multiple mini-batches. Each mini-batch will be feed on each device.
Args:
reader(fun): the input data.
multi_devices(bool): the number of places. Default None.
num_places(int): the number of places. Default None.
drop_last(bool): the number of places. Default None.
Returns:
dict: the result of conversion.
Raises:
ValueError: If drop_last is False and the data batch which cannot
fit for devices.
"""
def
__reader_creator__
():
if
not
multi_devices
:
for
item
in
reader
():
...
...
python/paddle/fluid/executor.py
浏览文件 @
e02cbf35
...
...
@@ -25,6 +25,13 @@ g_scope = core.Scope()
def
global_scope
():
"""
Get the global/default scope instance. There are a lot of APIs use
:code:`global_scope` as its default value, e.g., :code:`Executor.run`
Returns:
Scope: The global/default scope instance.
"""
return
g_scope
...
...
@@ -37,6 +44,19 @@ def switch_scope(scope):
@
contextlib
.
contextmanager
def
scope_guard
(
scope
):
"""
Change the global/default scope instance by Python `with` statement. All
variable in runtime will assigned to the new scope.
Examples:
>>> import paddle.fluid as fluid
>>> new_scope = fluid.Scope()
>>> with fluid.scope_guard(new_scope):
>>> ...
Args:
scope: The new global/default scope.
"""
ex
=
switch_scope
(
scope
)
yield
switch_scope
(
ex
)
...
...
@@ -135,14 +155,18 @@ def has_fetch_operators(block, fetch_targets, fetch_holder_name):
def
fetch_var
(
name
,
scope
=
None
,
return_numpy
=
True
):
"""
Fetch the value of the variable with the given name from the given scope
Fetch the value of the variable with the given name from the
given scope.
Args:
name(str): name of the variable. Typically, only persistable variables
can be found in the scope used for running the program.
scope(core.Scope|None): scope object. It should be the scope where
you pass to Executor.run() when running your program.
If None, global_scope() will be used.
return_numpy(bool): whether convert the tensor to numpy.ndarray
If None, global_scope() will be used. Default None.
return_numpy(bool): whether convert the tensor to numpy.ndarray.
Default True.
Returns:
LodTensor|numpy.ndarray
"""
...
...
python/paddle/fluid/framework.py
浏览文件 @
e02cbf35
...
...
@@ -30,8 +30,6 @@ __all__ = [
'default_startup_program'
,
'default_main_program'
,
'program_guard'
,
'switch_startup_program'
,
'switch_main_program'
,
'get_var'
,
]
...
...
@@ -43,7 +41,8 @@ ZERO_VAR_SUFFIX = core.kZeroVarSuffix()
def
grad_var_name
(
var_name
):
"""
return gradient name for a certain var name
Returns:
str: gradient name for a certain var name
"""
return
var_name
+
GRAD_VAR_SUFFIX
...
...
@@ -51,10 +50,12 @@ def grad_var_name(var_name):
def
convert_np_dtype_to_dtype_
(
np_dtype
):
"""
Convert the data type in numpy to the data type in Paddle
Args:
np_dtype(np.dtype): the data type in numpy
np_dtype(np.dtype): the data type in numpy
.
Returns(core.VarDesc.VarType): the data type in Paddle
Returns:
core.VarDesc.VarType: the data type in Paddle.
"""
dtype
=
np
.
dtype
(
np_dtype
)
...
...
@@ -120,37 +121,53 @@ def _debug_string_(proto, throw_on_error=True):
class
Variable
(
object
):
"""
Python variable. Every input and output of an operator is a variable. Every
variable belongs to a block. The variable has a name and two variables in
different blocks could have the same name.
In Fluid, every input and output of an operator is a variable. In most
cases, variables are used for holding different kinds of data or training
labels. A variable belongs to a block. All variable has its own name and
two variables in different blocks could have the same name.
There are many kinds of variables.
Please reference the framework.proto for
details.
There are many kinds of variables.
Each kind of them has its own attributes
and usages. Please reference the framework.proto for details.
Notes: The constructor of Variable should not be invoked directly. Please
use `Block.create_var` to create a variable.
>>> cur_program = Program()
>>> cur_block = cur_program.current_block()
>>> new_variable = cur_block.create_var(
>>> name="X", shape=[-1, 23, 48], dtype='float32')
Most of a Variable's member variables can be setted to be None. It mean
it is not available or will be specified later.
Args:
block(Block): The associated block. It will be passed by
`Block.create_var` automatically.
block(Block): The block that the variable belongs to.
type(core.VarDesc.VarType): Variable type. Please reference the
framework.proto for details.
shape(tuple|list|None): The shape of variable. -1 means the batch size.
name(str|None): The name of the variable. If setted None, it will be
generated automatically. Default: None
shape(tuple|list|None): The shape of the variable. -1 means the batch size.
Some kinds of variable do not contain shape, just set it to None.
dtype(np.dtype|core.VarDesc.VarType|str): The data type of variable.
lod_level(int): The level of lod tensor. 0 means it is not a time
Default: None
dtype(np.dtype|core.VarDesc.VarType|str|None): The data type of variable.
Default: None
lod_level (int|None): The level of lod tensor. 0 means it is not a time
series data.
capacity(int): The capacity of Channel variable. Ignored
for other types.
persistable(bool): True if the variable should be saved as check point.
Defaults to False.
stop_gradient(bool): True if the variable will stop to calculate
gradients when backward. Defaults to False.
Default: None
capacity (int|None): The capacity of Channel variable. Ignored for other
types. Default: None
persistable (bool|None): True if the variable is persistable. A persistable
variable will not be deleted after an iteration ending. Defaults: None.
error_clip (BaseErrorClipAttr|None): The error clip attributes of the
corresponding gradient variable. Default: None
stop_gradient (bool): True if the variable will stop to calculate its
gradients when backward. Default: False.
is_data (bool): True if the variable is an input data. Default: False
Notes:
The constructor of Variable should not be invoked directly. Please
use `Block.create_var` to create a variable.
Examples:
.. code-block:: python
cur_program = Program()
cur_block = cur_program.current_block()
new_variable = cur_block.create_var(name="X",
shape=[-1, 23, 48],
dtype='float32')
"""
def
__init__
(
self
,
...
...
@@ -253,13 +270,14 @@ class Variable(object):
Get debug string.
Args:
throw_on_error(bool): True if raise an exception when self is
not
in
tialized.
throw_on_error(bool): True if raise an exception when self is
not ini
tialized.
with_details(bool): more details about variables and parameters
(e.g. trainable, optimize_attr, ...) will be printed when with_details is True
Returns(str): The debug string.
(e.g. trainable, optimize_attr, ...) will be printed when
with_details is True. Default False;
Returns:
str: The debug string.
"""
assert
isinstance
(
throw_on_error
,
bool
)
and
isinstance
(
with_details
,
bool
)
...
...
@@ -276,6 +294,15 @@ class Variable(object):
__repr__
=
__str__
def
set_desc
(
self
,
input
):
"""
Set the variable description.
Args:
input(core.VarDesc): The new VarDesc.
Returns:
None
"""
self
.
desc
=
input
@
property
...
...
@@ -312,6 +339,15 @@ class Variable(object):
return
self
.
desc
.
type
()
def
set_error_clip
(
self
,
error_clip
):
"""
Set the error_clip.
Args:
error_clip(BaseErrorClipAttr) : The new error_clip.
Returns:
None
"""
self
.
error_clip
=
error_clip
...
...
@@ -319,8 +355,8 @@ def get_all_op_protos():
"""
Get all registered op proto from PaddlePaddle C++ end.
Returns
(list): list of OpProto
Returns
:
list: list of OpProto.
"""
protostrs
=
core
.
get_all_op_protos
()
ret_values
=
[]
...
...
@@ -373,9 +409,45 @@ class OpProtoHolder(object):
class
Operator
(
object
):
"""
Python Operator class. The operator represents the build in instructions in a
Block. Users can use the build in instructions to describe their neural
network.
In Fluid, all the operation are represented by Operator, and Operator
is regarded as a build in an instruction of a Block. Users can use the
build in instructions to describe their neural network.
Args:
block(Block): The block has the current operator.
desc(core.OpDesc): The protobuf description of Operator.
type(str): The type of operator. Default None.
inputs(dict): The input of this Operator. it is a dictionary, for every
element, key is the input parameter name, and value is a list of
variables. Default None.
outputs(dict): The output of this Operator. it is a dictionary, for
every element, key is the input parameter name, and value is a list
of variables. Default None.
attrs(dict): The attributes of this Operator. it is a dictionary, for
every element, key is attribute name, and value is the attribute value.
The attribute type should be as same as the type registered in C++ side.
Default None.
Returns:
Operator: The initialized Operator.
Raises:
ValueError: If the passed input, output and attrs doesn't match the
initializing Operator's that registered in C++ side.
Notes:
The constructor of operator should not be invoked directly. Use
Block.append_op or Block.prepend_op instead.
Examples:
.. code-block:: python
cur_program = Program()
cur_block = cur_program.current_block()
# var1 += var2 + var3
cur_block.append_op(type="sum",
inputs={"X": [var1, var2, var3]},
outputs={"Out": [var1]})
"""
OP_WITHOUT_KERNEL_SET
=
{
'feed'
,
'fetch'
,
'save'
,
'load'
,
'recurrent'
,
'go'
,
...
...
@@ -392,31 +464,7 @@ class Operator(object):
inputs
=
None
,
outputs
=
None
,
attrs
=
None
):
"""
Constructor.
Notes: The constructor of operator should not be invoked directly. Use
Block.append_op or Block.prepend_op instead.
>>> cur_program = Program()
>>> cur_block = cur_program.current_block()
>>> # var1 += var2 + var3
>>> cur_block.append_op(type="sum",
>>> inputs={"X": [var1, var2, var3]},
>>> outputs={"Out": [var1]})
Args:
block(Block): The block has the current operator.
desc(core.OpDesc): The protobuf description.
type(str): The type of operator.
inputs(dict): The input dictionary. Key is the input parameter name.
Value is a list of variables.
outputs(dict): The output dictionary which has the same format with
inputs.
attrs(dict): The attributes dictionary. Key is attribute name. Value
is the attribute value. The attribute type should be as same as
the type registered in C++
"""
self
.
block
=
block
self
.
desc
=
desc
self
.
attrs
=
attrs
...
...
@@ -529,12 +577,14 @@ class Operator(object):
def
to_string
(
self
,
throw_on_error
):
"""
To debug string.
Get debug string.
Args:
throw_on_error(bool):
raise exception when self is not initialized
when throw_on_error is True
throw_on_error(bool):
Whether to raise exception if self is not
initialized.
Returns(str): The debug string.
Returns:
str: The debug string.
"""
protostr
=
self
.
desc
.
serialize_to_string
()
...
...
@@ -552,29 +602,45 @@ class Operator(object):
def
input
(
self
,
name
):
"""
Get input arguments by the input parameter name
Args:
name(str): The input parameter name
Get the input arguments according to the input parameter name.
Returns(list): return the list of argument names associated with the
specific
parameter name.
Args:
name(str): The input
parameter name.
Returns:
list: return the list of argument names that associated with
\
the specific parameter name.
"""
return
self
.
desc
.
input
(
name
)
def
rename_input
(
self
,
old_name
,
new_name
):
"""
Rename the `old_name` to `new_name`.
Args:
old_name(str): The old name of the Operator's input.
new_name(str): The new name of the Operator's input.
Returns:
None
"""
self
.
desc
.
rename_input
(
old_name
,
new_name
)
def
rename_output
(
self
,
old_name
,
new_name
):
"""
Rename the `old_name` to `new_name`.
Args:
old_name(str): The old name of the Operator's output.
new_name(str): The new name of the Operator's output.
Returns:
None
"""
self
.
desc
.
rename_output
(
old_name
,
new_name
)
@
property
def
input_names
(
self
):
"""
Get all input parameter names
Returns(list): return a list of input parameter names
"""
return
self
.
desc
.
input_names
()
@
property
...
...
@@ -587,33 +653,23 @@ class Operator(object):
def
output
(
self
,
name
):
"""
Get output arguments by the output parameter name
Args:
name(str): The output parameter name
Get output arguments by the output parameter name.
Returns(list): return the list of argument names associated with the
specific
parameter name.
Args:
name(str): The output
parameter name.
Returns:
list: return the list of argument names associated with
\
the specific parameter name.
"""
return
self
.
desc
.
output
(
name
)
@
property
def
output_names
(
self
):
"""
Get all output parameter names
Returns(list): return a list of output parameter names
"""
return
self
.
desc
.
output_names
()
@
property
def
idx
(
self
):
"""
Return the array index of current operator.
Returns(int): The array index in block.ops array
Raises:
ValueError: when the operator is not found.
"""
for
i
,
op
in
enumerate
(
self
.
block
.
ops
):
if
op
==
self
:
return
i
...
...
@@ -622,27 +678,40 @@ class Operator(object):
def
has_attr
(
self
,
name
):
"""
operator has the attribute with name or not.
Whether this Operator has the attribute with name or not.
Args:
name(str): the attribute name
name(str): the attribute name
.
Returns(bool): True if has this attribute.
Returns:
bool: True if has this attribute.
"""
return
self
.
desc
.
has_attr
(
name
)
def
attr_type
(
self
,
name
):
"""
Get the type of attribute by attribute name
Args:
name(str): the attribute name
Get the type of attribute by attribute's name.
Returns(core.AttrType): the attribute type
Args:
name(str): the attribute name.
Returns:
core.AttrType: the attribute type.
"""
return
self
.
desc
.
attr_type
(
name
)
def
set_attr
(
self
,
name
,
val
):
"""
Set the value of attribute by attribute's name.
Args:
name(str): the attribute name.
val(bool|int|str|float|list): the value of the attribute.
Raises:
ValueError: If the type of value doesn't match with desc.attr_type(name).
"""
self
.
attrs
[
name
]
=
val
if
isinstance
(
val
,
Block
):
self
.
desc
.
set_block_attr
(
name
,
val
.
desc
)
...
...
@@ -654,40 +723,39 @@ class Operator(object):
@
property
def
attr_names
(
self
):
"""
Get all attribute names
Returns(list): The list of attribute name
"""
return
self
.
desc
.
attr_names
()
def
attr
(
self
,
name
):
"""
Get attribute by name
Get the attribute by name.
Args:
name(str): the attribute name
name(str): the attribute name
.
Returns(bool|int|str|float|list): The attribute value. The return value
Returns:
bool|int|str|float|list: The attribute value. The return value
can be any valid attribute type.
"""
return
self
.
desc
.
attr
(
name
)
def
block_attr
(
self
,
name
):
"""
Get the block attribute by name
Args:
name(str): the attribute name
Get the block attribute by name.
Returns(int): the block index
Args:
name(str): the attribute name.
Returns:
int: the block index.
"""
return
self
.
desc
.
block_attr
(
name
)
def
all_attrs
(
self
):
"""
Get the attribute dict
Returns(dict): The Operator's attribute dict
Get the attribute dict.
Returns:
dict: The Operator's attribute dict.
"""
attr_names
=
self
.
attr_names
attr_map
=
{}
...
...
@@ -700,6 +768,35 @@ class Operator(object):
class
Block
(
object
):
"""
In Fluid, a Program is consistence of multi-Block, and Block stores
VarDesc and OpDesc. In a specific Block, a VarDesc have a unique name.
One block could have some child blocks, and child block's name scopes
should inherit the parent's so that OpDesc in child block can reference
a VarDesc that is stored in the parent block.
Please reference the framework.proto for details.
Args:
program(Program): The Program that the Block belongs to.
idx(int): The block's id in the Program.
Notes:
The constructor of Block should not be invoked directly. Please
use `Program.create_block()` to create a block.
Examples:
.. code-block:: python
cur_program = Program()
cur_block = cur_program.current_block()
var = cur_block.create_var(name="X",
shape=[-1, 23, 48],
dtype='float32')
cur_block.append_op(type="abs",
inputs={"X": [var]},
outputs={"Out": [var]})
"""
def
__init__
(
self
,
program
,
idx
):
self
.
desc
=
program
.
desc
.
block
(
idx
)
self
.
vars
=
collections
.
OrderedDict
()
# var_name --> var
...
...
@@ -712,15 +809,17 @@ class Block(object):
def
to_string
(
self
,
throw_on_error
,
with_details
=
False
):
"""
To debug string.
Get debug string.
Args:
throw_on_error(bool): raise exception when self is not initialized
when throw_on_error is True
when throw_on_error is True
.
with_details(bool): more details about variables and parameters
(e.g. trainable, optimize_attr, ...) will be printed when with_details is True
Returns(str): The debug string.
(e.g. trainable, optimize_attr, ...) will be printed when
with_details is True. Default False.
Returns:
str: The debug string.
"""
assert
isinstance
(
throw_on_error
,
bool
)
and
isinstance
(
with_details
,
bool
)
...
...
@@ -752,6 +851,15 @@ class Block(object):
return
self
.
desc
.
get_forward_block_idx
()
def
set_forward_block_idx
(
self
,
idx
):
"""
Set the forward block Idx.
Args:
idx(int): the block index.
Returns:
None
"""
self
.
desc
.
set_forward_block_idx
(
idx
)
@
property
...
...
@@ -759,6 +867,19 @@ class Block(object):
return
self
.
desc
.
id
def
var
(
self
,
name
):
"""
Get a Variable by name from this block.
Args:
name(str): the Variable's name.
Raises:
ValueError: The If input's type is not str, or this block
doesn't have a Variable with the giving name.
Returns:
Variable: the Variable with the giving name.
"""
if
not
isinstance
(
name
,
basestring
):
raise
TypeError
(
"var require string as parameter, but get %s instead."
%
...
...
@@ -769,6 +890,19 @@ class Block(object):
return
v
def
var_recursive
(
self
,
name
):
"""
Get a Variable by name from this block recursively.
Args:
name(str): the Variable's name.
Raises:
ValueError: this block and this parent block doesn't
have a Variable with the giving name.
Returns:
Variable: the Variable with the giving name.
"""
frontier
=
list
()
visited
=
set
()
...
...
@@ -815,6 +949,18 @@ class Block(object):
def
rename_var
(
self
,
name
,
new_name
):
"""
Rename variable in vars and ops' inputs and outputs
Args:
name(str): the name that need to be renamed.
new_name(str): the name that need to rename to.
Raises:
ValueError: If this block doesn't have this the giving name,
or the type of the var with the giving name is not Parameter
or Variable.
Returns:
Variable: the Variable with the giving name.
"""
if
not
self
.
has_var
(
name
):
raise
ValueError
(
"var %s is not in current block"
%
name
)
...
...
@@ -878,12 +1024,27 @@ class Block(object):
return
param
def
append_op
(
self
,
*
args
,
**
kwargs
):
"""
Appends a new Operator according to the giving arguments.
Returns:
Operator: the append Operator.
"""
op_desc
=
self
.
desc
.
append_op
()
op
=
Operator
(
block
=
self
,
desc
=
op_desc
,
*
args
,
**
kwargs
)
self
.
ops
.
append
(
op
)
return
op
def
insert_op
(
self
,
index
,
*
args
,
**
kwargs
):
"""
Insert a Operator according to the giving arguments.
Args:
index(int): the place that the operator to insert.
Returns:
Operator: the insert Operator.
"""
self
.
sync_with_cpp
()
op_desc
=
self
.
desc
.
insert_op
(
index
)
op
=
Operator
(
block
=
self
,
desc
=
op_desc
,
*
args
,
**
kwargs
)
...
...
@@ -891,11 +1052,30 @@ class Block(object):
return
op
def
remove_op
(
self
,
index
):
"""
Remove the specific position operator.
Args:
index(int): the position that the operator to insert.
Returns:
None
"""
self
.
sync_with_cpp
()
self
.
desc
.
remove_op
(
index
,
index
+
1
)
del
self
.
ops
[
index
]
def
slice_ops
(
self
,
start
,
end
):
"""
Return the Operator between start and end.
Args:
start(int): the start position.
end(int): the end position.
Returns:
list: the Operators between start and end.
"""
return
self
.
ops
[
start
:
end
]
def
prepend_op
(
self
,
*
args
,
**
kwargs
):
...
...
@@ -906,9 +1086,8 @@ class Block(object):
def
sync_with_cpp
(
self
):
"""
Sync from the desc on the c++ end.
This method is used to synchronize the c++ desc instance generated by backward.
Sync from the desc on the c++ end. This method is used to synchronize
the c++ desc instance generated by backward.
"""
# sync variables from cpp
for
var
in
self
.
desc
.
all_vars
():
...
...
@@ -973,9 +1152,14 @@ class Block(object):
def
copy_param_info_from
(
self
,
other
):
"""
Copy the information of parameters from the other block
Copy the information of parameters from the other block.
Args:
other(Block): the other block
other(Block): the other block.
Raises:
ValueError: If type of input is not Block, or the `other` and this
block is not in the same topology.
Returns:
None
...
...
@@ -1007,11 +1191,12 @@ class Block(object):
def
clone_variable
(
self
,
var
):
"""
Clone a variable into current block.
Args:
var: the variable to be cloned.
Returns:
T
he new variable cloned from 'var' in current block.
Variable: t
he new variable cloned from 'var' in current block.
"""
assert
isinstance
(
var
,
Variable
)
ret_var
=
None
...
...
@@ -1051,23 +1236,18 @@ class Program(object):
Notes: we have default_startup_program and default_main_program
by default, a pair of them will shared the parameters.
The default_startup_program only run once to initialize parameters,
default_main_program run in every minibatch and adjust the weights.
Args:
None
default_main_program run in every mini batch and adjust the weights.
Returns:
Python Program
A empty program.
Examples:
.. code-block:: python
main_program = Program()
startup_program = Program()
with fluid.program_guard(main_program=main_program, startup_program=startup_program):
fluid.layers.data(name="x", shape=[-1, 784], dtype='float32')
fluid.layers.data(name="y", shape=[-1, 1], dtype='int32')
fluid.layers.fc(name="fc", shape=[10], dtype='float32', act="relu")
>>> main_program = fluid.Program()
>>> startup_program = fluid.Program()
>>> with fluid.program_guard(main_program=main_program, startup_program=startup_program):
>>> fluid.layers.data(name="x", shape=[-1, 784], dtype='float32')
>>> fluid.layers.data(name="y", shape=[-1, 1], dtype='int32')
>>> fluid.layers.fc(name="fc", shape=[10], dtype='float32', act="relu")
"""
...
...
@@ -1081,6 +1261,19 @@ class Program(object):
@
property
def
op_role
(
self
):
"""
The operator role. In a enum {Forward, Backward, Optimize}.
Notes: this is a low level API. It is used only for ParallelExecutor to
duplicate or schedule operator to devices.
For example, the forward operator should be executed on every device.
The backward operator should be executed on every device and the
parameter gradient of backward (use :code:`op_role_var` to get this
variable) operator should be merged to one device. The optimization
operators should be executed on only one device and broadcast the
optimization result, i.e., the new parameter, to every other device.
"""
return
self
.
_current_role
@
op_role
.
setter
...
...
@@ -1089,6 +1282,13 @@ class Program(object):
@
property
def
op_role_var
(
self
):
"""
The auxiliary variables for :code:`op_role` property.
See Also: :code:`Program.op_role`'s documentation for details.
Notes: This is a very low-level API. Users should not use it directly.
"""
return
self
.
_op_role_var
@
op_role_var
.
setter
...
...
@@ -1097,6 +1297,21 @@ class Program(object):
@
contextlib
.
contextmanager
def
optimized_guard
(
self
,
var
):
"""
A with guard to set :code:`Optimization` :code:`OpRole` and
:code:`OpRoleVar` automatically.
Notes: This is a very low level API. Users should not use it directly.
Args:
var(Variable|str): The variable (name) to be optimized.
Examples:
>>> p, g = backward(...)
>>> with program.optimized_guard(p):
>>> p = p - 0.001 * g
"""
OpRole
=
core
.
op_proto_and_checker_maker
.
OpRole
self
.
_current_role
=
OpRole
.
Optimize
self
.
_op_role_var
=
[
var
.
name
if
isinstance
(
var
,
Variable
)
else
var
]
...
...
@@ -1105,18 +1320,35 @@ class Program(object):
self
.
_current_role
=
OpRole
.
Forward
def
__str__
(
self
):
"""
Get the protobuf debug string of this Program.
Returns:
(str): The protobuf debug string.
Raises:
ValueError: If any of required fields is not set.
"""
return
self
.
to_string
(
True
)
def
to_string
(
self
,
throw_on_error
,
with_details
=
False
):
"""
To debug string.
Args:
throw_on_error(bool): raise exception when self is not initialized
when throw_on_error is True
with_details(bool): more details about variables and parameters
(e.g. trainable, optimize_attr, ...) will be printed when with_details is True
throw_on_error(bool): raise Value error when any of required fields
is not set.
Returns(str): The debug string.
with_details(bool): True if more details about variables and
parameters, e.g., :code:`trainable`, :code:`optimize_attr`, need
to print.
Returns
(str): The debug string.
Raises:
ValueError: If any of required fields is not set and throw_on_error is
True.
"""
assert
isinstance
(
throw_on_error
,
bool
)
and
isinstance
(
with_details
,
...
...
@@ -1132,25 +1364,89 @@ class Program(object):
return
res_str
def
get_desc
(
self
):
"""
Get the C++ side of `ProgramDesc` object pointer. The C++ object is
exposed by :code:`pybind`.
Notes: This is a very low level API. Users should not use this API
directly.
"""
return
self
.
desc
def
clone
(
self
,
for_test
=
False
):
"""
Clone the Program object
Args:
for_test(bool): indicate whether clone for test.
"""
Create a new, duplicated program.
Set for_test to False when we want to clone the program for training.
Set for_test to True when we want to clone the program for testing.
Some operators, e.g., :code:`batch_norm`, behave differently between
training and testing. They have an attribute, :code:`is_test`, to
control this behaviour. This method will change the :code:`is_test`
attribute of them to :code:`True` when :code:`for_test=True`.
* Set for_test to False when we want to clone the program for training.
* Set for_test to True when we want to clone the program for testing.
Notes: This API DOES NOT prune any operator. Use
:code:`clone(for_test=True)` before backward and optimization please.
Args:
for_test(bool): Some operators, such as batch_norm and drop_out ops,
behave differently in training and testing. If for_test is True,
the is_test attributes in these operators will be set to True for
testing purposes, otherwise, they remain unchanged.
for_test(bool): True if change the :code:`is_test` attribute of
operators to :code:`True`.
Returns:
Program: The cloned Program object.
Program: The new, duplicated Program object.
Examples:
1. To clone a test program, the sample code is:
>>> import paddle.fluid as fluid
>>> train_program = fluid.Program()
>>> startup_program = fluid.Program()
>>> with fluid.program_guard(train_program, startup_program):
>>> img = fluid.layers.data(name='image', shape=[784])
>>> hidden = fluid.layers.fc(input=img, size=200, act='relu')
>>> hidden = fluid.layers.dropout(hidden, dropout_prob=0.5)
>>> loss = fluid.layers.cross_entropy(
>>> input=fluid.layers.fc(hidden, size=10, act='softmax'),
>>> label=fluid.layers.data(name='label', shape=[1], dtype='int64'))
>>>
>>> test_program = train_program.clone(for_test=True)
>>>
>>> sgd = fluid.optimizer.SGD(learning_rate=1e-3)
>>> with fluid.program_guard(train_program, startup_program):
>>> sgd.minimize(loss)
2. The :code:`clone` method can be avoid if you create program for
training and program for testing individually.
>>> import paddle.fluid as fluid
>>>
>>> def network(is_test):
>>> img = fluid.layers.data(name='image', shape=[784])
>>> hidden = fluid.layers.fc(input=img, size=200, act='relu')
>>> hidden = fluid.layers.dropout(hidden, dropout_prob=0.5, is_test=is_test)
>>> loss = fluid.layers.cross_entropy(
>>> input=fluid.layers.fc(hidden, size=10, act='softmax'),
>>> label=fluid.layers.data(name='label', shape=[1], dtype='int64'))
>>> return loss
>>>
>>> train_program = fluid.Program()
>>> startup_program = fluid.Program()
>>> test_program = fluid.Program()
>>>
>>> with fluid.program_guard(train_program, startup_program):
>>> with fluid.unique_name.guard():
>>> loss = network(is_test=False)
>>> sgd = fluid.optimizer.SGD(learning_rate=1e-3)
>>> sgd.minimize(loss)
>>>
>>> # the test startup program is not used.
>>> with fluid.program_guard(test_program, fluid.Program()):
>>> with fluid.unique_name.guard():
>>> loss = network(is_test=True)
The two code snippets above will generate same programs.
"""
if
for_test
:
p
=
self
.
inference_optimize
()
...
...
@@ -1165,6 +1461,21 @@ class Program(object):
return
p
def
prune
(
self
,
targets
):
"""
Prune operators and variables which are not needed to generate
:code:`targets`.
Notes: This is a very low level API. Users should not use this API
directly. This API is in flux and not stable.
Args:
targets(list|Variable|Operator): A list of variables or operators
need to be pruned
Returns:
Program: A new, pruned program.
"""
if
not
isinstance
(
targets
,
list
):
targets
=
[
targets
]
targets_idx
=
[]
...
...
@@ -1199,6 +1510,17 @@ class Program(object):
return
res
def
inference_optimize
(
self
):
"""
This method will create a new program and change the :code:`is_test`
attribute of operators to :code:`True`. All the :code:`Parameter`
information will be lost.
Notes: This API is a very low level API. Use
:code:`Program.clone(for_test=True)` instead.
Returns:
Program: The new program.
"""
# this is an alternative implement before
# core.inference_optimize being fixed.
res
=
Program
()
...
...
@@ -1215,6 +1537,18 @@ class Program(object):
@
staticmethod
def
parse_from_string
(
binary_str
):
"""
Deserialize a program desc from protobuf binary string.
Notes: All information about parameters will be lost after serialization
and deserialization.
Args:
binary_str(str): The binary prootbuf string.
Returns:
Program: A deserialized program desc.
"""
p
=
Program
()
p
.
desc
=
core
.
ProgramDesc
(
binary_str
)
p
.
blocks
=
[
Block
(
p
,
i
)
for
i
in
xrange
(
p
.
desc
.
num_blocks
())]
...
...
@@ -1223,10 +1557,19 @@ class Program(object):
@
property
def
random_seed
(
self
):
"""
The default random seed for random operators in Program. Zero means get
the random seed from random device.
Notes: It must be set before the operators have been added.
"""
return
self
.
_seed
@
property
def
num_blocks
(
self
):
"""
The number of blocks in this program.
"""
return
self
.
desc
.
num_blocks
()
@
random_seed
.
setter
...
...
@@ -1239,15 +1582,40 @@ class Program(object):
return
str
(
self
)
def
global_block
(
self
):
"""
Get the first block of this program.
"""
return
self
.
blocks
[
0
]
def
block
(
self
,
index
):
"""
Get the :code:`index` block of this program
Args:
index(int): The index of block to get
Returns:
Block: The :code:`index` block
"""
return
self
.
blocks
[
index
]
def
current_block
(
self
):
"""
Get the current block. The :code:`current` block is the block to append
operators.
"""
return
self
.
blocks
[
self
.
current_block_idx
]
def
create_block
(
self
,
parent_idx
=
None
):
"""
Create a new block with the :code:`parent_idx` and change the current block
to new block.
Args:
parent_idx(int): The parent block index.
Returns:
Block: The new block.
"""
new_block_idx
=
len
(
self
.
blocks
)
parent
=
self
.
current_block
()
if
parent_idx
is
None
else
self
.
block
(
parent_idx
)
...
...
@@ -1257,9 +1625,24 @@ class Program(object):
return
self
.
current_block
()
def
rollback
(
self
):
"""
Exit a code block, i.e., roll back to the parent block.
Returns:
None
"""
self
.
current_block_idx
=
self
.
current_block
().
parent_idx
def
sync_with_cpp
(
self
):
"""
Synchronize Python instance to its binding C++ object instance.
If the program is modified in C++ space, this method should be invoked.
Notes: This is a very low level API. Users should not invoke it
directly.
Returns:
None
"""
for
block_idx
in
range
(
len
(
self
.
blocks
),
self
.
desc
.
num_blocks
()):
self
.
blocks
.
append
(
Block
(
self
,
block_idx
))
for
block
in
self
.
blocks
:
...
...
@@ -1269,6 +1652,9 @@ class Program(object):
"""
Copy the information of parameters from other program.
Notes: This is a very low level API. Users should not invoke it
directly.
Args:
other(Program): Other program
...
...
@@ -1288,6 +1674,9 @@ class Program(object):
"""
Copy the information of data variables from other program.
Notes: This is a very low level API. Users should not invoke it
directly.
Args:
other(Program): Other program
...
...
@@ -1306,12 +1695,41 @@ class Program(object):
self
.
global_block
().
var
(
var
.
name
).
is_data
=
True
def
list_vars
(
self
):
"""
Get all variables from this Program. A iterable object is returned.
Returns:
iterable: The generator will yield every variable in this program.
"""
for
each_block
in
self
.
blocks
:
for
each_var
in
each_block
.
vars
.
itervalues
():
yield
each_var
class
Parameter
(
Variable
):
"""
Parameter is derived from Variable. A parameter is a persistable
Variable, and will be updated by optimizers after each iteration.
The training of a neural network is essentially the updating of
its parameters.
Relative to a general Variable, a Parameter has several its own
member variables:
Args:
trainable(bool): True if the parameter need to be updated after
iterations.
optimize_attr(map): Parameter attributes related with optimizing.
Currently, it only contains 'learning_rate'.
Default: {'learning_rate': 1.0}
regularizer(WeightDecayRegularizer): The Regularizer which will
be applied on the parameter. Default: None
gradient_clip_attr(BaseGradientClipAttr): The gradint clip strategy
which will be applied on the parameter. Default: None
do_model_average(bool): True if the model average strategy will
be applied on this parameter.
"""
def
__init__
(
self
,
block
,
shape
,
dtype
,
**
kwargs
):
if
shape
is
None
or
dtype
is
None
:
raise
ValueError
(
"Parameter must set shape and dtype"
)
...
...
@@ -1374,8 +1792,15 @@ _startup_program_ = Program()
def
default_startup_program
():
"""
Get default startup program. In startup program, Paddle will initialize
parameters, initialize nccl handle, etc.
Get default/global startup program.
The layer function in :code:`fluid.layers` will create parameters, readers,
NCCL handles as global variables. The :code:`startup_program` will
initialize them by the operators in startup program. The layer function will
append these initialization operators into startup program.
This method will return the :code:`default` or the :code:`current` startup
program. Users can use :code:`fluid.program_guard` to switch program.
Returns:
Program: startup program
...
...
@@ -1385,7 +1810,15 @@ def default_startup_program():
def
default_main_program
():
"""
Get default main program. The main program is used for training or testing.
Get default/global main program. The main program is used for training or
testing.
All layer function in :code:`fluid.layers` will append operators and
variables to the :code:`default_main_program`.
The :code:`default_main_program` is the default program in a lot of APIs.
For example, the :code:`Executor.run()` will execute the
:code:`default_main_program` when the program is not specified.
Returns:
Program: main program
...
...
@@ -1427,20 +1860,34 @@ def switch_startup_program(program):
@
contextlib
.
contextmanager
def
program_guard
(
main_program
,
startup_program
=
None
):
"""
Switch program with `with` statement
Change the global main program and startup program with `with` statement.
Layer functions in the Python `with` block will append operators and
variables to the new main programs.
Examples:
>>> with program_guard(Program()):
>>> data = fluid.layers.data(...)
>>> hidden = fluid.layers.fc(...)
>>> import paddle.fluid as fluid
>>> main_program = fluid.Program()
>>> startup_program = fluid.Program()
>>> with fluid.program_guard(main_program, startup_program):
>>> data = fluid.layers.data(...)
>>> hidden = fluid.layers.fc(...)
Notes: The temporary :code:`Program` can be used if the user does not need
to construct either of startup program or main program.
Examples:
>>> import paddle.fluid as fluid
>>> main_program = fluid.Program()
>>> # does not care about startup program. Just pass a temporary value.
>>> with fluid.program_guard(main_program, fluid.Program()):
>>> data = ...
Args:
main_program(Program): New main program inside `with` statement
main_program(Program): New main program inside `with` statement
.
startup_program(Program): New startup program inside `with` statement.
None means do not change startup program.
Returns:
None
"""
if
not
isinstance
(
main_program
,
Program
):
raise
TypeError
(
"main_program should be Program"
)
...
...
@@ -1457,7 +1904,8 @@ def program_guard(main_program, startup_program=None):
def
get_var
(
name
,
program
=
None
):
"""
Get a variable by name from the global block of a program
Get a variable by name from the global block of a program.
Args:
name(str): name of the variable
program(Program|None): program object.
...
...
python/paddle/fluid/io.py
浏览文件 @
e02cbf35
...
...
@@ -30,20 +30,42 @@ __all__ = [
def
is_parameter
(
var
):
"""Check whether the variable is a Parameter.
This function checks whether the input variable is a Parameter.
"""
Check whether the given variable is an instance of Parameter.
Args:
var
: The input variable
.
var
(Variable): The variable to be checked
.
Returns:
boolean result whether the variable is a Parameter.
bool: True if the given `var` is an instance of Parameter,
False if not.
Examples:
.. code-block:: python
param = fluid.default_main_program().global_block().var('fc.w')
res = fluid.io.is_parameter(param)
"""
return
isinstance
(
var
,
Parameter
)
def
is_persistable
(
var
):
"""
Check whether the given variable is persistable.
Args:
var(Variable): The variable to be checked.
Returns:
bool: True if the given `var` is persistable
False if not.
Examples:
.. code-block:: python
param = fluid.default_main_program().global_block().var('fc.w')
res = fluid.io.is_persistable(param)
"""
if
var
.
desc
.
type
()
==
core
.
VarDesc
.
VarType
.
FEED_MINIBATCH
or
\
var
.
desc
.
type
()
==
core
.
VarDesc
.
VarType
.
FETCH_LIST
:
return
False
...
...
@@ -68,20 +90,69 @@ def save_vars(executor,
predicate
=
None
,
filename
=
None
):
"""
Save variables to directory by executor.
Save variables to the given directory by executor.
There are two ways to specify variables to be saved: The first way, list
variables in a list and assign it to the `vars`. The second way, assign the
`main_program` with an existing program, then all variables in the program
will be saved. The first way has a higher priority. In other words, if `vars`
are assigned, the `main_program` and the `predicate` will be ignored.
:param executor: executor that save variable
:param dirname: directory path
:param main_program: program. If vars is None, then filter all variables in this
program which fit `predicate`. Default default_main_program.
:param predicate: The Predicate describes a callable that returns a variable
as a bool. If it returns true, the corresponding input variable will be saved.
:param vars: variables need to be saved. If vars is specified, program & predicate
will be ignored
:param filename: The name of a single file that all vars are saved to.
If it is None, save variables to separate files.
The `dirname` are used to specify the folder where to save variables.
If you prefer to save variables in separate files in the folder `dirname`,
set `filename` None; if you prefer to save all variables in a single file,
use `filename` to specify it.
:return: None
Args:
executor(Executor): The executor to run for saving variables.
dirname(str): The directory path.
main_program(Program|None): The program whose variables will be saved.
If it is None, the default main program will
be used automatically.
Default: None
vars(list[Variable]|None): The list that contains all variables to save.
It has a higher priority than the `main_program`.
Default: None
predicate(function|None): If it is not None, only variables in the
`main_program` that makes predicate(variable)==True
will be saved. It only works when we are using the
`main_program` to specify variables (In other words
`vars` is None).
Default: None
filename(str|None): The file which to save all variables. If you prefer to save
variables separately, set it to None.
Default: None
Returns:
None
Raises:
TypeError: If `main_program` is not an instance of Program nor None.
Examples:
.. code-block:: python
exe = fluid.Executor(fluid.CPUPlace())
param_path = "./my_paddle_model"
# The first usage: using `main_program` to specify variables
def name_has_fc(var):
res = "fc" in var.name
return res
prog = fluid.default_main_program()
fluid.io.save_vars(executor=exe, dirname=path, main_program=prog,
vars=None)
# All variables in `main_program` whose name includes "fc" will be saved.
# And variables are going to be saved separately.
# The second usage: using `vars` to specify variables
var_list = [var_a, var_b, var_c]
fluid.io.save_vars(executor=exe, dirname=path, vars=var_list,
filename="vars_file")
# var_a, var_b and var_c will be saved. And they are going to be
# saved in the same file named 'var_file' in the path "./my_paddle_model".
"""
if
vars
is
None
:
if
main_program
is
None
:
...
...
@@ -129,7 +200,42 @@ def save_vars(executor,
def
save_params
(
executor
,
dirname
,
main_program
=
None
,
filename
=
None
):
"""
Save all parameters to directory with executor.
This function filters out all parameters from the give `main_program`
and then save them to the folder `dirname` or the file `filename`.
Use the `dirname` to specify the saving folder. If you would like to
save parameters in separate files, set `filename` None; if you would
like to save all parameters in a single file, use `filename` to specify
the file name.
NOTICE: Some variables are not Parameter while they are necessary for
training. So you can NOT save and continue your training just by
`save_params()` and `load_params()`. Please use `save_persistables()`
and `load_persistables()` instead.
Args:
executor(Executor): The executor to run for saving parameters.
dirname(str): The saving directory path.
main_program(Program|None): The program whose parameters will be
saved. If it is None, the default
main program will be used automatically.
Default: None
filename(str|None): The file to save all parameters. If you prefer
to save parameters in differnet files, set it
to None.
Default: None
Returns:
None
Examples:
.. code-block:: python
exe = fluid.Executor(fluid.CPUPlace())
param_path = "./my_paddle_model"
prog = fluid.default_main_program()
fluid.io.save_params(executor=exe, dirname=param_path,
main_program=None)
"""
save_vars
(
executor
,
...
...
@@ -142,7 +248,37 @@ def save_params(executor, dirname, main_program=None, filename=None):
def
save_persistables
(
executor
,
dirname
,
main_program
=
None
,
filename
=
None
):
"""
Save all persistables to directory with executor.
This function filters out all variables with `persistable==True` from the
give `main_program` and then saves these variables to the folder `dirname`
or file `filename`.
The `dirname` is used to specify the folder where persistable variables
are going to be saved. If you would like to save variables in separate
files, set `filename` None; if you would like to save all variables in a
single file, use `filename` to specify the file name.
Args:
executor(Executor): The executor to run for saving persistable variables.
dirname(str): The directory path.
main_program(Program|None): The program whose persistbale variables will
be saved. If it is None, the default main
program will be used automatically.
Default: None
filename(str|None): The file to saved all variables. If you prefer to
save variables in differnet files, set it to None.
Default: None
Returns:
None
Examples:
.. code-block:: python
exe = fluid.Executor(fluid.CPUPlace())
param_path = "./my_paddle_model"
prog = fluid.default_main_program()
fluid.io.save_persistables(executor=exe, dirname=param_path,
main_program=None)
"""
save_vars
(
executor
,
...
...
@@ -160,20 +296,69 @@ def load_vars(executor,
predicate
=
None
,
filename
=
None
):
"""
Load variables from directory by executor.
Load variables from the given directory by executor.
There are two ways to specify variables to be loaded: The first way, list
variables in a list and assign it to the `vars`. The second way, assign the
`main_program` with an existing program, then all variables in the program
will be loaded. The first way has a higher priority. In other words if `vars`
are assigned, the `main_program` and the `predicate` will be ignored.
The `dirname` are used to specify the folder where to load variables.
If variables were saved in separate files in the folder `dirname`,
set `filename` None; if all variables were saved in a single file,
use `filename` to specify it.
:param executor: executor that load variable
:param dirname: directory path
:param main_program: program. If vars is None, then filter all variables in this
program which fit `predicate`. Default default_main_program().
:param predicate: The Predicate describes a callable that returns a variable
as a bool. If it returns true, the corresponding input variable will be loaded.
:param vars: variables need to be loaded. If vars is specified, program &
predicate will be ignored
:param filename: The name of the single file that all vars are loaded from.
If it is None, load variables from separate files.
Args:
executor(Executor): The executor to run for loading variables.
dirname(str): The directory path.
main_program(Program|None): The program whose variables will be loaded.
If it is None, the default main program will
be used automatically.
Default: None
vars(list[Variable]|None): The list that contains all variables to load.
It has a higher priority than the `main_program`.
Default: None
predicate(function|None): If it is not None, only variables in the
`main_program` that makes predicate(variable)==True
will be loaded. It only works when we are using the
`main_program` to specify variables (In other words
`vars` is None).
Default: None
filename(str|None): The file which saved all required variables. If variables
were saved in differnet files, set it to None.
Default: None
Returns:
None
Raises:
TypeError: If `main_program` is not an instance of Program nor None.
Examples:
.. code-block:: python
exe = fluid.Executor(fluid.CPUPlace())
param_path = "./my_paddle_model"
# The first usage: using `main_program` to specify variables
def name_has_fc(var):
res = "fc" in var.name
return res
:return: None
prog = fluid.default_main_program()
fluid.io.load_vars(executor=exe, dirname=path, main_program=prog,
vars=None)
# All variables in `main_program` whose name includes "fc" will be loaded.
# And all the variables are supposed to have been saved in differnet files.
# The second usage: using `vars` to specify variables
var_list = [var_a, var_b, var_c]
fluid.io.load_vars(executor=exe, dirname=path, vars=var_list,
filename="vars_file")
# var_a, var_b and var_c will be loaded. And they are supposed to haven
# been saved in the same file named 'var_file' in the path "./my_paddle_model".
"""
if
vars
is
None
:
if
main_program
is
None
:
...
...
@@ -221,7 +406,42 @@ def load_vars(executor,
def
load_params
(
executor
,
dirname
,
main_program
=
None
,
filename
=
None
):
"""
load all parameters from directory by executor.
This function filters out all parameters from the give `main_program`
and then trys to load these parameters from the folder `dirname` or
the file `filename`.
Use the `dirname` to specify the folder where parameters were saved. If
parameters were saved in separate files in the folder `dirname`, set
`filename` None; if all parameters were saved in a single file, use
`filename` to specify the file name.
NOTICE: Some variables are not Parameter while they are necessary for
training. So you can NOT save and continue your training just by
`save_params()` and `load_params()`. Please use `save_persistables()`
and `load_persistables()` instead.
Args:
executor(Executor): The executor to run for loading parameters.
dirname(str): The directory path.
main_program(Program|None): The program whose parameters will be
loaded. If it is None, the default
main program will be used automatically.
Default: None
filename(str|None): The file which saved all parameters. If parameters
were saved in differnet files, set it to None.
Default: None
Returns:
None
Examples:
.. code-block:: python
exe = fluid.Executor(fluid.CPUPlace())
param_path = "./my_paddle_model"
prog = fluid.default_main_program()
fluid.io.load_params(executor=exe, dirname=param_path,
main_program=None)
"""
load_vars
(
executor
,
...
...
@@ -233,7 +453,37 @@ def load_params(executor, dirname, main_program=None, filename=None):
def
load_persistables
(
executor
,
dirname
,
main_program
=
None
,
filename
=
None
):
"""
load all persistables from directory by executor.
This function filters out all variables with `persistable==True` from the
give `main_program` and then trys to load these variables from the folder
`dirname` or the file `filename`.
Use the `dirname` to specify the folder where persistable variables were
saved. If variables were saved in separate files, set `filename` None;
if all variables were saved in a single file, use `filename` to specify
the file name.
Args:
executor(Executor): The executor to run for loading persistable variables.
dirname(str): The directory path.
main_program(Program|None): The program whose persistbale variables will
be loaded. If it is None, the default main
program will be used automatically.
Default: None
filename(str|None): The file which saved all variables. If variables were
saved in differnet files, set it to None.
Default: None
Returns:
None
Examples:
.. code-block:: python
exe = fluid.Executor(fluid.CPUPlace())
param_path = "./my_paddle_model"
prog = fluid.default_main_program()
fluid.io.load_persistables(executor=exe, dirname=param_path,
main_program=None)
"""
load_vars
(
executor
,
...
...
@@ -306,22 +556,48 @@ def save_inference_model(dirname,
model_filename
=
None
,
params_filename
=
None
):
"""
Build a model especially for inference,
and save it to directory by the executor.
Prune the given `main_program` to build a new program especially for inference,
and then save it and all related parameters to given `dirname` by the `executor`.
Args:
dirname(str): The directory path to save the inference model.
feeded_var_names(list[str]): Names of variables that need to be feeded data
during inference.
target_vars(list[Variable]): Variables from which we can get inference
results.
executor(Executor): The executor that saves the inference model.
main_program(Program|None): The original program, which will be pruned to
build the inference model. If is setted None,
the default main program will be used.
Default: None.
model_filename(str|None): The name of file to save the inference program
itself. If is setted None, a default filename
`__model__` will be used.
params_filename(str|None): The name of file to save all related parameters.
If it is setted None, parameters will be saved
in separate files .
:param dirname: directory path
:param feeded_var_names: Names of variables that need to be feeded data during inference
:param target_vars: Variables from which we can get inference results.
:param executor: executor that save inference model
:param main_program: original program, which will be pruned to build the inference model.
Default default_main_program().
:param model_filename: The name of file to save inference program.
If not specified, default filename `__model__` will be used.
:param params_filename: The name of file to save parameters.
It is used for the case that all parameters are saved in a single binary file.
If not specified, parameters are considered saved in separate files.
Returns:
None
Raises:
ValueError: If `feed_var_names` is not a list of basestring.
ValueError: If `target_vars` is not a list of Variable.
Examples:
.. code-block:: python
exe = fluid.Executor(fluid.CPUPlace())
path = "./infer_model"
fluid.io.save_inference_model(dirname=path, feeded_var_names=['img'],
target_vars=[predict_var], executor=exe)
# In this exsample, the function will prune the default main program
# to make it suitable for infering the `predict_var`. The pruned
# inference program is going to be saved in the "./infer_model/__model__"
# and parameters are going to be saved in separate files under folder
# "./infer_model".
:return: None
"""
if
isinstance
(
feeded_var_names
,
basestring
):
feeded_var_names
=
[
feeded_var_names
]
...
...
@@ -382,18 +658,49 @@ def load_inference_model(dirname,
"""
Load inference model from a directory
:param dirname: directory path
:param executor: executor that load inference model
:param model_filename: The name of file to load inference program.
If not specified, default filename `__model__` will be used.
:param params_filename: The name of file to load parameters.
It is used for the case that all parameters are saved in a single binary file.
If not specified, parameters are considered saved in separate files.
Args:
dirname(str): The directory path
executor(Executor): The executor to run for loading inference model.
model_filename(str|None): The name of file to load inference program.
If it is None, the default filename
'__model__' will be used.
Default: None
params_filename(str|None): The name of file to load all parameters.
It is only used for the case that all
parameters were saved in a single binary
file. If parameters were saved in separate
files, set it as 'None'.
Returns:
tuple: The return of this function is a tuple with three elements:
(program, feed_target_names, fetch_targets). The `program` is a
Program, it's the program for inference. The `feed_target_names` is
a list of str, it contains Names of variables that need to feed
data in the inference program. The `fetch_targets` is a list of
Variable. It contains variables from which we can get inference
results.
Raises:
ValueError: If `dirname` is not a existing directory.
Examples:
.. code-block:: python
exe = fluid.Executor(fluid.CPUPlace())
path = "./infer_model"
[inference_program, feed_target_names, fetch_targets] =
fluid.io.load_inference_model(dirname=path, executor=exe)
results = exe.run(inference_program,
feed={feed_target_names[0]: tensor_img},
fetch_list=fetch_targets)
# In this exsample, the inference program was saved in the
# "./infer_model/__model__" and parameters were saved in
# separate files in ""./infer_model".
# After getting inference program, feed target names and
# fetch targets, we can use an Executor to run the inference
# program to get the inference result.
:return: [program, feed_target_names, fetch_targets]
program: program especially for inference.
feed_target_names: Names of variables that need to feed data
fetch_targets: Variables from which we can get inference results.
"""
if
not
os
.
path
.
isdir
(
dirname
):
raise
ValueError
(
"There is no directory named '%s'"
,
dirname
)
...
...
@@ -424,12 +731,25 @@ def load_inference_model(dirname,
def
get_parameter_value
(
para
,
executor
):
"""
Get the LoDTensor for the parameter
Get the LoDTensor value of the given parameter.
Args:
para(Parameter): The parameter to get value from.
executor(Executor): The executor to run for retrieving the value.
Returns:
numpy.array: The given parameter's values.
Raises:
AssertionError: If the `para` is not an instance of Parameter.
:param executor: executor for retrieving the value
:param para: the given parameter
Examples:
.. code-block:: python
exe = fluid.Executor(fluid.CPUPlace())
param = fluid.default_main_program().global_block().var('fc.w')
p = fluid.io.get_parameter_value(param, exe)
:return: the LoDTensor for the parameter
"""
assert
is_parameter
(
para
)
...
...
@@ -441,14 +761,30 @@ def get_parameter_value(para, executor):
def
get_parameter_value_by_name
(
name
,
executor
,
program
=
None
):
"""
Get the LoDTensor for paramter with the given name
Get the LoDTensor value of a certain parameter by its name.
Args:
name(str): The parameter's name.
executor(Executor): The executor to run for retrieving the value.
program(Program | None): The program where to find the parameter.
If it's set to be None, the function will
try to find the parameter in the default
main program.
:param executor: executor for retrieving the value
:param name: the name of the parameter
:param program: the program where the variable is found
Default default_main_program().
Returns:
numpy.array: The parameter's values.
:return: the LoDTensor for the variable
Raises:
TypeError: If given `name` is not an instance of basestring.
TypeError: If the parameter with the given name doesn't exist.
AssertionError: If there is a varibale named `name` in the
given program but it is not a Parameter.
Examples:
.. code-block:: python
exe = fluid.Executor(fluid.CPUPlace())
p = fluid.io.get_parameter_value('fc.w', exe)
"""
if
program
is
None
:
program
=
default_main_program
()
...
...
@@ -470,16 +806,58 @@ def save_checkpoint(executor,
main_program
=
None
,
max_num_checkpoints
=
3
):
"""
Save Checkpoint will save persistable LodTensor variables from main_program in checkpoint directory,
the directory named by serial number from 0 to (n -1), save_checkpoint use LRU strategy
to keep numbers of checkpoint directory, the numbers of checkpoint directory are max_num_checkpoints at most,
The interval between two saved checkpoints must greater than save_interval_secs.
This function filters out all checkpoint variables from the give
main_program and then saves these variables to the `checkpoint_dir`
directory.
In the training precess, we generally save a checkpoint in each
iteration. So there might be a lot of checkpoints in the
`checkpoint_dir`. To avoid them taking too much disk space, the
`max_num_checkpoints` are introduced to limit the total number of
checkpoints. If the number of existing checkpints is greater than
the `max_num_checkpoints`, oldest ones will be scroll deleted.
A variable is a checkpoint variable and will be saved if it meets
all following conditions:
1. It's persistable.
2. It's type is not FEED_MINIBATCH nor FETCH_LIST nor RAW.
3. It's name contains no "@GRAD" nor ".trainer_" nor ".block".
:param executor executor for save the value
:param checkpoint_dir the checkpoint directory
:param trainer_id currect trainer id, if id is equal to 0, the trainer is chief
:param main_program will save all variables in program
:param max_num_checkpoints will keep numbers of checkpoint serials not bigger than max_num_checkpoints
Args:
executor(Executor): The executor to run for save checkpoint.
checkpoint_dir(str): The folder where to save checkpoints.
trainer_id(int): currect trainer id, if id is equal to 0, the trainer
is chief.
trainer_args(dict|None): Current training arguments. Such as 'epoch_id'
and 'step_id'.
Defaut: None
main_program(Program|None): The program whose checkpoint variables will
be saved. If it is None, the default main program will be used.
max_num_checkpoints(int): The max number of total number of existing
checkpoints.
Default: 3
Returns:
None
Raises:
ValueError: If `checkpoint_dir` is None.
AssertionError: If `trainer_args` is not a dict.
Examples:
.. code-block:: python
exe = fluid.Executor(fluid.CPUPlace())
path = "./checkpoints"
prog = fluid.default_main_program()
trainer_args = {"epoch_id": 200,
"step_id": 20} # just an example
fluid.io.save_checkpoint(executor=exe,
checkpoint_dir=path,
trainer_id=0,
trainer_args=trainer_args,
main_program=prog,
max_num_checkpoints=3)
"""
if
checkpoint_dir
is
None
:
raise
ValueError
(
"'checkpoint_dir' should not be None"
)
...
...
@@ -503,13 +881,50 @@ def save_checkpoint(executor,
def
load_checkpoint
(
executor
,
checkpoint_dir
,
serial
,
main_program
):
"""
Load checkpoint from a directory by executor,
it will find the most recent saved checkpoint file and load it auto.
This function filters out all checkpoint variables from the give
main_program and then try to load these variables from the
`checkpoint_dir` directory.
In the training precess, we generally save a checkpoint in each
iteration. So there are more than one checkpoint in the
`checkpoint_dir` (each checkpoint has its own sub folder), use
`serial` to specify which serial of checkpoint you would like to
load.
A variable is a checkpoint variable and will be loaded if it meets
all following conditions:
1. It's persistable.
2. It's type is not FEED_MINIBATCH nor FETCH_LIST nor RAW.
3. It's name contains no "@GRAD" nor ".trainer_" nor ".block".
Args:
executor(Executor): The executor to run for loading checkpoint.
checkpoint_dir(str): The folder where all checkpoints are.
serial(int): The serial of checkpoint you would like to load.
main_program(Program): The program whose checkpoint variables will
be loaded.
:param executor executor for load the value
:param checkpoint_dir the checkpoint directory
:param serial the serial folder in checkpoint directory will be load
:param main_program will load all variables in program
Returns:
None
Raises:
ValueError: If `checkpoint_dir` is None.
ValueError: If `serial` is None or `serial` is less than 0.
ValueError: If `main_program` is None.
Examples:
.. code-block:: python
exe = fluid.Executor(fluid.CPUPlace())
path = "./checkpoints"
prog = fluid.default_main_program()
fluid.io.load_checkpoint(executor=exe, checkpoint_dir=path,
serial=9, main_program=prog)
# In this example, `load_checkpoint` function
# will first filters out all checkpoint variables in the default
# main program, and then try to load these variables form the
# folder "./checkpoints/checkpoint_9/__model__".
"""
if
checkpoint_dir
is
None
:
...
...
@@ -528,10 +943,10 @@ def load_checkpoint(executor, checkpoint_dir, serial, main_program):
def
clean_checkpoint
(
checkpoint_dir
,
delete_dir
=
False
):
"""
clean the checkpoint dir, when the train exits normally, the trainer will call clean_checkpoint to delete checkpoint directory saved before.
delete_dir only works when the directory is empty, otherwise, OSError is raised.
delete_dir only works when the directory is empty, otherwise, OSError is raised.
:param checkpoint_dir
:param delete_dir
:
param checkpoint_dir
:
param delete_dir
"""
if
checkpoint_dir
is
None
:
...
...
@@ -547,13 +962,40 @@ def load_persist_vars_without_grad(executor,
program
,
has_model_dir
=
False
):
"""
load_persist_vars_without_grad will load variables from a directory by an executor,
the variable named end with "@GRAD" will not be loaded.
This function filters out all checkpoint variables from the give
program and then trys to load these variables from the given directory.
A variable is a checkpoint variable if it meets all following
conditions:
1. It's persistable.
2. It's type is not FEED_MINIBATCH nor FETCH_LIST nor RAW.
3. It's name contains no "@GRAD" nor ".trainer_" nor ".block".
:param executor executor for load the value
:param dirname the checkpoint directory
:param program will load all variables in program
:param has_model_dir if has_model_dir is True, will load variables from sub directory named __model__
Args:
executor(Executor): The executor to run for loading variables.
dirname(str): The directory path.
program(Program): The program whose checkpoint variables will
be loaded.
has_model_dir(bool): if True, the function loads variables
from a sub directory named '__model__'.
Default: False
Returns:
None
Examples:
.. code-block:: python
exe = fluid.Executor(fluid.CPUPlace())
param_path = "./my_paddle_model"
prog = fluid.default_main_program()
fluid.io.load_persist_vars_without_grad(executor=exe,
dirname=param_path, program=prog, has_model_dir=True)
# In this example, `load_persist_vars_without_grad` function
# will first filters out all checkpoint variables in the default
# main program, and then trys to load these variables form the
# folder "./my_paddle_model/__model__".
"""
if
has_model_dir
:
...
...
@@ -569,12 +1011,38 @@ def load_persist_vars_without_grad(executor,
def
save_persist_vars_without_grad
(
executor
,
dirname
,
program
):
"""
save_persist_vars_without_grad will save variables to a directory by an executor,
the variable named end with "@GRAD" will not be saved.
This function filters out all checkpoint variables from the give
program and then save these variables to a sub-folder '__model__' of
the given directory.
A variable is a checkpoint variable if it meets all following
conditions:
1. It's persistable.
2. It's type is not FEED_MINIBATCH nor FETCH_LIST nor RAW.
3. It's name contains no "@GRAD" nor ".trainer_" nor ".block".
Args:
executor(Executor): The executor to run for saving variables.
dirname(str): The directory path.
program(Program): The program whose checkpoint variables will
be saved.
Returns:
None
Examples:
.. code-block:: python
exe = fluid.Executor(fluid.CPUPlace())
param_path = "./my_paddle_model"
prog = fluid.default_main_program()
fluid.io.save_persist_vars_without_grad(executor=exe,
dirname=param_path, program=prog)
:param executor executor for load the value
:param dirname the checkpoint directory
:param program will load all variables in program
# In this example, `save_persist_vars_without_grad` function
# will first filters out all checkpoint variables in the default
# main program, and then saves these variables to the folder
# "./my_paddle_model/__model__".
"""
cur_dir
=
_get_model_dir
(
dirname
)
save_vars
(
...
...
@@ -620,7 +1088,7 @@ def _is_checkpoint_var(var):
the checkpoint will not save or load all the variables.
var type is FEED_MINIBATCH/FETCH_LIST/RAW or var name ends with @GRAD are discarded.
:param var
:
param var
"""
if
var
.
desc
.
type
()
==
core
.
VarDesc
.
VarType
.
FEED_MINIBATCH
or
\
var
.
desc
.
type
()
==
core
.
VarDesc
.
VarType
.
FETCH_LIST
or
\
...
...
@@ -701,7 +1169,7 @@ def _write_success(dirname):
"""
write an empty file named "_SUCCESS" in checkpoint dir, indicate this checkpoint is correct.
:param dirname
:
param dirname
"""
success_file
=
os
.
path
.
join
(
dirname
,
SUCCESS_MARK_FILENAME
)
with
open
(
success_file
,
'a'
)
as
f
:
...
...
@@ -713,7 +1181,7 @@ def get_latest_checkpoint_serial(checkpoint_dir):
"""
get the latest file in checkpoint directory, the _SUCCESS file must exist in the directory
:param checkpoint_dir
:
param checkpoint_dir
"""
if
not
checkpoint_dir
:
return
-
1
...
...
python/paddle/fluid/layers/control_flow.py
浏览文件 @
e02cbf35
...
...
@@ -185,12 +185,14 @@ def Print(input,
Returns:
Variable: Output tensor, same data with input tensor.
Examples:
.. code-block:: python
value = some_layer(...)
Print(value, summarize=10,
message="The content of some_layer: ")
value = some_layer(...)
Print(value, summarize=10,
message="The content of some_layer: ")
'''
helper
=
LayerHelper
(
'print'
,
**
locals
())
out
=
helper
.
create_tmp_variable
(
dtype
=
helper
.
input_dtype
())
...
...
@@ -1201,6 +1203,31 @@ class ConditionalBlockGuard(BlockGuard):
class
ConditionalBlock
(
object
):
'''
**ConditionalBlock**
ConditionalBlock is an operator that bind a block to a specific condition,
if the condition matches, the corresponding block will be executed.
Args:
inputs (Variable): bool conditions.
is_scalar_condition (bool): whether the branch is controled by a scalar.
name(str): name of this ConditionalBlock.
Examples:
.. code-block:: python
cond = layers.less_than(x=label, y=limit)
true_image, false_image = layers.split_lod_tensor(
input=image, mask=cond)
true_cond = layers.ConditionalBlock([true_image])
with true_cond.block():
...
with false_cond.block():
...
'''
def
__init__
(
self
,
inputs
,
is_scalar_condition
=
False
,
name
=
None
):
for
each_input
in
inputs
:
if
not
isinstance
(
each_input
,
Variable
):
...
...
python/paddle/fluid/layers/nn.py
浏览文件 @
e02cbf35
...
...
@@ -2678,18 +2678,35 @@ def sequence_expand(x, y, ref_level=-1, name=None):
def
beam_search
(
pre_ids
,
ids
,
scores
,
beam_size
,
end_id
,
level
=
0
):
'''
**beam search**
This function implements the beam search algorithm.
Beam search is a classical algorithm for selecting candidate words
in a machine translation task.
Refer to `Beam search <https://en.wikipedia.org/wiki/Beam_search>`_
for more details.
Args:
pre_ids (Variable):
${pre_ids_comment}
ids (Variable):
${ids_comment}
scores (Variable):
${scores_comment}
beam_size (int):
${beam_size_comment}
end_id (int):
${end_id_comment}
level (int):
${level_comment}
pre_ids (Variable):
ids in previous step.
ids (Variable):
a LoDTensor of shape of [None,k]
scores (Variable):
a LoDTensor that has the same shape and LoD with `ids`
beam_size (int):
beam size for beam search
end_id (int):
the token id which indicates the end of a sequence
level (int):
the level of LoDTensor
Returns:
tuple: a tuple of beam_search output variables: selected_ids, selected_scores
tuple: a tuple of beam_search output variables: `selected_ids`, `selected_scores`
Examples:
.. code-block:: python
# current_score is a Tensor of shape (num_batch_size, embed_size), which
# consists score of each candidate word.
topk_scores, topk_indices = pd.topk(current_score, k=50)
selected_ids, selected_scores = pd.beam_search(
pre_ids, topk_indices, topk_scores, beam_size, end_id=10, level=0)
'''
helper
=
LayerHelper
(
'beam_search'
,
**
locals
())
score_type
=
scores
.
dtype
...
...
python/paddle/fluid/lod_tensor.py
浏览文件 @
e02cbf35
...
...
@@ -19,33 +19,41 @@ __all__ = ['create_lod_tensor', 'create_random_int_lodtensor']
def
create_lod_tensor
(
data
,
lod
,
place
):
"""Create a lod tensor from a numpy array, a list, or an existing lod tensor.
"""
Create a lod tensor from a numpy array, a list, or an existing lod tensor.
Create a lod tensor by doing the following:
1. Check that the length-based input lod is valid.
2. Convert the length-based lod to a offset-based LoD.
3. Copy the data from a numpy array, a list or a existing lod tensor to
3. Copy the data from a numpy array, a list or a existing lod tensor to
CPU or GPU device (based on input place).
4. Set the level of detail (LoD) using the offset-based LoD.
Use example:
Suppose we want LoDTensor to hold data for sequences of word, where each word is
represented by an integer. If we want to create a LoDTensor to represent two
sentences, one of 2 words, and one of 3 words.
Examples:
Then 'data' can be a numpy array of integers with shape (5, 1).
'lod' will be [[2, 3]], indicating the length(# of words) in each sentence.
This length-based input lod [[2, 3]] will be converted to offset-based lod [[0, 2, 5]]
inside the function call.
Suppose we want LoDTensor to hold data for sequences of word, where each
word is represented by an integer. If we want to create a LoDTensor to
represent two sentences, one of 2 words, and one of 3 words.
Please refer to
github.com/PaddlePaddle/Paddle/blob/develop/doc/fluid/design/concepts/lod_tensor.md
for more details regarding LoD.
Then :code:`data` can be a numpy array of integers with shape (5, 1).
:code:`lod` will be [[2, 3]], indicating the length(# of words) in each
sentence. This length-based input lod [[2, 3]] will be converted to
offset-based lod [[0, 2, 5]] inside the function call.
Please reference :ref:`api_guide_low_level_lod_tensor` for more details
regarding LoD.
Args:
data: a numpy array or a LoDTensor or a list holding the data to be copied.
lod: a list of lists indicating the length-based LoD info specified by the user.
place: CPU or GPU place indicating where the data in the new LoDTensor will be stored.
data(numpy.ndarray|list|LoDTensor): a numpy array or a LoDTensor or a
list holding the data to be copied.
lod(list): a list of lists indicating the length-based LoD info
specified by the user.
place(Place): CPU or GPU place indicating where the data in the new
LoDTensor will be stored.
Returns:
A fluid LoDTensor object with tensor data and lod info.
...
...
@@ -77,31 +85,38 @@ def create_lod_tensor(data, lod, place):
def
create_random_int_lodtensor
(
lod
,
base_shape
,
place
,
low
,
high
):
"""Create a LoDTensor containing random integers.
"""
Create a LoDTensor containing random integers.
This function is frequently used in the book examples. So we revised it
based on
the new create_lod_tensor API and put it here in the lod_tensor module to simplify
the code.
This function is frequently used in the book examples. So we revised it
based on the new create_lod_tensor API and put it here in the lod_tensor
module to simplify the code.
The function does the following:
1. Calculate the overall shape of the LoDTensor based on the length-based 'lod' input
and the shape of the basic element in 'base_shape'.
1. Calculate the overall shape of the LoDTensor based on the length-based
:code:`lod` input and the shape of the basic element in
:code:`base_shape`.
2. Create a numpy array of this shape.
3. Create the LoDTensor using create_lod_tensor API.
Suppose we want LoDTensor to hold data for sequences of word, where each
word is
represented by an integer. If we want to create a LoDTensor to represent two
sentences, one of 2 words, and one of 3 words. Then 'base_shape' is [1], input
length-based 'lod' is [[2, 3]]. Then the overall shape of the LoDTensor would be
[5, 1], holding 5 words for two sentences.
Suppose we want LoDTensor to hold data for sequences of word, where each
word is represented by an integer. If we want to create a LoDTensor to
represent two sentences, one of 2 words, and one of 3 words. Then
'base_shape' is [1], input length-based 'lod' is [[2, 3]]. Then the overall
shape of the LoDTensor would be [5, 1], holding 5 words for two sentences.
Args:
data: a numpy array or a LoDTensor holding the data to be copied.
lod: a list of lists indicating the length-based LoD info specified by the user.
base_shape: the shape of the basic element to be held by the LoDTensor.
place: CPU or GPU place indicating where the data in the new LoDTensor will be stored.
low: the lower bound of the random integers.
high: the upper bound of the random integers.
lod(list): a list of lists indicating the length-based LoD info
specified by the user.
base_shape(list): the shape of the basic element to be held by the
LoDTensor.
place(Place): CPU or GPU place indicating where the data in the new
LoDTensor will be stored.
low(int): the lower bound of the random integers.
high(int): the upper bound of the random integers.
Returns:
A fluid LoDTensor object with tensor data and lod info.
...
...
python/paddle/fluid/metrics.py
浏览文件 @
e02cbf35
...
...
@@ -325,14 +325,14 @@ class Auc(MetricBase):
"""
def
__init__
(
self
,
name
,
curve
=
'ROC'
,
num_thresholds
=
200
):
super
(
MetricBase
,
self
).
__init__
(
name
,
curve
,
num_thresholds
)
super
(
Auc
,
self
).
__init__
(
name
=
name
)
self
.
_curve
=
curve
self
.
_num_thresholds
=
num_thresholds
self
.
_epsilon
=
1e-6
self
.
tp_list
=
np
.
ndarray
((
num_thresholds
,
))
self
.
fn_list
=
np
.
ndarray
((
num_thresholds
,
))
self
.
tn_list
=
np
.
ndarray
((
num_thresholds
,
))
self
.
fp_list
=
np
.
ndarray
((
num_thresholds
,
))
self
.
tp_list
=
np
.
zeros
((
num_thresholds
,
))
self
.
fn_list
=
np
.
zeros
((
num_thresholds
,
))
self
.
tn_list
=
np
.
zeros
((
num_thresholds
,
))
self
.
fp_list
=
np
.
zeros
((
num_thresholds
,
))
def
update
(
self
,
labels
,
predictions
,
axis
=
1
):
if
not
_is_numpy_
(
labels
):
...
...
@@ -350,12 +350,12 @@ class Auc(MetricBase):
tp
,
fn
,
tn
,
fp
=
0
,
0
,
0
,
0
for
i
,
lbl
in
enumerate
(
labels
):
if
lbl
:
if
predictions
[
i
,
0
]
>=
thresh
:
if
predictions
[
i
,
1
]
>=
thresh
:
tp
+=
1
else
:
fn
+=
1
else
:
if
predictions
[
i
,
0
]
>=
thresh
:
if
predictions
[
i
,
1
]
>=
thresh
:
fp
+=
1
else
:
tn
+=
1
...
...
python/paddle/fluid/nets.py
浏览文件 @
e02cbf35
...
...
@@ -26,16 +26,87 @@ def simple_img_conv_pool(input,
filter_size
,
pool_size
,
pool_stride
,
act
,
param_attr
=
None
,
pool_padding
=
0
,
pool_type
=
'max'
,
global_pooling
=
False
,
conv_stride
=
1
,
conv_padding
=
0
,
conv_dilation
=
1
,
conv_groups
=
1
,
param_attr
=
None
,
bias_attr
=
None
,
act
=
None
,
use_cudnn
=
True
,
use_mkldnn
=
False
):
"""
The simple_img_conv_pool is composed with one Convolution2d and one Pool2d.
Args:
input (Variable): The input image with [N, C, H, W] format.
num_filters(int): The number of filter. It is as same as the output
feature channel.
filter_size (int|list|tuple): The filter size. If filter_size is a list or
tuple, it must contain two integers, (filter_size_H, filter_size_W). Otherwise,
the filter_size_H = filter_size_W = filter_size.
pool_size (int|list|tuple): The pooling size of Pool2d layer. If pool_size
is a list or tuple, it must contain two integers, (pool_size_H, pool_size_W).
Otherwise, the pool_size_H = pool_size_W = pool_size.
pool_stride (int|list|tuple): The pooling stride of Pool2d layer. If pool_stride
is a list or tuple, it must contain two integers, (pooling_stride_H, pooling_stride_W).
Otherwise, the pooling_stride_H = pooling_stride_W = pool_stride.
pool_padding (int|list|tuple): The padding of Pool2d layer. If pool_padding is a list or
tuple, it must contain two integers, (pool_padding_H, pool_padding_W).
Otherwise, the pool_padding_H = pool_padding_W = pool_padding. Default 0.
pool_type (str): Pooling type can be :math:`max` for max-pooling and :math:`avg` for
average-pooling. Default :math:`max`.
global_pooling (bool): Whether to use the global pooling. If global_pooling = true,
pool_size and pool_padding while be ignored. Default False
conv_stride (int|list|tuple): The stride size of the Conv2d Layer. If stride is a
list or tuple, it must contain two integers, (conv_stride_H, conv_stride_W). Otherwise,
the conv_stride_H = conv_stride_W = conv_stride. Default: conv_stride = 1.
conv_padding (int|list|tuple): The padding size of the Conv2d Layer. If padding is
a list or tuple, it must contain two integers, (conv_padding_H, conv_padding_W).
Otherwise, the conv_padding_H = conv_padding_W = conv_padding. Default: conv_padding = 0.
conv_dilation (int|list|tuple): The dilation size of the Conv2d Layer. If dilation is
a list or tuple, it must contain two integers, (conv_dilation_H, conv_dilation_W).
Otherwise, the conv_dilation_H = conv_dilation_W = conv_dilation. Default: conv_dilation = 1.
conv_groups (int): The groups number of the Conv2d Layer. According to grouped
convolution in Alex Krizhevsky's Deep CNN paper: when group=2,
the first half of the filters is only connected to the first half
of the input channels, while the second half of the filters is only
connected to the second half of the input channels. Default: groups=1
param_attr (ParamAttr): The parameters to the Conv2d Layer. Default: None
bias_attr (ParamAttr): Bias parameter for the Conv2d layer. Default: None
act (str): Activation type for Conv2d. Default: None
use_cudnn (bool): Use cudnn kernel or not, it is valid only when the cudnn
library is installed. Default: True
use_mkldnn (bool): Use mkldnn kernels or not, it is valid only when compiled
with mkldnn library. Default: False
Return:
Variable: The result of input after Convolution2d and Pool2d.
Examples:
.. code-block:: python
img = fluid.layers.data(name='img', shape=[1, 28, 28], dtype='float32')
conv_pool = fluid.nets.simple_img_conv_pool(input=img,
filter_size=5,
num_filters=20,
pool_size=2,
pool_stride=2,
act="relu")
"""
conv_out
=
layers
.
conv2d
(
input
=
input
,
num_filters
=
num_filters
,
filter_size
=
filter_size
,
stride
=
conv_stride
,
padding
=
conv_padding
,
dilation
=
conv_dilation
,
groups
=
conv_groups
,
param_attr
=
param_attr
,
bias_attr
=
bias_attr
,
act
=
act
,
use_cudnn
=
use_cudnn
,
use_mkldnn
=
use_mkldnn
)
...
...
@@ -45,6 +116,8 @@ def simple_img_conv_pool(input,
pool_size
=
pool_size
,
pool_type
=
pool_type
,
pool_stride
=
pool_stride
,
pool_padding
=
pool_padding
,
global_pooling
=
global_pooling
,
use_cudnn
=
use_cudnn
,
use_mkldnn
=
use_mkldnn
)
return
pool_out
...
...
@@ -60,11 +133,65 @@ def img_conv_group(input,
conv_with_batchnorm
=
False
,
conv_batchnorm_drop_rate
=
0.0
,
pool_stride
=
1
,
pool_type
=
None
,
pool_type
=
"max"
,
use_cudnn
=
True
,
use_mkldnn
=
False
):
"""
Image Convolution Group, Used for vgg net.
The Image Convolution Group is composed of Convolution2d, BatchNorm, DropOut,
and Pool2d. According to the input arguments, img_conv_group will do serials of
computation for Input using Convolution2d, BatchNorm, DropOut, and pass the last
result to Pool2d.
Args:
input (Variable): The input image with [N, C, H, W] format.
conv_num_filter(list|tuple): Indicates the numbers of filter of this group.
pool_size (int|list|tuple): The pooling size of Pool2d Layer. If pool_size
is a list or tuple, it must contain two integers, (pool_size_H, pool_size_W).
Otherwise, the pool_size_H = pool_size_W = pool_size.
conv_padding (int|list|tuple): The padding size of the Conv2d Layer. If padding is
a list or tuple, its length must be equal to the length of conv_num_filter.
Otherwise the conv_padding of all Conv2d Layers are the same. Default 1.
conv_filter_size (int|list|tuple): The filter size. If filter_size is a list or
tuple, its length must be equal to the length of conv_num_filter.
Otherwise the conv_filter_size of all Conv2d Layers are the same. Default 3.
conv_act (str): Activation type for Conv2d Layer that is not followed by BatchNorm.
Default: None.
param_attr (ParamAttr): The parameters to the Conv2d Layer. Default: None
conv_with_batchnorm (bool|list): Indicates whether to use BatchNorm after Conv2d Layer.
If conv_with_batchnorm is a list, its length must be equal to the length of
conv_num_filter. Otherwise, conv_with_batchnorm indicates whether all the
Conv2d Layer follows a BatchNorm. Default False.
conv_batchnorm_drop_rate (float|list): Indicates the drop_rate of Dropout Layer
after BatchNorm. If conv_batchnorm_drop_rate is a list, its length must be
equal to the length of conv_num_filter. Otherwise, drop_rate of all Dropout
Layers is conv_batchnorm_drop_rate. Default 0.0.
pool_stride (int|list|tuple): The pooling stride of Pool2d layer. If pool_stride
is a list or tuple, it must contain two integers, (pooling_stride_H,
pooling_stride_W). Otherwise, the pooling_stride_H = pooling_stride_W = pool_stride.
Default 1.
pool_type (str): Pooling type can be :math:`max` for max-pooling and :math:`avg` for
average-pooling. Default :math:`max`.
use_cudnn (bool): Use cudnn kernel or not, it is valid only when the cudnn
library is installed. Default: True
use_mkldnn (bool): Use mkldnn kernels or not, it is valid only when compiled
with mkldnn library. Default: False
Return:
Variable: The final result after serial computation using Convolution2d,
BatchNorm, DropOut, and Pool2d.
Examples:
.. code-block:: python
img = fluid.layers.data(name='img', shape=[1, 28, 28], dtype='float32')
conv_pool = fluid.nets.img_conv_group(input=img,
num_channels=3,
conv_padding=1,
conv_num_filter=[3, 3],
conv_filter_size=3,
conv_act="relu",
pool_size=2,
pool_stride=2)
"""
tmp
=
input
assert
isinstance
(
conv_num_filter
,
list
)
or
\
...
...
@@ -74,6 +201,7 @@ def img_conv_group(input,
if
not
hasattr
(
obj
,
'__len__'
):
return
[
obj
]
*
len
(
conv_num_filter
)
else
:
assert
len
(
obj
)
==
len
(
conv_num_filter
)
return
obj
conv_padding
=
__extend_list__
(
conv_padding
)
...
...
@@ -119,6 +247,39 @@ def sequence_conv_pool(input,
param_attr
=
None
,
act
=
"sigmoid"
,
pool_type
=
"max"
):
"""
The sequence_conv_pool is composed with Sequence Convolution and Pooling.
Args:
input (Variable): The input of sequence_conv, which supports variable-time
length input sequence. The underlying of input is a matrix with shape
(T, N), where T is the total time steps in this mini-batch and N is
the input_hidden_size
num_filters(int): The number of filter.
filter_size (int): The filter size.
param_attr (ParamAttr): The parameters to the Sequence_conv Layer. Default: None.
act (str): Activation type for Sequence_conv Layer. Default: "sigmoid".
pool_type (str): Pooling type can be :math:`max` for max-pooling, :math:`average` for
average-pooling, :math:`sum` for sum-pooling, :math:`sqrt` for sqrt-pooling.
Default :math:`max`.
Return:
Variable: The final result after Sequence Convolution and Pooling.
Examples:
.. code-block:: python
input_dim = len(word_dict)
emb_dim = 128
hid_dim = 512
data = fluid.layers.data( ame="words", shape=[1], dtype="int64", lod_level=1)
emb = fluid.layers.embedding(input=data, size=[input_dim, emb_dim], is_sparse=True)
seq_conv = fluid.nets.sequence_conv_pool(input=emb,
num_filters=hid_dim,
filter_size=3,
act="tanh",
pool_type="sqrt")
"""
conv_out
=
layers
.
sequence_conv
(
input
=
input
,
num_filters
=
num_filters
,
...
...
@@ -132,9 +293,9 @@ def sequence_conv_pool(input,
def
glu
(
input
,
dim
=-
1
):
"""
The
gated linear unit composed by split, sigmoid activation and element
wise
multiplication. Specifically, Split the input into two equal sized parts
:math:`a` and :math:`b` along the given dimension and then compute as
The
Gated Linear Units(GLU) composed by split, sigmoid activation and element-
wise
multiplication. Specifically, Split the input into two equal sized parts
,
:math:`a` and :math:`b`
,
along the given dimension and then compute as
following:
.. math::
...
...
@@ -147,16 +308,16 @@ def glu(input, dim=-1):
Args:
input (Variable): The input variable which is a Tensor or LoDTensor.
dim (int): The dimension along which to split. If :math:`dim < 0`, the
dimension to split along is :math:`rank(input) + dim`.
dimension to split along is :math:`rank(input) + dim`.
Default -1.
Returns:
Variable:
The Tensor v
ariable with half the size of input.
Variable:
V
ariable with half the size of input.
Examples:
.. code-block:: python
# x is a Tensor variable with shape [3, 6, 9]
fluid.nets.glu(input=x
, dim=1) # shape of output: [3, 3, 9]
data = fluid.layers.data(name="words", shape=[3, 6, 9], dtype="float32")
output = fluid.nets.glu(input=data
, dim=1) # shape of output: [3, 3, 9]
"""
a
,
b
=
layers
.
split
(
input
,
num_or_sections
=
2
,
dim
=
dim
)
...
...
@@ -189,40 +350,48 @@ def scaled_dot_product_attention(queries,
<https://arxiv.org/pdf/1706.03762.pdf>`_.
Args:
queries (Variable): The input variable which should be a 3-D Tensor.
keys (Variable): The input variable which should be a 3-D Tensor.
values (Variable): The input variable which should be a 3-D Tensor.
num_heads (int): Head number to compute the scaled dot product
attention. Default value is
1.
attention. Default:
1.
dropout_rate (float): The dropout rate to drop the attention weight.
Default value is
0.
Default: 0.
0.
Returns:
Variable: A 3-D Tensor computed by multi-head scaled dot product
\
attention.
Variable: A 3-D Tensor computed by multi-head scaled dot product
\
attention.
Raises:
ValueError: If input queries, keys, values are not 3-D Tensors.
NOTE:
NOTE
S
:
1. When num_heads > 1, three linear projections are learned respectively
to map input queries, keys and values into queries', keys' and values'.
queries', keys' and values' have the same shapes with queries, keys
and values.
1. When num_heads == 1, scaled_dot_product_attention has no learnable
parameters.
to map input queries, keys and values into queries', keys' and values'.
queries', keys' and values' have the same shapes with queries, keys
and values.
2. When num_heads == 1, scaled_dot_product_attention has no learnable
parameters.
Examples:
.. code-block:: python
# Suppose q, k, v are Tensors with the following shape:
# q: [3, 5, 9], k: [3, 6, 9], v: [3, 6, 10]
contexts = fluid.nets.scaled_dot_product_attention(q, k, v)
queries = fluid.layers.data(name="queries",
shape=[3, 5, 9],
dtype="float32",
append_batch_size=False)
queries.stop_gradient = False
keys = fluid.layers.data(name="keys",
shape=[3, 6, 9],
dtype="float32",
append_batch_size=False)
keys.stop_gradient = False
values = fluid.layers.data(name="values",
shape=[3, 6, 10],
dtype="float32",
append_batch_size=False)
values.stop_gradient = False
contexts = fluid.nets.scaled_dot_product_attention(queries, keys, values)
contexts.shape # [3, 5, 10]
"""
if
not
(
len
(
queries
.
shape
)
==
len
(
keys
.
shape
)
==
len
(
values
.
shape
)
==
3
):
...
...
python/paddle/fluid/parallel_executor.py
浏览文件 @
e02cbf35
...
...
@@ -27,6 +27,40 @@ BuildStrategy = core.ParallelExecutor.BuildStrategy
class
ParallelExecutor
(
object
):
"""
ParallelExecutor can run program in parallel.
Args:
use_cuda (bool): Whether to use CUDA or not.
loss_name (str): The loss name must set in training. Default None.
main_program (Program): The program that need to run, if not provided,
then default_main_program will be used. Default None.
share_vars_from(ParallelExecutor): If provied, it will share variables
from the specified ParallelExecutor. Default None.
num_trainers(int): If greater than 1, NCCL will be initialized with
multiple rank of nodes, each node should have same number of GPUs.
Distributed training will be enabled then. Default 1.
trainer_id(int: Must use together with num_trainers. trainer_id is the
"rank" of current node starts from 0. Default 0.
Returns:
ParallelExecutor: The initialized ParallelExecutor object.
Raises:
TypeError: If share_vars_from is provided, but not ParallelExecutor object.
Examples:
.. code-block:: python
train_exe = fluid.ParallelExecutor(use_cuda=True, loss_name=loss.name)
test_exe = fluid.ParallelExecutor(use_cuda=True,
main_program=test_program,
share_vars_from=train_exe)
train_loss, = train_exe.run([loss.name], feed=feed_dict)
test_loss, = test_exe.run([loss.name], feed=feed_dict)
"""
def
__init__
(
self
,
use_cuda
,
loss_name
=
None
,
...
...
@@ -37,42 +71,6 @@ class ParallelExecutor(object):
num_trainers
=
1
,
trainer_id
=
0
,
**
kwargs
):
"""
ParallelExecutor can run program in parallel.
Args:
use_cuda(bool): Whether to use CUDA or not.
loss_name(str, default None): The loss name must set in training.
main_program(Program, default None): The program that need to run,
if not provided, then default_main_program will be used.
share_vars_from(ParallelExecutor, default None): If provied,
it will share variables from the specified ParallelExecutor.
num_trainers(int, default 1): If greater than 1, NCCL will be
initialized with multpile rank of nodes, each node should have
same number of GPUs. Distributed training will be enabled then.
trainer_id(int, default 0): Must use together with num_trainers.
trainer_id is the "rank" of current node starts from 0.
Returns:
A ParallelExecutor object.
Raises:
TypeError: If share_vars_from is provided, but not ParallelExecutor
object.
Examples:
.. code-block:: python
train_exe = fluid.ParallelExecutor(
use_cuda=True, loss_name=loss.name)
test_exe = fluid.ParallelExecutor(
use_cuda=True,
main_program=test_program,
share_vars_from=train_exe)
train_loss, = train_exe.run([loss.name], feed=feed_dict)
test_loss, = test_exe.run([loss.name], feed=feed_dict)
"""
if
len
(
kwargs
)
!=
0
:
err_msg
=
""
for
key
in
kwargs
:
...
...
@@ -131,10 +129,16 @@ class ParallelExecutor(object):
main
=
main_program
main
=
main
if
main
else
framework
.
default_main_program
()
scope
=
executor
.
global_scope
()
# FIXME(Yancey1989): it's a temporary approach to determinate the distribute
# train program, call self.bcast_param() at the end of each mini-batch.
self
.
is_dist
=
True
if
"recv"
in
[
op
.
type
for
op
in
main
.
global_block
().
ops
]
else
False
if
share_vars_from
and
not
isinstance
(
share_vars_from
,
ParallelExecutor
):
raise
TypeError
(
"share_vars_from must be ParallelExecutor."
)
local_scopes
=
share_vars_from
.
executor
.
local_scopes
(
)
if
share_vars_from
else
[]
...
...
@@ -166,12 +170,14 @@ class ParallelExecutor(object):
element in the list will be copied to each device directly.
For example, if the feed is a dict:
>>> exe = ParallelExecutor()
>>> # the image will be splitted into devices. If there is two devices
>>> # each device will process an image with shape (24, 1, 28, 28)
>>> exe.run(feed={'image': numpy.random.random(size=(48, 1, 28, 28))})
For example, if the feed is a list:
>>> exe = ParallelExecutor()
>>> # each device will process each element in the list.
>>> # the 1st device will process an image with shape (48, 1, 28, 28)
...
...
@@ -182,18 +188,40 @@ class ParallelExecutor(object):
>>> {"image": numpy.random.random(size=(32, 1, 28, 28))},
>>> ])
Args:
fetch_list(list): The fetched variable names
feed(list|dict|None): The feed variables. If the feed is a dict,
tensors in that dict will be splitted into each devices. If
the feed is a list, each element of the list will be copied
to each device.
to each device.
Default None.
feed_dict: Alias for feed parameter, for backward compatibility.
This parameter is deprecated.
This parameter has been deprecated. Default None.
Returns:
List: The fetched result list.
Raises:
ValueError: If the feed is a list, but its length is not equal the
length of active places, or its element's is not dict.
NOTES:
1. If the feed's type is dict, the number of data that feeds to
ParallelExecutor must be bigger than active places. Otherwise,
it will throw exception from C++ side. Special attention should be
paid to check whether the last batch of the dataset is bigger
than active places.
2. If active places are more than one, the fetch results for each
variable is a list, and each element of this list is the variable of
respective active place.
Returns: fetched result list.
Examples:
.. code-block:: python
pe = fluid.ParallelExecutor(use_cuda=use_cuda,
loss_name=avg_cost.name,
main_program=fluid.default_main_program())
loss = pe.run(feed=feeder.feed(cur_batch),
fetch_list=[avg_cost.name]))
"""
if
feed
is
None
and
feed_dict
is
not
None
:
feed
=
feed_dict
...
...
@@ -238,9 +266,17 @@ class ParallelExecutor(object):
fetch_var_name
=
'@FETCHED_VAR_NAME@'
self
.
executor
.
run
(
fetch_list
,
fetch_var_name
)
arr
=
self
.
scope
.
find_var
(
fetch_var_name
).
get_lod_tensor_array
()
if
self
.
is_dist
:
self
.
bcast_params
()
return
[
arr
[
i
]
for
i
in
range
(
len
(
arr
))]
def
bcast_params
(
self
):
"""
Broadcast the parameters to other devices. It is used during
distributed training.
"""
self
.
executor
.
bcast_params
(
set
(
self
.
persistable_vars
))
@
property
...
...
python/paddle/fluid/param_attr.py
浏览文件 @
e02cbf35
...
...
@@ -22,6 +22,35 @@ __all__ = [
class
ParamAttr
(
object
):
"""
Parameter attributes object. To fine-tuning network training process, user
can set parameter's attributes to control training details. Such as learning rate,
regularization, trainable, do_model_average and the method to initialize param.
Args:
name(str): The parameter's name. Default None.
initializer(Initializer): The method to initial this parameter. Default None.
learning_rate(float): The parameter's learning rate. The learning rate when
optimize is :math:`global\_lr * parameter\_lr * scheduler\_factor`.
Default 1.0.
regularizer(WeightDecayRegularizer): Regularization factor. Default None.
trainable(bool): Whether this parameter is trainable. Default True.
gradient_clip(BaseGradientClipAttr): The method to clip this parameter's
gradient. Default None.
do_model_average(bool): Whether this parameter should do model average.
Default False.
Examples:
.. code-block:: python
w_param_attrs = fluid.ParamAttr(name="fc_weight",
learning_rate=0.5,
regularizer=fluid.L2Decay(1.0),
trainable=True)
y_predict = fluid.layers.fc(input=x, size=10, param_attr=w_param_attrs)
"""
def
__init__
(
self
,
name
=
None
,
initializer
=
None
,
...
...
@@ -29,7 +58,7 @@ class ParamAttr(object):
regularizer
=
None
,
trainable
=
True
,
gradient_clip
=
None
,
do_model_average
=
Non
e
):
do_model_average
=
Fals
e
):
self
.
name
=
name
self
.
initializer
=
initializer
self
.
learning_rate
=
learning_rate
...
...
@@ -39,6 +68,16 @@ class ParamAttr(object):
self
.
model_average
=
do_model_average
def
set_default_initializer
(
self
,
initializer
):
"""
Set the default initializer, the initializer should be Constant,
Uniform, Normal, Xavier, MSRA.
Args:
initializer(Initializer): the initializer to set.
Returns:
None
"""
if
initializer
is
None
:
if
self
.
initializer
is
None
:
raise
ValueError
(
"ParamAttr.initializer is not set"
)
...
...
@@ -50,13 +89,45 @@ class ParamAttr(object):
self
.
initializer
=
initializer
def
set_default_param_initializer
(
self
):
"""
Set the default initializer for the parameter with Xavier.
Args:
None.
Returns:
None.
"""
self
.
set_default_initializer
(
Xavier
())
def
set_default_bias_initializer
(
self
):
"""
Set the default initializer for the bias with Constant(0.0).
Args:
None.
Returns:
None.
"""
self
.
set_default_initializer
(
Constant
(
0.0
))
@
staticmethod
def
to_attr
(
arg
):
"""
Create ParamAttr[s].
Args:
arg: Arguments to initialize ParamAttr[s]. arg's type can be
str, Initializer, float, WeightDecayRegularizer, BaseGradientClipAttr,
bool, ParamAttr, or a list of above type.
Returns:
ParamAttr[s]: ParamAttr[s] initialized with arg.
Raises:
arg can not initialize a ParamAttr.
"""
if
arg
is
None
:
return
ParamAttr
()
elif
isinstance
(
arg
,
list
)
or
isinstance
(
arg
,
tuple
):
...
...
@@ -75,6 +146,15 @@ class ParamAttr(object):
raise
TypeError
(
"{0} cast to ParamAttr"
.
format
(
type
(
arg
)))
def
to_kwargs
(
self
,
with_initializer
=
False
):
"""
Returns the attributes of this parameter.
Args:
with_initializer(bool): Whether to add initializer attr.
Returns:
Parameter attributes(map): The attributes of this parameter.
"""
kwargs
=
{
'name'
:
self
.
name
,
'optimize_attr'
:
{
...
...
@@ -92,9 +172,27 @@ class ParamAttr(object):
class
WeightNormParamAttr
(
ParamAttr
):
"""
Used for weight normalization. Any field in ParamAttr can also be set here.
Besides, an extra field dim can be set to indicate the dimension except
which to normalize.
Used for weight Norm. Weight Norm is a reparameterization of the weight vectors
in a neural network that decouples the length of those weight vectors from
their direction. Weight Norm has been implemented as discussed in this
paper: `Weight Normalization: A Simple Reparameterization to Accelerate
Training of Deep Neural Networks
<https://arxiv.org/pdf/1602.07868.pdf>`_.
Args:
dim(list): The parameter's name. Default None.
kwargs: Any field in ParamAttr. Default None.
Examples:
.. code-block:: python
data = fluid.layers.data(name="data", shape=[3, 32, 32], dtype="float32")
fc = fluid.layers.fc(input=data,
size=1000,
param_attr=WeightNormParamAttr(
dim=None,
name='weight_norm_param'))
"""
# List to record the parameters reparameterized by weight normalization.
# If these parameters are treated as Variable rather than Parameter,
...
...
python/paddle/fluid/recordio_writer.py
浏览文件 @
e02cbf35
...
...
@@ -36,6 +36,45 @@ def convert_reader_to_recordio_file(
compressor
=
core
.
RecordIOWriter
.
Compressor
.
Snappy
,
max_num_records
=
1000
,
feed_order
=
None
):
"""
Convert a Python Reader to a recordio file.
Please see :ref:`api_guide_python_reader` and :ref:`api_guide_reader_op` for
details.
Examples:
>>> import paddle.fluid as fluid
>>> import paddle.dataset.mnist as mnist
>>> import paddle
>>>
>>> tmp_program = fluid.Program()
>>> with fluid.program_guard(tmp_program):
>>> img = fluid.layers.data(name='img', shape=[784])
>>> label = fluid.layers.data(name='label', shape=[1], dtype='int64')
>>> feeder = fluid.DataFeeder(feed_list=[img, label], place=fluid.CPUPlace())
>>> # mnist.recordio will be generated in current directory
>>> fluid.recordio_writer.convert_reader_to_recordio_file(
>>> filename="mnist.recordio",
>>> reader_creator=paddle.batch(mnist.train(), batch_size=32),
>>> feeder=feeder)
Args:
filename(str): The recordio filename.
reader_creator(callable): The Python Reader Creator. See
:ref:`api_guide_python_reader`.
feeder(DataFeeder): The DataFeeder instance. Used to convert
:code:`reader_creator` to :code: `lod_tensor`
compressor: Must in fluid.core.RecordIOWriter.Compressor.Snappy or
fluid.core.RecordIOWriter.Compressor.NoCompress. Use :code:`Snappy`
by default.
max_num_records(int): Maximum number of records in one chuck. Each record
is each return value from reader function
feed_order(list): The order of variable names that the reader returns
Returns:
int: the number of record that saved.
"""
if
feed_order
is
None
:
feed_order
=
feeder
.
feed_names
counter
=
0
...
...
@@ -58,6 +97,17 @@ def convert_reader_to_recordio_files(
compressor
=
core
.
RecordIOWriter
.
Compressor
.
Snappy
,
max_num_records
=
1000
,
feed_order
=
None
):
"""
convert a python reader to many recordio files.
This API is basically same as :code:`convert_reader_to_recordio_file`,
instead of it will create many recordio files. Each file contains at
most :code:`batch_per_file` records.
Please reference
:ref:`api_fluid_recordio_writer_convert_reader_to_recordio_file` for more
details.
"""
if
feed_order
is
None
:
feed_order
=
feeder
.
feed_names
f_name
,
f_ext
=
os
.
path
.
splitext
(
filename
)
...
...
python/paddle/fluid/tests/book/notest_understand_sentiment.py
浏览文件 @
e02cbf35
...
...
@@ -194,16 +194,16 @@ def train(word_dict,
if
is_local
:
train_loop
(
fluid
.
default_main_program
())
else
:
port
=
os
.
getenv
(
"PADDLE_
INIT
_PORT"
,
"6174"
)
pserver_ips
=
os
.
getenv
(
"PADDLE_
INIT_PSERVER
S"
)
# ip,ip...
port
=
os
.
getenv
(
"PADDLE_
PSERVER
_PORT"
,
"6174"
)
pserver_ips
=
os
.
getenv
(
"PADDLE_
PSERVER_IP
S"
)
# ip,ip...
eplist
=
[]
for
ip
in
pserver_ips
.
split
(
","
):
eplist
.
append
(
':'
.
join
([
ip
,
port
]))
pserver_endpoints
=
","
.
join
(
eplist
)
# ip:port,ip:port...
trainers
=
int
(
os
.
getenv
(
"TRAINERS"
))
trainers
=
int
(
os
.
getenv
(
"
PADDLE_
TRAINERS"
))
current_endpoint
=
os
.
getenv
(
"POD_IP"
)
+
":"
+
port
trainer_id
=
int
(
os
.
getenv
(
"PADDLE_
INIT_
TRAINER_ID"
))
training_role
=
os
.
getenv
(
"TRAINING_ROLE"
,
"TRAINER"
)
trainer_id
=
int
(
os
.
getenv
(
"PADDLE_TRAINER_ID"
))
training_role
=
os
.
getenv
(
"
PADDLE_
TRAINING_ROLE"
,
"TRAINER"
)
t
=
fluid
.
DistributeTranspiler
()
t
.
transpile
(
trainer_id
,
pservers
=
pserver_endpoints
,
trainers
=
trainers
)
if
training_role
==
"PSERVER"
:
...
...
python/paddle/fluid/tests/book/test_fit_a_line.py
浏览文件 @
e02cbf35
...
...
@@ -69,16 +69,16 @@ def train(use_cuda, save_dirname, is_local):
if
is_local
:
train_loop
(
fluid
.
default_main_program
())
else
:
port
=
os
.
getenv
(
"PADDLE_
INIT
_PORT"
,
"6174"
)
pserver_ips
=
os
.
getenv
(
"PADDLE_
INIT_PSERVER
S"
)
# ip,ip...
port
=
os
.
getenv
(
"PADDLE_
PSERVER
_PORT"
,
"6174"
)
pserver_ips
=
os
.
getenv
(
"PADDLE_
PSERVER_IP
S"
)
# ip,ip...
eplist
=
[]
for
ip
in
pserver_ips
.
split
(
","
):
eplist
.
append
(
':'
.
join
([
ip
,
port
]))
pserver_endpoints
=
","
.
join
(
eplist
)
# ip:port,ip:port...
trainers
=
int
(
os
.
getenv
(
"TRAINERS"
))
trainers
=
int
(
os
.
getenv
(
"
PADDLE_
TRAINERS"
))
current_endpoint
=
os
.
getenv
(
"POD_IP"
)
+
":"
+
port
trainer_id
=
int
(
os
.
getenv
(
"PADDLE_
INIT_
TRAINER_ID"
))
training_role
=
os
.
getenv
(
"TRAINING_ROLE"
,
"TRAINER"
)
trainer_id
=
int
(
os
.
getenv
(
"PADDLE_TRAINER_ID"
))
training_role
=
os
.
getenv
(
"
PADDLE_
TRAINING_ROLE"
,
"TRAINER"
)
t
=
fluid
.
DistributeTranspiler
()
t
.
transpile
(
trainer_id
,
pservers
=
pserver_endpoints
,
trainers
=
trainers
)
if
training_role
==
"PSERVER"
:
...
...
python/paddle/fluid/tests/book/test_image_classification.py
浏览文件 @
e02cbf35
...
...
@@ -178,16 +178,16 @@ def train(net_type, use_cuda, save_dirname, is_local):
if
is_local
:
train_loop
(
fluid
.
default_main_program
())
else
:
port
=
os
.
getenv
(
"PADDLE_
INIT
_PORT"
,
"6174"
)
pserver_ips
=
os
.
getenv
(
"PADDLE_
INIT_PSERVER
S"
)
# ip,ip...
port
=
os
.
getenv
(
"PADDLE_
PSERVER
_PORT"
,
"6174"
)
pserver_ips
=
os
.
getenv
(
"PADDLE_
PSERVER_IP
S"
)
# ip,ip...
eplist
=
[]
for
ip
in
pserver_ips
.
split
(
","
):
eplist
.
append
(
':'
.
join
([
ip
,
port
]))
pserver_endpoints
=
","
.
join
(
eplist
)
# ip:port,ip:port...
trainers
=
int
(
os
.
getenv
(
"TRAINERS"
))
trainers
=
int
(
os
.
getenv
(
"
PADDLE_
TRAINERS"
))
current_endpoint
=
os
.
getenv
(
"POD_IP"
)
+
":"
+
port
trainer_id
=
int
(
os
.
getenv
(
"PADDLE_
INIT_
TRAINER_ID"
))
training_role
=
os
.
getenv
(
"TRAINING_ROLE"
,
"TRAINER"
)
trainer_id
=
int
(
os
.
getenv
(
"PADDLE_TRAINER_ID"
))
training_role
=
os
.
getenv
(
"
PADDLE_
TRAINING_ROLE"
,
"TRAINER"
)
t
=
fluid
.
DistributeTranspiler
()
t
.
transpile
(
trainer_id
,
pservers
=
pserver_endpoints
,
trainers
=
trainers
)
if
training_role
==
"PSERVER"
:
...
...
python/paddle/fluid/tests/book/test_label_semantic_roles.py
浏览文件 @
e02cbf35
...
...
@@ -209,16 +209,16 @@ def train(use_cuda, save_dirname=None, is_local=True):
if
is_local
:
train_loop
(
fluid
.
default_main_program
())
else
:
port
=
os
.
getenv
(
"PADDLE_
INIT
_PORT"
,
"6174"
)
pserver_ips
=
os
.
getenv
(
"PADDLE_
INIT_PSERVER
S"
)
# ip,ip...
port
=
os
.
getenv
(
"PADDLE_
PSERVER
_PORT"
,
"6174"
)
pserver_ips
=
os
.
getenv
(
"PADDLE_
PSERVER_IP
S"
)
# ip,ip...
eplist
=
[]
for
ip
in
pserver_ips
.
split
(
","
):
eplist
.
append
(
':'
.
join
([
ip
,
port
]))
pserver_endpoints
=
","
.
join
(
eplist
)
# ip:port,ip:port...
trainers
=
int
(
os
.
getenv
(
"TRAINERS"
))
trainers
=
int
(
os
.
getenv
(
"
PADDLE_
TRAINERS"
))
current_endpoint
=
os
.
getenv
(
"POD_IP"
)
+
":"
+
port
trainer_id
=
int
(
os
.
getenv
(
"PADDLE_
INIT_
TRAINER_ID"
))
training_role
=
os
.
getenv
(
"TRAINING_ROLE"
,
"TRAINER"
)
trainer_id
=
int
(
os
.
getenv
(
"PADDLE_TRAINER_ID"
))
training_role
=
os
.
getenv
(
"
PADDLE_
TRAINING_ROLE"
,
"TRAINER"
)
t
=
fluid
.
DistributeTranspiler
()
t
.
transpile
(
trainer_id
,
pservers
=
pserver_endpoints
,
trainers
=
trainers
)
if
training_role
==
"PSERVER"
:
...
...
python/paddle/fluid/tests/book/test_machine_translation.py
浏览文件 @
e02cbf35
...
...
@@ -200,16 +200,16 @@ def train_main(use_cuda, is_sparse, is_local=True):
if
is_local
:
train_loop
(
framework
.
default_main_program
())
else
:
port
=
os
.
getenv
(
"PADDLE_
INIT
_PORT"
,
"6174"
)
pserver_ips
=
os
.
getenv
(
"PADDLE_
INIT_PSERVER
S"
)
# ip,ip...
port
=
os
.
getenv
(
"PADDLE_
PSERVER
_PORT"
,
"6174"
)
pserver_ips
=
os
.
getenv
(
"PADDLE_
PSERVER_IP
S"
)
# ip,ip...
eplist
=
[]
for
ip
in
pserver_ips
.
split
(
","
):
eplist
.
append
(
':'
.
join
([
ip
,
port
]))
pserver_endpoints
=
","
.
join
(
eplist
)
# ip:port,ip:port...
trainers
=
int
(
os
.
getenv
(
"TRAINERS"
))
trainers
=
int
(
os
.
getenv
(
"
PADDLE_
TRAINERS"
))
current_endpoint
=
os
.
getenv
(
"POD_IP"
)
+
":"
+
port
trainer_id
=
int
(
os
.
getenv
(
"PADDLE_
INIT_
TRAINER_ID"
))
training_role
=
os
.
getenv
(
"TRAINING_ROLE"
,
"TRAINER"
)
trainer_id
=
int
(
os
.
getenv
(
"PADDLE_TRAINER_ID"
))
training_role
=
os
.
getenv
(
"
PADDLE_
TRAINING_ROLE"
,
"TRAINER"
)
t
=
fluid
.
DistributeTranspiler
()
t
.
transpile
(
trainer_id
,
pservers
=
pserver_endpoints
,
trainers
=
trainers
)
if
training_role
==
"PSERVER"
:
...
...
python/paddle/fluid/tests/book/test_recognize_digits.py
浏览文件 @
e02cbf35
...
...
@@ -151,16 +151,16 @@ def train(nn_type,
if
is_local
:
train_loop
(
fluid
.
default_main_program
())
else
:
port
=
os
.
getenv
(
"PADDLE_
INIT
_PORT"
,
"6174"
)
pserver_ips
=
os
.
getenv
(
"PADDLE_
INIT_PSERVER
S"
)
# ip,ip...
port
=
os
.
getenv
(
"PADDLE_
PSERVER
_PORT"
,
"6174"
)
pserver_ips
=
os
.
getenv
(
"PADDLE_
PSERVER_IP
S"
)
# ip,ip...
eplist
=
[]
for
ip
in
pserver_ips
.
split
(
","
):
eplist
.
append
(
':'
.
join
([
ip
,
port
]))
pserver_endpoints
=
","
.
join
(
eplist
)
# ip:port,ip:port...
trainers
=
int
(
os
.
getenv
(
"TRAINERS"
))
trainers
=
int
(
os
.
getenv
(
"
PADDLE_
TRAINERS"
))
current_endpoint
=
os
.
getenv
(
"POD_IP"
)
+
":"
+
port
trainer_id
=
int
(
os
.
getenv
(
"PADDLE_
INIT_
TRAINER_ID"
))
training_role
=
os
.
getenv
(
"TRAINING_ROLE"
,
"TRAINER"
)
trainer_id
=
int
(
os
.
getenv
(
"PADDLE_TRAINER_ID"
))
training_role
=
os
.
getenv
(
"
PADDLE_
TRAINING_ROLE"
,
"TRAINER"
)
t
=
fluid
.
DistributeTranspiler
()
t
.
transpile
(
trainer_id
,
pservers
=
pserver_endpoints
,
trainers
=
trainers
)
if
training_role
==
"PSERVER"
:
...
...
python/paddle/fluid/tests/book/test_recommender_system.py
浏览文件 @
e02cbf35
...
...
@@ -220,16 +220,16 @@ def train(use_cuda, save_dirname, is_local=True):
if
is_local
:
train_loop
(
fluid
.
default_main_program
())
else
:
port
=
os
.
getenv
(
"PADDLE_
INIT
_PORT"
,
"6174"
)
pserver_ips
=
os
.
getenv
(
"PADDLE_
INIT_PSERVER
S"
)
# ip,ip...
port
=
os
.
getenv
(
"PADDLE_
PSERVER
_PORT"
,
"6174"
)
pserver_ips
=
os
.
getenv
(
"PADDLE_
PSERVER_IP
S"
)
# ip,ip...
eplist
=
[]
for
ip
in
pserver_ips
.
split
(
","
):
eplist
.
append
(
':'
.
join
([
ip
,
port
]))
pserver_endpoints
=
","
.
join
(
eplist
)
# ip:port,ip:port...
trainers
=
int
(
os
.
getenv
(
"TRAINERS"
))
trainers
=
int
(
os
.
getenv
(
"
PADDLE_
TRAINERS"
))
current_endpoint
=
os
.
getenv
(
"POD_IP"
)
+
":"
+
port
trainer_id
=
int
(
os
.
getenv
(
"PADDLE_
INIT_
TRAINER_ID"
))
training_role
=
os
.
getenv
(
"TRAINING_ROLE"
,
"TRAINER"
)
trainer_id
=
int
(
os
.
getenv
(
"PADDLE_TRAINER_ID"
))
training_role
=
os
.
getenv
(
"
PADDLE_
TRAINING_ROLE"
,
"TRAINER"
)
t
=
fluid
.
DistributeTranspiler
()
t
.
transpile
(
trainer_id
,
pservers
=
pserver_endpoints
,
trainers
=
trainers
)
if
training_role
==
"PSERVER"
:
...
...
python/paddle/fluid/tests/book/test_word2vec.py
浏览文件 @
e02cbf35
...
...
@@ -125,16 +125,16 @@ def train(use_cuda, is_sparse, is_parallel, save_dirname, is_local=True):
if
is_local
:
train_loop
(
fluid
.
default_main_program
())
else
:
port
=
os
.
getenv
(
"PADDLE_
INIT
_PORT"
,
"6174"
)
pserver_ips
=
os
.
getenv
(
"PADDLE_
INIT_PSERVER
S"
)
# ip,ip...
port
=
os
.
getenv
(
"PADDLE_
PSERVER
_PORT"
,
"6174"
)
pserver_ips
=
os
.
getenv
(
"PADDLE_
PSERVER_IP
S"
)
# ip,ip...
eplist
=
[]
for
ip
in
pserver_ips
.
split
(
","
):
eplist
.
append
(
':'
.
join
([
ip
,
port
]))
pserver_endpoints
=
","
.
join
(
eplist
)
# ip:port,ip:port...
trainers
=
int
(
os
.
getenv
(
"TRAINERS"
))
trainers
=
int
(
os
.
getenv
(
"
PADDLE_
TRAINERS"
))
current_endpoint
=
os
.
getenv
(
"POD_IP"
)
+
":"
+
port
trainer_id
=
int
(
os
.
getenv
(
"PADDLE_
INIT_
TRAINER_ID"
))
training_role
=
os
.
getenv
(
"TRAINING_ROLE"
,
"TRAINER"
)
trainer_id
=
int
(
os
.
getenv
(
"PADDLE_TRAINER_ID"
))
training_role
=
os
.
getenv
(
"
PADDLE_
TRAINING_ROLE"
,
"TRAINER"
)
t
=
fluid
.
DistributeTranspiler
()
t
.
transpile
(
trainer_id
,
pservers
=
pserver_endpoints
,
trainers
=
trainers
)
if
training_role
==
"PSERVER"
:
...
...
python/paddle/fluid/tests/unittests/test_bilinear_interp_op.py
浏览文件 @
e02cbf35
...
...
@@ -15,6 +15,7 @@
import
unittest
import
numpy
as
np
from
op_test
import
OpTest
import
paddle.fluid.core
as
core
def
bilinear_interp_np
(
input
,
out_h
,
out_w
,
out_size
):
...
...
@@ -45,9 +46,9 @@ def bilinear_interp_np(input, out_h, out_w, out_size):
out
[:,
:,
i
,
j
]
=
h2lambda
*
(
w2lambda
*
input
[:,
:,
h
,
w
]
+
w1lambda
*
input
[:,
:,
h
,
w
+
wid
])
+
\
h1lambda
*
(
w2lambda
*
input
[:,
:,
h
+
hid
,
w
]
+
w1lambda
*
input
[:,
:,
h
+
hid
,
w
+
wid
])
return
out
.
astype
(
"float32"
)
h1lambda
*
(
w2lambda
*
input
[:,
:,
h
+
hid
,
w
]
+
w1lambda
*
input
[:,
:,
h
+
hid
,
w
+
wid
])
return
out
.
astype
(
input
.
dtype
)
class
TestBilinearInterpOp
(
OpTest
):
...
...
@@ -122,5 +123,44 @@ class TestCase6(TestBilinearInterpOp):
self
.
out_size
=
np
.
array
([
65
,
129
]).
astype
(
"int32"
)
class
TestBilinearInterpOpUint8
(
OpTest
):
def
setUp
(
self
):
self
.
out_size
=
None
self
.
init_test_case
()
self
.
op_type
=
"bilinear_interp"
input_np
=
np
.
random
.
randint
(
low
=
0
,
high
=
256
,
size
=
self
.
input_shape
).
astype
(
"uint8"
)
output_np
=
bilinear_interp_np
(
input_np
,
self
.
out_h
,
self
.
out_w
,
self
.
out_size
)
self
.
inputs
=
{
'X'
:
input_np
}
if
self
.
out_size
is
not
None
:
self
.
inputs
[
'OutSize'
]
=
self
.
out_size
self
.
attrs
=
{
'out_h'
:
self
.
out_h
,
'out_w'
:
self
.
out_w
}
self
.
outputs
=
{
'Out'
:
output_np
}
def
test_check_output
(
self
):
self
.
check_output_with_place
(
place
=
core
.
CPUPlace
(),
atol
=
1
)
def
init_test_case
(
self
):
self
.
input_shape
=
[
1
,
3
,
9
,
6
]
self
.
out_h
=
10
self
.
out_w
=
9
class
TestCase1Uint8
(
TestBilinearInterpOpUint8
):
def
init_test_case
(
self
):
self
.
input_shape
=
[
2
,
3
,
128
,
64
]
self
.
out_h
=
120
self
.
out_w
=
50
class
TestCase2Uint8
(
TestBilinearInterpOpUint8
):
def
init_test_case
(
self
):
self
.
input_shape
=
[
4
,
1
,
7
,
8
]
self
.
out_h
=
5
self
.
out_w
=
13
self
.
out_size
=
np
.
array
([
6
,
15
]).
astype
(
"int32"
)
if
__name__
==
"__main__"
:
unittest
.
main
()
python/paddle/fluid/trainer.py
浏览文件 @
e02cbf35
...
...
@@ -33,23 +33,59 @@ __all__ = [
class
BeginEpochEvent
(
object
):
"""
The begin of a training epoch.
Args:
epoch_id(int): The current epoch ID.
"""
def
__init__
(
self
,
epoch_id
):
self
.
epoch
=
epoch_id
class
EndEpochEvent
(
object
):
"""
The end of a training epoch.
Args:
epoch_id(int): The current epoch ID.
"""
def
__init__
(
self
,
epoch_id
):
self
.
epoch
=
epoch_id
class
BeginStepEvent
(
object
):
"""
The begin of a training epoch.
Args:
epoch_id(int): The current epoch ID.
step_id(int): The current step ID.
"""
def
__init__
(
self
,
epoch_id
,
step_id
):
self
.
epoch
=
epoch_id
self
.
step
=
step_id
self
.
fetch_metrics
=
True
"""
If fetch_metrics is true, the metrics will be fetched at the
EndStepEvent. Default is True.
"""
class
EndStepEvent
(
object
):
"""
The end of a training step.
Args:
epoch_id(int): The current epoch ID.
step_id(int): The current step ID.
metrics(list): A list of fetched tensor. The order of this list is same
as the :code:`train_func` returns.
"""
def
__init__
(
self
,
epoch_id
,
step_id
,
metrics
):
self
.
epoch
=
epoch_id
self
.
step
=
step_id
...
...
@@ -57,6 +93,27 @@ class EndStepEvent(object):
class
CheckpointConfig
(
object
):
"""
Parameter object for :code:`fluid.io.save_checkpoint` and
:code:`fluid.Trainer`. Used to configuration how to save checkpoint.
Args:
checkpoint_dir(str): Directory path to save check point. Default is the
current directory.
max_num_checkpoints(int): The max number of local check points.
epoch_interval(int): Every number of epoch to save check point.
step_interval(int): Every number of step to save check point.
Examples:
>>> config = fluid.CheckpointConfig("./checkpoints")
>>> trainer = fluid.Trainer(train_func=train_program,
>>> place=place,
>>> optimizer_func=optimizer_func,
>>> checkpoint_config=config)
>>> trainer.train(...)
"""
def
__init__
(
self
,
checkpoint_dir
=
None
,
max_num_checkpoints
=
3
,
...
...
@@ -113,11 +170,62 @@ def check_and_get_place(place):
class
Trainer
(
object
):
"""
A trainer wraps MultiGPU/MultiNode training loops and can be used to train a
simple neural network easily.
This API takes a :code:`train_func`. A :code:`train_func` is a function that
return loss as it first return value. The reset value can be fetched by
EndStepEvent.metrics
This API also takes a :code:`optimizer_func` that will return an optimizer
instance.
For example, to train a MLP for MNIST dataset, the sample program is
>>> import paddle.fluid as fluid
>>>
>>> def mlp(image, layer_sizes=[200, 100], activation="relu", num_classes=10):
>>> hidden = image
>>> for layer_size in layer_sizes:
>>> hidden = fluid.layers.fc(input=hidden, size=layer_size, act=activation)
>>> return fluid.layers.fc(input=hidden, size=num_classes, act="softmax")
>>>
>>> def train_mnist_mlp():
>>> img = fluid.layers.data(name='image', shape=[784])
>>> label = fluid.layers.data(name='label', shape=[1], dtype='int64')
>>> prediction = mlp(img)
>>> return fluid.layers.mean(fluid.layers.cross_entropy(prediction, label))
>>>
>>> def optimizer():
>>> return fluid.optimizer.Adam()
>>>
>>> trainer = Trainer(train_func=train_mnist_mlp,
>>> optimizer_func=optimizer,
>>> place=fluid.CUDAPlace(0),
>>> parallel=True)
>>>
>>> def train_callback(event):
>>> if isinstance(event, fluid.EndStepEvent):
>>> print "Epoch ID", event.epoch, "Step ID",
\
>>> event.step, "AvgLoss", event.metrics[0]
>>> elif isinstance(event, fluid.EndEpochEvent):
>>> trainer.save_params("./model_{0}".format(event.epoch))
>>>
>>> trainer.train(num_epochs=100, event_handler=train_callback)
For more example, please see :ref:`api_guide_high_level_api`.
Args:
train_func(callable): A function which will return loss. The loss must be a scalar.
train_func(callable): A function which will return loss. The loss must be
a scalar tensor.
optimizer_func(callable): A function that returns an Optimizer object.
place: The device place of this trainer.
place(CUDAPlace|CPUPlace): The device place of this trainer. If
:code:`parallel=True,` all CUDA Places will be used if :code:`place`
is a :code:`CUDAPlace`.
parallel(bool): True if use multiple devices.
checkpoint_config(CheckpointConfig): Configuration about how to save
checkpoints.
"""
def
__init__
(
self
,
...
...
@@ -129,9 +237,6 @@ class Trainer(object):
checkpoint_config
=
None
):
self
.
__stop
=
False
self
.
parallel
=
parallel
# 1. we need to generate a framework.Program by calling
# program_func. Reference: fluid.program_guard in
# test_word2vec.py
# config for checkpoint
# only chief worker will save variables
...
...
@@ -145,6 +250,10 @@ class Trainer(object):
self
.
scope
=
core
.
Scope
()
# 1. we need to generate a framework.Program by calling
# program_func. Reference: fluid.program_guard in
# test_word2vec.py
self
.
startup_program
=
framework
.
Program
()
self
.
train_program
=
framework
.
Program
()
...
...
@@ -277,17 +386,18 @@ class Trainer(object):
def
train
(
self
,
num_epochs
,
event_handler
,
reader
=
None
,
feed_order
=
None
):
"""
T
rain the model.
Start the train loop to t
rain the model.
Args:
num_epochs: The number of epoch. An epoch will process all data in reader
event_handler: The event handler. A function with type (ev:Event)->void
reader:
feed_order: Feeding order of reader. None will following the defining
num_epochs(int): The number of epoch. An epoch will process all data in reader
event_handler(callable): The event handler. A function with type (ev:Event)->void
reader(callable): A reader creator object. See also
:ref:`api_guide_python_reader` .
feed_order(list): Feeding order of reader. None will following the defining
order in program
Returns:
None
"""
training_role
=
os
.
getenv
(
"PADDLE_TRAINING_ROLE"
,
""
)
if
training_role
==
"PSERVER"
:
...
...
@@ -307,16 +417,24 @@ class Trainer(object):
Test the model on given test data
Args:
reader: The reader that yields test data.
feed_order
: Feeding order of reader. None will following the defining
order in program
reader
(callable)
: The reader that yields test data.
feed_order
(list): Feeding order of reader. None will following the
defining
order in program
"""
return
self
.
_test_by_executor
(
reader
,
feed_order
,
self
.
train_func_outputs
)
def
save_params
(
self
,
param_path
):
# reference: save_persistables in io.py
"""
Save all parameters into :code:`param_path`.
Args:
param_path(str): The path to save parameters.
Returns:
None
"""
with
self
.
_prog_and_scope_guard
():
exe
=
executor
.
Executor
(
self
.
place
)
io
.
save_persistables
(
exe
,
dirname
=
param_path
)
...
...
python/paddle/fluid/transpiler/distribute_transpiler.py
浏览文件 @
e02cbf35
...
...
@@ -12,14 +12,6 @@
# See the License for the specific language governing permissions and
# limitations under the License.
"""
Transpile the program to distributed data-parallelism programs.
The main_program will be transformed to use a remote parameter server
to do parameter optimization. And the optimization graph will be put
into a parameter server program.
Use different methods to split trainable variables to different
parameter servers.
Steps to transpile trainer:
1. split variable to multiple blocks, aligned by product(dim[1:]) (width).
2. rename splited grad variables to add trainer_id suffix ".trainer_%d".
...
...
@@ -117,129 +109,41 @@ def slice_variable(var_list, slice_count, min_block_size=8192):
return
blocks
class
DistributeTranspiler
:
def
_has_distributed_lookup_table
(
self
):
# process lookup_table_op
# 1. check all lookup_table_op is distributed
# 2. check all lookup_table_op share the same table.
distributed_lookup_table_ops
=
[]
# support only one distributed_lookup_table now
self
.
table_name
=
None
for
op
in
self
.
origin_program
.
global_block
().
ops
:
if
op
.
type
==
LOOKUP_TABLE_TYPE
:
if
op
.
attrs
[
'is_distributed'
]
is
True
:
if
self
.
table_name
is
None
:
self
.
table_name
=
op
.
input
(
"W"
)[
0
]
if
self
.
table_name
!=
op
.
input
(
"W"
)[
0
]:
raise
RuntimeError
(
"all distributed lookup_table_ops"
" should have only one table"
)
distributed_lookup_table_ops
.
append
(
op
)
else
:
if
self
.
table_name
is
not
None
:
assert
op
.
input
(
"W"
)[
0
]
!=
self
.
table_name
return
len
(
distributed_lookup_table_ops
)
>
0
def
_update_dist_lookup_table_vars
(
self
,
param_list
,
grad_list
,
params_grads
):
# TODO(wuyi): put find a way to put dist lookup table stuff all together.
# update self.table_param_grad and self.trainer_side_table_grad_list
program
=
self
.
origin_program
if
self
.
has_distributed_lookup_table
:
param_list
=
[
param
for
param
in
param_list
if
param
.
name
!=
self
.
table_name
]
grad_list
=
[
grad
for
grad
in
grad_list
if
grad
.
name
!=
grad_var_name
(
self
.
table_name
)
]
self
.
table_param_grad
=
[
param_grad
for
param_grad
in
params_grads
if
param_grad
[
0
].
name
==
self
.
table_name
][
0
]
table_grad_var
=
self
.
table_param_grad
[
1
]
if
self
.
sync_mode
:
self
.
trainer_side_table_grad_list
=
[
program
.
global_block
().
create_var
(
name
=
"%s.trainer_%d.pserver_%d"
%
(
table_grad_var
.
name
,
self
.
trainer_id
,
index
),
type
=
table_grad_var
.
type
,
shape
=
table_grad_var
.
shape
,
dtype
=
table_grad_var
.
dtype
)
for
index
in
range
(
len
(
self
.
pserver_endpoints
))
]
else
:
self
.
trainer_side_table_grad_list
=
[
program
.
global_block
().
create_var
(
name
=
"%s.pserver_%d"
%
(
table_grad_var
.
name
,
index
),
type
=
table_grad_var
.
type
,
shape
=
table_grad_var
.
shape
,
dtype
=
table_grad_var
.
dtype
)
for
index
in
range
(
len
(
self
.
pserver_endpoints
))
]
return
param_list
,
grad_list
def
_init_splited_vars
(
self
,
slice_var_up
):
# update these mappings for further transpile:
# 1. param_var_mapping: param var name -> [splited params vars]
# 2. grad_var_mapping: grad var name -> [splited grads vars]
# 3. grad_param_mapping: grad.blockx -> param.blockx
# 4. param_grad_ep_mapping: ep -> {"params": [], "grads": []}
param_list
=
[]
grad_list
=
[]
param_grad_set
=
set
()
for
p
,
g
in
self
.
params_grads
:
# skip parameter marked not trainable
if
type
(
p
)
==
Parameter
and
p
.
trainable
==
False
:
continue
if
p
.
name
not
in
param_grad_set
:
param_list
.
append
(
p
)
param_grad_set
.
add
(
p
.
name
)
if
g
.
name
not
in
param_grad_set
:
grad_list
.
append
(
g
)
param_grad_set
.
add
(
g
.
name
)
param_list
,
grad_list
=
self
.
_update_dist_lookup_table_vars
(
param_list
,
grad_list
,
self
.
params_grads
)
if
slice_var_up
:
# when we slice var up into blocks, we will slice the var according to
# pserver services' count. A pserver may have two or more listening ports.
grad_blocks
=
slice_variable
(
grad_list
,
len
(
self
.
pserver_endpoints
))
param_blocks
=
slice_variable
(
param_list
,
len
(
self
.
pserver_endpoints
))
else
:
# when we do NOT slice var up into blocks, we will always slice params
# grads into one block.
grad_blocks
=
slice_variable
(
grad_list
,
1
)
param_blocks
=
slice_variable
(
param_list
,
1
)
assert
(
len
(
grad_blocks
)
==
len
(
param_blocks
))
# origin_varname -> [splited_var]
self
.
param_var_mapping
=
self
.
_create_vars_from_blocklist
(
self
.
origin_program
,
param_blocks
)
self
.
grad_var_mapping
=
self
.
_create_vars_from_blocklist
(
self
.
origin_program
,
grad_blocks
,
add_trainer_suffix
=
self
.
trainer_num
>
1
)
self
.
grad_param_mapping
=
dict
()
for
g
,
p
in
zip
(
grad_blocks
,
param_blocks
):
g_name
,
g_bid
,
_
=
g
.
split
(
":"
)
p_name
,
p_bid
,
_
=
p
.
split
(
":"
)
self
.
grad_param_mapping
[
self
.
grad_var_mapping
[
g_name
][
int
(
g_bid
)]]
=
\
self
.
param_var_mapping
[
p_name
][
int
(
p_bid
)]
# create mapping of endpoint -> split var to create pserver side program
self
.
param_grad_ep_mapping
=
dict
()
[
self
.
param_grad_ep_mapping
.
update
({
ep
:
{
"params"
:
[],
"grads"
:
[]
}
})
for
ep
in
self
.
pserver_endpoints
]
class
DistributeTranspiler
(
object
):
"""
**DistributeTranspiler**
Convert the fluid program to distributed data-parallelism programs.
The main_program will be transformed to use a remote parameter server
to do parameter optimization. And the optimization graph will be put
into a parameter server program.
Examples:
.. code-block:: python
# Define your model before these codes.
port = os.getenv("PADDLE_PSERVER_PORT", "6174")
pserver_ips = os.getenv("PADDLE_PSERVER_IPS", "")
eplist = []
for ip in pserver_ips.split(","):
eplist.append(':'.join([ip, port]))
pserver_endpoints = ",".join(eplist)
trainers = int(os.getenv("PADDLE_TRAINERS"))
current_endpoint = os.getenv("PADDLE_CURRENT_IP", "") + ":" + port
trainer_id = int(os.getenv("PADDLE_TRAINER_ID", "0"))
role = os.getenv("PADDLE_TRAINING_ROLE")
t = distribute_transpiler.DistributeTranspiler()
t.transpile(
trainer_id, pservers=pserver_endpoints, trainers=trainers)
if role == "PSERVER":
pserver_program = t.get_pserver_program(current_endpoint)
pserver_startup_program = t.get_startup_program(current_endpoint,
pserver_program)
elif role == "TRAINER":
trainer_program = t.get_trainer_program()
"""
def
transpile
(
self
,
trainer_id
,
...
...
@@ -250,15 +154,20 @@ class DistributeTranspiler:
split_method
=
RoundRobin
,
sync_mode
=
True
):
"""
Run the transpiler.
Args:
trainer_id(int): one unique id for each trainer in a job.
program(Program): program to transpile, default is default_main_program
pservers(string): parameter server endpoints like "m1:6174,m2:6174"
trainers(int): total number of workers/trainers in the job
split_method(PSDispatcher): A function to determin how to split variables
to different servers equally.
sync_mode(boolean): if sync_mode is set True, it means that dist transpiler
will transpile the program into sync_mode pserver and trainer program.
trainer_id (int): id for current trainer worker, if you have
n workers, the id may range from 0 ~ n-1
program (Program|None): program to transpile,
default is fluid.default_main_program().
pservers (str): comma separated ip:port string for the pserver
list.
trainers (int): number of trainers in the distributed job.
slice_var_up (bool): Do Tensor slice for pservers, default is True.
split_method (PSDispatcher): RoundRobin or HashName can be used
try to choose the best method to balance loads for pservers.
sync_mode (bool): Do sync training or not, default is True.
"""
assert
(
split_method
.
__bases__
[
0
]
==
PSDispatcher
)
if
program
is
None
:
...
...
@@ -385,6 +294,12 @@ class DistributeTranspiler:
self
.
_split_table_grad_and_add_send_vars
(
program
,
pserver_endpoints
)
def
get_trainer_program
(
self
):
"""
Get transpiled trainer side program.
Returns:
Program: trainer side program.
"""
# remove optimize ops and add a send op to main_program
delete_ops
(
self
.
origin_program
.
global_block
(),
self
.
optimize_ops
)
# FIXME(typhoonzero): serialize once will fix error occurs when clone.
...
...
@@ -393,17 +308,19 @@ class DistributeTranspiler:
def
get_pserver_program
(
self
,
endpoint
):
"""
Get pserver side program using the endpoint.
TODO(panyx0718): Revisit this assumption. what if #blocks > #pservers.
NOTE: assume blocks of the same variable is not distributed
on the same pserver, only change param/grad varnames for
trainers to fetch.
Get parameter server side program.
Args:
endpoint(string): the endpoint for the current pserver instance
.
Returns
(Program): the pserver program
endpoint (str): current parameter server endpoint
.
Returns
:
Program: the program for current parameter server to run.
"""
# TODO(panyx0718): Revisit this assumption. what if #blocks > #pservers.
# NOTE: assume blocks of the same variable is not distributed
# on the same pserver, only change param/grad varnames for
# trainers to fetch.
# step1
pserver_program
=
Program
()
# step2: Create vars to receive vars at parameter servers.
...
...
@@ -481,7 +398,7 @@ class DistributeTranspiler:
def
__clone_lr_op_sub_block__
(
op
,
program
,
new_block
,
skip_sub_blks
):
if
not
op
.
has_attr
(
'sub_block'
):
return
-
1
return
origin_block_desc
=
op
.
attr
(
'sub_block'
)
origin_block
=
self
.
origin_program
.
block
(
origin_block_desc
.
id
)
...
...
@@ -587,11 +504,14 @@ class DistributeTranspiler:
Get startup program for current parameter server.
Modify operator input variables if there are variables that
were split to several blocks.
Args:
endpoint(string): the endpoint for the current pserver instance.
pserver_program(Program): the program for pserver to execute.
Returns(Program): the startup program for pserver
Args:
endpoint (str): current pserver endpoint.
pserver_program (Program): call get_pserver_program first and
pass the result here.
Returns:
Program: parameter server side startup program.
"""
s_prog
=
Program
()
orig_s_prog
=
default_startup_program
()
...
...
@@ -643,6 +563,129 @@ class DistributeTranspiler:
# ====================== private transpiler functions =====================
def
_has_distributed_lookup_table
(
self
):
# process lookup_table_op
# 1. check all lookup_table_op is distributed
# 2. check all lookup_table_op share the same table.
distributed_lookup_table_ops
=
[]
# support only one distributed_lookup_table now
self
.
table_name
=
None
for
op
in
self
.
origin_program
.
global_block
().
ops
:
if
op
.
type
==
LOOKUP_TABLE_TYPE
:
if
op
.
attrs
[
'is_distributed'
]
is
True
:
if
self
.
table_name
is
None
:
self
.
table_name
=
op
.
input
(
"W"
)[
0
]
if
self
.
table_name
!=
op
.
input
(
"W"
)[
0
]:
raise
RuntimeError
(
"all distributed lookup_table_ops"
" should have only one table"
)
distributed_lookup_table_ops
.
append
(
op
)
else
:
if
self
.
table_name
is
not
None
:
assert
op
.
input
(
"W"
)[
0
]
!=
self
.
table_name
return
len
(
distributed_lookup_table_ops
)
>
0
def
_update_dist_lookup_table_vars
(
self
,
param_list
,
grad_list
,
params_grads
):
# TODO(wuyi): put find a way to put dist lookup table stuff all together.
# update self.table_param_grad and self.trainer_side_table_grad_list
program
=
self
.
origin_program
if
self
.
has_distributed_lookup_table
:
param_list
=
[
param
for
param
in
param_list
if
param
.
name
!=
self
.
table_name
]
grad_list
=
[
grad
for
grad
in
grad_list
if
grad
.
name
!=
grad_var_name
(
self
.
table_name
)
]
self
.
table_param_grad
=
[
param_grad
for
param_grad
in
params_grads
if
param_grad
[
0
].
name
==
self
.
table_name
][
0
]
table_grad_var
=
self
.
table_param_grad
[
1
]
if
self
.
sync_mode
:
self
.
trainer_side_table_grad_list
=
[
program
.
global_block
().
create_var
(
name
=
"%s.trainer_%d.pserver_%d"
%
(
table_grad_var
.
name
,
self
.
trainer_id
,
index
),
type
=
table_grad_var
.
type
,
shape
=
table_grad_var
.
shape
,
dtype
=
table_grad_var
.
dtype
)
for
index
in
range
(
len
(
self
.
pserver_endpoints
))
]
else
:
self
.
trainer_side_table_grad_list
=
[
program
.
global_block
().
create_var
(
name
=
"%s.pserver_%d"
%
(
table_grad_var
.
name
,
index
),
type
=
table_grad_var
.
type
,
shape
=
table_grad_var
.
shape
,
dtype
=
table_grad_var
.
dtype
)
for
index
in
range
(
len
(
self
.
pserver_endpoints
))
]
return
param_list
,
grad_list
def
_init_splited_vars
(
self
,
slice_var_up
):
# update these mappings for further transpile:
# 1. param_var_mapping: param var name -> [splited params vars]
# 2. grad_var_mapping: grad var name -> [splited grads vars]
# 3. grad_param_mapping: grad.blockx -> param.blockx
# 4. param_grad_ep_mapping: ep -> {"params": [], "grads": []}
param_list
=
[]
grad_list
=
[]
param_grad_set
=
set
()
for
p
,
g
in
self
.
params_grads
:
# skip parameter marked not trainable
if
type
(
p
)
==
Parameter
and
p
.
trainable
==
False
:
continue
if
p
.
name
not
in
param_grad_set
:
param_list
.
append
(
p
)
param_grad_set
.
add
(
p
.
name
)
if
g
.
name
not
in
param_grad_set
:
grad_list
.
append
(
g
)
param_grad_set
.
add
(
g
.
name
)
param_list
,
grad_list
=
self
.
_update_dist_lookup_table_vars
(
param_list
,
grad_list
,
self
.
params_grads
)
if
slice_var_up
:
# when we slice var up into blocks, we will slice the var according to
# pserver services' count. A pserver may have two or more listening ports.
grad_blocks
=
slice_variable
(
grad_list
,
len
(
self
.
pserver_endpoints
))
param_blocks
=
slice_variable
(
param_list
,
len
(
self
.
pserver_endpoints
))
else
:
# when we do NOT slice var up into blocks, we will always slice params
# grads into one block.
grad_blocks
=
slice_variable
(
grad_list
,
1
)
param_blocks
=
slice_variable
(
param_list
,
1
)
assert
(
len
(
grad_blocks
)
==
len
(
param_blocks
))
# origin_varname -> [splited_var]
self
.
param_var_mapping
=
self
.
_create_vars_from_blocklist
(
self
.
origin_program
,
param_blocks
)
self
.
grad_var_mapping
=
self
.
_create_vars_from_blocklist
(
self
.
origin_program
,
grad_blocks
,
add_trainer_suffix
=
self
.
trainer_num
>
1
)
self
.
grad_param_mapping
=
dict
()
for
g
,
p
in
zip
(
grad_blocks
,
param_blocks
):
g_name
,
g_bid
,
_
=
g
.
split
(
":"
)
p_name
,
p_bid
,
_
=
p
.
split
(
":"
)
self
.
grad_param_mapping
[
self
.
grad_var_mapping
[
g_name
][
int
(
g_bid
)]]
=
\
self
.
param_var_mapping
[
p_name
][
int
(
p_bid
)]
# create mapping of endpoint -> split var to create pserver side program
self
.
param_grad_ep_mapping
=
dict
()
[
self
.
param_grad_ep_mapping
.
update
({
ep
:
{
"params"
:
[],
"grads"
:
[]
}
})
for
ep
in
self
.
pserver_endpoints
]
# transpiler function for dis lookup_table
def
_replace_lookup_table_op_with_prefetch
(
self
,
program
,
pserver_endpoints
):
...
...
python/paddle/fluid/transpiler/memory_optimization_transpiler.py
浏览文件 @
e02cbf35
...
...
@@ -383,6 +383,16 @@ def memory_optimize(input_program, skip_opt_set=None, print_log=False, level=0):
def
release_memory
(
input_program
,
skip_opt_set
=
None
):
"""
Modify the input program and insert :code:`delete_op` to early drop not used
variables. The modification will be performed inplace.
Notes: This is an experimental API and could be removed in next few
releases. Users should not use this API.
Args:
input_program(Program): The program will be inserted :code:`delete_op`.
"""
cfgs
=
_get_cfgs
(
input_program
)
for
cfg
in
cfgs
:
cfg
.
release_memory
(
skip_opt_set
=
skip_opt_set
)
python/paddle/fluid/transpiler/ps_dispatcher.py
浏览文件 @
e02cbf35
...
...
@@ -33,15 +33,21 @@ class PSDispatcher(object):
def
dispatch
(
self
,
varlist
):
"""
:param varlist: a list of Variables
:return: a map of pserver endpoint -> varname
Args:
varlist(list): a list of Variables
Returns:
a map of pserver endpoint -> varname
"""
AssertionError
(
"Interface has not been implemented."
)
class
HashName
(
PSDispatcher
):
"""
Hash variable names to several endpoints
Hash variable names to several endpoints using python
"hash()" function.
Args:
pserver_endpoints (list): list of endpoint(ip:port).
"""
def
__init__
(
self
,
pserver_endpoints
):
...
...
@@ -61,7 +67,11 @@ class HashName(PSDispatcher):
class
RoundRobin
(
PSDispatcher
):
"""
Distribute variables to serveral endpoints.
Distribute variables to serveral endpoints using
RondRobin<https://en.wikipedia.org/wiki/Round-robin_scheduling> method.
Args:
pserver_endpoints (list): list of endpoint(ip:port).
"""
def
__init__
(
self
,
pserver_endpoints
):
...
...
python/paddle/fluid/unique_name.py
浏览文件 @
e02cbf35
...
...
@@ -16,7 +16,7 @@ import collections
import
contextlib
import
sys
__all__
=
[
'generate'
,
'switch'
,
'guard'
,
'UniqueNameGenerator'
]
__all__
=
[
'generate'
,
'switch'
,
'guard'
]
class
UniqueNameGenerator
(
object
):
...
...
tools/codestyle/docstring_checker.py
浏览文件 @
e02cbf35
...
...
@@ -291,6 +291,8 @@ class DocstringChecker(BaseChecker):
True if successful otherwise False.
"""
if
node
.
name
.
startswith
(
"__"
)
or
node
.
name
.
startswith
(
"_"
):
return
True
find
=
False
for
t
in
node
.
body
:
if
not
isinstance
(
t
,
astroid
.
Return
):
...
...
@@ -316,6 +318,8 @@ class DocstringChecker(BaseChecker):
Returns:
True if successful otherwise False.
"""
if
node
.
name
.
startswith
(
"__"
)
or
node
.
name
.
startswith
(
"_"
):
return
True
args
=
[]
for
arg
in
node
.
args
.
get_children
():
if
(
not
isinstance
(
arg
,
astroid
.
AssignName
))
\
...
...
tools/print_signatures.py
0 → 100644
浏览文件 @
e02cbf35
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
"""
Print all signature of a python module in alphabet order.
Usage:
./print_signature "paddle.fluid" > signature.txt
"""
import
importlib
import
inspect
import
collections
import
sys
import
pydoc
member_dict
=
collections
.
OrderedDict
()
def
visit_member
(
parent_name
,
member
):
cur_name
=
"."
.
join
([
parent_name
,
member
.
__name__
])
if
inspect
.
isclass
(
member
):
for
name
,
value
in
inspect
.
getmembers
(
member
):
if
hasattr
(
value
,
'__name__'
)
and
(
not
name
.
startswith
(
"_"
)
or
name
==
"__init__"
):
visit_member
(
cur_name
,
value
)
elif
callable
(
member
):
try
:
member_dict
[
cur_name
]
=
inspect
.
getargspec
(
member
)
except
TypeError
:
# special for PyBind method
member_dict
[
cur_name
]
=
" "
.
join
([
line
.
strip
()
for
line
in
pydoc
.
render_doc
(
member
).
split
(
'
\n
'
)
if
"->"
in
line
])
else
:
raise
RuntimeError
(
"Unsupported generate signature of member, type {0}"
.
format
(
str
(
type
(
member
))))
def
visit_all_module
(
mod
):
for
member_name
in
(
name
for
name
in
(
mod
.
__all__
if
hasattr
(
mod
,
"__all__"
)
else
dir
(
mod
))
if
not
name
.
startswith
(
"_"
)):
instance
=
getattr
(
mod
,
member_name
,
None
)
if
instance
is
None
:
continue
if
inspect
.
ismodule
(
instance
):
visit_all_module
(
instance
)
else
:
visit_member
(
mod
.
__name__
,
instance
)
visit_all_module
(
importlib
.
import_module
(
sys
.
argv
[
1
]))
for
name
in
member_dict
:
print
name
,
member_dict
[
name
]
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录