Skip to content
体验新版
项目
组织
正在加载...
登录
切换导航
打开侧边栏
BaiXuePrincess
Paddle
提交
e5890052
P
Paddle
项目概览
BaiXuePrincess
/
Paddle
与 Fork 源项目一致
Fork自
PaddlePaddle / Paddle
通知
1
Star
1
Fork
0
代码
文件
提交
分支
Tags
贡献者
分支图
Diff
Issue
0
列表
看板
标记
里程碑
合并请求
0
Wiki
0
Wiki
分析
仓库
DevOps
项目成员
Pages
P
Paddle
项目概览
项目概览
详情
发布
仓库
仓库
文件
提交
分支
标签
贡献者
分支图
比较
Issue
0
Issue
0
列表
看板
标记
里程碑
合并请求
0
合并请求
0
Pages
分析
分析
仓库分析
DevOps
Wiki
0
Wiki
成员
成员
收起侧边栏
关闭侧边栏
动态
分支图
创建新Issue
提交
Issue看板
提交
e5890052
编写于
6月 21, 2018
作者:
T
tangwei12
浏览文件
操作
浏览文件
下载
差异文件
merge
上级
05bd9db8
0151e4eb
变更
95
展开全部
隐藏空白更改
内联
并排
Showing
95 changed file
with
2297 addition
and
856 deletion
+2297
-856
.pre-commit-config.yaml
.pre-commit-config.yaml
+2
-2
benchmark/fluid/Dockerfile
benchmark/fluid/Dockerfile
+13
-4
benchmark/fluid/fluid_benchmark.py
benchmark/fluid/fluid_benchmark.py
+9
-2
benchmark/fluid/kube_gen_job.py
benchmark/fluid/kube_gen_job.py
+3
-3
doc/fluid/api/gen_doc.sh
doc/fluid/api/gen_doc.sh
+1
-1
doc/fluid/api/transpiler.rst
doc/fluid/api/transpiler.rst
+46
-0
doc/fluid/howto/inference/build_and_install_lib_cn.rst
doc/fluid/howto/inference/build_and_install_lib_cn.rst
+1
-0
paddle/contrib/inference/demo/simple_on_word2vec.cc
paddle/contrib/inference/demo/simple_on_word2vec.cc
+9
-13
paddle/contrib/inference/paddle_inference_api.cc
paddle/contrib/inference/paddle_inference_api.cc
+50
-0
paddle/contrib/inference/paddle_inference_api.h
paddle/contrib/inference/paddle_inference_api.h
+33
-5
paddle/contrib/inference/paddle_inference_api_anakin_engine.cc
...e/contrib/inference/paddle_inference_api_anakin_engine.cc
+5
-2
paddle/contrib/inference/paddle_inference_api_anakin_engine_tester.cc
...ib/inference/paddle_inference_api_anakin_engine_tester.cc
+7
-9
paddle/contrib/inference/paddle_inference_api_impl.cc
paddle/contrib/inference/paddle_inference_api_impl.cc
+7
-6
paddle/contrib/inference/test_paddle_inference_api_impl.cc
paddle/contrib/inference/test_paddle_inference_api_impl.cc
+10
-16
paddle/fluid/framework/details/multi_devices_graph_builder.cc
...le/fluid/framework/details/multi_devices_graph_builder.cc
+106
-51
paddle/fluid/framework/details/multi_devices_graph_builder.h
paddle/fluid/framework/details/multi_devices_graph_builder.h
+10
-7
paddle/fluid/framework/details/ssa_graph_builder.h
paddle/fluid/framework/details/ssa_graph_builder.h
+1
-0
paddle/fluid/framework/details/threaded_ssa_graph_executor.cc
...le/fluid/framework/details/threaded_ssa_graph_executor.cc
+2
-0
paddle/fluid/framework/details/threaded_ssa_graph_executor.h
paddle/fluid/framework/details/threaded_ssa_graph_executor.h
+1
-0
paddle/fluid/framework/executor.cc
paddle/fluid/framework/executor.cc
+16
-7
paddle/fluid/framework/executor.h
paddle/fluid/framework/executor.h
+1
-1
paddle/fluid/framework/parallel_executor.cc
paddle/fluid/framework/parallel_executor.cc
+18
-5
paddle/fluid/framework/parallel_executor.h
paddle/fluid/framework/parallel_executor.h
+3
-0
paddle/fluid/inference/analysis/tensorrt_subgraph_pass.cc
paddle/fluid/inference/analysis/tensorrt_subgraph_pass.cc
+3
-3
paddle/fluid/operators/CMakeLists.txt
paddle/fluid/operators/CMakeLists.txt
+7
-16
paddle/fluid/operators/batch_norm_mkldnn_op.cc
paddle/fluid/operators/batch_norm_mkldnn_op.cc
+0
-14
paddle/fluid/operators/batch_norm_op.cc
paddle/fluid/operators/batch_norm_op.cc
+0
-16
paddle/fluid/operators/batch_norm_op.h
paddle/fluid/operators/batch_norm_op.h
+16
-0
paddle/fluid/operators/bilinear_interp_op.cc
paddle/fluid/operators/bilinear_interp_op.cc
+2
-1
paddle/fluid/operators/bilinear_interp_op.h
paddle/fluid/operators/bilinear_interp_op.h
+24
-18
paddle/fluid/operators/detail/macros.h
paddle/fluid/operators/detail/macros.h
+8
-8
paddle/fluid/operators/distributed/CMakeLists.txt
paddle/fluid/operators/distributed/CMakeLists.txt
+0
-5
paddle/fluid/operators/distributed/brpc_client.cc
paddle/fluid/operators/distributed/brpc_client.cc
+3
-3
paddle/fluid/operators/distributed/brpc_client.h
paddle/fluid/operators/distributed/brpc_client.h
+4
-4
paddle/fluid/operators/distributed/brpc_server.cc
paddle/fluid/operators/distributed/brpc_server.cc
+11
-11
paddle/fluid/operators/distributed/brpc_server.h
paddle/fluid/operators/distributed/brpc_server.h
+4
-4
paddle/fluid/operators/distributed/bytebuffer_stream.cc
paddle/fluid/operators/distributed/bytebuffer_stream.cc
+3
-3
paddle/fluid/operators/distributed/bytebuffer_stream.h
paddle/fluid/operators/distributed/bytebuffer_stream.h
+2
-2
paddle/fluid/operators/distributed/grpc_client.cc
paddle/fluid/operators/distributed/grpc_client.cc
+4
-4
paddle/fluid/operators/distributed/grpc_client.h
paddle/fluid/operators/distributed/grpc_client.h
+4
-4
paddle/fluid/operators/distributed/grpc_serde_test.cc
paddle/fluid/operators/distributed/grpc_serde_test.cc
+7
-7
paddle/fluid/operators/distributed/grpc_server.cc
paddle/fluid/operators/distributed/grpc_server.cc
+7
-6
paddle/fluid/operators/distributed/grpc_server.h
paddle/fluid/operators/distributed/grpc_server.h
+8
-8
paddle/fluid/operators/distributed/grpc_service.h
paddle/fluid/operators/distributed/grpc_service.h
+10
-9
paddle/fluid/operators/distributed/proto_encoder_helper.h
paddle/fluid/operators/distributed/proto_encoder_helper.h
+2
-2
paddle/fluid/operators/distributed/request_handler.h
paddle/fluid/operators/distributed/request_handler.h
+2
-2
paddle/fluid/operators/distributed/request_handler_impl.cc
paddle/fluid/operators/distributed/request_handler_impl.cc
+4
-4
paddle/fluid/operators/distributed/request_handler_impl.h
paddle/fluid/operators/distributed/request_handler_impl.h
+3
-3
paddle/fluid/operators/distributed/rpc_client.cc
paddle/fluid/operators/distributed/rpc_client.cc
+3
-3
paddle/fluid/operators/distributed/rpc_client.h
paddle/fluid/operators/distributed/rpc_client.h
+2
-2
paddle/fluid/operators/distributed/rpc_server.cc
paddle/fluid/operators/distributed/rpc_server.cc
+3
-3
paddle/fluid/operators/distributed/rpc_server.h
paddle/fluid/operators/distributed/rpc_server.h
+3
-3
paddle/fluid/operators/distributed/rpc_server_test.cc
paddle/fluid/operators/distributed/rpc_server_test.cc
+12
-10
paddle/fluid/operators/distributed/send_recv.proto
paddle/fluid/operators/distributed/send_recv.proto
+0
-0
paddle/fluid/operators/distributed/sendrecvop_utils.cc
paddle/fluid/operators/distributed/sendrecvop_utils.cc
+7
-7
paddle/fluid/operators/distributed/sendrecvop_utils.h
paddle/fluid/operators/distributed/sendrecvop_utils.h
+4
-4
paddle/fluid/operators/distributed/variable_response.cc
paddle/fluid/operators/distributed/variable_response.cc
+10
-10
paddle/fluid/operators/distributed/variable_response.h
paddle/fluid/operators/distributed/variable_response.h
+5
-5
paddle/fluid/operators/fetch_barrier_op.cc
paddle/fluid/operators/fetch_barrier_op.cc
+2
-2
paddle/fluid/operators/gen_nccl_id_op.cc
paddle/fluid/operators/gen_nccl_id_op.cc
+9
-8
paddle/fluid/operators/listen_and_serv_op.cc
paddle/fluid/operators/listen_and_serv_op.cc
+21
-19
paddle/fluid/operators/listen_and_serv_op.h
paddle/fluid/operators/listen_and_serv_op.h
+10
-8
paddle/fluid/operators/logical_op.cc
paddle/fluid/operators/logical_op.cc
+1
-1
paddle/fluid/operators/math/concat.cc
paddle/fluid/operators/math/concat.cc
+2
-2
paddle/fluid/operators/math/math_function.cc
paddle/fluid/operators/math/math_function.cc
+1
-0
paddle/fluid/operators/prefetch_op.cc
paddle/fluid/operators/prefetch_op.cc
+2
-2
paddle/fluid/operators/recv_op.cc
paddle/fluid/operators/recv_op.cc
+2
-2
paddle/fluid/operators/send_barrier_op.cc
paddle/fluid/operators/send_barrier_op.cc
+2
-2
paddle/fluid/operators/send_op.cc
paddle/fluid/operators/send_op.cc
+2
-2
paddle/fluid/operators/tensorrt_engine_op.cc
paddle/fluid/operators/tensorrt_engine_op.cc
+4
-1
paddle/fluid/operators/tensorrt_engine_op.h
paddle/fluid/operators/tensorrt_engine_op.h
+3
-1
paddle/fluid/operators/tensorrt_engine_op_test.cc
paddle/fluid/operators/tensorrt_engine_op_test.cc
+0
-1
paddle/fluid/operators/test_send_nccl_id.cc
paddle/fluid/operators/test_send_nccl_id.cc
+11
-10
paddle/fluid/pybind/pybind.cc
paddle/fluid/pybind/pybind.cc
+6
-0
paddle/fluid/pybind/tensor_py.h
paddle/fluid/pybind/tensor_py.h
+1
-1
python/paddle/fluid/__init__.py
python/paddle/fluid/__init__.py
+2
-1
python/paddle/fluid/data_feeder.py
python/paddle/fluid/data_feeder.py
+97
-0
python/paddle/fluid/executor.py
python/paddle/fluid/executor.py
+27
-3
python/paddle/fluid/framework.py
python/paddle/fluid/framework.py
+614
-166
python/paddle/fluid/lod_tensor.py
python/paddle/fluid/lod_tensor.py
+48
-33
python/paddle/fluid/metrics.py
python/paddle/fluid/metrics.py
+7
-7
python/paddle/fluid/nets.py
python/paddle/fluid/nets.py
+198
-29
python/paddle/fluid/parallel_executor.py
python/paddle/fluid/parallel_executor.py
+76
-40
python/paddle/fluid/param_attr.py
python/paddle/fluid/param_attr.py
+102
-4
python/paddle/fluid/recordio_writer.py
python/paddle/fluid/recordio_writer.py
+50
-0
python/paddle/fluid/tests/unittests/test_bilinear_interp_op.py
...n/paddle/fluid/tests/unittests/test_bilinear_interp_op.py
+43
-3
python/paddle/fluid/trainer.py
python/paddle/fluid/trainer.py
+133
-15
python/paddle/fluid/transpiler/distribute_transpiler.py
python/paddle/fluid/transpiler/distribute_transpiler.py
+198
-150
python/paddle/fluid/transpiler/memory_optimization_transpiler.py
...paddle/fluid/transpiler/memory_optimization_transpiler.py
+10
-0
python/paddle/fluid/transpiler/ps_dispatcher.py
python/paddle/fluid/transpiler/ps_dispatcher.py
+14
-4
python/paddle/fluid/unique_name.py
python/paddle/fluid/unique_name.py
+1
-1
tools/check_ctest_hung.py
tools/check_ctest_hung.py
+53
-0
tools/codestyle/clang_format.hook
tools/codestyle/clang_format.hook
+0
-0
tools/codestyle/copyright.hook
tools/codestyle/copyright.hook
+0
-0
tools/codestyle/docstring_checker.py
tools/codestyle/docstring_checker.py
+4
-0
未找到文件。
.pre-commit-config.yaml
浏览文件 @
e5890052
...
...
@@ -23,7 +23,7 @@ repos:
-
id
:
clang-format-with-version-check
name
:
clang-format
description
:
Format files with ClangFormat.
entry
:
bash ./
.
clang_format.hook -i
entry
:
bash ./
tools/codestyle/
clang_format.hook -i
language
:
system
files
:
\.(c|cc|cxx|cpp|cu|h|hpp|hxx|proto)$
-
repo
:
local
...
...
@@ -52,7 +52,7 @@ repos:
hooks
:
-
id
:
copyright_checker
name
:
copyright_checker
entry
:
python ./
.
copyright.hook
entry
:
python ./
tools/codestyle/
copyright.hook
language
:
system
files
:
\.(c|cc|cxx|cpp|cu|h|hpp|hxx|proto|py)$
exclude
:
(?!.*third_party)^.*$ | (?!.*book)^.*$
benchmark/fluid/Dockerfile
浏览文件 @
e5890052
FROM
nvidia/cuda:9.0-cudnn7-devel-ubuntu16.04
# Use UBUNTU_MIRROR can speed up apt-get speed.
# ARG UBUNTU_MIRROR
# RUN /bin/bash -c 'if [[ -n ${UBUNTU_MIRROR} ]]; then sed -i 's#http://archive.ubuntu.com/ubuntu#${UBUNTU_MIRROR}#g' /etc/apt/sources.list; fi'
RUN
apt-get update
&&
apt-get
install
-y
python python-pip iputils-ping libgtk2.0-dev wget vim net-tools iftop python-opencv
RUN
ln
-s
/usr/lib/x86_64-linux-gnu/libcudnn.so.7 /usr/lib/libcudnn.so
&&
ln
-s
/usr/lib/x86_64-linux-gnu/libnccl.so.2 /usr/lib/libnccl.so
RUN
pip
install
-U
pip
RUN
pip
install
-U
kubernetes paddlepaddle
# IMPORTANT:
# Add "ENV http_proxy=http://ip:port" if your download is slow, and don't forget to unset it at runtime.
# exmaple: unset http_proxy && unset https_proxy && python fluid_benchmark.py ...
RUN
pip
install
-U
pip
RUN
pip
install
-U
kubernetes paddlepaddle
RUN
sh
-c
'echo "import paddle.v2 as paddle\npaddle.dataset.cifar.train10()\npaddle.dataset.flowers.fetch()" | python'
RUN
sh
-c
'echo "import paddle.v2 as paddle\npaddle.dataset.mnist.train()\npaddle.dataset.mnist.test()\npaddle.dataset.imdb.fetch()" | python'
...
...
@@ -14,9 +21,11 @@ RUN pip uninstall -y paddlepaddle && mkdir /workspace
ADD
https://raw.githubusercontent.com/PaddlePaddle/cloud/develop/docker/paddle_k8s /usr/bin
ADD
https://raw.githubusercontent.com/PaddlePaddle/cloud/develop/docker/k8s_tools.py /root
RUN
chmod
+x /usr/bin/paddle_k8s
ADD
*.whl /
RUN
pip
install
/
*
.whl
&&
rm
-f
/
*
.whl
&&
chmod
+x /usr/bin/paddle_k8s
RUN
pip
install
/
*
.whl
&&
rm
-f
/
*
.whl
ENV
LD_LIBRARY_PATH=/usr/local/lib
ADD
fluid_benchmark.py recordio_converter.py models/ /workspace/
ADD
fluid_benchmark.py recordio_converter.py args.py recordio_converter.py run.sh run_fluid_benchmark.sh /workspace/
ADD
models/ /workspace/models/
benchmark/fluid/fluid_benchmark.py
浏览文件 @
e5890052
...
...
@@ -264,8 +264,6 @@ def train_parallel(avg_loss, infer_prog, optimizer, train_reader, test_reader,
break
else
:
loss
,
=
exe
.
run
([
avg_loss
.
name
],
feed
=
feeder
.
feed
(
data
))
if
args
.
update_method
==
"pserver"
:
exe
.
bcast_params
()
if
args
.
use_reader_op
:
num_samples
+=
args
.
batch_size
*
args
.
gpus
else
:
...
...
@@ -301,9 +299,18 @@ def print_train_time(start_time, end_time, num_samples):
(
num_samples
,
train_elapsed
,
examples_per_sec
))
def
print_paddle_envs
():
print
(
'----------- Configuration envs -----------'
)
for
k
in
os
.
environ
:
if
"PADDLE_"
in
k
:
print
"ENV %s:%s"
%
(
k
,
os
.
environ
[
k
])
print
(
'------------------------------------------------'
)
def
main
():
args
=
parse_args
()
print_arguments
(
args
)
print_paddle_envs
()
# the unique trainer id, starting from 0, needed by trainer
# only
...
...
benchmark/fluid/kube_gen_job.py
浏览文件 @
e5890052
...
...
@@ -17,6 +17,7 @@ import copy
import
argparse
import
random
import
os
import
copy
from
kube_templates
import
pserver
,
trainer
,
envs
...
...
@@ -109,10 +110,9 @@ def gen_job():
envs
.
append
({
"name"
:
"PADDLE_JOB_NAME"
,
"value"
:
args
.
jobname
})
envs
.
append
({
"name"
:
"PADDLE_TRAINERS"
,
"value"
:
str
(
args
.
trainers
)})
envs
.
append
({
"name"
:
"PSERVERS"
,
"value"
:
str
(
args
.
pservers
)})
envs
.
append
({
"name"
:
"P
ADDLE_P
SERVERS"
,
"value"
:
str
(
args
.
pservers
)})
envs
.
append
({
"name"
:
"ENTRY"
,
"value"
:
args
.
entry
})
envs
.
append
({
"name"
:
"PADDLE_PSERVER_PORT"
,
"value"
:
str
(
args
.
port
)})
envs
.
append
({
"name"
:
"PADDLE_PSERVER_PORT"
,
"value"
:
str
(
args
.
port
)})
# NOTE: these directories below are cluster specific, please modify
# this settings before you run on your own cluster.
envs
.
append
({
...
...
@@ -166,7 +166,7 @@ def gen_job():
tn
[
"spec"
][
"template"
][
"spec"
][
"volumes"
]
=
volumes
tn_container
[
"volumeMounts"
]
=
volumeMounts
ps_container
[
"env"
]
=
envs
ps_container
[
"env"
]
=
copy
.
deepcopy
(
envs
)
ps_container
[
"env"
].
append
({
"name"
:
"PADDLE_TRAINING_ROLE"
,
"value"
:
"PSERVER"
...
...
doc/fluid/api/gen_doc.sh
浏览文件 @
e5890052
#!/bin/bash
python gen_doc.py layers
--submodules
control_flow device io nn ops tensor detection learning_rate_scheduler metric
>
layers.rst
for
module
in
data_feeder clip metrics executor initializer io nets optimizer param_attr profiler regularizer
for
module
in
data_feeder clip metrics executor initializer io nets optimizer param_attr profiler regularizer
transpiler
do
python gen_doc.py
${
module
}
>
${
module
}
.rst
done
doc/fluid/api/transpiler.rst
0 → 100644
浏览文件 @
e5890052
.. THIS FILE IS GENERATED BY `gen_doc.{py|sh}`
!DO NOT EDIT THIS FILE MANUALLY!
==========
transpiler
==========
DistributeTranspiler
--------------------
.. autoclass:: paddle.fluid.transpiler.DistributeTranspiler
:members:
:noindex:
InferenceTranspiler
-------------------
.. autoclass:: paddle.fluid.transpiler.InferenceTranspiler
:members:
:noindex:
memory_optimize
---------------
.. autofunction:: paddle.fluid.transpiler.memory_optimize
:noindex:
release_memory
--------------
.. autofunction:: paddle.fluid.transpiler.release_memory
:noindex:
HashName
--------
.. autoclass:: paddle.fluid.transpiler.HashName
:members:
:noindex:
RoundRobin
----------
.. autoclass:: paddle.fluid.transpiler.RoundRobin
:members:
:noindex:
doc/fluid/howto/inference/build_and_install_lib_cn.rst
浏览文件 @
e5890052
...
...
@@ -13,6 +13,7 @@ cpu_noavx_openblas `fluid.tgz <https://guest:@paddleci.ngrok.io/repository
cuda7.5_cudnn5_avx_mkl `fluid.tgz <https://guest:@paddleci.ngrok.io/repository/download/Manylinux1_Cuda75cudnn5cp27cp27mu/.lastSuccessful/fluid.tgz>`_
cuda8.0_cudnn5_avx_mkl `fluid.tgz <https://guest:@paddleci.ngrok.io/repository/download/Manylinux1_Cuda80cudnn5cp27cp27mu/.lastSuccessful/fluid.tgz>`_
cuda8.0_cudnn7_avx_mkl `fluid.tgz <https://guest:@paddleci.ngrok.io/repository/download/Manylinux1_Cuda8cudnn7cp27cp27mu/.lastSuccessful/fluid.tgz>`_
cuda9.0_cudnn7_avx_mkl `fluid.tgz <https://guest:@paddleci.ngrok.io/repository/download/Manylinux1_Cuda90cudnn7avxMkl/.lastSuccessful/fluid.tgz>`_
====================== ========================================
从源码编译
...
...
paddle/contrib/inference/demo/simple_on_word2vec.cc
浏览文件 @
e5890052
...
...
@@ -40,10 +40,9 @@ void Main(bool use_gpu) {
//# 2. Prepare input.
int64_t
data
[
4
]
=
{
1
,
2
,
3
,
4
};
PaddleBuf
buf
{.
data
=
data
,
.
length
=
sizeof
(
data
)};
PaddleTensor
tensor
{.
name
=
""
,
.
shape
=
std
::
vector
<
int
>
({
4
,
1
}),
.
data
=
buf
,
.
data
=
PaddleBuf
(
data
,
sizeof
(
data
))
,
.
dtype
=
PaddleDType
::
INT64
};
// For simplicity, we set all the slots with the same data.
...
...
@@ -55,14 +54,12 @@ void Main(bool use_gpu) {
//# 4. Get output.
ASSERT_EQ
(
outputs
.
size
(),
1UL
);
LOG
(
INFO
)
<<
"output buffer size: "
<<
outputs
.
front
().
data
.
length
;
const
size_t
num_elements
=
outputs
.
front
().
data
.
length
/
sizeof
(
float
);
LOG
(
INFO
)
<<
"output buffer size: "
<<
outputs
.
front
().
data
.
length
()
;
const
size_t
num_elements
=
outputs
.
front
().
data
.
length
()
/
sizeof
(
float
);
// The outputs' buffers are in CPU memory.
for
(
size_t
i
=
0
;
i
<
std
::
min
(
5UL
,
num_elements
);
i
++
)
{
LOG
(
INFO
)
<<
static_cast
<
float
*>
(
outputs
.
front
().
data
.
data
)[
i
];
LOG
(
INFO
)
<<
static_cast
<
float
*>
(
outputs
.
front
().
data
.
data
()
)[
i
];
}
// TODO(Superjomn): this is should be free automatically
free
(
outputs
[
0
].
data
.
data
);
}
}
...
...
@@ -86,10 +83,9 @@ void MainThreads(int num_threads, bool use_gpu) {
for
(
int
batch_id
=
0
;
batch_id
<
num_batches
;
++
batch_id
)
{
// 2. Dummy Input Data
int64_t
data
[
4
]
=
{
1
,
2
,
3
,
4
};
PaddleBuf
buf
{.
data
=
data
,
.
length
=
sizeof
(
data
)};
PaddleTensor
tensor
{.
name
=
""
,
.
shape
=
std
::
vector
<
int
>
({
4
,
1
}),
.
data
=
buf
,
.
data
=
PaddleBuf
(
data
,
sizeof
(
data
))
,
.
dtype
=
PaddleDType
::
INT64
};
std
::
vector
<
PaddleTensor
>
inputs
(
4
,
tensor
);
std
::
vector
<
PaddleTensor
>
outputs
;
...
...
@@ -99,13 +95,13 @@ void MainThreads(int num_threads, bool use_gpu) {
// 4. Get output.
ASSERT_EQ
(
outputs
.
size
(),
1UL
);
LOG
(
INFO
)
<<
"TID: "
<<
tid
<<
", "
<<
"output buffer size: "
<<
outputs
.
front
().
data
.
length
;
const
size_t
num_elements
=
outputs
.
front
().
data
.
length
/
sizeof
(
float
);
<<
"output buffer size: "
<<
outputs
.
front
().
data
.
length
();
const
size_t
num_elements
=
outputs
.
front
().
data
.
length
()
/
sizeof
(
float
);
// The outputs' buffers are in CPU memory.
for
(
size_t
i
=
0
;
i
<
std
::
min
(
5UL
,
num_elements
);
i
++
)
{
LOG
(
INFO
)
<<
static_cast
<
float
*>
(
outputs
.
front
().
data
.
data
)[
i
];
LOG
(
INFO
)
<<
static_cast
<
float
*>
(
outputs
.
front
().
data
.
data
()
)[
i
];
}
free
(
outputs
[
0
].
data
.
data
);
}
});
}
...
...
paddle/contrib/inference/paddle_inference_api.cc
浏览文件 @
e5890052
...
...
@@ -13,3 +13,53 @@ See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/contrib/inference/paddle_inference_api.h"
namespace
paddle
{
PaddleBuf
::
PaddleBuf
(
PaddleBuf
&&
other
)
:
data_
(
other
.
data_
),
length_
(
other
.
length_
),
memory_owned_
(
other
.
memory_owned_
)
{
other
.
memory_owned_
=
false
;
other
.
data_
=
nullptr
;
other
.
length_
=
0
;
}
PaddleBuf
::
PaddleBuf
(
const
PaddleBuf
&
other
)
{
*
this
=
other
;
}
PaddleBuf
&
PaddleBuf
::
operator
=
(
const
PaddleBuf
&
other
)
{
// only the buffer with external memory can be copied
assert
(
!
other
.
memory_owned_
);
data_
=
other
.
data_
;
length_
=
other
.
length_
;
memory_owned_
=
other
.
memory_owned_
;
return
*
this
;
}
void
PaddleBuf
::
Resize
(
size_t
length
)
{
// Only the owned memory can be reset, the external memory can't be changed.
if
(
length_
==
length
)
return
;
assert
(
memory_owned_
);
Free
();
data_
=
new
char
[
length
];
length_
=
length
;
memory_owned_
=
true
;
}
void
PaddleBuf
::
Reset
(
void
*
data
,
size_t
length
)
{
Free
();
memory_owned_
=
false
;
data_
=
data
;
length_
=
length
;
}
void
PaddleBuf
::
Free
()
{
if
(
memory_owned_
&&
data_
)
{
assert
(
length_
>
0
);
delete
static_cast
<
char
*>
(
data_
);
data_
=
nullptr
;
length_
=
0
;
}
}
}
// namespace paddle
\ No newline at end of file
paddle/contrib/inference/paddle_inference_api.h
浏览文件 @
e5890052
...
...
@@ -21,6 +21,7 @@ limitations under the License. */
#pragma once
#include <cassert>
#include <memory>
#include <string>
#include <vector>
...
...
@@ -32,12 +33,38 @@ enum PaddleDType {
INT64
,
};
struct
PaddleBuf
{
void
*
data
;
// pointer to the data memory.
size_t
length
;
// number of memory bytes.
class
PaddleBuf
{
public:
PaddleBuf
()
=
default
;
PaddleBuf
(
PaddleBuf
&&
other
);
// Copy only available when memory is managed externally.
explicit
PaddleBuf
(
const
PaddleBuf
&
);
PaddleBuf
&
operator
=
(
const
PaddleBuf
&
);
// Do not own the memory.
PaddleBuf
(
void
*
data
,
size_t
length
)
:
data_
(
data
),
length_
(
length
),
memory_owned_
{
false
}
{}
// Own memory.
PaddleBuf
(
size_t
length
)
:
data_
(
new
char
[
length
]),
length_
(
length
),
memory_owned_
(
true
)
{}
// Resize to `length` bytes.
void
Resize
(
size_t
length
);
// Reset to external memory.
void
Reset
(
void
*
data
,
size_t
length
);
bool
empty
()
const
{
return
length_
==
0
;
}
void
*
data
()
const
{
return
data_
;
}
size_t
length
()
const
{
return
length_
;
}
~
PaddleBuf
()
{
Free
();
}
private:
void
Free
();
void
*
data_
{
nullptr
};
// pointer to the data memory.
size_t
length_
{
0
};
// number of memory bytes.
bool
memory_owned_
{
true
};
};
struct
PaddleTensor
{
PaddleTensor
()
=
default
;
std
::
string
name
;
// variable name.
std
::
vector
<
int
>
shape
;
// TODO(Superjomn) for LoD support, add a vector<vector<int>> field if needed.
...
...
@@ -67,8 +94,9 @@ class PaddlePredictor {
// Predict an record.
// The caller should be responsible for allocating and releasing the memory of
// `inputs`. `inputs` should be alive until Run returns. caller should be
// responsible for releasing the memory of `output_data`.
// `inputs`. `inputs` should be available until Run returns. Caller should be
// responsible for the output tensor's buffer, either allocated or passed from
// outside.
virtual
bool
Run
(
const
std
::
vector
<
PaddleTensor
>&
inputs
,
std
::
vector
<
PaddleTensor
>*
output_data
)
=
0
;
...
...
paddle/contrib/inference/paddle_inference_api_anakin_engine.cc
浏览文件 @
e5890052
...
...
@@ -48,7 +48,7 @@ bool PaddleInferenceAnakinPredictor::Run(
auto
d_tensor_in_p
=
executor_
.
get_in
(
input
.
name
);
float
*
d_data_p
=
d_tensor_in_p
->
mutable_data
();
if
(
cudaMemcpy
(
d_data_p
,
static_cast
<
float
*>
(
input
.
data
.
data
),
static_cast
<
float
*>
(
input
.
data
.
data
()
),
d_tensor_in_p
->
valid_size
()
*
sizeof
(
float
),
cudaMemcpyHostToDevice
)
!=
0
)
{
LOG
(
ERROR
)
<<
"copy data from CPU to GPU error"
;
...
...
@@ -65,8 +65,11 @@ bool PaddleInferenceAnakinPredictor::Run(
for
(
auto
&
output
:
*
output_data
)
{
auto
*
tensor
=
executor_
.
get_out
(
output
.
name
);
output
.
shape
=
tensor
->
shape
();
if
(
output
.
data
.
length
()
<
tensor
->
valid_size
()
*
sizeof
(
float
))
{
output
.
data
.
Resize
(
tensor
->
valid_size
()
*
sizeof
(
float
));
}
// Copy data from GPU -> CPU
if
(
cudaMemcpy
(
output
.
data
.
data
,
if
(
cudaMemcpy
(
output
.
data
.
data
()
,
tensor
->
mutable_data
(),
tensor
->
valid_size
()
*
sizeof
(
float
),
cudaMemcpyDeviceToHost
)
!=
0
)
{
...
...
paddle/contrib/inference/paddle_inference_api_anakin_engine_tester.cc
浏览文件 @
e5890052
...
...
@@ -37,28 +37,26 @@ TEST(inference, anakin) {
float
data
[
1
*
3
*
224
*
224
]
=
{
1.0
f
};
PaddleBuf
buf
{.
data
=
data
,
.
length
=
sizeof
(
data
)};
PaddleTensor
tensor
{.
name
=
"input_0"
,
.
shape
=
std
::
vector
<
int
>
({
1
,
3
,
224
,
224
}),
.
data
=
buf
,
.
data
=
PaddleBuf
(
data
,
sizeof
(
data
))
,
.
dtype
=
PaddleDType
::
FLOAT32
};
// For simplicity, we set all the slots with the same data.
std
::
vector
<
PaddleTensor
>
paddle_tensor_feeds
(
1
,
tensor
);
std
::
vector
<
PaddleTensor
>
paddle_tensor_feeds
;
paddle_tensor_feeds
.
emplace_back
(
std
::
move
(
tensor
));
float
data_out
[
1000
];
PaddleBuf
buf_out
{.
data
=
data_out
,
.
length
=
sizeof
(
data
)};
PaddleTensor
tensor_out
{.
name
=
"prob_out"
,
.
shape
=
std
::
vector
<
int
>
({
1000
,
1
}),
.
data
=
buf_out
,
.
data
=
PaddleBuf
()
,
.
dtype
=
PaddleDType
::
FLOAT32
};
std
::
vector
<
PaddleTensor
>
outputs
(
1
,
tensor_out
);
std
::
vector
<
PaddleTensor
>
outputs
;
outputs
.
emplace_back
(
std
::
move
(
tensor_out
));
ASSERT_TRUE
(
predictor
->
Run
(
paddle_tensor_feeds
,
&
outputs
));
float
*
data_o
=
static_cast
<
float
*>
(
outputs
[
0
].
data
.
data
);
float
*
data_o
=
static_cast
<
float
*>
(
outputs
[
0
].
data
.
data
()
);
for
(
size_t
j
=
0
;
j
<
1000
;
++
j
)
{
LOG
(
INFO
)
<<
"output["
<<
j
<<
"]: "
<<
data_o
[
j
];
}
...
...
paddle/contrib/inference/paddle_inference_api_impl.cc
浏览文件 @
e5890052
...
...
@@ -178,8 +178,8 @@ bool NativePaddlePredictor::SetFeed(const std::vector<PaddleTensor> &inputs,
// TODO(panyx0718): Init LoDTensor from existing memcpy to save a copy.
std
::
memcpy
(
static_cast
<
void
*>
(
input_ptr
),
inputs
[
i
].
data
.
data
,
inputs
[
i
].
data
.
length
);
inputs
[
i
].
data
.
data
()
,
inputs
[
i
].
data
.
length
()
);
feeds
->
push_back
(
input
);
}
return
true
;
...
...
@@ -241,10 +241,11 @@ bool NativePaddlePredictor::GetFetch(
}
outputs
->
at
(
i
).
shape
=
shape
;
outputs
->
at
(
i
).
data
.
length
=
sizeof
(
float
)
*
data
.
size
();
outputs
->
at
(
i
).
data
.
data
=
malloc
(
outputs
->
at
(
i
).
data
.
length
);
std
::
memcpy
(
outputs
->
at
(
i
).
data
.
data
,
data
.
data
(),
outputs
->
at
(
i
).
data
.
length
);
auto
&
buffer
=
outputs
->
at
(
i
).
data
;
if
(
buffer
.
empty
()
||
buffer
.
length
()
<
sizeof
(
float
)
*
data
.
size
())
{
buffer
.
Resize
(
sizeof
(
float
)
*
data
.
size
());
}
std
::
memcpy
(
buffer
.
data
(),
data
.
data
(),
buffer
.
length
());
outputs
->
at
(
i
).
dtype
=
PaddleDType
::
FLOAT32
;
// TODO(panyx0718): support other types? fill tensor name? avoid a copy.
}
...
...
paddle/contrib/inference/test_paddle_inference_api_impl.cc
浏览文件 @
e5890052
...
...
@@ -27,13 +27,12 @@ namespace paddle {
PaddleTensor
LodTensorToPaddleTensor
(
framework
::
LoDTensor
*
t
)
{
PaddleTensor
pt
;
pt
.
data
.
data
=
t
->
data
<
void
>
();
if
(
t
->
type
()
==
typeid
(
int64_t
))
{
pt
.
data
.
length
=
t
->
numel
()
*
sizeof
(
int64_t
);
pt
.
data
.
Reset
(
t
->
data
<
void
>
(),
t
->
numel
()
*
sizeof
(
int64_t
)
);
pt
.
dtype
=
PaddleDType
::
INT64
;
}
else
if
(
t
->
type
()
==
typeid
(
float
))
{
pt
.
data
.
length
=
t
->
numel
()
*
sizeof
(
float
);
pt
.
data
.
Reset
(
t
->
data
<
void
>
(),
t
->
numel
()
*
sizeof
(
float
)
);
pt
.
dtype
=
PaddleDType
::
FLOAT32
;
}
else
{
LOG
(
FATAL
)
<<
"unsupported type."
;
...
...
@@ -79,8 +78,8 @@ void MainWord2Vec(bool use_gpu) {
std
::
vector
<
PaddleTensor
>
outputs
;
ASSERT_TRUE
(
predictor
->
Run
(
paddle_tensor_feeds
,
&
outputs
));
ASSERT_EQ
(
outputs
.
size
(),
1UL
);
size_t
len
=
outputs
[
0
].
data
.
length
;
float
*
data
=
static_cast
<
float
*>
(
outputs
[
0
].
data
.
data
);
size_t
len
=
outputs
[
0
].
data
.
length
()
;
float
*
data
=
static_cast
<
float
*>
(
outputs
[
0
].
data
.
data
()
);
for
(
size_t
j
=
0
;
j
<
len
/
sizeof
(
float
);
++
j
)
{
ASSERT_LT
(
data
[
j
],
1.0
);
ASSERT_GT
(
data
[
j
],
-
1.0
);
...
...
@@ -103,8 +102,6 @@ void MainWord2Vec(bool use_gpu) {
EXPECT_LT
(
lod_data
[
i
]
-
data
[
i
],
1e-3
);
EXPECT_GT
(
lod_data
[
i
]
-
data
[
i
],
-
1e-3
);
}
free
(
outputs
[
0
].
data
.
data
);
}
void
MainImageClassification
(
bool
use_gpu
)
{
...
...
@@ -143,13 +140,12 @@ void MainImageClassification(bool use_gpu) {
std
::
vector
<
PaddleTensor
>
outputs
;
ASSERT_TRUE
(
predictor
->
Run
(
paddle_tensor_feeds
,
&
outputs
));
ASSERT_EQ
(
outputs
.
size
(),
1UL
);
size_t
len
=
outputs
[
0
].
data
.
length
;
float
*
data
=
static_cast
<
float
*>
(
outputs
[
0
].
data
.
data
);
size_t
len
=
outputs
[
0
].
data
.
length
()
;
float
*
data
=
static_cast
<
float
*>
(
outputs
[
0
].
data
.
data
()
);
float
*
lod_data
=
output1
.
data
<
float
>
();
for
(
size_t
j
=
0
;
j
<
len
/
sizeof
(
float
);
++
j
)
{
EXPECT_NEAR
(
lod_data
[
j
],
data
[
j
],
1e-3
);
}
free
(
data
);
}
void
MainThreadsWord2Vec
(
bool
use_gpu
)
{
...
...
@@ -192,8 +188,8 @@ void MainThreadsWord2Vec(bool use_gpu) {
// check outputs range
ASSERT_EQ
(
local_outputs
.
size
(),
1UL
);
const
size_t
len
=
local_outputs
[
0
].
data
.
length
;
float
*
data
=
static_cast
<
float
*>
(
local_outputs
[
0
].
data
.
data
);
const
size_t
len
=
local_outputs
[
0
].
data
.
length
()
;
float
*
data
=
static_cast
<
float
*>
(
local_outputs
[
0
].
data
.
data
()
);
for
(
size_t
j
=
0
;
j
<
len
/
sizeof
(
float
);
++
j
)
{
ASSERT_LT
(
data
[
j
],
1.0
);
ASSERT_GT
(
data
[
j
],
-
1.0
);
...
...
@@ -205,7 +201,6 @@ void MainThreadsWord2Vec(bool use_gpu) {
for
(
int
i
=
0
;
i
<
refs
[
tid
].
numel
();
++
i
)
{
EXPECT_NEAR
(
ref_data
[
i
],
data
[
i
],
1e-3
);
}
free
(
data
);
});
}
for
(
int
i
=
0
;
i
<
num_jobs
;
++
i
)
{
...
...
@@ -251,14 +246,13 @@ void MainThreadsImageClassification(bool use_gpu) {
// check outputs correctness
ASSERT_EQ
(
local_outputs
.
size
(),
1UL
);
const
size_t
len
=
local_outputs
[
0
].
data
.
length
;
float
*
data
=
static_cast
<
float
*>
(
local_outputs
[
0
].
data
.
data
);
const
size_t
len
=
local_outputs
[
0
].
data
.
length
()
;
float
*
data
=
static_cast
<
float
*>
(
local_outputs
[
0
].
data
.
data
()
);
float
*
ref_data
=
refs
[
tid
].
data
<
float
>
();
EXPECT_EQ
(
refs
[
tid
].
numel
(),
len
/
sizeof
(
float
));
for
(
int
i
=
0
;
i
<
refs
[
tid
].
numel
();
++
i
)
{
EXPECT_NEAR
(
ref_data
[
i
],
data
[
i
],
1e-3
);
}
free
(
data
);
});
}
for
(
int
i
=
0
;
i
<
num_jobs
;
++
i
)
{
...
...
paddle/fluid/framework/details/multi_devices_graph_builder.cc
浏览文件 @
e5890052
...
...
@@ -57,6 +57,7 @@ MultiDevSSAGraphBuilder::MultiDevSSAGraphBuilder(
for
(
auto
&
p
:
params
)
{
grad_names_
.
insert
(
GradVarName
(
p
));
}
balance_vars_
.
resize
(
places_
.
size
(),
0
);
}
void
MultiDevSSAGraphBuilder
::
CreateOpHandleIOs
(
SSAGraph
*
result
,
...
...
@@ -140,11 +141,30 @@ bool MultiDevSSAGraphBuilder::IsDistTrainOp(
checker
(
op
.
InputArgumentNames
(),
recv_vars
);
}
size_t
MultiDevSSAGraphBuilder
::
GetAppropriateDeviceID
(
const
std
::
vector
<
std
::
string
>
&
var_names
)
const
{
int64_t
numel_sum
=
0
;
for
(
auto
var_name
:
var_names
)
{
auto
var_desc
=
all_vars_
.
at
(
var_name
);
PADDLE_ENFORCE_NOT_NULL
(
var_desc
);
auto
dim
=
framework
::
make_ddim
(
var_desc
->
GetShape
());
int64_t
numel
=
framework
::
product
(
dim
);
PADDLE_ENFORCE_GT
(
numel
,
0
);
numel_sum
+=
numel
;
}
auto
smallest
=
std
::
min_element
(
std
::
begin
(
balance_vars_
),
std
::
end
(
balance_vars_
));
size_t
dev_id
=
static_cast
<
size_t
>
(
std
::
distance
(
std
::
begin
(
balance_vars_
),
smallest
));
balance_vars_
[
dev_id
]
+=
numel_sum
;
return
dev_id
;
}
std
::
unique_ptr
<
SSAGraph
>
MultiDevSSAGraphBuilder
::
Build
(
const
ProgramDesc
&
program
)
const
{
std
::
unordered_map
<
std
::
string
,
VarDesc
*>
all_vars
;
for
(
auto
*
var
:
program
.
Block
(
0
).
AllVars
())
{
all_vars
[
var
->
Name
()]
=
var
;
all_vars
_
.
emplace
(
var
->
Name
(),
var
)
;
}
auto
graph
=
new
SSAGraph
();
...
...
@@ -161,35 +181,16 @@ std::unique_ptr<SSAGraph> MultiDevSSAGraphBuilder::Build(
auto
send_vars
=
FindDistTrainSendVars
(
program
);
auto
recv_vars
=
FindDistTrainRecvVars
(
program
);
std
::
vector
<
std
::
unordered_set
<
std
::
string
>>
var_name_on_devices
;
std
::
vector
<
std
::
unordered_set
<
std
::
string
>>
bcast_var_name_set
;
var_name_on_devices
.
resize
(
places_
.
size
());
bcast_var_name_set
.
resize
(
places_
.
size
());
size_t
cur_device_id
=
0
;
std
::
vector
<
int64_t
>
balance_grads
(
places_
.
size
(),
0
);
auto
get_appropriate_dev
=
[
&
](
std
::
string
&
g_name
)
->
size_t
{
auto
var_desc
=
all_vars
.
at
(
g_name
);
PADDLE_ENFORCE_NOT_NULL
(
var_desc
);
auto
dim
=
framework
::
make_ddim
(
var_desc
->
GetShape
());
int64_t
numel
=
framework
::
product
(
dim
);
PADDLE_ENFORCE_GE
(
numel
,
0
);
auto
smallest
=
std
::
min_element
(
std
::
begin
(
balance_grads
),
std
::
end
(
balance_grads
));
size_t
dev_id
=
static_cast
<
size_t
>
(
std
::
distance
(
std
::
begin
(
balance_grads
),
smallest
));
balance_grads
[
dev_id
]
+=
numel
;
return
dev_id
;
};
bool
is_forwarding
=
true
;
for
(
auto
*
op
:
program
.
Block
(
0
).
AllOps
())
{
if
(
boost
::
get
<
int
>
(
op
->
GetAttr
(
OpProtoAndCheckerMaker
::
OpRoleAttrName
()))
==
static_cast
<
int
>
(
OpRole
::
kRPC
))
{
// append rpc op if program is distributed trainer main program.
// always use the first device
CreateRPCOp
(
&
result
,
*
op
);
}
else
if
(
IsDistTrainOp
(
*
op
,
send_vars
,
recv_vars
))
{
CreateDistTrainOp
(
&
result
,
*
op
);
...
...
@@ -199,15 +200,19 @@ std::unique_ptr<SSAGraph> MultiDevSSAGraphBuilder::Build(
BuildStrategy
::
GradientScaleStrategy
::
kCustomized
)
{
CreateScaleLossGradOp
(
&
result
);
}
// This assumes the backward generating code will ensure IsScaleLossOp
// is true only for the op that scale the final scalar loss.
// It also assumes backward op will always follow the forward op in
// the block.
is_forwarding
=
false
;
}
else
{
int
op_dev_id
=
GetOpDeviceID
(
var_name_on_devices
,
*
op
);
int
op_dev_id
=
GetOpDeviceID
(
*
op
);
if
(
op_dev_id
==
-
1
)
{
// var on all device
CreateComputationalOps
(
&
result
,
*
op
,
places_
.
size
());
}
else
{
CreateComputationalOp
(
&
result
,
*
op
,
op_dev_id
);
for
(
auto
&
var_name
:
op
->
OutputArgumentNames
())
{
var_name_on_devices
[
op_dev_id
].
emplace
(
var_name
);
var_name_on_devices
_
.
emplace
(
var_name
,
op_dev_id
);
}
}
if
(
!
is_forwarding
&&
places_
.
size
()
>
1
)
{
...
...
@@ -230,19 +235,22 @@ std::unique_ptr<SSAGraph> MultiDevSSAGraphBuilder::Build(
switch
(
strategy_
.
reduce_
)
{
case
BuildStrategy
::
ReduceStrategy
::
kReduce
:
cur_device_id
=
get_appropriate_dev
(
g_name
);
cur_device_id
=
GetAppropriateDeviceID
({
g_name
}
);
CreateReduceOp
(
&
result
,
g_name
,
cur_device_id
);
var_name_on_devices
[
cur_device_id
].
emplace
(
g_name
);
var_name_on_devices
_
.
emplace
(
g_name
,
cur_device_id
);
bcast_var_name_set
[
cur_device_id
].
emplace
(
p_name
);
break
;
case
BuildStrategy
::
ReduceStrategy
::
kAllReduce
:
if
(
IsSparseGradient
(
all_vars
,
g_name
))
{
if
(
IsSparseGradient
(
g_name
))
{
CreateReduceOp
(
&
result
,
g_name
,
0
);
CreateBroadcastOp
(
&
result
,
g_name
,
0
);
}
else
{
InsertAllReduceOp
(
&
result
,
g_name
);
}
break
;
default:
LOG
(
FATAL
)
<<
"Unknown reduce strategy "
;
break
;
}
}
}
catch
(
boost
::
bad_get
e
)
{
...
...
@@ -261,7 +269,7 @@ std::unique_ptr<SSAGraph> MultiDevSSAGraphBuilder::Build(
}
/*
Dependency graph has been constructed. However, there are still data
ha
rzae
ds need to be handled.
ha
zar
ds need to be handled.
*/
PolishGraphToSupportDataHazards
(
&
result
);
...
...
@@ -273,11 +281,9 @@ std::unique_ptr<SSAGraph> MultiDevSSAGraphBuilder::Build(
return
std
::
unique_ptr
<
SSAGraph
>
(
graph
);
}
bool
MultiDevSSAGraphBuilder
::
IsSparseGradient
(
const
std
::
unordered_map
<
std
::
string
,
VarDesc
*>
&
all_vars
,
const
std
::
string
&
og
)
const
{
PADDLE_ENFORCE
(
all_vars
.
count
(
og
)
!=
0
);
if
(
all_vars
.
at
(
og
)
->
GetType
()
==
proto
::
VarType
::
SELECTED_ROWS
)
{
bool
MultiDevSSAGraphBuilder
::
IsSparseGradient
(
const
std
::
string
&
og
)
const
{
PADDLE_ENFORCE
(
all_vars_
.
count
(
og
)
!=
0
);
if
(
all_vars_
.
at
(
og
)
->
GetType
()
==
proto
::
VarType
::
SELECTED_ROWS
)
{
return
true
;
}
return
false
;
...
...
@@ -363,24 +369,23 @@ bool MultiDevSSAGraphBuilder::IsParameterGradientOnce(
return
is_pg_once
;
}
int
MultiDevSSAGraphBuilder
::
GetOpDeviceID
(
const
std
::
vector
<
std
::
unordered_set
<
std
::
string
>>
&
var_name_on_devices
,
const
OpDesc
&
op
)
const
{
int
MultiDevSSAGraphBuilder
::
GetOpDeviceID
(
const
OpDesc
&
op
)
const
{
if
(
strategy_
.
reduce_
!=
BuildStrategy
::
ReduceStrategy
::
kReduce
)
{
return
-
1
;
}
int
var_dev_id
=
-
1
;
for
(
auto
&
var_name
:
op
.
InputArgumentNames
())
{
if
(
var_dev_id
!=
-
1
)
break
;
for
(
size_t
i
=
0
;
i
<
var_name_on_devices
.
size
();
++
i
)
{
if
(
var_name_on_devices
[
i
].
count
(
var_name
))
{
var_dev_id
=
static_cast
<
int
>
(
i
);
break
;
}
for
(
auto
&
varname
:
op
.
InputArgumentNames
())
{
int
dev_id
=
GetVarDeviceID
(
varname
);
if
(
dev_id
!=
-
1
)
{
return
dev_id
;
}
}
return
var_dev_id
;
return
-
1
;
}
int
MultiDevSSAGraphBuilder
::
GetVarDeviceID
(
const
std
::
string
&
varname
)
const
{
auto
got
=
var_name_on_devices_
.
find
(
varname
);
return
got
==
var_name_on_devices_
.
end
()
?
-
1
:
got
->
second
;
}
void
MultiDevSSAGraphBuilder
::
CreateScaleLossGradOp
(
SSAGraph
*
result
)
const
{
...
...
@@ -449,6 +454,8 @@ VarHandle *MultiDevSSAGraphBuilder::CreateReduceOp(SSAGraph *result,
return
var
;
}
// Find the first occurence of `prev_op_name` and make current `op` depend
// on it.
void
MultiDevSSAGraphBuilder
::
ConnectOp
(
SSAGraph
*
result
,
OpHandleBase
*
op
,
const
std
::
string
&
prev_op_name
)
const
{
for
(
auto
&
prev_op
:
result
->
ops_
)
{
...
...
@@ -463,16 +470,66 @@ void MultiDevSSAGraphBuilder::ConnectOp(SSAGraph *result, OpHandleBase *op,
void
MultiDevSSAGraphBuilder
::
CreateDistTrainOp
(
SSAGraph
*
result
,
const
OpDesc
&
op
)
const
{
CreateComputationalOp
(
result
,
op
,
0
);
int
op_dev_id
=
-
1
;
if
(
op
.
Type
()
==
"split_byref"
)
{
op_dev_id
=
GetVarDeviceID
(
op
.
InputArgumentNames
()[
0
]);
if
(
strategy_
.
reduce_
==
BuildStrategy
::
ReduceStrategy
::
kAllReduce
)
{
op_dev_id
=
GetAppropriateDeviceID
(
op
.
InputArgumentNames
());
for
(
auto
&
varname
:
op
.
InputArgumentNames
())
{
var_name_on_devices_
.
emplace
(
varname
,
op_dev_id
);
}
}
for
(
auto
&
varname
:
op
.
OutputArgumentNames
())
{
var_name_on_devices_
.
emplace
(
varname
,
op_dev_id
);
}
}
else
if
(
op
.
Type
()
==
"concat"
)
{
op_dev_id
=
GetVarDeviceID
(
op
.
InputArgumentNames
()[
0
]);
}
else
{
PADDLE_ENFORCE
(
"the distribute training related op should be in [split_byref, "
"concat]."
);
}
PADDLE_ENFORCE
(
op_dev_id
!=
-
1
,
"can not find right place for distributed op: %s"
,
op
.
Type
());
CreateComputationalOp
(
result
,
op
,
op_dev_id
);
if
(
op
.
Type
()
==
"concat"
)
{
ConnectOp
(
result
,
result
->
ops_
.
back
().
get
(),
"fetch_barrier"
);
}
}
// Create RPC related op handles that connects its in ops and out ops.
void
MultiDevSSAGraphBuilder
::
CreateRPCOp
(
SSAGraph
*
result
,
const
OpDesc
&
op
)
const
{
result
->
ops_
.
emplace_back
(
new
RPCOpHandle
(
op
,
local_scopes_
[
0
],
op
.
Type
(),
places_
[
0
]));
int
op_dev_id
=
-
1
;
if
(
op
.
Type
()
==
"send"
)
{
op_dev_id
=
GetVarDeviceID
(
op
.
InputArgumentNames
()[
0
]);
// the variable name which contains .block means it was splited by
// split_byref op
// so that we can balance the variable blocks to all the pserver instances.
if
(
strategy_
.
reduce_
==
BuildStrategy
::
ReduceStrategy
::
kAllReduce
&&
op
.
InputArgumentNames
()[
0
].
find
(
".block"
)
==
std
::
string
::
npos
)
{
op_dev_id
=
GetAppropriateDeviceID
(
op
.
InputArgumentNames
());
for
(
auto
&
varname
:
op
.
InputArgumentNames
())
{
var_name_on_devices_
.
emplace
(
varname
,
op_dev_id
);
}
}
}
else
if
(
op
.
Type
()
==
"recv"
)
{
op_dev_id
=
GetAppropriateDeviceID
(
op
.
OutputArgumentNames
());
for
(
auto
&
varname
:
op
.
OutputArgumentNames
())
{
var_name_on_devices_
.
emplace
(
varname
,
op_dev_id
);
}
}
else
{
// send_barrier and fetch_barrier op can be scheduled on device 0
op_dev_id
=
0
;
}
PADDLE_ENFORCE
(
op_dev_id
!=
-
1
,
"can not find the right place for rpc op: %s"
,
op
.
Type
());
result
->
ops_
.
emplace_back
(
new
RPCOpHandle
(
op
,
local_scopes_
[
op_dev_id
],
op
.
Type
(),
places_
[
op_dev_id
]));
if
(
op
.
Type
()
==
"send_barrier"
)
{
ConnectOp
(
result
,
result
->
ops_
.
back
().
get
(),
"send"
);
...
...
@@ -488,9 +545,7 @@ void MultiDevSSAGraphBuilder::CreateRPCOp(SSAGraph *result,
"send, send_barrier. recv, fetch_barrier]"
);
}
// TODO(Yancey1989): schedule rpc op on different place may
// increate throughput
CreateOpHandleIOs
(
result
,
op
,
0
);
CreateOpHandleIOs
(
result
,
op
,
op_dev_id
);
}
bool
MultiDevSSAGraphBuilder
::
IsScaleLossOp
(
const
OpDesc
&
op
)
const
{
...
...
paddle/fluid/framework/details/multi_devices_graph_builder.h
浏览文件 @
e5890052
...
...
@@ -47,10 +47,11 @@ class MultiDevSSAGraphBuilder : public SSAGraphBuilder {
#endif
std
::
unique_ptr
<
SSAGraph
>
Build
(
const
ProgramDesc
&
program
)
const
override
;
int
GetVarDeviceID
(
const
std
::
string
&
varname
)
const
;
private:
void
CreateOpHandleIOs
(
SSAGraph
*
result
,
const
OpDesc
&
op
,
size_t
pla
ce_id
)
const
;
size_t
devi
ce_id
)
const
;
private:
std
::
string
loss_var_name_
;
...
...
@@ -96,21 +97,23 @@ class MultiDevSSAGraphBuilder : public SSAGraphBuilder {
const
std
::
string
&
og
,
std
::
unordered_set
<
std
::
string
>
*
og_has_been_broadcast
)
const
;
int
GetOpDeviceID
(
const
std
::
vector
<
std
::
unordered_set
<
std
::
string
>>
&
var_name_on_devices
,
const
OpDesc
&
op
)
const
;
int
GetOpDeviceID
(
const
OpDesc
&
op
)
const
;
void
InsertAllReduceOp
(
SSAGraph
*
result
,
const
std
::
string
&
og
)
const
;
void
CreateBroadcastOp
(
SSAGraph
*
result
,
const
std
::
string
&
p_name
,
size_t
src_dev_id
)
const
;
bool
IsSparseGradient
(
const
std
::
unordered_map
<
std
::
string
,
VarDesc
*>
&
all_vars
,
const
std
::
string
&
og
)
const
;
bool
IsSparseGradient
(
const
std
::
string
&
og
)
const
;
size_t
GetAppropriateDeviceID
(
const
std
::
vector
<
std
::
string
>
&
var_names
)
const
;
private:
BuildStrategy
strategy_
;
mutable
std
::
unordered_map
<
std
::
string
,
VarDesc
*>
all_vars_
;
mutable
std
::
unordered_map
<
std
::
string
,
int
>
var_name_on_devices_
;
mutable
std
::
vector
<
int64_t
>
balance_vars_
;
void
SetCommunicationContext
(
OpHandleBase
*
op_handle
,
const
platform
::
Place
&
p
)
const
;
...
...
paddle/fluid/framework/details/ssa_graph_builder.h
浏览文件 @
e5890052
...
...
@@ -30,6 +30,7 @@ class SSAGraphBuilder {
SSAGraphBuilder
()
{}
virtual
~
SSAGraphBuilder
()
{}
virtual
std
::
unique_ptr
<
SSAGraph
>
Build
(
const
ProgramDesc
&
program
)
const
=
0
;
virtual
int
GetVarDeviceID
(
const
std
::
string
&
var_name
)
const
{
return
-
1
;
}
DISABLE_COPY_AND_ASSIGN
(
SSAGraphBuilder
);
...
...
paddle/fluid/framework/details/threaded_ssa_graph_executor.cc
浏览文件 @
e5890052
...
...
@@ -96,6 +96,7 @@ FeedFetchList ThreadedSSAGraphExecutor::Run(
auto
cur_ready_vars
=
ready_vars
.
PopAll
(
1
,
&
timeout
);
if
(
timeout
)
{
std
::
lock_guard
<
std
::
mutex
>
l
(
exception_mu_
);
if
(
exception_
)
{
auto
exp
=
*
exception_
;
exception_
.
reset
();
...
...
@@ -199,6 +200,7 @@ void ThreadedSSAGraphExecutor::RunOp(
ready_var_q
->
Extend
(
op
->
Outputs
());
VLOG
(
10
)
<<
op
<<
" "
<<
op
->
Name
()
<<
"Signal posted"
;
}
catch
(
platform
::
EnforceNotMet
ex
)
{
std
::
lock_guard
<
std
::
mutex
>
l
(
exception_mu_
);
exception_
.
reset
(
new
platform
::
EnforceNotMet
(
ex
));
}
catch
(...)
{
LOG
(
FATAL
)
<<
"Unknown exception catched"
;
...
...
paddle/fluid/framework/details/threaded_ssa_graph_executor.h
浏览文件 @
e5890052
...
...
@@ -56,6 +56,7 @@ class ThreadedSSAGraphExecutor : public SSAGraphExecutor {
std
::
vector
<
Scope
*>
local_scopes_
;
std
::
vector
<
platform
::
Place
>
places_
;
platform
::
DeviceContextPool
fetch_ctxs_
;
std
::
mutex
exception_mu_
;
std
::
unique_ptr
<
platform
::
EnforceNotMet
>
exception_
;
std
::
atomic
<
int
>
running_ops_
;
...
...
paddle/fluid/framework/executor.cc
浏览文件 @
e5890052
...
...
@@ -21,7 +21,7 @@ limitations under the License. */
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/framework/reader.h"
#ifdef PADDLE_WITH_DISTRIBUTE
#include "paddle/fluid/operators/d
etail
/grpc_client.h"
#include "paddle/fluid/operators/d
istributed
/grpc_client.h"
#endif
#include "paddle/fluid/platform/place.h"
#include "paddle/fluid/platform/profiler.h"
...
...
@@ -49,8 +49,8 @@ Executor::Executor(const platform::Place& place) : place_(place) {}
#ifdef PADDLE_WITH_DISTRIBUTE
void
Executor
::
Complete
()
{
::
paddle
::
operators
::
d
etail
::
RPCClient
::
GetInstance
<
::
paddle
::
operators
::
d
etail
::
GRPCClient
>
()
::
paddle
::
operators
::
d
istributed
::
RPCClient
::
GetInstance
<
::
paddle
::
operators
::
d
istributed
::
GRPCClient
>
()
->
SendComplete
();
}
#endif
...
...
@@ -321,7 +321,8 @@ std::vector<std::shared_ptr<ExecutorPrepareContext>> Executor::Prepare(
}
void
Executor
::
RunPreparedContext
(
ExecutorPrepareContext
*
ctx
,
Scope
*
scope
,
bool
create_local_scope
,
bool
create_vars
)
{
bool
create_local_scope
,
bool
create_vars
,
bool
keep_kids
)
{
Scope
*
local_scope
=
scope
;
if
(
create_vars
)
{
if
(
create_local_scope
)
{
...
...
@@ -344,12 +345,20 @@ void Executor::RunPreparedContext(ExecutorPrepareContext* ctx, Scope* scope,
}
}
platform
::
DeviceContextPool
::
Instance
().
Get
(
place_
)
->
Wait
();
if
(
create_vars
&&
create_local_
scope
)
{
if
(
local_scope
!=
scope
)
{
scope
->
DeleteScope
(
local_scope
);
}
else
{
// Delete the local scopes created in operators.
scope
->
DropKids
();
if
(
!
keep_kids
)
{
// By default, we should delete all kid scopes after run executor because
// some operators may create local scope when running, such as while_op.
// But when while_op also create a local executor to run it's sub block,
// the sub scopes it created should not be dropped immediately, because
// while_grad_op will use some variables created during while_op run, so
// we need to keep the kids and wait for the outer executor to drop them.
scope
->
DropKids
();
}
}
if
(
FLAGS_benchmark
)
{
VLOG
(
2
)
<<
"-------------------------------------------------------"
;
VLOG
(
2
)
<<
"Memory used after deleting local scope: "
...
...
paddle/fluid/framework/executor.h
浏览文件 @
e5890052
...
...
@@ -78,7 +78,7 @@ class Executor {
void
RunPreparedContext
(
ExecutorPrepareContext
*
ctx
,
Scope
*
scope
,
bool
create_local_scope
=
true
,
bool
create_vars
=
true
);
bool
create_vars
=
true
,
bool
keep_kids
=
false
);
void
RunPreparedContext
(
ExecutorPrepareContext
*
ctx
,
Scope
*
scope
,
std
::
map
<
std
::
string
,
const
LoDTensor
*>*
feed_targets
,
...
...
paddle/fluid/framework/parallel_executor.cc
浏览文件 @
e5890052
...
...
@@ -110,7 +110,6 @@ ParallelExecutor::ParallelExecutor(
// Step 3. Convert main_program to SSA form and dependency graph. Also, insert
// ncclOp
details
::
SSAGraphBuilderFactory
builder_factory
(
member_
->
places_
,
loss_var_name
,
params
,
member_
->
local_scopes_
,
build_strategy
);
...
...
@@ -122,9 +121,10 @@ ParallelExecutor::ParallelExecutor(
#endif
}
builder_
=
std
::
move
(
builder_factory
.
Create
());
member_
->
executor_
.
reset
(
new
details
::
ThreadedSSAGraphExecutor
(
exec_strategy
,
member_
->
local_scopes_
,
places
,
builder_
factory
.
Create
()
->
Build
(
main_program
)));
builder_
->
Build
(
main_program
)));
member_
->
executor_
.
reset
(
new
details
::
ScopeBufferedSSAGraphExecutor
(
exec_strategy
,
member_
->
local_scopes_
,
std
::
move
(
var_infos
),
...
...
@@ -133,10 +133,22 @@ ParallelExecutor::ParallelExecutor(
void
ParallelExecutor
::
BCastParamsToGPUs
(
const
std
::
unordered_set
<
std
::
string
>
&
vars
)
const
{
auto
*
main_scope
=
member_
->
local_scopes_
[
0
];
// the the initialize bcast, all vars would be bcast from device(0), otherwise
// bcast from the specified device.
bool
initialize
=
builder_
.
get
()
==
nullptr
?
true
:
false
;
for
(
auto
&
var
:
vars
)
{
auto
*
main_var
=
main_scope
->
FindVar
(
var
);
int
var_dev_id
=
builder_
.
get
()
==
nullptr
?
-
1
:
builder_
->
GetVarDeviceID
(
var
);
if
(
!
initialize
&&
var_dev_id
==
-
1
)
continue
;
framework
::
Variable
*
main_var
=
nullptr
;
if
(
initialize
)
{
main_var
=
member_
->
local_scopes_
[
0
]
->
FindVar
(
var
);
}
else
{
main_var
=
member_
->
local_scopes_
[
var_dev_id
]
->
FindVar
(
var
);
}
if
(
main_var
==
nullptr
||
!
main_var
->
IsType
<
LoDTensor
>
())
{
continue
;
}
...
...
@@ -151,7 +163,8 @@ void ParallelExecutor::BCastParamsToGPUs(
for
(
size_t
i
=
0
;
i
<
member_
->
places_
.
size
();
++
i
)
{
auto
place
=
member_
->
places_
[
i
];
void
*
buffer
;
if
(
i
==
0
)
{
if
((
initialize
&&
i
==
0
)
||
(
!
initialize
&&
i
==
var_dev_id
))
{
buffer
=
const_cast
<
void
*>
(
main_tensor
.
data
<
void
>
());
}
else
{
auto
local_scope
=
member_
->
local_scopes_
[
i
];
...
...
paddle/fluid/framework/parallel_executor.h
浏览文件 @
e5890052
...
...
@@ -19,12 +19,14 @@ limitations under the License. */
#include <unordered_set>
#include <vector>
#include "paddle/fluid/framework/details/execution_strategy.h"
#include "paddle/fluid/framework/details/multi_devices_graph_builder.h"
#include "paddle/fluid/framework/executor.h"
#include "paddle/fluid/framework/op_info.h"
#include "paddle/fluid/framework/program_desc.h"
#include "paddle/fluid/framework/scope.h"
#include "paddle/fluid/framework/tensor.h"
#include "paddle/fluid/platform/device_context.h"
namespace
paddle
{
namespace
framework
{
...
...
@@ -68,6 +70,7 @@ class ParallelExecutor {
private:
ParallelExecutorPrivate
*
member_
;
std
::
unique_ptr
<
details
::
SSAGraphBuilder
>
builder_
;
};
}
// namespace framework
...
...
paddle/fluid/inference/analysis/tensorrt_subgraph_pass.cc
浏览文件 @
e5890052
...
...
@@ -27,7 +27,7 @@ void TensorRTSubGraphPass::Run(DataFlowGraph *graph) {
SubGraphFuse
(
graph
,
node_inside_subgraph_teller_
);
}
}
// analysis
}
// inference
}
//
namespace
analysis
}
//
namespace
inference
}
// paddle
}
//
namespace
paddle
paddle/fluid/operators/CMakeLists.txt
浏览文件 @
e5890052
...
...
@@ -184,9 +184,9 @@ else()
set
(
DEPS_OPS
${
DEPS_OPS
}
nccl_op
)
endif
()
add_subdirectory
(
detail
)
if
(
WITH_DISTRIBUTE
)
add_subdirectory
(
distributed
)
set
(
DISTRIBUTE_DEPS
""
)
if
(
WITH_GRPC
)
set
(
DISTRIBUTE_DEPS sendrecvop_grpc grpc++_unsecure grpc_unsecure gpr cares zlib protobuf
)
...
...
@@ -195,20 +195,11 @@ if(WITH_DISTRIBUTE)
endif
()
set
(
DISTRIBUTE_COMPILE_FLAGS
"-Wno-non-virtual-dtor -Wno-error=non-virtual-dtor -Wno-error=delete-non-virtual-dtor"
)
op_library
(
prefetch_op DEPS
${
DISTRIBUTE_DEPS
}
)
set_source_files_properties
(
prefetch_op.cc PROPERTIES COMPILE_FLAGS
${
DISTRIBUTE_COMPILE_FLAGS
}
)
op_library
(
checkpoint_notify_op DEPS
${
DISTRIBUTE_DEPS
}
)
set_source_files_properties
(
checkpoint_notify_op.cc PROPERTIES COMPILE_FLAGS
${
DISTRIBUTE_COMPILE_FLAGS
}
)
op_library
(
recv_op DEPS
${
DISTRIBUTE_DEPS
}
)
set_source_files_properties
(
recv_op.cc PROPERTIES COMPILE_FLAGS
${
DISTRIBUTE_COMPILE_FLAGS
}
)
op_library
(
listen_and_serv_op DEPS
${
DISTRIBUTE_DEPS
}
)
set_source_files_properties
(
listen_and_serv_op.cc PROPERTIES COMPILE_FLAGS
${
DISTRIBUTE_COMPILE_FLAGS
}
)
op_library
(
send_op DEPS
${
DISTRIBUTE_DEPS
}
)
set_source_files_properties
(
send_op.cc PROPERTIES COMPILE_FLAGS
${
DISTRIBUTE_COMPILE_FLAGS
}
)
op_library
(
send_barrier_op DEPS
${
DISTRIBUTE_DEPS
}
)
op_library
(
fetch_barrier_op DEPS
${
DISTRIBUTE_DEPS
}
)
set_source_files_properties
(
send_barrier_op.cc PROPERTIES COMPILE_FLAGS
${
DISTRIBUTE_COMPILE_FLAGS
}
)
set_source_files_properties
(
fetch_barrier_op.cc PROPERTIES COMPILE_FLAGS
${
DISTRIBUTE_COMPILE_FLAGS
}
)
foreach
(
dist_op
"prefetch_op"
"checkpoint_notify_op"
"listen_and_serv_op"
"send_op"
"recv_op"
"send_barrier_op"
"fetch_barrier_op"
)
op_library
(
${
dist_op
}
DEPS
${
DISTRIBUTE_DEPS
}
)
set_source_files_properties
(
${
dist_op
}
.cc PROPERTIES COMPILE_FLAGS
${
DISTRIBUTE_COMPILE_FLAGS
}
)
endforeach
()
#set_source_files_properties(send_recv_op_test.cc PROPERTIES COMPILE_FLAGS ${DISTRIBUTE_COMPILE_FLAGS})
#cc_test(test_send_recv SRCS send_recv_op_test.cc DEPS prefetch_op send_op
# listen_and_serv_op sum_op executor SERIAL)
...
...
paddle/fluid/operators/batch_norm_mkldnn_op.cc
浏览文件 @
e5890052
...
...
@@ -21,8 +21,6 @@ namespace operators {
using
batch_norm_bwd
=
mkldnn
::
batch_normalization_backward
;
using
batch_norm_fwd
=
mkldnn
::
batch_normalization_forward
;
using
framework
::
DataLayout
;
using
framework
::
Tensor
;
using
mkldnn
::
memory
;
using
mkldnn
::
primitive
;
using
mkldnn
::
reorder
;
...
...
@@ -31,18 +29,6 @@ using paddle::platform::MKLDNNDeviceContext;
using
paddle
::
platform
::
MKLDNNMemDesc
;
using
platform
::
to_void_cast
;
template
<
typename
T
>
using
EigenArrayMap
=
Eigen
::
Map
<
Eigen
::
Array
<
T
,
Eigen
::
Dynamic
,
Eigen
::
Dynamic
>>
;
template
<
typename
T
>
using
ConstEigenArrayMap
=
Eigen
::
Map
<
const
Eigen
::
Array
<
T
,
Eigen
::
Dynamic
,
Eigen
::
Dynamic
>>
;
template
<
typename
T
>
using
EigenVectorArrayMap
=
Eigen
::
Map
<
Eigen
::
Array
<
T
,
Eigen
::
Dynamic
,
1
>>
;
template
<
typename
T
>
using
ConstEigenVectorArrayMap
=
Eigen
::
Map
<
const
Eigen
::
Array
<
T
,
Eigen
::
Dynamic
,
1
>>
;
namespace
{
template
<
typename
T
>
struct
bn_type_traits
{
...
...
paddle/fluid/operators/batch_norm_op.cc
浏览文件 @
e5890052
...
...
@@ -22,22 +22,6 @@ limitations under the License. */
namespace
paddle
{
namespace
operators
{
using
Tensor
=
framework
::
Tensor
;
using
LoDTensor
=
framework
::
LoDTensor
;
using
DataLayout
=
framework
::
DataLayout
;
template
<
typename
T
>
using
EigenArrayMap
=
Eigen
::
Map
<
Eigen
::
Array
<
T
,
Eigen
::
Dynamic
,
Eigen
::
Dynamic
>>
;
template
<
typename
T
>
using
ConstEigenArrayMap
=
Eigen
::
Map
<
const
Eigen
::
Array
<
T
,
Eigen
::
Dynamic
,
Eigen
::
Dynamic
>>
;
template
<
typename
T
>
using
EigenVectorArrayMap
=
Eigen
::
Map
<
Eigen
::
Array
<
T
,
Eigen
::
Dynamic
,
1
>>
;
template
<
typename
T
>
using
ConstEigenVectorArrayMap
=
Eigen
::
Map
<
const
Eigen
::
Array
<
T
,
Eigen
::
Dynamic
,
1
>>
;
class
BatchNormOp
:
public
framework
::
OperatorWithKernel
{
public:
using
framework
::
OperatorWithKernel
::
OperatorWithKernel
;
...
...
paddle/fluid/operators/batch_norm_op.h
浏览文件 @
e5890052
...
...
@@ -19,6 +19,22 @@ limitations under the License. */
namespace
paddle
{
namespace
operators
{
using
Tensor
=
framework
::
Tensor
;
using
LoDTensor
=
framework
::
LoDTensor
;
using
DataLayout
=
framework
::
DataLayout
;
template
<
typename
T
>
using
EigenArrayMap
=
Eigen
::
Map
<
Eigen
::
Array
<
T
,
Eigen
::
Dynamic
,
Eigen
::
Dynamic
>>
;
template
<
typename
T
>
using
ConstEigenArrayMap
=
Eigen
::
Map
<
const
Eigen
::
Array
<
T
,
Eigen
::
Dynamic
,
Eigen
::
Dynamic
>>
;
template
<
typename
T
>
using
EigenVectorArrayMap
=
Eigen
::
Map
<
Eigen
::
Array
<
T
,
Eigen
::
Dynamic
,
1
>>
;
template
<
typename
T
>
using
ConstEigenVectorArrayMap
=
Eigen
::
Map
<
const
Eigen
::
Array
<
T
,
Eigen
::
Dynamic
,
1
>>
;
template
<
typename
DeviceContext
,
typename
T
>
class
BatchNormKernel
:
public
framework
::
OpKernel
<
T
>
{
public:
...
...
paddle/fluid/operators/bilinear_interp_op.cc
浏览文件 @
e5890052
...
...
@@ -110,6 +110,7 @@ REGISTER_OPERATOR(bilinear_interp, ops::BilinearInterpOp,
ops
::
BilinearInterpOpMaker
,
paddle
::
framework
::
DefaultGradOpDescMaker
<
true
>
);
REGISTER_OPERATOR
(
bilinear_interp_grad
,
ops
::
BilinearInterpOpGrad
);
REGISTER_OP_CPU_KERNEL
(
bilinear_interp
,
ops
::
BilinearInterpKernel
<
float
>
);
REGISTER_OP_CPU_KERNEL
(
bilinear_interp
,
ops
::
BilinearInterpKernel
<
float
>
,
ops
::
BilinearInterpKernel
<
uint8_t
>
);
REGISTER_OP_CPU_KERNEL
(
bilinear_interp_grad
,
ops
::
BilinearInterpGradKernel
<
float
>
);
paddle/fluid/operators/bilinear_interp_op.h
浏览文件 @
e5890052
...
...
@@ -46,8 +46,10 @@ class BilinearInterpKernel : public framework::OpKernel<T> {
int
in_chw
=
channels
*
in_hw
;
int
out_chw
=
channels
*
out_hw
;
T
ratio_h
=
(
out_h
>
1
)
?
static_cast
<
T
>
(
in_h
-
1
)
/
(
out_h
-
1
)
:
0.
f
;
T
ratio_w
=
(
out_w
>
1
)
?
static_cast
<
T
>
(
in_w
-
1
)
/
(
out_w
-
1
)
:
0.
f
;
float
ratio_h
=
(
out_h
>
1
)
?
static_cast
<
float
>
(
in_h
-
1
)
/
(
out_h
-
1
)
:
0.
f
;
float
ratio_w
=
(
out_w
>
1
)
?
static_cast
<
float
>
(
in_w
-
1
)
/
(
out_w
-
1
)
:
0.
f
;
if
(
in_h
==
out_h
&&
in_w
==
out_w
)
{
memcpy
(
output
,
input
,
input_t
->
numel
()
*
sizeof
(
T
));
...
...
@@ -56,24 +58,24 @@ class BilinearInterpKernel : public framework::OpKernel<T> {
for
(
int
i
=
0
;
i
<
out_h
;
++
i
)
{
// loop for images
int
h
=
ratio_h
*
i
;
int
hid
=
(
h
<
in_h
-
1
)
?
1
:
0
;
T
h1lambda
=
ratio_h
*
i
-
h
;
T
h2lambda
=
1
-
h1lambda
;
float
h1lambda
=
ratio_h
*
i
-
h
;
float
h2lambda
=
1.
f
-
h1lambda
;
for
(
int
j
=
0
;
j
<
out_w
;
++
j
)
{
int
w
=
ratio_w
*
j
;
int
wid
=
(
w
<
in_w
-
1
)
?
1
:
0
;
T
w1lambda
=
ratio_w
*
j
-
w
;
T
w2lambda
=
1
-
w1lambda
;
float
w1lambda
=
ratio_w
*
j
-
w
;
float
w2lambda
=
1.
f
-
w1lambda
;
// calculate four position for bilinear interpolation
const
T
*
in_pos
=
&
input
[
k
*
in_chw
+
h
*
in_w
+
w
];
T
*
out_pos
=
&
output
[
k
*
out_chw
+
i
*
out_w
+
j
];
for
(
int
c
=
0
;
c
<
channels
;
++
c
)
{
// loop for channels
// bilinear interpolation
out_pos
[
0
]
=
out_pos
[
0
]
=
static_cast
<
T
>
(
h2lambda
*
(
w2lambda
*
in_pos
[
0
]
+
w1lambda
*
in_pos
[
wid
])
+
h1lambda
*
(
w2lambda
*
in_pos
[
hid
*
in_w
]
+
w1lambda
*
in_pos
[
hid
*
in_w
+
wid
]);
w1lambda
*
in_pos
[
hid
*
in_w
+
wid
])
)
;
in_pos
+=
in_hw
;
out_pos
+=
out_hw
;
}
...
...
@@ -117,8 +119,10 @@ class BilinearInterpGradKernel : public framework::OpKernel<T> {
int
in_chw
=
channels
*
in_hw
;
int
out_chw
=
channels
*
out_hw
;
T
ratio_h
=
(
out_h
>
1
)
?
static_cast
<
T
>
(
in_h
-
1
)
/
(
out_h
-
1
)
:
0.
f
;
T
ratio_w
=
(
out_w
>
1
)
?
static_cast
<
T
>
(
in_w
-
1
)
/
(
out_w
-
1
)
:
0.
f
;
float
ratio_h
=
(
out_h
>
1
)
?
static_cast
<
float
>
(
in_h
-
1
)
/
(
out_h
-
1
)
:
0.
f
;
float
ratio_w
=
(
out_w
>
1
)
?
static_cast
<
float
>
(
in_w
-
1
)
/
(
out_w
-
1
)
:
0.
f
;
if
(
in_h
==
out_h
&&
in_w
==
out_w
)
{
memcpy
(
d_input
,
d_output
,
d_input_t
->
numel
()
*
sizeof
(
T
));
...
...
@@ -127,22 +131,24 @@ class BilinearInterpGradKernel : public framework::OpKernel<T> {
for
(
int
i
=
0
;
i
<
out_h
;
++
i
)
{
// loop for images
int
h
=
ratio_h
*
i
;
int
hid
=
(
h
<
in_h
-
1
)
?
1
:
0
;
T
h1lambda
=
ratio_h
*
i
-
h
;
T
h2lambda
=
1
-
h1lambda
;
float
h1lambda
=
ratio_h
*
i
-
h
;
float
h2lambda
=
1
-
h1lambda
;
for
(
int
j
=
0
;
j
<
out_w
;
++
j
)
{
int
w
=
ratio_w
*
j
;
int
wid
=
(
w
<
in_w
-
1
)
?
1
:
0
;
T
w1lambda
=
ratio_w
*
j
-
w
;
T
w2lambda
=
1
-
w1lambda
;
float
w1lambda
=
ratio_w
*
j
-
w
;
float
w2lambda
=
1
-
w1lambda
;
T
*
in_pos
=
&
d_input
[
k
*
in_chw
+
h
*
in_w
+
w
];
const
T
*
out_pos
=
&
d_output
[
k
*
out_chw
+
i
*
out_w
+
j
];
for
(
int
c
=
0
;
c
<
channels
;
++
c
)
{
// loop for channels
in_pos
[
0
]
+=
h2lambda
*
w2lambda
*
out_pos
[
0
];
in_pos
[
wid
]
+=
h2lambda
*
w1lambda
*
out_pos
[
0
];
in_pos
[
hid
*
in_w
]
+=
h1lambda
*
w2lambda
*
out_pos
[
0
];
in_pos
[
hid
*
in_w
+
wid
]
+=
h1lambda
*
w1lambda
*
out_pos
[
0
];
in_pos
[
0
]
+=
static_cast
<
T
>
(
h2lambda
*
w2lambda
*
out_pos
[
0
]);
in_pos
[
wid
]
+=
static_cast
<
T
>
(
h2lambda
*
w1lambda
*
out_pos
[
0
]);
in_pos
[
hid
*
in_w
]
+=
static_cast
<
T
>
(
h1lambda
*
w2lambda
*
out_pos
[
0
]);
in_pos
[
hid
*
in_w
+
wid
]
+=
static_cast
<
T
>
(
h1lambda
*
w1lambda
*
out_pos
[
0
]);
in_pos
+=
in_hw
;
out_pos
+=
out_hw
;
}
...
...
paddle/fluid/operators/detail/macros.h
浏览文件 @
e5890052
...
...
@@ -15,13 +15,13 @@
#pragma once
#ifdef PADDLE_WITH_GRPC
#include "paddle/fluid/operators/d
etail
/grpc_client.h"
#include "paddle/fluid/operators/d
etail
/grpc_server.h"
#define RPCSERVER_T d
etail
::AsyncGRPCServer
#define RPCCLIENT_T d
etail
::GRPCClient
#include "paddle/fluid/operators/d
istributed
/grpc_client.h"
#include "paddle/fluid/operators/d
istributed
/grpc_server.h"
#define RPCSERVER_T d
istributed
::AsyncGRPCServer
#define RPCCLIENT_T d
istributed
::GRPCClient
#else
#include "paddle/fluid/operators/d
etail
/brpc_client.h"
#include "paddle/fluid/operators/d
etail
/brpc_server.h"
#define RPCSERVER_T d
etail
::AsyncBRPCServer
#define RPCCLIENT_T d
etail
::BRPCClient
#include "paddle/fluid/operators/d
istributed
/brpc_client.h"
#include "paddle/fluid/operators/d
istributed
/brpc_server.h"
#define RPCSERVER_T d
istributed
::AsyncBRPCServer
#define RPCCLIENT_T d
istributed
::BRPCClient
#endif
paddle/fluid/operators/d
etail
/CMakeLists.txt
→
paddle/fluid/operators/d
istributed
/CMakeLists.txt
浏览文件 @
e5890052
if
(
NOT WITH_DISTRIBUTE
)
return
()
endif
()
if
(
WITH_GRPC
)
grpc_library
(
sendrecvop_grpc SRCS bytebuffer_stream.cc sendrecvop_utils.cc grpc_client.cc
request_handler_impl.cc rpc_client.cc rpc_server.cc grpc_server.cc variable_response.cc PROTO send_recv.proto DEPS lod_tensor
...
...
paddle/fluid/operators/d
etail
/brpc_client.cc
→
paddle/fluid/operators/d
istributed
/brpc_client.cc
浏览文件 @
e5890052
...
...
@@ -12,12 +12,12 @@
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/operators/d
etail
/brpc_client.h"
#include "paddle/fluid/operators/d
istributed
/brpc_client.h"
#include "paddle/fluid/framework/threadpool.h"
namespace
paddle
{
namespace
operators
{
namespace
d
etail
{
namespace
d
istributed
{
DEFINE_int32
(
brpc_channel_num
,
24
,
"Number of channels to send requests connected to one server"
);
...
...
@@ -175,6 +175,6 @@ ChannelQueuePtr BRPCClient::GetChannel(const std::string& ep) {
return
q
;
}
}
// namespace d
etail
}
// namespace d
istributed
}
// namespace operators
}
// namespace paddle
paddle/fluid/operators/d
etail
/brpc_client.h
→
paddle/fluid/operators/d
istributed
/brpc_client.h
浏览文件 @
e5890052
...
...
@@ -31,13 +31,13 @@ limitations under the License. */
#include "paddle/fluid/framework/lod_tensor.h"
#include "paddle/fluid/framework/scope.h"
#include "paddle/fluid/framework/selected_rows.h"
#include "paddle/fluid/operators/d
etail
/rpc_client.h"
#include "paddle/fluid/operators/d
etail
/send_recv.pb.h"
#include "paddle/fluid/operators/d
istributed
/rpc_client.h"
#include "paddle/fluid/operators/d
istributed
/send_recv.pb.h"
#include "paddle/fluid/platform/macros.h" // for DISABLE_COPY_AND_ASSIGN
namespace
paddle
{
namespace
operators
{
namespace
d
etail
{
namespace
d
istributed
{
struct
ChannelContext
{
brpc
::
Channel
channel
;
...
...
@@ -95,6 +95,6 @@ class BRPCClient : public RPCClient {
DISABLE_COPY_AND_ASSIGN
(
BRPCClient
);
};
}
// namespace d
etail
}
// namespace d
istributed
}
// namespace operators
}
// namespace paddle
paddle/fluid/operators/d
etail
/brpc_server.cc
→
paddle/fluid/operators/d
istributed
/brpc_server.cc
浏览文件 @
e5890052
...
...
@@ -12,13 +12,13 @@
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/operators/d
etail
/brpc_server.h"
#include "paddle/fluid/operators/d
etail
/request_handler.h"
#include "paddle/fluid/operators/d
istributed
/brpc_server.h"
#include "paddle/fluid/operators/d
istributed
/request_handler.h"
namespace
sendrecv
{
typedef
std
::
unordered_map
<
std
::
string
,
paddle
::
operators
::
d
etail
::
RequestHandler
*>
paddle
::
operators
::
d
istributed
::
RequestHandler
*>
HandlerMap
;
class
BRPCServiceImpl
:
public
SendRecvService
{
...
...
@@ -27,17 +27,17 @@ class BRPCServiceImpl : public SendRecvService {
:
request_send_h_
(
nullptr
),
request_get_h_
(
nullptr
),
request_prefetch_h_
(
nullptr
)
{
auto
it
=
rpc_call_map
.
find
(
paddle
::
operators
::
d
etail
::
kRequestSend
);
auto
it
=
rpc_call_map
.
find
(
paddle
::
operators
::
d
istributed
::
kRequestSend
);
if
(
it
!=
rpc_call_map
.
end
())
{
request_send_h_
=
it
->
second
;
}
it
=
rpc_call_map
.
find
(
paddle
::
operators
::
d
etail
::
kRequestSend
);
it
=
rpc_call_map
.
find
(
paddle
::
operators
::
d
istributed
::
kRequestSend
);
if
(
it
!=
rpc_call_map
.
end
())
{
request_get_h_
=
it
->
second
;
}
it
=
rpc_call_map
.
find
(
paddle
::
operators
::
d
etail
::
kRequestPrefetch
);
it
=
rpc_call_map
.
find
(
paddle
::
operators
::
d
istributed
::
kRequestPrefetch
);
if
(
it
!=
rpc_call_map
.
end
())
{
request_prefetch_h_
=
it
->
second
;
}
...
...
@@ -88,15 +88,15 @@ class BRPCServiceImpl : public SendRecvService {
}
private:
paddle
::
operators
::
d
etail
::
RequestHandler
*
request_send_h_
;
paddle
::
operators
::
d
etail
::
RequestHandler
*
request_get_h_
;
paddle
::
operators
::
d
etail
::
RequestHandler
*
request_prefetch_h_
;
paddle
::
operators
::
d
istributed
::
RequestHandler
*
request_send_h_
;
paddle
::
operators
::
d
istributed
::
RequestHandler
*
request_get_h_
;
paddle
::
operators
::
d
istributed
::
RequestHandler
*
request_prefetch_h_
;
};
}
// namespace sendrecv
namespace
paddle
{
namespace
operators
{
namespace
d
etail
{
namespace
d
istributed
{
void
AsyncBRPCServer
::
StartServer
()
{
// Instance of your service.
...
...
@@ -139,6 +139,6 @@ void AsyncBRPCServer::WaitServerReady() {
VLOG
(
3
)
<<
"AsyncGRPCServer WaitSeverReady"
;
}
};
// namespace d
etail
};
// namespace d
istributed
};
// namespace operators
};
// namespace paddle
paddle/fluid/operators/d
etail
/brpc_server.h
→
paddle/fluid/operators/d
istributed
/brpc_server.h
浏览文件 @
e5890052
...
...
@@ -19,12 +19,12 @@ limitations under the License. */
#include <string>
#include "brpc/server.h"
#include "paddle/fluid/operators/d
etail
/rpc_server.h"
#include "paddle/fluid/operators/d
etail
/send_recv.pb.h"
#include "paddle/fluid/operators/d
istributed
/rpc_server.h"
#include "paddle/fluid/operators/d
istributed
/send_recv.pb.h"
namespace
paddle
{
namespace
operators
{
namespace
d
etail
{
namespace
d
istributed
{
class
AsyncBRPCServer
final
:
public
RPCServer
{
public:
...
...
@@ -48,6 +48,6 @@ class AsyncBRPCServer final : public RPCServer {
int
ready_
;
};
};
// namespace d
etail
};
// namespace d
istributed
};
// namespace operators
};
// namespace paddle
paddle/fluid/operators/d
etail
/bytebuffer_stream.cc
→
paddle/fluid/operators/d
istributed
/bytebuffer_stream.cc
浏览文件 @
e5890052
...
...
@@ -17,11 +17,11 @@ limitations under the License. */
// file and did some modifications so that we can send gRPC
// requests without too much copying of the tensor data.
#include "paddle/fluid/operators/d
etail
/bytebuffer_stream.h"
#include "paddle/fluid/operators/d
istributed
/bytebuffer_stream.h"
namespace
paddle
{
namespace
operators
{
namespace
d
etail
{
namespace
d
istributed
{
GrpcByteBufferSource
::
GrpcByteBufferSource
()
{}
...
...
@@ -83,6 +83,6 @@ google::protobuf::int64 GrpcByteBufferSource::ByteCount() const {
return
byte_count_
;
}
}
// namespace d
etail
}
// namespace d
istributed
}
// namespace operators
}
// namespace paddle
paddle/fluid/operators/d
etail
/bytebuffer_stream.h
→
paddle/fluid/operators/d
istributed
/bytebuffer_stream.h
浏览文件 @
e5890052
...
...
@@ -106,7 +106,7 @@ class GrpcBufferReader final
namespace
paddle
{
namespace
operators
{
namespace
d
etail
{
namespace
d
istributed
{
// Source provides a way for a particular RPC implementation to provide
// received data to ParseFrom.
class
Source
{
...
...
@@ -183,6 +183,6 @@ class GrpcByteSource : public Source {
char
space_
[
sizeof
(
Reader
)];
};
}
// namespace d
etail
}
// namespace d
istributed
}
// namespace operators
}
// namespace paddle
paddle/fluid/operators/d
etail
/grpc_client.cc
→
paddle/fluid/operators/d
istributed
/grpc_client.cc
浏览文件 @
e5890052
...
...
@@ -12,19 +12,19 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/operators/d
etail
/grpc_client.h"
#include "paddle/fluid/operators/d
istributed
/grpc_client.h"
#include <sys/time.h>
#include <limits>
#include "paddle/fluid/framework/threadpool.h"
#include "paddle/fluid/operators/d
etail
/request_handler.h"
#include "paddle/fluid/operators/d
istributed
/request_handler.h"
#include "paddle/fluid/platform/profiler.h"
namespace
paddle
{
namespace
operators
{
namespace
d
etail
{
namespace
d
istributed
{
void
GRPCClient
::
InitImpl
()
{
InitEventLoop
();
}
...
...
@@ -293,6 +293,6 @@ std::shared_ptr<grpc::Channel> GRPCClient::GetChannel(const std::string& ep) {
return
ch
;
}
}
// namespace d
etail
}
// namespace d
istributed
}
// namespace operators
}
// namespace paddle
paddle/fluid/operators/d
etail
/grpc_client.h
→
paddle/fluid/operators/d
istributed
/grpc_client.h
浏览文件 @
e5890052
...
...
@@ -38,13 +38,13 @@ limitations under the License. */
#include "paddle/fluid/framework/lod_tensor.h"
#include "paddle/fluid/framework/scope.h"
#include "paddle/fluid/framework/selected_rows.h"
#include "paddle/fluid/operators/d
etail
/rpc_client.h"
#include "paddle/fluid/operators/d
etail
/sendrecvop_utils.h"
#include "paddle/fluid/operators/d
istributed
/rpc_client.h"
#include "paddle/fluid/operators/d
istributed
/sendrecvop_utils.h"
#include "paddle/fluid/platform/macros.h" // for DISABLE_COPY_AND_ASSIGN
namespace
paddle
{
namespace
operators
{
namespace
d
etail
{
namespace
d
istributed
{
struct
VarHandle
{
std
::
string
ep
;
...
...
@@ -244,6 +244,6 @@ class GRPCClient : public RPCClient {
DISABLE_COPY_AND_ASSIGN
(
GRPCClient
);
};
}
// namespace d
etail
}
// namespace d
istributed
}
// namespace operators
}
// namespace paddle
paddle/fluid/operators/d
etail
/grpc_serde_test.cc
→
paddle/fluid/operators/d
istributed
/grpc_serde_test.cc
浏览文件 @
e5890052
...
...
@@ -21,8 +21,8 @@ limitations under the License. */
#include "paddle/fluid/framework/lod_tensor.h"
#include "paddle/fluid/framework/tensor_util.h"
#include "paddle/fluid/framework/variable.h"
#include "paddle/fluid/operators/d
etail
/sendrecvop_utils.h"
#include "paddle/fluid/operators/d
etail
/variable_response.h"
#include "paddle/fluid/operators/d
istributed
/sendrecvop_utils.h"
#include "paddle/fluid/operators/d
istributed
/variable_response.h"
#include "paddle/fluid/operators/math/math_function.h"
#include "paddle/fluid/platform/place.h"
#include "paddle/fluid/string/printf.h"
...
...
@@ -50,7 +50,7 @@ void RunSerdeTestSelectedRows(platform::Place place) {
for
(
int
i
=
0
;
i
<
564
;
++
i
)
rows
->
push_back
(
i
);
::
grpc
::
ByteBuffer
msg
;
operators
::
d
etail
::
SerializeToByteBuffer
(
"myvar"
,
&
var
,
ctx
,
&
msg
);
operators
::
d
istributed
::
SerializeToByteBuffer
(
"myvar"
,
&
var
,
ctx
,
&
msg
);
EXPECT_GT
(
msg
.
Length
(),
static_cast
<
size_t
>
(
0
));
// deserialize
...
...
@@ -81,10 +81,10 @@ void RunSerdeTestSelectedRows(platform::Place place) {
// deserialize zero-copy
// framework::Variable var2;
// operators::d
etail
::DeserializeFromByteBuffer(msg, ctx, &var2);
// operators::d
istributed
::DeserializeFromByteBuffer(msg, ctx, &var2);
framework
::
Scope
scope
;
scope
.
Var
(
"myvar"
);
operators
::
d
etail
::
VariableResponse
resp
(
&
scope
,
&
ctx
);
operators
::
d
istributed
::
VariableResponse
resp
(
&
scope
,
&
ctx
);
EXPECT_EQ
(
resp
.
Parse
(
msg
),
0
);
framework
::
Variable
*
var2
=
resp
.
GetVar
();
...
...
@@ -128,7 +128,7 @@ void RunTestLodTensor(platform::Place place, int from_type = 0) {
math
::
set_constant
(
ctx
,
tensor
,
31.9
);
::
grpc
::
ByteBuffer
msg
;
operators
::
d
etail
::
SerializeToByteBuffer
(
"myvar"
,
&
var
,
ctx
,
&
msg
);
operators
::
d
istributed
::
SerializeToByteBuffer
(
"myvar"
,
&
var
,
ctx
,
&
msg
);
EXPECT_GT
(
msg
.
Length
(),
static_cast
<
size_t
>
(
0
));
// deserialize
...
...
@@ -171,7 +171,7 @@ void RunTestLodTensor(platform::Place place, int from_type = 0) {
// deserialize zero-copy
framework
::
Scope
scope
;
scope
.
Var
(
"myvar"
);
operators
::
d
etail
::
VariableResponse
resp
(
&
scope
,
&
ctx
);
operators
::
d
istributed
::
VariableResponse
resp
(
&
scope
,
&
ctx
);
if
(
from_type
==
0
)
{
EXPECT_EQ
(
resp
.
Parse
(
msg
),
0
);
}
else
{
...
...
paddle/fluid/operators/d
etail
/grpc_server.cc
→
paddle/fluid/operators/d
istributed
/grpc_server.cc
浏览文件 @
e5890052
...
...
@@ -15,13 +15,13 @@ limitations under the License. */
#include <limits>
#include <string>
#include "paddle/fluid/operators/d
etail
/grpc_server.h"
#include "paddle/fluid/operators/d
istributed
/grpc_server.h"
using
::
grpc
::
ServerAsyncResponseWriter
;
namespace
paddle
{
namespace
operators
{
namespace
d
etail
{
namespace
d
istributed
{
enum
CallStatus
{
PROCESS
=
0
,
FINISH
};
// reference:
...
...
@@ -74,7 +74,7 @@ class RequestSend final : public RequestBase {
request_
.
reset
(
new
VariableResponse
(
request_handler
->
scope
(),
request_handler
->
dev_ctx
(),
!
request_handler
->
sync_mode
()));
int
method_id
=
static_cast
<
int
>
(
d
etail
::
GrpcMethod
::
kSendVariable
);
int
method_id
=
static_cast
<
int
>
(
d
istributed
::
GrpcMethod
::
kSendVariable
);
service_
->
RequestAsyncUnary
(
method_id
,
&
ctx_
,
request_
.
get
(),
&
responder_
,
cq_
,
cq_
,
reinterpret_cast
<
void
*>
(
static_cast
<
intptr_t
>
(
req_id
)));
...
...
@@ -106,7 +106,7 @@ class RequestGet final : public RequestBase {
::
grpc
::
ServerCompletionQueue
*
cq
,
RequestHandler
*
request_handler
,
int
req_id
)
:
RequestBase
(
service
,
cq
,
request_handler
,
req_id
),
responder_
(
&
ctx_
)
{
auto
method_id
=
static_cast
<
int
>
(
d
etail
::
GrpcMethod
::
kGetVariable
);
auto
method_id
=
static_cast
<
int
>
(
d
istributed
::
GrpcMethod
::
kGetVariable
);
service_
->
RequestAsyncUnary
(
method_id
,
&
ctx_
,
&
request_
,
&
responder_
,
cq_
,
cq_
,
reinterpret_cast
<
void
*>
(
static_cast
<
intptr_t
>
(
req_id
)));
...
...
@@ -150,7 +150,8 @@ class RequestPrefetch final : public RequestBase {
local_scope_
(
nullptr
)
{
request_
.
reset
(
new
VariableResponse
(
request_handler
->
scope
(),
request_handler
->
dev_ctx
(),
true
));
int
method_id
=
static_cast
<
int
>
(
detail
::
GrpcMethod
::
kPrefetchVariable
);
int
method_id
=
static_cast
<
int
>
(
distributed
::
GrpcMethod
::
kPrefetchVariable
);
service_
->
RequestAsyncUnary
(
method_id
,
&
ctx_
,
request_
.
get
(),
&
responder_
,
cq_
,
cq_
,
reinterpret_cast
<
void
*>
(
static_cast
<
intptr_t
>
(
req_id
)));
...
...
@@ -399,6 +400,6 @@ void AsyncGRPCServer::HandleRequest(
}
}
}
// namespace d
etail
}
// namespace d
istributed
}
// namespace operators
}
// namespace paddle
paddle/fluid/operators/d
etail
/grpc_server.h
→
paddle/fluid/operators/d
istributed
/grpc_server.h
浏览文件 @
e5890052
...
...
@@ -29,17 +29,17 @@ limitations under the License. */
#include "paddle/fluid/framework/scope.h"
#include "paddle/fluid/framework/selected_rows.h"
#include "paddle/fluid/framework/var_type.h"
#include "paddle/fluid/operators/d
etail
/grpc_service.h"
#include "paddle/fluid/operators/d
etail
/request_handler.h"
#include "paddle/fluid/operators/d
etail
/rpc_server.h"
#include "paddle/fluid/operators/d
etail
/send_recv.grpc.pb.h"
#include "paddle/fluid/operators/d
etail
/send_recv.pb.h"
#include "paddle/fluid/operators/d
etail
/sendrecvop_utils.h"
#include "paddle/fluid/operators/d
istributed
/grpc_service.h"
#include "paddle/fluid/operators/d
istributed
/request_handler.h"
#include "paddle/fluid/operators/d
istributed
/rpc_server.h"
#include "paddle/fluid/operators/d
istributed
/send_recv.grpc.pb.h"
#include "paddle/fluid/operators/d
istributed
/send_recv.pb.h"
#include "paddle/fluid/operators/d
istributed
/sendrecvop_utils.h"
#include "paddle/fluid/platform/profiler.h"
namespace
paddle
{
namespace
operators
{
namespace
d
etail
{
namespace
d
istributed
{
class
RequestBase
;
...
...
@@ -84,6 +84,6 @@ class AsyncGRPCServer final : public RPCServer {
std
::
map
<
std
::
string
,
std
::
vector
<
RequestBase
*>>
rpc_reqs_
;
};
};
// namespace d
etail
};
// namespace d
istributed
};
// namespace operators
};
// namespace paddle
paddle/fluid/operators/d
etail
/grpc_service.h
→
paddle/fluid/operators/d
istributed
/grpc_service.h
浏览文件 @
e5890052
...
...
@@ -23,7 +23,7 @@
#include <grpc++/impl/codegen/stub_options.h>
#include <grpc++/impl/codegen/sync_stream.h>
#include <grpc++/support/byte_buffer.h>
#include "paddle/fluid/operators/d
etail
/variable_response.h"
#include "paddle/fluid/operators/d
istributed
/variable_response.h"
#include "paddle/fluid/platform/profiler.h"
...
...
@@ -42,24 +42,25 @@ class ServerContext;
// Support parsing/unparsing of tensorflow::VariableResponse.
// Wire-format is identical to RecvVariableResponse.
template
<
>
class
SerializationTraits
<
paddle
::
operators
::
d
etail
::
VariableResponse
>
{
class
SerializationTraits
<
paddle
::
operators
::
d
istributed
::
VariableResponse
>
{
public:
static
Status
Serialize
(
const
paddle
::
operators
::
d
etail
::
VariableResponse
&
msg
,
const
paddle
::
operators
::
d
istributed
::
VariableResponse
&
msg
,
grpc_byte_buffer
**
bp
,
bool
*
own_buffer
)
{
PADDLE_ENFORCE
(
false
,
"SerializationTraits::Serialize not implemented!"
);
return
Status
();
}
static
Status
Deserialize
(
grpc_byte_buffer
*
buffer
,
paddle
::
operators
::
detail
::
VariableResponse
*
msg
,
int
max_message_size
=
INT_MAX
)
{
static
Status
Deserialize
(
grpc_byte_buffer
*
buffer
,
paddle
::
operators
::
distributed
::
VariableResponse
*
msg
,
int
max_message_size
=
INT_MAX
)
{
if
(
buffer
==
nullptr
)
{
return
Status
(
StatusCode
::
INTERNAL
,
"No payload"
);
}
Status
result
=
g_core_codegen_interface
->
ok
();
if
(
result
.
ok
())
{
paddle
::
operators
::
d
etail
::
GrpcByteSource
source
(
buffer
);
paddle
::
operators
::
d
istributed
::
GrpcByteSource
source
(
buffer
);
int
ret
=
msg
->
Parse
(
&
source
);
if
(
ret
!=
0
)
{
result
=
Status
(
StatusCode
::
INTERNAL
,
"VariableResponse parse error"
);
...
...
@@ -73,7 +74,7 @@ class SerializationTraits<paddle::operators::detail::VariableResponse> {
namespace
paddle
{
namespace
operators
{
namespace
d
etail
{
namespace
d
istributed
{
enum
class
GrpcMethod
{
kSendVariable
,
...
...
@@ -121,6 +122,6 @@ class GrpcService final {
};
};
}
// namespace d
etail
}
// namespace d
istributed
}
// namespace operators
}
// namespace paddle
paddle/fluid/operators/d
etail
/proto_encoder_helper.h
→
paddle/fluid/operators/d
istributed
/proto_encoder_helper.h
浏览文件 @
e5890052
...
...
@@ -26,7 +26,7 @@ limitations under the License. */
namespace
paddle
{
namespace
operators
{
namespace
d
etail
{
namespace
d
istributed
{
char
*
EncodeVarint32
(
char
*
dst
,
uint32_t
v
)
{
// Operate on characters as unsigneds
...
...
@@ -144,6 +144,6 @@ class ProtoEncodeHelper {
char
*
limit_
;
// Just for CHECKs
};
}
// namespace d
etail
}
// namespace d
istributed
}
// namespace operators
}
// namespace paddle
paddle/fluid/operators/d
etail
/request_handler.h
→
paddle/fluid/operators/d
istributed
/request_handler.h
浏览文件 @
e5890052
...
...
@@ -31,7 +31,7 @@
namespace
paddle
{
namespace
operators
{
namespace
d
etail
{
namespace
d
istributed
{
constexpr
char
kRequestSend
[]
=
"RequestSend"
;
constexpr
char
kRequestGet
[]
=
"RequestGet"
;
...
...
@@ -135,6 +135,6 @@ class RequestHandler {
RPCServer
*
rpc_server_
;
};
}
// namespace d
etail
}
// namespace d
istributed
}
// namespace operators
}
// namespace paddle
paddle/fluid/operators/d
etail
/request_handler_impl.cc
→
paddle/fluid/operators/d
istributed
/request_handler_impl.cc
浏览文件 @
e5890052
...
...
@@ -20,13 +20,13 @@
#include "paddle/fluid/framework/lod_tensor.h"
#include "paddle/fluid/framework/scope.h"
#include "paddle/fluid/framework/selected_rows.h"
#include "paddle/fluid/operators/d
etail
/request_handler_impl.h"
#include "paddle/fluid/operators/d
etail
/rpc_server.h"
#include "paddle/fluid/operators/d
istributed
/request_handler_impl.h"
#include "paddle/fluid/operators/d
istributed
/rpc_server.h"
#include "paddle/fluid/string/printf.h"
namespace
paddle
{
namespace
operators
{
namespace
d
etail
{
namespace
d
istributed
{
bool
RequestSendHandler
::
Handle
(
const
std
::
string
&
varname
,
framework
::
Scope
*
scope
,
...
...
@@ -138,6 +138,6 @@ bool RequestCheckpointHandler::Handle(const std::string& varname,
return
true
;
}
}
// namespace d
etail
}
// namespace d
istributed
}
// namespace operators
}
// namespace paddle
paddle/fluid/operators/d
etail
/request_handler_impl.h
→
paddle/fluid/operators/d
istributed
/request_handler_impl.h
浏览文件 @
e5890052
...
...
@@ -28,11 +28,11 @@
#include "paddle/fluid/framework/scope.h"
#include "paddle/fluid/framework/selected_rows.h"
#include "paddle/fluid/framework/var_type.h"
#include "paddle/fluid/operators/d
etail
/request_handler.h"
#include "paddle/fluid/operators/d
istributed
/request_handler.h"
namespace
paddle
{
namespace
operators
{
namespace
d
etail
{
namespace
d
istributed
{
class
RequestSendHandler
final
:
public
RequestHandler
{
public:
...
...
@@ -81,6 +81,6 @@ class RequestCheckpointHandler final : public RequestHandler {
int
checkpoint_notify_id
;
};
}
// namespace d
etail
}
// namespace d
istributed
}
// namespace operators
}
// namespace paddle
paddle/fluid/operators/d
etail
/rpc_client.cc
→
paddle/fluid/operators/d
istributed
/rpc_client.cc
浏览文件 @
e5890052
...
...
@@ -12,15 +12,15 @@
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/operators/d
etail
/rpc_client.h"
#include "paddle/fluid/operators/d
istributed
/rpc_client.h"
namespace
paddle
{
namespace
operators
{
namespace
d
etail
{
namespace
d
istributed
{
std
::
once_flag
RPCClient
::
init_flag_
;
std
::
unique_ptr
<
RPCClient
>
RPCClient
::
rpc_client_
(
nullptr
);
}
// namespace d
etail
}
// namespace d
istributed
}
// namespace operators
}
// namespace paddle
paddle/fluid/operators/d
etail
/rpc_client.h
→
paddle/fluid/operators/d
istributed
/rpc_client.h
浏览文件 @
e5890052
...
...
@@ -22,7 +22,7 @@
namespace
paddle
{
namespace
operators
{
namespace
d
etail
{
namespace
d
istributed
{
class
RPCClient
{
public:
...
...
@@ -88,6 +88,6 @@ class RPCClient {
static
std
::
once_flag
init_flag_
;
static
std
::
unique_ptr
<
RPCClient
>
rpc_client_
;
};
}
// namespace d
etail
}
// namespace d
istributed
}
// namespace operators
}
// namespace paddle
paddle/fluid/operators/d
etail
/rpc_server.cc
→
paddle/fluid/operators/d
istributed
/rpc_server.cc
浏览文件 @
e5890052
...
...
@@ -17,11 +17,11 @@
#include <limits>
#include <string>
#include "paddle/fluid/operators/d
etail
/rpc_server.h"
#include "paddle/fluid/operators/d
istributed
/rpc_server.h"
namespace
paddle
{
namespace
operators
{
namespace
d
etail
{
namespace
d
istributed
{
void
RPCServer
::
ShutDown
()
{
LOG
(
INFO
)
<<
"RPCServer ShutDown "
;
...
...
@@ -112,6 +112,6 @@ void RPCServer::WaitCond(const std::string& rpc_name) {
lock
,
[
=
]
{
return
(
cur_cond_
.
load
()
==
cond
||
exit_flag_
.
load
());
});
}
}
// namespace d
etail
}
// namespace d
istributed
}
// namespace operators
}
// namespace paddle
paddle/fluid/operators/d
etail
/rpc_server.h
→
paddle/fluid/operators/d
istributed
/rpc_server.h
浏览文件 @
e5890052
...
...
@@ -19,11 +19,11 @@
#include <thread> // NOLINT
#include <utility>
#include <vector>
#include "paddle/fluid/operators/d
etail
/request_handler.h"
#include "paddle/fluid/operators/d
istributed
/request_handler.h"
namespace
paddle
{
namespace
operators
{
namespace
d
etail
{
namespace
d
istributed
{
class
RPCServer
{
public:
...
...
@@ -86,6 +86,6 @@ class RPCServer {
friend
class
RequestHandler
;
};
};
// namespace d
etail
};
// namespace d
istributed
};
// namespace operators
};
// namespace paddle
paddle/fluid/operators/d
etail
/rpc_server_test.cc
→
paddle/fluid/operators/d
istributed
/rpc_server_test.cc
浏览文件 @
e5890052
...
...
@@ -22,18 +22,18 @@ limitations under the License. */
#include "paddle/fluid/framework/operator.h"
#include "paddle/fluid/operators/detail/macros.h"
#include "paddle/fluid/operators/d
etail
/request_handler_impl.h"
#include "paddle/fluid/operators/d
etail
/rpc_client.h"
#include "paddle/fluid/operators/d
etail
/rpc_server.h"
#include "paddle/fluid/operators/d
istributed
/request_handler_impl.h"
#include "paddle/fluid/operators/d
istributed
/rpc_client.h"
#include "paddle/fluid/operators/d
istributed
/rpc_server.h"
namespace
framework
=
paddle
::
framework
;
namespace
platform
=
paddle
::
platform
;
namespace
d
etail
=
paddle
::
operators
::
detail
;
namespace
d
istributed
=
paddle
::
operators
::
distributed
;
USE_OP
(
lookup_table
);
std
::
unique_ptr
<
d
etail
::
RPCServer
>
g_rpc_service
;
std
::
unique_ptr
<
d
etail
::
RequestHandler
>
g_req_handler
;
std
::
unique_ptr
<
d
istributed
::
RPCServer
>
g_rpc_service
;
std
::
unique_ptr
<
d
istributed
::
RequestHandler
>
g_req_handler
;
framework
::
BlockDesc
*
AppendPrefetchBlcok
(
framework
::
ProgramDesc
*
program
)
{
auto
root_block
=
program
->
MutableBlock
(
0
);
...
...
@@ -113,19 +113,21 @@ void StartServer() {
g_req_handler
->
SetScope
(
&
scope
);
g_req_handler
->
SetExecutor
(
&
exe
);
g_rpc_service
->
RegisterRPC
(
detail
::
kRequestPrefetch
,
g_req_handler
.
get
());
g_rpc_service
->
RegisterRPC
(
distributed
::
kRequestPrefetch
,
g_req_handler
.
get
());
g_req_handler
->
SetRPCServer
(
g_rpc_service
.
get
());
std
::
thread
server_thread
(
std
::
bind
(
&
d
etail
::
RPCServer
::
StartServer
,
g_rpc_service
.
get
()));
std
::
bind
(
&
d
istributed
::
RPCServer
::
StartServer
,
g_rpc_service
.
get
()));
server_thread
.
join
();
}
TEST
(
PREFETCH
,
CPU
)
{
g_req_handler
.
reset
(
new
d
etail
::
RequestPrefetchHandler
(
true
));
g_req_handler
.
reset
(
new
d
istributed
::
RequestPrefetchHandler
(
true
));
g_rpc_service
.
reset
(
new
RPCSERVER_T
(
"127.0.0.1:0"
,
1
));
detail
::
RPCClient
*
client
=
detail
::
RPCClient
::
GetInstance
<
RPCCLIENT_T
>
();
distributed
::
RPCClient
*
client
=
distributed
::
RPCClient
::
GetInstance
<
RPCCLIENT_T
>
();
std
::
thread
server_thread
(
StartServer
);
g_rpc_service
->
WaitServerReady
();
...
...
paddle/fluid/operators/d
etail
/send_recv.proto
→
paddle/fluid/operators/d
istributed
/send_recv.proto
浏览文件 @
e5890052
文件已移动
paddle/fluid/operators/d
etail
/sendrecvop_utils.cc
→
paddle/fluid/operators/d
istributed
/sendrecvop_utils.cc
浏览文件 @
e5890052
...
...
@@ -12,7 +12,7 @@ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License. */
#include "paddle/fluid/operators/d
etail
/sendrecvop_utils.h"
#include "paddle/fluid/operators/d
istributed
/sendrecvop_utils.h"
#ifdef PADDLE_WITH_CUDA
#include <nccl.h>
...
...
@@ -23,14 +23,14 @@ limitations under the License. */
#include "google/protobuf/io/coded_stream.h"
#include "google/protobuf/io/zero_copy_stream.h"
#include "paddle/fluid/framework/data_type.h"
#include "paddle/fluid/operators/d
etail
/bytebuffer_stream.h"
#include "paddle/fluid/operators/d
etail
/proto_encoder_helper.h"
#include "paddle/fluid/operators/d
etail
/variable_response.h"
#include "paddle/fluid/operators/d
istributed
/bytebuffer_stream.h"
#include "paddle/fluid/operators/d
istributed
/proto_encoder_helper.h"
#include "paddle/fluid/operators/d
istributed
/variable_response.h"
#include "paddle/fluid/platform/profiler.h"
namespace
paddle
{
namespace
operators
{
namespace
d
etail
{
namespace
d
istributed
{
using
VarMsg
=
sendrecv
::
VariableMessage
;
...
...
@@ -222,11 +222,11 @@ void DeserializeFromByteBuffer(const ::grpc::ByteBuffer& msg,
const
platform
::
DeviceContext
&
ctx
,
const
framework
::
Scope
*
scope
,
framework
::
Variable
**
var
)
{
operators
::
d
etail
::
VariableResponse
resp
(
scope
,
&
ctx
);
operators
::
d
istributed
::
VariableResponse
resp
(
scope
,
&
ctx
);
PADDLE_ENFORCE
(
resp
.
Parse
(
msg
)
==
0
,
"parse bytebuffer to tensor error!"
);
*
var
=
resp
.
GetVar
();
}
}
// namespace d
etail
}
// namespace d
istributed
}
// namespace operators
}
// namespace paddle
paddle/fluid/operators/d
etail
/sendrecvop_utils.h
→
paddle/fluid/operators/d
istributed
/sendrecvop_utils.h
浏览文件 @
e5890052
...
...
@@ -25,12 +25,12 @@ limitations under the License. */
#include "paddle/fluid/framework/tensor_util.h"
#include "paddle/fluid/framework/var_type.h"
#include "paddle/fluid/operators/d
etail
/send_recv.grpc.pb.h"
#include "paddle/fluid/operators/d
etail
/send_recv.pb.h"
#include "paddle/fluid/operators/d
istributed
/send_recv.grpc.pb.h"
#include "paddle/fluid/operators/d
istributed
/send_recv.pb.h"
namespace
paddle
{
namespace
operators
{
namespace
d
etail
{
namespace
d
istributed
{
typedef
void
(
*
DestroyCallback
)(
void
*
);
...
...
@@ -61,6 +61,6 @@ inline std::type_index ToTypeIndex(sendrecv::VariableMessage::Type type) {
}
}
}
// namespace d
etail
}
// namespace d
istributed
}
// namespace operators
}
// namespace paddle
paddle/fluid/operators/d
etail
/variable_response.cc
→
paddle/fluid/operators/d
istributed
/variable_response.cc
浏览文件 @
e5890052
...
...
@@ -12,7 +12,7 @@
// See the License for the specific language governing permissions and
// limitations under the License.
#include "paddle/fluid/operators/d
etail
/variable_response.h"
#include "paddle/fluid/operators/d
istributed
/variable_response.h"
#include <string>
#include <utility>
...
...
@@ -22,12 +22,12 @@
#endif
#include "paddle/fluid/platform/profiler.h"
#include "paddle/fluid/operators/d
etail
/send_recv.pb.h"
#include "paddle/fluid/operators/d
etail
/sendrecvop_utils.h"
#include "paddle/fluid/operators/d
istributed
/send_recv.pb.h"
#include "paddle/fluid/operators/d
istributed
/sendrecvop_utils.h"
namespace
paddle
{
namespace
operators
{
namespace
d
etail
{
namespace
d
istributed
{
enum
WireType
{
WIRETYPE_VARINT
=
0
,
...
...
@@ -158,13 +158,13 @@ bool VariableResponse::CopySelectRowsTensorData(
slr
->
set_height
(
meta_
.
slr_height
());
auto
*
tensor
=
slr
->
mutable_value
();
tensor
->
Resize
(
dims
);
PADDLE_ENFORCE_EQ
(
static_cast
<
size_t
>
(
tensor
->
numel
()),
length
/
framework
::
SizeOfType
(
paddle
::
operators
::
detail
::
ToTypeIndex
(
meta_
.
data_type
())));
PADDLE_ENFORCE_EQ
(
static_cast
<
size_t
>
(
tensor
->
numel
()),
length
/
framework
::
SizeOfType
(
paddle
::
operators
::
distributed
::
ToTypeIndex
(
meta_
.
data_type
())));
void
*
tensor_data
=
tensor
->
mutable_data
(
ctx
.
GetPlace
(),
paddle
::
operators
::
d
etail
::
ToTypeIndex
(
meta_
.
data_type
()));
paddle
::
operators
::
d
istributed
::
ToTypeIndex
(
meta_
.
data_type
()));
if
(
!
ReadRaw
(
input
,
ctx
,
tensor
->
place
(),
tensor_data
,
length
))
{
return
false
;
...
...
@@ -480,6 +480,6 @@ int VariableResponse::Parse(Source* source) {
return
0
;
}
};
// namespace d
etail
};
// namespace d
istributed
};
// namespace operators
};
// namespace paddle
paddle/fluid/operators/d
etail
/variable_response.h
→
paddle/fluid/operators/d
istributed
/variable_response.h
浏览文件 @
e5890052
...
...
@@ -22,17 +22,17 @@
#include "paddle/fluid/framework/selected_rows.h"
#include "paddle/fluid/framework/var_type.h"
#include "paddle/fluid/operators/d
etail
/send_recv.grpc.pb.h"
#include "paddle/fluid/operators/d
etail
/send_recv.pb.h"
#include "paddle/fluid/operators/d
istributed
/send_recv.grpc.pb.h"
#include "paddle/fluid/operators/d
istributed
/send_recv.pb.h"
#include "google/protobuf/io/coded_stream.h"
#include "google/protobuf/io/zero_copy_stream.h"
#include "paddle/fluid/framework/tensor.h"
#include "paddle/fluid/operators/d
etail
/bytebuffer_stream.h"
#include "paddle/fluid/operators/d
istributed
/bytebuffer_stream.h"
namespace
paddle
{
namespace
operators
{
namespace
d
etail
{
namespace
d
istributed
{
class
VariableResponse
{
public:
...
...
@@ -99,6 +99,6 @@ class VariableResponse {
sendrecv
::
VariableMessage
meta_
;
};
};
// namespace d
etail
};
// namespace d
istributed
};
// namespace operators
};
// namespace paddle
paddle/fluid/operators/fetch_barrier_op.cc
浏览文件 @
e5890052
...
...
@@ -42,8 +42,8 @@ class FetchBarrierOp : public framework::OperatorBase {
// For profiling
platform
::
RecordEvent
record_event
(
Type
(),
&
ctx
);
d
etail
::
RPCClient
*
rpc_client
=
d
etail
::
RPCClient
::
GetInstance
<
RPCCLIENT_T
>
();
d
istributed
::
RPCClient
*
rpc_client
=
d
istributed
::
RPCClient
::
GetInstance
<
RPCCLIENT_T
>
();
rpc_client
->
Wait
();
...
...
paddle/fluid/operators/gen_nccl_id_op.cc
浏览文件 @
e5890052
...
...
@@ -22,7 +22,7 @@ limitations under the License. */
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/framework/threadpool.h"
#include "paddle/fluid/operators/detail/macros.h"
#include "paddle/fluid/operators/d
etail
/request_handler_impl.h"
#include "paddle/fluid/operators/d
istributed
/request_handler_impl.h"
#include "paddle/fluid/platform/nccl_helper.h"
namespace
paddle
{
...
...
@@ -60,7 +60,8 @@ class GenNCCLIdOp : public framework::OperatorBase {
std
::
vector
<
std
::
string
>
endpoint_list
=
Attr
<
std
::
vector
<
std
::
string
>>
(
"endpoint_list"
);
detail
::
RPCClient
*
client
=
detail
::
RPCClient
::
GetInstance
<
RPCCLIENT_T
>
();
distributed
::
RPCClient
*
client
=
distributed
::
RPCClient
::
GetInstance
<
RPCCLIENT_T
>
();
for
(
auto
&
ep
:
endpoint_list
)
{
VLOG
(
3
)
<<
"sending nccl id to "
<<
ep
;
...
...
@@ -80,11 +81,11 @@ class GenNCCLIdOp : public framework::OperatorBase {
// NOTE: Can not use unique_ptr here because the default
// deleter will call GRPC Server's base class's dtor and
// that will cause a wired crash.
d
etail
::
RequestSendHandler
rpc_h
(
true
);
std
::
unique_ptr
<
d
etail
::
RPCServer
>
rpc_service
(
d
istributed
::
RequestSendHandler
rpc_h
(
true
);
std
::
unique_ptr
<
d
istributed
::
RPCServer
>
rpc_service
(
new
RPCSERVER_T
(
endpoint
,
1
));
rpc_service
->
RegisterRPC
(
d
etail
::
kRequestSend
,
&
rpc_h
);
rpc_service
->
RegisterRPC
(
d
istributed
::
kRequestSend
,
&
rpc_h
);
rpc_h
.
SetRPCServer
(
rpc_service
.
get
());
framework
::
ProgramDesc
empty_program
;
...
...
@@ -95,11 +96,11 @@ class GenNCCLIdOp : public framework::OperatorBase {
rpc_h
.
SetExecutor
(
&
executor
);
std
::
thread
server_thread
(
std
::
bind
(
&
d
etail
::
RPCServer
::
StartServer
,
rpc_service
.
get
()));
std
::
bind
(
&
d
istributed
::
RPCServer
::
StartServer
,
rpc_service
.
get
()));
rpc_service
->
SetCond
(
d
etail
::
kRequestSend
);
rpc_service
->
SetCond
(
d
istributed
::
kRequestSend
);
VLOG
(
3
)
<<
"start getting nccl id from trainer 0..."
;
rpc_service
->
WaitBarrier
(
d
etail
::
kRequestSend
);
rpc_service
->
WaitBarrier
(
d
istributed
::
kRequestSend
);
VLOG
(
3
)
<<
"got nccl id and stop server..."
;
rpc_service
->
ShutDown
();
VLOG
(
3
)
<<
"rpc server stopped"
;
...
...
paddle/fluid/operators/listen_and_serv_op.cc
浏览文件 @
e5890052
...
...
@@ -21,14 +21,14 @@ limitations under the License. */
#include "paddle/fluid/operators/detail/macros.h"
#include "paddle/fluid/operators/d
etail
/request_handler_impl.h"
#include "paddle/fluid/operators/d
istributed
/request_handler_impl.h"
#include "paddle/fluid/operators/listen_and_serv_op.h"
#include "paddle/fluid/platform/profiler.h"
namespace
paddle
{
namespace
operators
{
void
RunServer
(
std
::
shared_ptr
<
d
etail
::
RPCServer
>
service
)
{
void
RunServer
(
std
::
shared_ptr
<
d
istributed
::
RPCServer
>
service
)
{
service
->
StartServer
();
VLOG
(
4
)
<<
"RunServer thread end"
;
}
...
...
@@ -123,12 +123,12 @@ void ListenAndServOp::RunSyncLoop(
while
(
true
)
{
// Get from multiple trainers, we don't care about the order in which
// the gradients arrives, just add suffix 0~n and merge the gradient.
rpc_service_
->
SetCond
(
d
etail
::
kRequestSend
);
rpc_service_
->
WaitBarrier
(
d
etail
::
kRequestSend
);
rpc_service_
->
SetCond
(
d
istributed
::
kRequestSend
);
rpc_service_
->
WaitBarrier
(
d
istributed
::
kRequestSend
);
if
(
rpc_service_
->
IsExit
())
{
LOG
(
WARNING
)
<<
"get exit!rpc_processor break!"
;
rpc_service_
->
SetCond
(
d
etail
::
kRequestGet
);
rpc_service_
->
SetCond
(
d
istributed
::
kRequestGet
);
break
;
}
...
...
@@ -156,11 +156,11 @@ void ListenAndServOp::RunSyncLoop(
recv_scope
);
VLOG
(
2
)
<<
"run all blocks spent "
<<
GetTimestamp
()
-
ts
<<
"(ms)"
;
rpc_service_
->
SetCond
(
d
etail
::
kRequestGet
);
rpc_service_
->
WaitBarrier
(
d
etail
::
kRequestGet
);
rpc_service_
->
SetCond
(
d
istributed
::
kRequestGet
);
rpc_service_
->
WaitBarrier
(
d
istributed
::
kRequestGet
);
rpc_service_
->
ResetBarrierCounter
();
// reset received sparse vars to avoid reuse it in the next mini-batch
dynamic_cast
<
d
etail
::
RequestSendHandler
*>
(
request_send_handler_
.
get
())
dynamic_cast
<
d
istributed
::
RequestSendHandler
*>
(
request_send_handler_
.
get
())
->
ResetSparseVarRecorder
();
}
// while(true)
}
...
...
@@ -217,14 +217,14 @@ void ListenAndServOp::RunAsyncLoop(framework::Executor *executor,
}
static
void
FillRequestCtx
(
d
etail
::
RequestHandler
*
h
,
framework
::
Scope
*
scope
,
d
istributed
::
RequestHandler
*
h
,
framework
::
Scope
*
scope
,
platform
::
DeviceContext
*
dev_ctx
,
framework
::
Executor
*
executor
,
framework
::
ProgramDesc
*
program
,
std
::
unordered_map
<
std
::
string
,
std
::
shared_ptr
<
framework
::
ExecutorPrepareContext
>>
*
prefetch_ctx
,
std
::
shared_ptr
<
framework
::
ExecutorPrepareContext
>
checkpoint_ctx
,
d
etail
::
RPCServer
*
rpc_server
)
{
d
istributed
::
RPCServer
*
rpc_server
)
{
h
->
SetScope
(
scope
);
h
->
SetDevCtx
(
dev_ctx
);
h
->
SetExecutor
(
executor
);
...
...
@@ -255,16 +255,18 @@ void ListenAndServOp::RunImpl(const framework::Scope &scope,
rpc_service_
.
reset
(
new
RPCSERVER_T
(
endpoint
,
fan_in
));
request_send_handler_
.
reset
(
new
d
etail
::
RequestSendHandler
(
sync_mode
));
request_get_handler_
.
reset
(
new
d
etail
::
RequestGetHandler
(
sync_mode
));
request_send_handler_
.
reset
(
new
d
istributed
::
RequestSendHandler
(
sync_mode
));
request_get_handler_
.
reset
(
new
d
istributed
::
RequestGetHandler
(
sync_mode
));
request_prefetch_handler_
.
reset
(
new
detail
::
RequestPrefetchHandler
(
sync_mode
));
request_checkpoint_handler_
.
reset
(
new
detail
::
RequestCheckpointHandler
(
sync_mode
,
checkpoint_notify_id
));
rpc_service_
->
RegisterRPC
(
detail
::
kRequestSend
,
request_send_handler_
.
get
());
rpc_service_
->
RegisterRPC
(
detail
::
kRequestGet
,
request_get_handler_
.
get
());
rpc_service_
->
RegisterRPC
(
detail
::
kRequestPrefetch
,
new
distributed
::
RequestPrefetchHandler
(
sync_mode
));
request_checkpoint_handler_
.
reset
(
new
distributed
::
RequestCheckpointHandler
(
sync_mode
,
checkpoint_notify_id
));
rpc_service_
->
RegisterRPC
(
distributed
::
kRequestSend
,
request_send_handler_
.
get
());
rpc_service_
->
RegisterRPC
(
distributed
::
kRequestGet
,
request_get_handler_
.
get
());
rpc_service_
->
RegisterRPC
(
distributed
::
kRequestPrefetch
,
request_prefetch_handler_
.
get
());
rpc_service_
->
RegisterRPC
(
detail
::
kRequestCheckpoint
,
request_checkpoint_handler_
.
get
());
...
...
paddle/fluid/operators/listen_and_serv_op.h
浏览文件 @
e5890052
...
...
@@ -24,8 +24,8 @@ limitations under the License. */
#include "paddle/fluid/framework/lod_tensor.h"
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/framework/threadpool.h"
#include "paddle/fluid/operators/d
etail
/request_handler.h"
#include "paddle/fluid/operators/d
etail
/rpc_server.h"
#include "paddle/fluid/operators/d
istributed
/request_handler.h"
#include "paddle/fluid/operators/d
istributed
/rpc_server.h"
namespace
paddle
{
namespace
operators
{
...
...
@@ -34,7 +34,7 @@ constexpr char kOptimizeBlock[] = "OptimizeBlock";
constexpr
char
kPrefetchVarNameToBlockId
[]
=
"prefetch_var_name_to_block_id"
;
constexpr
char
kCheckpointBlockId
[]
=
"checkpint_block_id"
;
void
RunServer
(
std
::
shared_ptr
<
d
etail
::
RPCServer
>
service
);
void
RunServer
(
std
::
shared_ptr
<
d
istributed
::
RPCServer
>
service
);
class
ListenAndServOp
:
public
framework
::
OperatorBase
{
public:
...
...
@@ -64,11 +64,13 @@ class ListenAndServOp : public framework::OperatorBase {
const
platform
::
Place
&
dev_place
)
const
override
;
protected:
mutable
std
::
shared_ptr
<
detail
::
RPCServer
>
rpc_service_
;
mutable
std
::
shared_ptr
<
detail
::
RequestHandler
>
request_send_handler_
;
mutable
std
::
shared_ptr
<
detail
::
RequestHandler
>
request_get_handler_
;
mutable
std
::
shared_ptr
<
detail
::
RequestHandler
>
request_prefetch_handler_
;
mutable
std
::
shared_ptr
<
detail
::
RequestHandler
>
request_checkpoint_handler_
;
mutable
std
::
shared_ptr
<
distributed
::
RPCServer
>
rpc_service_
;
mutable
std
::
shared_ptr
<
distributed
::
RequestHandler
>
request_send_handler_
;
mutable
std
::
shared_ptr
<
distributed
::
RequestHandler
>
request_get_handler_
;
mutable
std
::
shared_ptr
<
distributed
::
RequestHandler
>
request_prefetch_handler_
;
mutable
std
::
shared_ptr
<
distributed
::
RequestHandler
>
request_checkpoint_handler_
;
mutable
std
::
shared_ptr
<
std
::
thread
>
server_thread_
;
};
...
...
paddle/fluid/operators/logical_op.cc
浏览文件 @
e5890052
...
...
@@ -146,6 +146,6 @@ REGISTER_UNARY_LOGICAL_OP(logical_not, "$$Out = !X$$");
REGISTER_UNARY_LOGICAL_KERNEL
(
logical_not
,
CPU
,
paddle
::
operators
::
LogicalNotFunctor
);
REGISTER_BINARY_LOGICAL_OP
(
logical_xor
,
"$$Out = (X || Y)
\\
,
\\
&
\\
&
\\
,
!(X
\\
&
\\
& Y)$$"
);
"$$Out = (X || Y)
\\
&
\\
&
!(X
\\
&
\\
& Y)$$"
);
REGISTER_BINARY_LOGICAL_KERNEL
(
logical_xor
,
CPU
,
paddle
::
operators
::
LogicalXorFunctor
);
paddle/fluid/operators/math/concat.cc
浏览文件 @
e5890052
...
...
@@ -93,10 +93,10 @@ class ConcatGradFunctor<platform::CPUDeviceContext, T> {
auto
cpu_place
=
boost
::
get
<
platform
::
CPUPlace
>
(
context
.
GetPlace
());
// computation
for
(
size_
t
k
=
0
;
k
<
input_rows
;
++
k
)
{
for
(
in
t
k
=
0
;
k
<
input_rows
;
++
k
)
{
const
T
*
src_ptr
=
input
.
data
<
T
>
()
+
k
*
input_cols
;
int
col_idx
=
0
;
for
(
in
t
j
=
0
;
j
<
num
;
++
j
)
{
for
(
size_
t
j
=
0
;
j
<
num
;
++
j
)
{
int
col_len
=
output_cols
[
j
];
auto
*
out_tensor
=
outputs
->
at
(
j
);
if
(
out_tensor
!=
nullptr
)
{
...
...
paddle/fluid/operators/math/math_function.cc
浏览文件 @
e5890052
...
...
@@ -30,6 +30,7 @@ template struct SetConstant<platform::CPUDeviceContext, double>;
template
struct
SetConstant
<
platform
::
CPUDeviceContext
,
int
>;
template
struct
SetConstant
<
platform
::
CPUDeviceContext
,
int64_t
>;
template
struct
SetConstant
<
platform
::
CPUDeviceContext
,
bool
>;
template
struct
SetConstant
<
platform
::
CPUDeviceContext
,
uint8_t
>;
#define DEFINE_CPU_TRANS(RANK) \
template struct Transpose<platform::CPUDeviceContext, platform::float16, \
...
...
paddle/fluid/operators/prefetch_op.cc
浏览文件 @
e5890052
...
...
@@ -41,8 +41,8 @@ class PrefetchOp : public framework::OperatorBase {
platform
::
DeviceContextPool
&
pool
=
platform
::
DeviceContextPool
::
Instance
();
auto
&
ctx
=
*
pool
.
Get
(
place
);
d
etail
::
RPCClient
*
rpc_client
=
d
etail
::
RPCClient
::
GetInstance
<
RPCCLIENT_T
>
();
d
istributed
::
RPCClient
*
rpc_client
=
d
istributed
::
RPCClient
::
GetInstance
<
RPCCLIENT_T
>
();
for
(
size_t
i
=
0
;
i
<
ins
.
size
();
i
++
)
{
if
(
NeedSend
(
scope
,
ins
[
i
]))
{
...
...
paddle/fluid/operators/recv_op.cc
浏览文件 @
e5890052
...
...
@@ -43,8 +43,8 @@ class RecvOp : public framework::OperatorBase {
// For profiling
platform
::
RecordEvent
record_event
(
Type
(),
&
ctx
);
d
etail
::
RPCClient
*
rpc_client
=
d
etail
::
RPCClient
::
GetInstance
<
RPCCLIENT_T
>
();
d
istributed
::
RPCClient
*
rpc_client
=
d
istributed
::
RPCClient
::
GetInstance
<
RPCCLIENT_T
>
();
for
(
size_t
i
=
0
;
i
<
outs
.
size
();
i
++
)
{
VLOG
(
3
)
<<
"getting "
<<
outs
[
i
]
<<
" from "
<<
epmap
[
i
];
...
...
paddle/fluid/operators/send_barrier_op.cc
浏览文件 @
e5890052
...
...
@@ -44,8 +44,8 @@ class SendBarrierOp : public framework::OperatorBase {
// For profiling
platform
::
RecordEvent
record_event
(
Type
(),
&
ctx
);
d
etail
::
RPCClient
*
rpc_client
=
d
etail
::
RPCClient
::
GetInstance
<
RPCCLIENT_T
>
();
d
istributed
::
RPCClient
*
rpc_client
=
d
istributed
::
RPCClient
::
GetInstance
<
RPCCLIENT_T
>
();
VLOG
(
3
)
<<
"SendBarrierOp sync_mode:"
<<
sync_mode
;
...
...
paddle/fluid/operators/send_op.cc
浏览文件 @
e5890052
...
...
@@ -45,8 +45,8 @@ class SendOp : public framework::OperatorBase {
// For profiling
platform
::
RecordEvent
record_event
(
Type
(),
&
ctx
);
d
etail
::
RPCClient
*
rpc_client
=
d
etail
::
RPCClient
::
GetInstance
<
RPCCLIENT_T
>
();
d
istributed
::
RPCClient
*
rpc_client
=
d
istributed
::
RPCClient
::
GetInstance
<
RPCCLIENT_T
>
();
for
(
size_t
i
=
0
;
i
<
ins
.
size
();
i
++
)
{
if
(
NeedSend
(
scope
,
ins
[
i
]))
{
...
...
paddle/fluid/operators/tensorrt_engine_op.cc
浏览文件 @
e5890052
...
...
@@ -14,11 +14,14 @@
#ifdef PADDLE_WITH_CUDA
#include "paddle/fluid/operators/tensorrt_engine_op.h"
#include <string>
#include <vector>
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/inference/tensorrt/convert/op_converter.h"
#include "paddle/fluid/inference/tensorrt/engine.h"
#include "paddle/fluid/inference/utils/singleton.h"
#include "paddle/fluid/operators/tensorrt_engine_op.h"
namespace
paddle
{
namespace
operators
{
...
...
paddle/fluid/operators/tensorrt_engine_op.h
浏览文件 @
e5890052
...
...
@@ -16,10 +16,12 @@
#ifdef PADDLE_WITH_CUDA
#include <string>
#include <vector>
#include "paddle/fluid/framework/operator.h"
#include "paddle/fluid/inference/analysis/helper.h"
#include "paddle/fluid/inference/tensorrt/engine.h"
#include "paddle/fluid/inference/tensorrt/engine.h"
namespace
paddle
{
namespace
operators
{
...
...
paddle/fluid/operators/tensorrt_engine_op_test.cc
浏览文件 @
e5890052
...
...
@@ -179,7 +179,6 @@ void Execute(int batch_size, int input_dim, int output_dim, int nlayers = 1) {
const
std
::
string
&
z_name
,
bool
x_created
,
const
shape_t
&
x_shape
,
const
shape_t
&
y_shape
,
const
shape_t
&
z_shape
)
{
LOG
(
INFO
)
<<
"create fc op"
;
auto
*
fc
=
block_desc
.
AppendOp
();
fc
->
SetType
(
"mul"
);
...
...
paddle/fluid/operators/test_send_nccl_id.cc
浏览文件 @
e5890052
...
...
@@ -21,7 +21,7 @@ limitations under the License. */
#include "paddle/fluid/framework/operator.h"
#include "paddle/fluid/framework/program_desc.h"
#include "paddle/fluid/operators/detail/macros.h"
#include "paddle/fluid/operators/d
etail
/request_handler_impl.h"
#include "paddle/fluid/operators/d
istributed
/request_handler_impl.h"
#include "paddle/fluid/operators/listen_and_serv_op.h"
#include "paddle/fluid/operators/math/math_function.h"
#include "paddle/fluid/operators/math/selected_rows_functor.h"
...
...
@@ -37,11 +37,11 @@ USE_NO_KERNEL_OP(listen_and_serv);
namespace
f
=
paddle
::
framework
;
namespace
p
=
paddle
::
platform
;
namespace
m
=
paddle
::
operators
::
math
;
namespace
d
etail
=
paddle
::
operators
::
detail
;
namespace
d
istributed
=
paddle
::
operators
::
distributed
;
namespace
string
=
paddle
::
string
;
std
::
unique_ptr
<
d
etail
::
RPCServer
>
g_rpc_service
;
std
::
unique_ptr
<
d
etail
::
RequestHandler
>
g_req_handler
;
std
::
unique_ptr
<
d
istributed
::
RPCServer
>
g_rpc_service
;
std
::
unique_ptr
<
d
istributed
::
RequestHandler
>
g_req_handler
;
void
StartServer
()
{
f
::
Scope
scope
;
...
...
@@ -57,14 +57,14 @@ void StartServer() {
g_req_handler
->
SetProgram
(
&
empty_program
);
g_req_handler
->
SetExecutor
(
&
executor
);
g_rpc_service
->
RegisterRPC
(
d
etail
::
kRequestSend
,
g_req_handler
.
get
());
g_rpc_service
->
RegisterRPC
(
d
istributed
::
kRequestSend
,
g_req_handler
.
get
());
g_req_handler
->
SetRPCServer
(
g_rpc_service
.
get
());
std
::
thread
server_thread
(
std
::
bind
(
&
d
etail
::
RPCServer
::
StartServer
,
g_rpc_service
.
get
()));
std
::
bind
(
&
d
istributed
::
RPCServer
::
StartServer
,
g_rpc_service
.
get
()));
g_rpc_service
->
SetCond
(
d
etail
::
kRequestSend
);
g_rpc_service
->
WaitBarrier
(
d
etail
::
kRequestSend
);
g_rpc_service
->
SetCond
(
d
istributed
::
kRequestSend
);
g_rpc_service
->
WaitBarrier
(
d
istributed
::
kRequestSend
);
LOG
(
INFO
)
<<
"got nccl id and stop server..."
;
g_rpc_service
->
ShutDown
();
...
...
@@ -72,7 +72,7 @@ void StartServer() {
}
TEST
(
SendNcclId
,
RPCServer
)
{
g_req_handler
.
reset
(
new
d
etail
::
RequestSendHandler
(
true
));
g_req_handler
.
reset
(
new
d
istributed
::
RequestSendHandler
(
true
));
g_rpc_service
.
reset
(
new
RPCSERVER_T
(
"127.0.0.1:0"
,
1
));
std
::
thread
server_thread
(
StartServer
);
...
...
@@ -91,7 +91,8 @@ TEST(SendNcclId, RPCServer) {
std
::
string
ep
=
string
::
Sprintf
(
"127.0.0.1:%d"
,
port
);
detail
::
RPCClient
*
client
=
detail
::
RPCClient
::
GetInstance
<
RPCCLIENT_T
>
();
distributed
::
RPCClient
*
client
=
distributed
::
RPCClient
::
GetInstance
<
RPCCLIENT_T
>
();
LOG
(
INFO
)
<<
"connect to server"
<<
ep
;
client
->
AsyncSendVar
(
ep
,
dev_ctx
,
scope
,
NCCL_ID_VARNAME
);
...
...
paddle/fluid/pybind/pybind.cc
浏览文件 @
e5890052
...
...
@@ -159,6 +159,11 @@ PYBIND11_PLUGIN(core) {
new
(
&
instance
)
LoDTensor
(
new_offset_lod
);
})
.
def
(
"__init__"
,
[](
LoDTensor
&
instance
)
{
new
(
&
instance
)
LoDTensor
();
})
// We implement offset based LOD in C++ while we use length based with
// Python API. So we changed set_lod to set_recursive_sequence_lengths to
// avoid misuse.
// The discussion is here:
// https://github.com/PaddlePaddle/Paddle/issues/10855
.
def
(
"set_lod"
,
[](
LoDTensor
&
self
,
const
std
::
vector
<
std
::
vector
<
size_t
>>
&
lod
)
{
// the input lod is offset-based level-of-detail info
...
...
@@ -199,6 +204,7 @@ PYBIND11_PLUGIN(core) {
std
::
copy
(
lod
.
begin
(),
lod
.
end
(),
std
::
back_inserter
(
new_lod
));
return
new_lod
;
})
// Set above comments of set_lod.
.
def
(
"recursive_sequence_lengths"
,
[](
LoDTensor
&
self
)
->
std
::
vector
<
std
::
vector
<
size_t
>>
{
// output the length-based lod info
...
...
paddle/fluid/pybind/tensor_py.h
浏览文件 @
e5890052
...
...
@@ -97,7 +97,7 @@ struct CastToPyBufferImpl<true, I, ARGS...> {
inline
pybind11
::
buffer_info
CastToPyBuffer
(
const
framework
::
Tensor
&
tensor
)
{
auto
buffer_info
=
details
::
CastToPyBufferImpl
<
true
,
0
,
float
,
int
,
double
,
int64_t
,
bool
,
platform
::
float16
>
()(
tensor
);
uint8_t
,
platform
::
float16
>
()(
tensor
);
return
buffer_info
;
}
...
...
python/paddle/fluid/__init__.py
浏览文件 @
e5890052
...
...
@@ -44,7 +44,7 @@ import metrics
import
transpiler
from
param_attr
import
ParamAttr
,
WeightNormParamAttr
from
data_feeder
import
DataFeeder
from
core
import
LoDTensor
,
CPUPlace
,
CUDAPlace
,
CUDAPinnedPlace
from
core
import
LoDTensor
,
CPUPlace
,
CUDAPlace
,
CUDAPinnedPlace
,
Scope
from
transpiler
import
DistributeTranspiler
,
InferenceTranspiler
,
\
memory_optimize
,
release_memory
from
concurrency
import
(
Go
,
make_channel
,
channel_send
,
channel_recv
,
...
...
@@ -83,6 +83,7 @@ __all__ = framework.__all__ + executor.__all__ + concurrency.__all__ + \
'profiler'
,
'unique_name'
,
'recordio_writer'
,
'Scope'
,
]
...
...
python/paddle/fluid/data_feeder.py
浏览文件 @
e5890052
...
...
@@ -79,6 +79,61 @@ class DataToLoDTensorConverter(object):
class
DataFeeder
(
object
):
"""
DataFeeder converts the data that returned by a reader into a data
structure that can feed into Executor and ParallelExecutor. The reader
usually returns a list of mini-batch data entries. Each data entry in
the list is one sample. Each sample is a list or a tuple with one
feature or multiple features.
The simple usage shows below:
.. code-block:: python
place = fluid.CPUPlace()
img = fluid.layers.data(name='image', shape=[1, 28, 28])
label = fluid.layers.data(name='label', shape=[1], dtype='int64')
feeder = fluid.DataFeeder([img, label], fluid.CPUPlace())
result = feeder.feed([([0] * 784, [9]), ([1] * 784, [1])])
If you want to feed data into GPU side separately in advance when you
use multi-GPU to train a model, you can use `decorate_reader` function.
.. code-block:: python
place=fluid.CUDAPlace(0)
feeder = fluid.DataFeeder(place=place, feed_list=[data, label])
reader = feeder.decorate_reader(
paddle.batch(flowers.train(), batch_size=16))
Args:
feed_list(list): The Variables or Variables'name that will
feed into model.
place(Place): place indicates feed data into CPU or GPU, if you want to
feed data into GPU, please using `fluid.CUDAPlace(i)` (`i` represents
the GPU id), or if you want to feed data into CPU, please using
`fluid.CPUPlace()`.
program(Program): The Program that will feed data into, if program
is None, it will use default_main_program(). Default None.
Raises:
ValueError: If some Variable is not in this Program.
Examples:
.. code-block:: python
# ...
place = fluid.CPUPlace()
feed_list = [
main_program.global_block().var(var_name) for var_name in feed_vars_name
] # feed_vars_name is a list of variables' name.
feeder = fluid.DataFeeder(feed_list, place)
for data in reader():
outs = exe.run(program=main_program,
feed=feeder.feed(data))
"""
def
__init__
(
self
,
feed_list
,
place
,
program
=
None
):
self
.
feed_dtypes
=
[]
self
.
feed_names
=
[]
...
...
@@ -108,6 +163,16 @@ class DataFeeder(object):
self
.
place
=
place
def
feed
(
self
,
iterable
):
"""
According to feed_list and iterable, converters the input into
a data structure that can feed into Executor and ParallelExecutor.
Args:
iterable(list|tuple): the input data.
Returns:
dict: the result of conversion.
"""
converter
=
[]
for
lod_level
,
shape
,
dtype
in
six
.
zip
(
self
.
feed_lod_level
,
self
.
feed_shapes
,
self
.
feed_dtypes
):
...
...
@@ -130,6 +195,20 @@ class DataFeeder(object):
return
ret_dict
def
feed_parallel
(
self
,
iterable
,
num_places
=
None
):
"""
Takes multiple mini-batches. Each mini-batch will be feed on each
device in advance.
Args:
iterable(list|tuple): the input data.
num_places(int): the number of devices. Default None.
Returns:
dict: the result of conversion.
Notes:
The number of devices and number of mini-batches must be same.
"""
if
isinstance
(
self
.
place
,
core
.
CUDAPlace
):
places
=
[
core
.
CUDAPlace
(
i
)
...
...
@@ -168,6 +247,24 @@ class DataFeeder(object):
multi_devices
,
num_places
=
None
,
drop_last
=
True
):
"""
Converter the input data into a data that returned by reader into
multiple mini-batches. Each mini-batch will be feed on each device.
Args:
reader(fun): the input data.
multi_devices(bool): the number of places. Default None.
num_places(int): the number of places. Default None.
drop_last(bool): the number of places. Default None.
Returns:
dict: the result of conversion.
Raises:
ValueError: If drop_last is False and the data batch which cannot
fit for devices.
"""
def
__reader_creator__
():
if
not
multi_devices
:
for
item
in
reader
():
...
...
python/paddle/fluid/executor.py
浏览文件 @
e5890052
...
...
@@ -25,6 +25,13 @@ g_scope = core.Scope()
def
global_scope
():
"""
Get the global/default scope instance. There are a lot of APIs use
:code:`global_scope` as its default value, e.g., :code:`Executor.run`
Returns:
Scope: The global/default scope instance.
"""
return
g_scope
...
...
@@ -37,6 +44,19 @@ def switch_scope(scope):
@
contextlib
.
contextmanager
def
scope_guard
(
scope
):
"""
Change the global/default scope instance by Python `with` statement. All
variable in runtime will assigned to the new scope.
Examples:
>>> import paddle.fluid as fluid
>>> new_scope = fluid.Scope()
>>> with fluid.scope_guard(new_scope):
>>> ...
Args:
scope: The new global/default scope.
"""
ex
=
switch_scope
(
scope
)
yield
switch_scope
(
ex
)
...
...
@@ -135,14 +155,18 @@ def has_fetch_operators(block, fetch_targets, fetch_holder_name):
def
fetch_var
(
name
,
scope
=
None
,
return_numpy
=
True
):
"""
Fetch the value of the variable with the given name from the given scope
Fetch the value of the variable with the given name from the
given scope.
Args:
name(str): name of the variable. Typically, only persistable variables
can be found in the scope used for running the program.
scope(core.Scope|None): scope object. It should be the scope where
you pass to Executor.run() when running your program.
If None, global_scope() will be used.
return_numpy(bool): whether convert the tensor to numpy.ndarray
If None, global_scope() will be used. Default None.
return_numpy(bool): whether convert the tensor to numpy.ndarray.
Default True.
Returns:
LodTensor|numpy.ndarray
"""
...
...
python/paddle/fluid/framework.py
浏览文件 @
e5890052
此差异已折叠。
点击以展开。
python/paddle/fluid/lod_tensor.py
浏览文件 @
e5890052
...
...
@@ -19,33 +19,41 @@ __all__ = ['create_lod_tensor', 'create_random_int_lodtensor']
def
create_lod_tensor
(
data
,
lod
,
place
):
"""Create a lod tensor from a numpy array, a list, or an existing lod tensor.
"""
Create a lod tensor from a numpy array, a list, or an existing lod tensor.
Create a lod tensor by doing the following:
1. Check that the length-based input lod is valid.
2. Convert the length-based lod to a offset-based LoD.
3. Copy the data from a numpy array, a list or a existing lod tensor to
3. Copy the data from a numpy array, a list or a existing lod tensor to
CPU or GPU device (based on input place).
4. Set the level of detail (LoD) using the offset-based LoD.
Use example:
Suppose we want LoDTensor to hold data for sequences of word, where each word is
represented by an integer. If we want to create a LoDTensor to represent two
sentences, one of 2 words, and one of 3 words.
Examples:
Then 'data' can be a numpy array of integers with shape (5, 1).
'lod' will be [[2, 3]], indicating the length(# of words) in each sentence.
This length-based input lod [[2, 3]] will be converted to offset-based lod [[0, 2, 5]]
inside the function call.
Suppose we want LoDTensor to hold data for sequences of word, where each
word is represented by an integer. If we want to create a LoDTensor to
represent two sentences, one of 2 words, and one of 3 words.
Please refer to
github.com/PaddlePaddle/Paddle/blob/develop/doc/fluid/design/concepts/lod_tensor.md
for more details regarding LoD.
Then :code:`data` can be a numpy array of integers with shape (5, 1).
:code:`lod` will be [[2, 3]], indicating the length(# of words) in each
sentence. This length-based input lod [[2, 3]] will be converted to
offset-based lod [[0, 2, 5]] inside the function call.
Please reference :ref:`api_guide_low_level_lod_tensor` for more details
regarding LoD.
Args:
data: a numpy array or a LoDTensor or a list holding the data to be copied.
lod: a list of lists indicating the length-based LoD info specified by the user.
place: CPU or GPU place indicating where the data in the new LoDTensor will be stored.
data(numpy.ndarray|list|LoDTensor): a numpy array or a LoDTensor or a
list holding the data to be copied.
lod(list): a list of lists indicating the length-based LoD info
specified by the user.
place(Place): CPU or GPU place indicating where the data in the new
LoDTensor will be stored.
Returns:
A fluid LoDTensor object with tensor data and lod info.
...
...
@@ -77,31 +85,38 @@ def create_lod_tensor(data, lod, place):
def
create_random_int_lodtensor
(
lod
,
base_shape
,
place
,
low
,
high
):
"""Create a LoDTensor containing random integers.
"""
Create a LoDTensor containing random integers.
This function is frequently used in the book examples. So we revised it
based on
the new create_lod_tensor API and put it here in the lod_tensor module to simplify
the code.
This function is frequently used in the book examples. So we revised it
based on the new create_lod_tensor API and put it here in the lod_tensor
module to simplify the code.
The function does the following:
1. Calculate the overall shape of the LoDTensor based on the length-based 'lod' input
and the shape of the basic element in 'base_shape'.
1. Calculate the overall shape of the LoDTensor based on the length-based
:code:`lod` input and the shape of the basic element in
:code:`base_shape`.
2. Create a numpy array of this shape.
3. Create the LoDTensor using create_lod_tensor API.
Suppose we want LoDTensor to hold data for sequences of word, where each
word is
represented by an integer. If we want to create a LoDTensor to represent two
sentences, one of 2 words, and one of 3 words. Then 'base_shape' is [1], input
length-based 'lod' is [[2, 3]]. Then the overall shape of the LoDTensor would be
[5, 1], holding 5 words for two sentences.
Suppose we want LoDTensor to hold data for sequences of word, where each
word is represented by an integer. If we want to create a LoDTensor to
represent two sentences, one of 2 words, and one of 3 words. Then
'base_shape' is [1], input length-based 'lod' is [[2, 3]]. Then the overall
shape of the LoDTensor would be [5, 1], holding 5 words for two sentences.
Args:
data: a numpy array or a LoDTensor holding the data to be copied.
lod: a list of lists indicating the length-based LoD info specified by the user.
base_shape: the shape of the basic element to be held by the LoDTensor.
place: CPU or GPU place indicating where the data in the new LoDTensor will be stored.
low: the lower bound of the random integers.
high: the upper bound of the random integers.
lod(list): a list of lists indicating the length-based LoD info
specified by the user.
base_shape(list): the shape of the basic element to be held by the
LoDTensor.
place(Place): CPU or GPU place indicating where the data in the new
LoDTensor will be stored.
low(int): the lower bound of the random integers.
high(int): the upper bound of the random integers.
Returns:
A fluid LoDTensor object with tensor data and lod info.
...
...
python/paddle/fluid/metrics.py
浏览文件 @
e5890052
...
...
@@ -325,14 +325,14 @@ class Auc(MetricBase):
"""
def
__init__
(
self
,
name
,
curve
=
'ROC'
,
num_thresholds
=
200
):
super
(
MetricBase
,
self
).
__init__
(
name
,
curve
,
num_thresholds
)
super
(
Auc
,
self
).
__init__
(
name
=
name
)
self
.
_curve
=
curve
self
.
_num_thresholds
=
num_thresholds
self
.
_epsilon
=
1e-6
self
.
tp_list
=
np
.
ndarray
((
num_thresholds
,
))
self
.
fn_list
=
np
.
ndarray
((
num_thresholds
,
))
self
.
tn_list
=
np
.
ndarray
((
num_thresholds
,
))
self
.
fp_list
=
np
.
ndarray
((
num_thresholds
,
))
self
.
tp_list
=
np
.
zeros
((
num_thresholds
,
))
self
.
fn_list
=
np
.
zeros
((
num_thresholds
,
))
self
.
tn_list
=
np
.
zeros
((
num_thresholds
,
))
self
.
fp_list
=
np
.
zeros
((
num_thresholds
,
))
def
update
(
self
,
labels
,
predictions
,
axis
=
1
):
if
not
_is_numpy_
(
labels
):
...
...
@@ -350,12 +350,12 @@ class Auc(MetricBase):
tp
,
fn
,
tn
,
fp
=
0
,
0
,
0
,
0
for
i
,
lbl
in
enumerate
(
labels
):
if
lbl
:
if
predictions
[
i
,
0
]
>=
thresh
:
if
predictions
[
i
,
1
]
>=
thresh
:
tp
+=
1
else
:
fn
+=
1
else
:
if
predictions
[
i
,
0
]
>=
thresh
:
if
predictions
[
i
,
1
]
>=
thresh
:
fp
+=
1
else
:
tn
+=
1
...
...
python/paddle/fluid/nets.py
浏览文件 @
e5890052
...
...
@@ -26,16 +26,87 @@ def simple_img_conv_pool(input,
filter_size
,
pool_size
,
pool_stride
,
act
,
param_attr
=
None
,
pool_padding
=
0
,
pool_type
=
'max'
,
global_pooling
=
False
,
conv_stride
=
1
,
conv_padding
=
0
,
conv_dilation
=
1
,
conv_groups
=
1
,
param_attr
=
None
,
bias_attr
=
None
,
act
=
None
,
use_cudnn
=
True
,
use_mkldnn
=
False
):
"""
The simple_img_conv_pool is composed with one Convolution2d and one Pool2d.
Args:
input (Variable): The input image with [N, C, H, W] format.
num_filters(int): The number of filter. It is as same as the output
feature channel.
filter_size (int|list|tuple): The filter size. If filter_size is a list or
tuple, it must contain two integers, (filter_size_H, filter_size_W). Otherwise,
the filter_size_H = filter_size_W = filter_size.
pool_size (int|list|tuple): The pooling size of Pool2d layer. If pool_size
is a list or tuple, it must contain two integers, (pool_size_H, pool_size_W).
Otherwise, the pool_size_H = pool_size_W = pool_size.
pool_stride (int|list|tuple): The pooling stride of Pool2d layer. If pool_stride
is a list or tuple, it must contain two integers, (pooling_stride_H, pooling_stride_W).
Otherwise, the pooling_stride_H = pooling_stride_W = pool_stride.
pool_padding (int|list|tuple): The padding of Pool2d layer. If pool_padding is a list or
tuple, it must contain two integers, (pool_padding_H, pool_padding_W).
Otherwise, the pool_padding_H = pool_padding_W = pool_padding. Default 0.
pool_type (str): Pooling type can be :math:`max` for max-pooling and :math:`avg` for
average-pooling. Default :math:`max`.
global_pooling (bool): Whether to use the global pooling. If global_pooling = true,
pool_size and pool_padding while be ignored. Default False
conv_stride (int|list|tuple): The stride size of the Conv2d Layer. If stride is a
list or tuple, it must contain two integers, (conv_stride_H, conv_stride_W). Otherwise,
the conv_stride_H = conv_stride_W = conv_stride. Default: conv_stride = 1.
conv_padding (int|list|tuple): The padding size of the Conv2d Layer. If padding is
a list or tuple, it must contain two integers, (conv_padding_H, conv_padding_W).
Otherwise, the conv_padding_H = conv_padding_W = conv_padding. Default: conv_padding = 0.
conv_dilation (int|list|tuple): The dilation size of the Conv2d Layer. If dilation is
a list or tuple, it must contain two integers, (conv_dilation_H, conv_dilation_W).
Otherwise, the conv_dilation_H = conv_dilation_W = conv_dilation. Default: conv_dilation = 1.
conv_groups (int): The groups number of the Conv2d Layer. According to grouped
convolution in Alex Krizhevsky's Deep CNN paper: when group=2,
the first half of the filters is only connected to the first half
of the input channels, while the second half of the filters is only
connected to the second half of the input channels. Default: groups=1
param_attr (ParamAttr): The parameters to the Conv2d Layer. Default: None
bias_attr (ParamAttr): Bias parameter for the Conv2d layer. Default: None
act (str): Activation type for Conv2d. Default: None
use_cudnn (bool): Use cudnn kernel or not, it is valid only when the cudnn
library is installed. Default: True
use_mkldnn (bool): Use mkldnn kernels or not, it is valid only when compiled
with mkldnn library. Default: False
Return:
Variable: The result of input after Convolution2d and Pool2d.
Examples:
.. code-block:: python
img = fluid.layers.data(name='img', shape=[1, 28, 28], dtype='float32')
conv_pool = fluid.nets.simple_img_conv_pool(input=img,
filter_size=5,
num_filters=20,
pool_size=2,
pool_stride=2,
act="relu")
"""
conv_out
=
layers
.
conv2d
(
input
=
input
,
num_filters
=
num_filters
,
filter_size
=
filter_size
,
stride
=
conv_stride
,
padding
=
conv_padding
,
dilation
=
conv_dilation
,
groups
=
conv_groups
,
param_attr
=
param_attr
,
bias_attr
=
bias_attr
,
act
=
act
,
use_cudnn
=
use_cudnn
,
use_mkldnn
=
use_mkldnn
)
...
...
@@ -45,6 +116,8 @@ def simple_img_conv_pool(input,
pool_size
=
pool_size
,
pool_type
=
pool_type
,
pool_stride
=
pool_stride
,
pool_padding
=
pool_padding
,
global_pooling
=
global_pooling
,
use_cudnn
=
use_cudnn
,
use_mkldnn
=
use_mkldnn
)
return
pool_out
...
...
@@ -60,11 +133,65 @@ def img_conv_group(input,
conv_with_batchnorm
=
False
,
conv_batchnorm_drop_rate
=
0.0
,
pool_stride
=
1
,
pool_type
=
None
,
pool_type
=
"max"
,
use_cudnn
=
True
,
use_mkldnn
=
False
):
"""
Image Convolution Group, Used for vgg net.
The Image Convolution Group is composed of Convolution2d, BatchNorm, DropOut,
and Pool2d. According to the input arguments, img_conv_group will do serials of
computation for Input using Convolution2d, BatchNorm, DropOut, and pass the last
result to Pool2d.
Args:
input (Variable): The input image with [N, C, H, W] format.
conv_num_filter(list|tuple): Indicates the numbers of filter of this group.
pool_size (int|list|tuple): The pooling size of Pool2d Layer. If pool_size
is a list or tuple, it must contain two integers, (pool_size_H, pool_size_W).
Otherwise, the pool_size_H = pool_size_W = pool_size.
conv_padding (int|list|tuple): The padding size of the Conv2d Layer. If padding is
a list or tuple, its length must be equal to the length of conv_num_filter.
Otherwise the conv_padding of all Conv2d Layers are the same. Default 1.
conv_filter_size (int|list|tuple): The filter size. If filter_size is a list or
tuple, its length must be equal to the length of conv_num_filter.
Otherwise the conv_filter_size of all Conv2d Layers are the same. Default 3.
conv_act (str): Activation type for Conv2d Layer that is not followed by BatchNorm.
Default: None.
param_attr (ParamAttr): The parameters to the Conv2d Layer. Default: None
conv_with_batchnorm (bool|list): Indicates whether to use BatchNorm after Conv2d Layer.
If conv_with_batchnorm is a list, its length must be equal to the length of
conv_num_filter. Otherwise, conv_with_batchnorm indicates whether all the
Conv2d Layer follows a BatchNorm. Default False.
conv_batchnorm_drop_rate (float|list): Indicates the drop_rate of Dropout Layer
after BatchNorm. If conv_batchnorm_drop_rate is a list, its length must be
equal to the length of conv_num_filter. Otherwise, drop_rate of all Dropout
Layers is conv_batchnorm_drop_rate. Default 0.0.
pool_stride (int|list|tuple): The pooling stride of Pool2d layer. If pool_stride
is a list or tuple, it must contain two integers, (pooling_stride_H,
pooling_stride_W). Otherwise, the pooling_stride_H = pooling_stride_W = pool_stride.
Default 1.
pool_type (str): Pooling type can be :math:`max` for max-pooling and :math:`avg` for
average-pooling. Default :math:`max`.
use_cudnn (bool): Use cudnn kernel or not, it is valid only when the cudnn
library is installed. Default: True
use_mkldnn (bool): Use mkldnn kernels or not, it is valid only when compiled
with mkldnn library. Default: False
Return:
Variable: The final result after serial computation using Convolution2d,
BatchNorm, DropOut, and Pool2d.
Examples:
.. code-block:: python
img = fluid.layers.data(name='img', shape=[1, 28, 28], dtype='float32')
conv_pool = fluid.nets.img_conv_group(input=img,
num_channels=3,
conv_padding=1,
conv_num_filter=[3, 3],
conv_filter_size=3,
conv_act="relu",
pool_size=2,
pool_stride=2)
"""
tmp
=
input
assert
isinstance
(
conv_num_filter
,
list
)
or
\
...
...
@@ -74,6 +201,7 @@ def img_conv_group(input,
if
not
hasattr
(
obj
,
'__len__'
):
return
[
obj
]
*
len
(
conv_num_filter
)
else
:
assert
len
(
obj
)
==
len
(
conv_num_filter
)
return
obj
conv_padding
=
__extend_list__
(
conv_padding
)
...
...
@@ -119,6 +247,39 @@ def sequence_conv_pool(input,
param_attr
=
None
,
act
=
"sigmoid"
,
pool_type
=
"max"
):
"""
The sequence_conv_pool is composed with Sequence Convolution and Pooling.
Args:
input (Variable): The input of sequence_conv, which supports variable-time
length input sequence. The underlying of input is a matrix with shape
(T, N), where T is the total time steps in this mini-batch and N is
the input_hidden_size
num_filters(int): The number of filter.
filter_size (int): The filter size.
param_attr (ParamAttr): The parameters to the Sequence_conv Layer. Default: None.
act (str): Activation type for Sequence_conv Layer. Default: "sigmoid".
pool_type (str): Pooling type can be :math:`max` for max-pooling, :math:`average` for
average-pooling, :math:`sum` for sum-pooling, :math:`sqrt` for sqrt-pooling.
Default :math:`max`.
Return:
Variable: The final result after Sequence Convolution and Pooling.
Examples:
.. code-block:: python
input_dim = len(word_dict)
emb_dim = 128
hid_dim = 512
data = fluid.layers.data( ame="words", shape=[1], dtype="int64", lod_level=1)
emb = fluid.layers.embedding(input=data, size=[input_dim, emb_dim], is_sparse=True)
seq_conv = fluid.nets.sequence_conv_pool(input=emb,
num_filters=hid_dim,
filter_size=3,
act="tanh",
pool_type="sqrt")
"""
conv_out
=
layers
.
sequence_conv
(
input
=
input
,
num_filters
=
num_filters
,
...
...
@@ -132,9 +293,9 @@ def sequence_conv_pool(input,
def
glu
(
input
,
dim
=-
1
):
"""
The
gated linear unit composed by split, sigmoid activation and element
wise
multiplication. Specifically, Split the input into two equal sized parts
:math:`a` and :math:`b` along the given dimension and then compute as
The
Gated Linear Units(GLU) composed by split, sigmoid activation and element-
wise
multiplication. Specifically, Split the input into two equal sized parts
,
:math:`a` and :math:`b`
,
along the given dimension and then compute as
following:
.. math::
...
...
@@ -147,16 +308,16 @@ def glu(input, dim=-1):
Args:
input (Variable): The input variable which is a Tensor or LoDTensor.
dim (int): The dimension along which to split. If :math:`dim < 0`, the
dimension to split along is :math:`rank(input) + dim`.
dimension to split along is :math:`rank(input) + dim`.
Default -1.
Returns:
Variable:
The Tensor v
ariable with half the size of input.
Variable:
V
ariable with half the size of input.
Examples:
.. code-block:: python
# x is a Tensor variable with shape [3, 6, 9]
fluid.nets.glu(input=x
, dim=1) # shape of output: [3, 3, 9]
data = fluid.layers.data(name="words", shape=[3, 6, 9], dtype="float32")
output = fluid.nets.glu(input=data
, dim=1) # shape of output: [3, 3, 9]
"""
a
,
b
=
layers
.
split
(
input
,
num_or_sections
=
2
,
dim
=
dim
)
...
...
@@ -189,40 +350,48 @@ def scaled_dot_product_attention(queries,
<https://arxiv.org/pdf/1706.03762.pdf>`_.
Args:
queries (Variable): The input variable which should be a 3-D Tensor.
keys (Variable): The input variable which should be a 3-D Tensor.
values (Variable): The input variable which should be a 3-D Tensor.
num_heads (int): Head number to compute the scaled dot product
attention. Default value is
1.
attention. Default:
1.
dropout_rate (float): The dropout rate to drop the attention weight.
Default value is
0.
Default: 0.
0.
Returns:
Variable: A 3-D Tensor computed by multi-head scaled dot product
\
attention.
Variable: A 3-D Tensor computed by multi-head scaled dot product
\
attention.
Raises:
ValueError: If input queries, keys, values are not 3-D Tensors.
NOTE:
NOTE
S
:
1. When num_heads > 1, three linear projections are learned respectively
to map input queries, keys and values into queries', keys' and values'.
queries', keys' and values' have the same shapes with queries, keys
and values.
1. When num_heads == 1, scaled_dot_product_attention has no learnable
parameters.
to map input queries, keys and values into queries', keys' and values'.
queries', keys' and values' have the same shapes with queries, keys
and values.
2. When num_heads == 1, scaled_dot_product_attention has no learnable
parameters.
Examples:
.. code-block:: python
# Suppose q, k, v are Tensors with the following shape:
# q: [3, 5, 9], k: [3, 6, 9], v: [3, 6, 10]
contexts = fluid.nets.scaled_dot_product_attention(q, k, v)
queries = fluid.layers.data(name="queries",
shape=[3, 5, 9],
dtype="float32",
append_batch_size=False)
queries.stop_gradient = False
keys = fluid.layers.data(name="keys",
shape=[3, 6, 9],
dtype="float32",
append_batch_size=False)
keys.stop_gradient = False
values = fluid.layers.data(name="values",
shape=[3, 6, 10],
dtype="float32",
append_batch_size=False)
values.stop_gradient = False
contexts = fluid.nets.scaled_dot_product_attention(queries, keys, values)
contexts.shape # [3, 5, 10]
"""
if
not
(
len
(
queries
.
shape
)
==
len
(
keys
.
shape
)
==
len
(
values
.
shape
)
==
3
):
...
...
python/paddle/fluid/parallel_executor.py
浏览文件 @
e5890052
...
...
@@ -27,6 +27,40 @@ BuildStrategy = core.ParallelExecutor.BuildStrategy
class
ParallelExecutor
(
object
):
"""
ParallelExecutor can run program in parallel.
Args:
use_cuda (bool): Whether to use CUDA or not.
loss_name (str): The loss name must set in training. Default None.
main_program (Program): The program that need to run, if not provided,
then default_main_program will be used. Default None.
share_vars_from(ParallelExecutor): If provied, it will share variables
from the specified ParallelExecutor. Default None.
num_trainers(int): If greater than 1, NCCL will be initialized with
multiple rank of nodes, each node should have same number of GPUs.
Distributed training will be enabled then. Default 1.
trainer_id(int: Must use together with num_trainers. trainer_id is the
"rank" of current node starts from 0. Default 0.
Returns:
ParallelExecutor: The initialized ParallelExecutor object.
Raises:
TypeError: If share_vars_from is provided, but not ParallelExecutor object.
Examples:
.. code-block:: python
train_exe = fluid.ParallelExecutor(use_cuda=True, loss_name=loss.name)
test_exe = fluid.ParallelExecutor(use_cuda=True,
main_program=test_program,
share_vars_from=train_exe)
train_loss, = train_exe.run([loss.name], feed=feed_dict)
test_loss, = test_exe.run([loss.name], feed=feed_dict)
"""
def
__init__
(
self
,
use_cuda
,
loss_name
=
None
,
...
...
@@ -37,42 +71,6 @@ class ParallelExecutor(object):
num_trainers
=
1
,
trainer_id
=
0
,
**
kwargs
):
"""
ParallelExecutor can run program in parallel.
Args:
use_cuda(bool): Whether to use CUDA or not.
loss_name(str, default None): The loss name must set in training.
main_program(Program, default None): The program that need to run,
if not provided, then default_main_program will be used.
share_vars_from(ParallelExecutor, default None): If provied,
it will share variables from the specified ParallelExecutor.
num_trainers(int, default 1): If greater than 1, NCCL will be
initialized with multpile rank of nodes, each node should have
same number of GPUs. Distributed training will be enabled then.
trainer_id(int, default 0): Must use together with num_trainers.
trainer_id is the "rank" of current node starts from 0.
Returns:
A ParallelExecutor object.
Raises:
TypeError: If share_vars_from is provided, but not ParallelExecutor
object.
Examples:
.. code-block:: python
train_exe = fluid.ParallelExecutor(
use_cuda=True, loss_name=loss.name)
test_exe = fluid.ParallelExecutor(
use_cuda=True,
main_program=test_program,
share_vars_from=train_exe)
train_loss, = train_exe.run([loss.name], feed=feed_dict)
test_loss, = test_exe.run([loss.name], feed=feed_dict)
"""
if
len
(
kwargs
)
!=
0
:
err_msg
=
""
for
key
in
kwargs
:
...
...
@@ -131,10 +129,16 @@ class ParallelExecutor(object):
main
=
main_program
main
=
main
if
main
else
framework
.
default_main_program
()
scope
=
executor
.
global_scope
()
# FIXME(Yancey1989): it's a temporary approach to determinate the distribute
# train program, call self.bcast_param() at the end of each mini-batch.
self
.
is_dist
=
True
if
"recv"
in
[
op
.
type
for
op
in
main
.
global_block
().
ops
]
else
False
if
share_vars_from
and
not
isinstance
(
share_vars_from
,
ParallelExecutor
):
raise
TypeError
(
"share_vars_from must be ParallelExecutor."
)
local_scopes
=
share_vars_from
.
executor
.
local_scopes
(
)
if
share_vars_from
else
[]
...
...
@@ -166,12 +170,14 @@ class ParallelExecutor(object):
element in the list will be copied to each device directly.
For example, if the feed is a dict:
>>> exe = ParallelExecutor()
>>> # the image will be splitted into devices. If there is two devices
>>> # each device will process an image with shape (24, 1, 28, 28)
>>> exe.run(feed={'image': numpy.random.random(size=(48, 1, 28, 28))})
For example, if the feed is a list:
>>> exe = ParallelExecutor()
>>> # each device will process each element in the list.
>>> # the 1st device will process an image with shape (48, 1, 28, 28)
...
...
@@ -182,18 +188,40 @@ class ParallelExecutor(object):
>>> {"image": numpy.random.random(size=(32, 1, 28, 28))},
>>> ])
Args:
fetch_list(list): The fetched variable names
feed(list|dict|None): The feed variables. If the feed is a dict,
tensors in that dict will be splitted into each devices. If
the feed is a list, each element of the list will be copied
to each device.
to each device.
Default None.
feed_dict: Alias for feed parameter, for backward compatibility.
This parameter is deprecated.
This parameter has been deprecated. Default None.
Returns:
List: The fetched result list.
Raises:
ValueError: If the feed is a list, but its length is not equal the
length of active places, or its element's is not dict.
NOTES:
1. If the feed's type is dict, the number of data that feeds to
ParallelExecutor must be bigger than active places. Otherwise,
it will throw exception from C++ side. Special attention should be
paid to check whether the last batch of the dataset is bigger
than active places.
2. If active places are more than one, the fetch results for each
variable is a list, and each element of this list is the variable of
respective active place.
Returns: fetched result list.
Examples:
.. code-block:: python
pe = fluid.ParallelExecutor(use_cuda=use_cuda,
loss_name=avg_cost.name,
main_program=fluid.default_main_program())
loss = pe.run(feed=feeder.feed(cur_batch),
fetch_list=[avg_cost.name]))
"""
if
feed
is
None
and
feed_dict
is
not
None
:
feed
=
feed_dict
...
...
@@ -238,9 +266,17 @@ class ParallelExecutor(object):
fetch_var_name
=
'@FETCHED_VAR_NAME@'
self
.
executor
.
run
(
fetch_list
,
fetch_var_name
)
arr
=
self
.
scope
.
find_var
(
fetch_var_name
).
get_lod_tensor_array
()
if
self
.
is_dist
:
self
.
bcast_params
()
return
[
arr
[
i
]
for
i
in
range
(
len
(
arr
))]
def
bcast_params
(
self
):
"""
Broadcast the parameters to other devices. It is used during
distributed training.
"""
self
.
executor
.
bcast_params
(
set
(
self
.
persistable_vars
))
@
property
...
...
python/paddle/fluid/param_attr.py
浏览文件 @
e5890052
...
...
@@ -22,6 +22,35 @@ __all__ = [
class
ParamAttr
(
object
):
"""
Parameter attributes object. To fine-tuning network training process, user
can set parameter's attributes to control training details. Such as learning rate,
regularization, trainable, do_model_average and the method to initialize param.
Args:
name(str): The parameter's name. Default None.
initializer(Initializer): The method to initial this parameter. Default None.
learning_rate(float): The parameter's learning rate. The learning rate when
optimize is :math:`global\_lr * parameter\_lr * scheduler\_factor`.
Default 1.0.
regularizer(WeightDecayRegularizer): Regularization factor. Default None.
trainable(bool): Whether this parameter is trainable. Default True.
gradient_clip(BaseGradientClipAttr): The method to clip this parameter's
gradient. Default None.
do_model_average(bool): Whether this parameter should do model average.
Default False.
Examples:
.. code-block:: python
w_param_attrs = fluid.ParamAttr(name="fc_weight",
learning_rate=0.5,
regularizer=fluid.L2Decay(1.0),
trainable=True)
y_predict = fluid.layers.fc(input=x, size=10, param_attr=w_param_attrs)
"""
def
__init__
(
self
,
name
=
None
,
initializer
=
None
,
...
...
@@ -29,7 +58,7 @@ class ParamAttr(object):
regularizer
=
None
,
trainable
=
True
,
gradient_clip
=
None
,
do_model_average
=
Non
e
):
do_model_average
=
Fals
e
):
self
.
name
=
name
self
.
initializer
=
initializer
self
.
learning_rate
=
learning_rate
...
...
@@ -39,6 +68,16 @@ class ParamAttr(object):
self
.
model_average
=
do_model_average
def
set_default_initializer
(
self
,
initializer
):
"""
Set the default initializer, the initializer should be Constant,
Uniform, Normal, Xavier, MSRA.
Args:
initializer(Initializer): the initializer to set.
Returns:
None
"""
if
initializer
is
None
:
if
self
.
initializer
is
None
:
raise
ValueError
(
"ParamAttr.initializer is not set"
)
...
...
@@ -50,13 +89,45 @@ class ParamAttr(object):
self
.
initializer
=
initializer
def
set_default_param_initializer
(
self
):
"""
Set the default initializer for the parameter with Xavier.
Args:
None.
Returns:
None.
"""
self
.
set_default_initializer
(
Xavier
())
def
set_default_bias_initializer
(
self
):
"""
Set the default initializer for the bias with Constant(0.0).
Args:
None.
Returns:
None.
"""
self
.
set_default_initializer
(
Constant
(
0.0
))
@
staticmethod
def
to_attr
(
arg
):
"""
Create ParamAttr[s].
Args:
arg: Arguments to initialize ParamAttr[s]. arg's type can be
str, Initializer, float, WeightDecayRegularizer, BaseGradientClipAttr,
bool, ParamAttr, or a list of above type.
Returns:
ParamAttr[s]: ParamAttr[s] initialized with arg.
Raises:
arg can not initialize a ParamAttr.
"""
if
arg
is
None
:
return
ParamAttr
()
elif
isinstance
(
arg
,
list
)
or
isinstance
(
arg
,
tuple
):
...
...
@@ -75,6 +146,15 @@ class ParamAttr(object):
raise
TypeError
(
"{0} cast to ParamAttr"
.
format
(
type
(
arg
)))
def
to_kwargs
(
self
,
with_initializer
=
False
):
"""
Returns the attributes of this parameter.
Args:
with_initializer(bool): Whether to add initializer attr.
Returns:
Parameter attributes(map): The attributes of this parameter.
"""
kwargs
=
{
'name'
:
self
.
name
,
'optimize_attr'
:
{
...
...
@@ -92,9 +172,27 @@ class ParamAttr(object):
class
WeightNormParamAttr
(
ParamAttr
):
"""
Used for weight normalization. Any field in ParamAttr can also be set here.
Besides, an extra field dim can be set to indicate the dimension except
which to normalize.
Used for weight Norm. Weight Norm is a reparameterization of the weight vectors
in a neural network that decouples the length of those weight vectors from
their direction. Weight Norm has been implemented as discussed in this
paper: `Weight Normalization: A Simple Reparameterization to Accelerate
Training of Deep Neural Networks
<https://arxiv.org/pdf/1602.07868.pdf>`_.
Args:
dim(list): The parameter's name. Default None.
kwargs: Any field in ParamAttr. Default None.
Examples:
.. code-block:: python
data = fluid.layers.data(name="data", shape=[3, 32, 32], dtype="float32")
fc = fluid.layers.fc(input=data,
size=1000,
param_attr=WeightNormParamAttr(
dim=None,
name='weight_norm_param'))
"""
# List to record the parameters reparameterized by weight normalization.
# If these parameters are treated as Variable rather than Parameter,
...
...
python/paddle/fluid/recordio_writer.py
浏览文件 @
e5890052
...
...
@@ -36,6 +36,45 @@ def convert_reader_to_recordio_file(
compressor
=
core
.
RecordIOWriter
.
Compressor
.
Snappy
,
max_num_records
=
1000
,
feed_order
=
None
):
"""
Convert a Python Reader to a recordio file.
Please see :ref:`api_guide_python_reader` and :ref:`api_guide_reader_op` for
details.
Examples:
>>> import paddle.fluid as fluid
>>> import paddle.dataset.mnist as mnist
>>> import paddle
>>>
>>> tmp_program = fluid.Program()
>>> with fluid.program_guard(tmp_program):
>>> img = fluid.layers.data(name='img', shape=[784])
>>> label = fluid.layers.data(name='label', shape=[1], dtype='int64')
>>> feeder = fluid.DataFeeder(feed_list=[img, label], place=fluid.CPUPlace())
>>> # mnist.recordio will be generated in current directory
>>> fluid.recordio_writer.convert_reader_to_recordio_file(
>>> filename="mnist.recordio",
>>> reader_creator=paddle.batch(mnist.train(), batch_size=32),
>>> feeder=feeder)
Args:
filename(str): The recordio filename.
reader_creator(callable): The Python Reader Creator. See
:ref:`api_guide_python_reader`.
feeder(DataFeeder): The DataFeeder instance. Used to convert
:code:`reader_creator` to :code: `lod_tensor`
compressor: Must in fluid.core.RecordIOWriter.Compressor.Snappy or
fluid.core.RecordIOWriter.Compressor.NoCompress. Use :code:`Snappy`
by default.
max_num_records(int): Maximum number of records in one chuck. Each record
is each return value from reader function
feed_order(list): The order of variable names that the reader returns
Returns:
int: the number of record that saved.
"""
if
feed_order
is
None
:
feed_order
=
feeder
.
feed_names
counter
=
0
...
...
@@ -58,6 +97,17 @@ def convert_reader_to_recordio_files(
compressor
=
core
.
RecordIOWriter
.
Compressor
.
Snappy
,
max_num_records
=
1000
,
feed_order
=
None
):
"""
convert a python reader to many recordio files.
This API is basically same as :code:`convert_reader_to_recordio_file`,
instead of it will create many recordio files. Each file contains at
most :code:`batch_per_file` records.
Please reference
:ref:`api_fluid_recordio_writer_convert_reader_to_recordio_file` for more
details.
"""
if
feed_order
is
None
:
feed_order
=
feeder
.
feed_names
f_name
,
f_ext
=
os
.
path
.
splitext
(
filename
)
...
...
python/paddle/fluid/tests/unittests/test_bilinear_interp_op.py
浏览文件 @
e5890052
...
...
@@ -15,6 +15,7 @@
import
unittest
import
numpy
as
np
from
op_test
import
OpTest
import
paddle.fluid.core
as
core
def
bilinear_interp_np
(
input
,
out_h
,
out_w
,
out_size
):
...
...
@@ -45,9 +46,9 @@ def bilinear_interp_np(input, out_h, out_w, out_size):
out
[:,
:,
i
,
j
]
=
h2lambda
*
(
w2lambda
*
input
[:,
:,
h
,
w
]
+
w1lambda
*
input
[:,
:,
h
,
w
+
wid
])
+
\
h1lambda
*
(
w2lambda
*
input
[:,
:,
h
+
hid
,
w
]
+
w1lambda
*
input
[:,
:,
h
+
hid
,
w
+
wid
])
return
out
.
astype
(
"float32"
)
h1lambda
*
(
w2lambda
*
input
[:,
:,
h
+
hid
,
w
]
+
w1lambda
*
input
[:,
:,
h
+
hid
,
w
+
wid
])
return
out
.
astype
(
input
.
dtype
)
class
TestBilinearInterpOp
(
OpTest
):
...
...
@@ -122,5 +123,44 @@ class TestCase6(TestBilinearInterpOp):
self
.
out_size
=
np
.
array
([
65
,
129
]).
astype
(
"int32"
)
class
TestBilinearInterpOpUint8
(
OpTest
):
def
setUp
(
self
):
self
.
out_size
=
None
self
.
init_test_case
()
self
.
op_type
=
"bilinear_interp"
input_np
=
np
.
random
.
randint
(
low
=
0
,
high
=
256
,
size
=
self
.
input_shape
).
astype
(
"uint8"
)
output_np
=
bilinear_interp_np
(
input_np
,
self
.
out_h
,
self
.
out_w
,
self
.
out_size
)
self
.
inputs
=
{
'X'
:
input_np
}
if
self
.
out_size
is
not
None
:
self
.
inputs
[
'OutSize'
]
=
self
.
out_size
self
.
attrs
=
{
'out_h'
:
self
.
out_h
,
'out_w'
:
self
.
out_w
}
self
.
outputs
=
{
'Out'
:
output_np
}
def
test_check_output
(
self
):
self
.
check_output_with_place
(
place
=
core
.
CPUPlace
(),
atol
=
1
)
def
init_test_case
(
self
):
self
.
input_shape
=
[
1
,
3
,
9
,
6
]
self
.
out_h
=
10
self
.
out_w
=
9
class
TestCase1Uint8
(
TestBilinearInterpOpUint8
):
def
init_test_case
(
self
):
self
.
input_shape
=
[
2
,
3
,
128
,
64
]
self
.
out_h
=
120
self
.
out_w
=
50
class
TestCase2Uint8
(
TestBilinearInterpOpUint8
):
def
init_test_case
(
self
):
self
.
input_shape
=
[
4
,
1
,
7
,
8
]
self
.
out_h
=
5
self
.
out_w
=
13
self
.
out_size
=
np
.
array
([
6
,
15
]).
astype
(
"int32"
)
if
__name__
==
"__main__"
:
unittest
.
main
()
python/paddle/fluid/trainer.py
浏览文件 @
e5890052
...
...
@@ -33,23 +33,59 @@ __all__ = [
class
BeginEpochEvent
(
object
):
"""
The begin of a training epoch.
Args:
epoch_id(int): The current epoch ID.
"""
def
__init__
(
self
,
epoch_id
):
self
.
epoch
=
epoch_id
class
EndEpochEvent
(
object
):
"""
The end of a training epoch.
Args:
epoch_id(int): The current epoch ID.
"""
def
__init__
(
self
,
epoch_id
):
self
.
epoch
=
epoch_id
class
BeginStepEvent
(
object
):
"""
The begin of a training epoch.
Args:
epoch_id(int): The current epoch ID.
step_id(int): The current step ID.
"""
def
__init__
(
self
,
epoch_id
,
step_id
):
self
.
epoch
=
epoch_id
self
.
step
=
step_id
self
.
fetch_metrics
=
True
"""
If fetch_metrics is true, the metrics will be fetched at the
EndStepEvent. Default is True.
"""
class
EndStepEvent
(
object
):
"""
The end of a training step.
Args:
epoch_id(int): The current epoch ID.
step_id(int): The current step ID.
metrics(list): A list of fetched tensor. The order of this list is same
as the :code:`train_func` returns.
"""
def
__init__
(
self
,
epoch_id
,
step_id
,
metrics
):
self
.
epoch
=
epoch_id
self
.
step
=
step_id
...
...
@@ -57,6 +93,27 @@ class EndStepEvent(object):
class
CheckpointConfig
(
object
):
"""
Parameter object for :code:`fluid.io.save_checkpoint` and
:code:`fluid.Trainer`. Used to configuration how to save checkpoint.
Args:
checkpoint_dir(str): Directory path to save check point. Default is the
current directory.
max_num_checkpoints(int): The max number of local check points.
epoch_interval(int): Every number of epoch to save check point.
step_interval(int): Every number of step to save check point.
Examples:
>>> config = fluid.CheckpointConfig("./checkpoints")
>>> trainer = fluid.Trainer(train_func=train_program,
>>> place=place,
>>> optimizer_func=optimizer_func,
>>> checkpoint_config=config)
>>> trainer.train(...)
"""
def
__init__
(
self
,
checkpoint_dir
=
None
,
max_num_checkpoints
=
3
,
...
...
@@ -106,11 +163,62 @@ def check_and_get_place(place):
class
Trainer
(
object
):
"""
A trainer wraps MultiGPU/MultiNode training loops and can be used to train a
simple neural network easily.
This API takes a :code:`train_func`. A :code:`train_func` is a function that
return loss as it first return value. The reset value can be fetched by
EndStepEvent.metrics
This API also takes a :code:`optimizer_func` that will return an optimizer
instance.
For example, to train a MLP for MNIST dataset, the sample program is
>>> import paddle.fluid as fluid
>>>
>>> def mlp(image, layer_sizes=[200, 100], activation="relu", num_classes=10):
>>> hidden = image
>>> for layer_size in layer_sizes:
>>> hidden = fluid.layers.fc(input=hidden, size=layer_size, act=activation)
>>> return fluid.layers.fc(input=hidden, size=num_classes, act="softmax")
>>>
>>> def train_mnist_mlp():
>>> img = fluid.layers.data(name='image', shape=[784])
>>> label = fluid.layers.data(name='label', shape=[1], dtype='int64')
>>> prediction = mlp(img)
>>> return fluid.layers.mean(fluid.layers.cross_entropy(prediction, label))
>>>
>>> def optimizer():
>>> return fluid.optimizer.Adam()
>>>
>>> trainer = Trainer(train_func=train_mnist_mlp,
>>> optimizer_func=optimizer,
>>> place=fluid.CUDAPlace(0),
>>> parallel=True)
>>>
>>> def train_callback(event):
>>> if isinstance(event, fluid.EndStepEvent):
>>> print "Epoch ID", event.epoch, "Step ID",
\
>>> event.step, "AvgLoss", event.metrics[0]
>>> elif isinstance(event, fluid.EndEpochEvent):
>>> trainer.save_params("./model_{0}".format(event.epoch))
>>>
>>> trainer.train(num_epochs=100, event_handler=train_callback)
For more example, please see :ref:`api_guide_high_level_api`.
Args:
train_func(callable): A function which will return loss. The loss must be a scalar.
train_func(callable): A function which will return loss. The loss must be
a scalar tensor.
optimizer_func(callable): A function that returns an Optimizer object.
place: The device place of this trainer.
place(CUDAPlace|CPUPlace): The device place of this trainer. If
:code:`parallel=True,` all CUDA Places will be used if :code:`place`
is a :code:`CUDAPlace`.
parallel(bool): True if use multiple devices.
checkpoint_config(CheckpointConfig): Configuration about how to save
checkpoints.
"""
def
__init__
(
self
,
...
...
@@ -122,9 +230,6 @@ class Trainer(object):
checkpoint_config
=
None
):
self
.
__stop
=
False
self
.
parallel
=
parallel
# 1. we need to generate a framework.Program by calling
# program_func. Reference: fluid.program_guard in
# test_word2vec.py
# config for checkpoint
# only chief worker will save variables
...
...
@@ -138,6 +243,10 @@ class Trainer(object):
self
.
scope
=
core
.
Scope
()
# 1. we need to generate a framework.Program by calling
# program_func. Reference: fluid.program_guard in
# test_word2vec.py
self
.
startup_program
=
framework
.
Program
()
self
.
train_program
=
framework
.
Program
()
...
...
@@ -280,17 +389,18 @@ class Trainer(object):
def
train
(
self
,
num_epochs
,
event_handler
,
reader
=
None
,
feed_order
=
None
):
"""
T
rain the model.
Start the train loop to t
rain the model.
Args:
num_epochs: The number of epoch. An epoch will process all data in reader
event_handler: The event handler. A function with type (ev:Event)->void
reader:
feed_order: Feeding order of reader. None will following the defining
num_epochs(int): The number of epoch. An epoch will process all data in reader
event_handler(callable): The event handler. A function with type (ev:Event)->void
reader(callable): A reader creator object. See also
:ref:`api_guide_python_reader` .
feed_order(list): Feeding order of reader. None will following the defining
order in program
Returns:
None
"""
training_role
=
os
.
getenv
(
"PADDLE_TRAINING_ROLE"
,
""
)
if
training_role
==
"PSERVER"
:
...
...
@@ -310,16 +420,24 @@ class Trainer(object):
Test the model on given test data
Args:
reader: The reader that yields test data.
feed_order
: Feeding order of reader. None will following the defining
order in program
reader
(callable)
: The reader that yields test data.
feed_order
(list): Feeding order of reader. None will following the
defining
order in program
"""
return
self
.
_test_by_executor
(
reader
,
feed_order
,
self
.
train_func_outputs
)
def
save_params
(
self
,
param_path
):
# reference: save_persistables in io.py
"""
Save all parameters into :code:`param_path`.
Args:
param_path(str): The path to save parameters.
Returns:
None
"""
with
self
.
_prog_and_scope_guard
():
exe
=
executor
.
Executor
(
self
.
place
)
io
.
save_persistables
(
exe
,
dirname
=
param_path
)
...
...
python/paddle/fluid/transpiler/distribute_transpiler.py
浏览文件 @
e5890052
此差异已折叠。
点击以展开。
python/paddle/fluid/transpiler/memory_optimization_transpiler.py
浏览文件 @
e5890052
...
...
@@ -383,6 +383,16 @@ def memory_optimize(input_program, skip_opt_set=None, print_log=False, level=0):
def
release_memory
(
input_program
,
skip_opt_set
=
None
):
"""
Modify the input program and insert :code:`delete_op` to early drop not used
variables. The modification will be performed inplace.
Notes: This is an experimental API and could be removed in next few
releases. Users should not use this API.
Args:
input_program(Program): The program will be inserted :code:`delete_op`.
"""
cfgs
=
_get_cfgs
(
input_program
)
for
cfg
in
cfgs
:
cfg
.
release_memory
(
skip_opt_set
=
skip_opt_set
)
python/paddle/fluid/transpiler/ps_dispatcher.py
浏览文件 @
e5890052
...
...
@@ -33,15 +33,21 @@ class PSDispatcher(object):
def
dispatch
(
self
,
varlist
):
"""
:param varlist: a list of Variables
:return: a map of pserver endpoint -> varname
Args:
varlist(list): a list of Variables
Returns:
a map of pserver endpoint -> varname
"""
AssertionError
(
"Interface has not been implemented."
)
class
HashName
(
PSDispatcher
):
"""
Hash variable names to several endpoints
Hash variable names to several endpoints using python
"hash()" function.
Args:
pserver_endpoints (list): list of endpoint(ip:port).
"""
def
__init__
(
self
,
pserver_endpoints
):
...
...
@@ -61,7 +67,11 @@ class HashName(PSDispatcher):
class
RoundRobin
(
PSDispatcher
):
"""
Distribute variables to serveral endpoints.
Distribute variables to serveral endpoints using
RondRobin<https://en.wikipedia.org/wiki/Round-robin_scheduling> method.
Args:
pserver_endpoints (list): list of endpoint(ip:port).
"""
def
__init__
(
self
,
pserver_endpoints
):
...
...
python/paddle/fluid/unique_name.py
浏览文件 @
e5890052
...
...
@@ -16,7 +16,7 @@ import collections
import
contextlib
import
sys
__all__
=
[
'generate'
,
'switch'
,
'guard'
,
'UniqueNameGenerator'
]
__all__
=
[
'generate'
,
'switch'
,
'guard'
]
class
UniqueNameGenerator
(
object
):
...
...
tools/check_ctest_hung.py
0 → 100644
浏览文件 @
e5890052
# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
import
sys
import
re
def
escape
(
input
):
o
=
input
.
replace
(
"
\n
"
,
""
)
o
=
o
.
replace
(
"
\r
"
,
""
)
return
o
def
main
():
usage
=
"""Usage:
1. Download the Paddle_PR_CI_*.log from TeamCity
2. run: python check_ctest_hung.py Paddle_PR_CI_*.log
3. If there is hung ctest, the result likes:
Diff: set(['test_parallel_executor_crf'])
"""
if
len
(
sys
.
argv
)
<
2
:
print
(
usage
)
exit
(
0
)
logfile
=
sys
.
argv
[
1
]
started
=
set
()
passed
=
set
()
with
open
(
logfile
,
"r"
)
as
fn
:
for
l
in
fn
.
readlines
():
if
l
.
find
(
"Test "
)
!=
-
1
and
\
l
.
find
(
"Passed"
)
!=
-
1
:
m
=
re
.
search
(
"Test\s+#[0-9]*\:\s([a-z0-9_]+)"
,
escape
(
l
))
passed
.
add
(
m
.
group
(
1
))
if
l
.
find
(
"Start "
)
!=
-
1
:
start_parts
=
escape
(
l
).
split
(
" "
)
m
=
re
.
search
(
"Start\s+[0-9]+\:\s([a-z0-9_]+)"
,
escape
(
l
))
started
.
add
(
m
.
group
(
1
))
print
"Diff: "
,
started
-
passed
if
__name__
==
"__main__"
:
main
()
.
clang_format.hook
→
tools/codestyle/
clang_format.hook
浏览文件 @
e5890052
文件已移动
.
copyright.hook
→
tools/codestyle/
copyright.hook
浏览文件 @
e5890052
文件已移动
tools/codestyle/docstring_checker.py
浏览文件 @
e5890052
...
...
@@ -291,6 +291,8 @@ class DocstringChecker(BaseChecker):
True if successful otherwise False.
"""
if
node
.
name
.
startswith
(
"__"
)
or
node
.
name
.
startswith
(
"_"
):
return
True
find
=
False
for
t
in
node
.
body
:
if
not
isinstance
(
t
,
astroid
.
Return
):
...
...
@@ -316,6 +318,8 @@ class DocstringChecker(BaseChecker):
Returns:
True if successful otherwise False.
"""
if
node
.
name
.
startswith
(
"__"
)
or
node
.
name
.
startswith
(
"_"
):
return
True
args
=
[]
for
arg
in
node
.
args
.
get_children
():
if
(
not
isinstance
(
arg
,
astroid
.
AssignName
))
\
...
...
编辑
预览
Markdown
is supported
0%
请重试
或
添加新附件
.
添加附件
取消
You are about to add
0
people
to the discussion. Proceed with caution.
先完成此消息的编辑!
取消
想要评论请
注册
或
登录