diff --git a/CMakeLists.txt b/CMakeLists.txt
index c86889c05c8cf0d521dce9adbf3e918ba91729a1..0ec65bac84b0b0d89123473a8941f80c90f1b339 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -53,7 +53,7 @@ option(COVERALLS_UPLOAD "Package code coverage data to coveralls" OFF)
option(ON_TRAVIS "Exclude special unit test on Travis CI" OFF)
option(WITH_C_API "Compile PaddlePaddle with C-API(Prediction)" OFF)
# TODO: Only compile PaddlePaddle fluid version by WITH_FLUID option.
-option(WITH_FLUID "Compile PaddlePaddle fluid only(TODO)" ON)
+option(WITH_FLUID "Compile PaddlePaddle fluid only(TODO)" OFF)
option(WITH_GOLANG "Compile PaddlePaddle with GOLANG" OFF)
option(GLIDE_INSTALL "Download and install go dependencies " ON)
option(USE_NNPACK "Compile PaddlePaddle with NNPACK library" OFF)
diff --git a/Dockerfile b/Dockerfile
index 60e76c7f2ede6beaca11659020d5991a75d5b741..fbec88c7966d6ea93495519843d6cda63f622661 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -53,10 +53,14 @@ RUN localedef -i en_US -f UTF-8 en_US.UTF-8
# FIXME: due to temporary ipykernel dependency issue, specify ipykernel jupyter
# version util jupyter fixes this issue.
+
+# specify sphinx version as 1.5.6 and remove -U option for [pip install -U
+# sphinx-rtd-theme] since -U option will cause sphinx being updated to newest
+# version(1.7.1 for now), which causes building documentation failed.
RUN pip install --upgrade pip && \
pip install -U wheel && \
- pip install -U docopt PyYAML sphinx && \
- pip install -U sphinx-rtd-theme==0.1.9 recommonmark
+ pip install -U docopt PyYAML sphinx==1.5.6 && \
+ pip install sphinx-rtd-theme==0.1.9 recommonmark
RUN pip install pre-commit 'ipython==5.3.0' && \
pip install 'ipykernel==4.6.0' 'jupyter==1.0.0' && \
diff --git a/doc/design/images/duplicate_op.graffle b/doc/design/images/duplicate_op.graffle
deleted file mode 100644
index 5979f792e252f028a615729215529c2be42d9165..0000000000000000000000000000000000000000
Binary files a/doc/design/images/duplicate_op.graffle and /dev/null differ
diff --git a/doc/design/images/duplicate_op.png b/doc/design/images/duplicate_op.png
deleted file mode 100644
index f299c5d37f260a1bb0daec886f0a4ee1c1f31c92..0000000000000000000000000000000000000000
Binary files a/doc/design/images/duplicate_op.png and /dev/null differ
diff --git a/doc/design/images/duplicate_op2.graffle b/doc/design/images/duplicate_op2.graffle
deleted file mode 100644
index 5cec3bc64dbd44dc99e348485969f29bd128ceb1..0000000000000000000000000000000000000000
Binary files a/doc/design/images/duplicate_op2.graffle and /dev/null differ
diff --git a/doc/design/images/duplicate_op2.png b/doc/design/images/duplicate_op2.png
deleted file mode 100644
index 21cdd5cabf1b5203e1435a75b57770d2f702fa92..0000000000000000000000000000000000000000
Binary files a/doc/design/images/duplicate_op2.png and /dev/null differ
diff --git a/doc/design/images/replica.png b/doc/design/images/replica.png
deleted file mode 100644
index ef59e56b01d792a059279e6bb9a29f3db6a59a41..0000000000000000000000000000000000000000
Binary files a/doc/design/images/replica.png and /dev/null differ
diff --git a/doc/design/images/two_phase_commit.png b/doc/design/images/two_phase_commit.png
deleted file mode 100644
index ef6f7317bd440cc7d9fe08fcbbf2b7a542f99049..0000000000000000000000000000000000000000
Binary files a/doc/design/images/two_phase_commit.png and /dev/null differ
diff --git a/doc/design/images/asgd.gif b/doc/fluid/design/algorithm/images/asgd.gif
similarity index 100%
rename from doc/design/images/asgd.gif
rename to doc/fluid/design/algorithm/images/asgd.gif
diff --git a/doc/design/images/theta_star.gif b/doc/fluid/design/algorithm/images/theta_star.gif
similarity index 100%
rename from doc/design/images/theta_star.gif
rename to doc/fluid/design/algorithm/images/theta_star.gif
diff --git a/doc/design/parameter_average.md b/doc/fluid/design/algorithm/parameter_average.md
similarity index 100%
rename from doc/design/parameter_average.md
rename to doc/fluid/design/algorithm/parameter_average.md
diff --git a/doc/design/build_system/README.md b/doc/fluid/design/concepts/README.md
similarity index 100%
rename from doc/design/build_system/README.md
rename to doc/fluid/design/concepts/README.md
diff --git a/doc/design/block.md b/doc/fluid/design/concepts/block.md
similarity index 100%
rename from doc/design/block.md
rename to doc/fluid/design/concepts/block.md
diff --git a/doc/design/cpp_data_feeding.md b/doc/fluid/design/concepts/cpp_data_feeding.md
similarity index 100%
rename from doc/design/cpp_data_feeding.md
rename to doc/fluid/design/concepts/cpp_data_feeding.md
diff --git a/doc/design/executor.md b/doc/fluid/design/concepts/executor.md
similarity index 100%
rename from doc/design/executor.md
rename to doc/fluid/design/concepts/executor.md
diff --git a/doc/design/functions_operators_layers.md b/doc/fluid/design/concepts/functions_operators_layers.md
similarity index 100%
rename from doc/design/functions_operators_layers.md
rename to doc/fluid/design/concepts/functions_operators_layers.md
diff --git a/paddle/fluid/framework/lod_tensor.md b/doc/fluid/design/concepts/lod_tensor.md
similarity index 100%
rename from paddle/fluid/framework/lod_tensor.md
rename to doc/fluid/design/concepts/lod_tensor.md
diff --git a/doc/design/program.md b/doc/fluid/design/concepts/program.md
similarity index 100%
rename from doc/design/program.md
rename to doc/fluid/design/concepts/program.md
diff --git a/doc/design/scope.md b/doc/fluid/design/concepts/scope.md
similarity index 100%
rename from doc/design/scope.md
rename to doc/fluid/design/concepts/scope.md
diff --git a/paddle/fluid/framework/tensor.md b/doc/fluid/design/concepts/tensor.md
similarity index 100%
rename from paddle/fluid/framework/tensor.md
rename to doc/fluid/design/concepts/tensor.md
diff --git a/doc/design/tensor_array.md b/doc/fluid/design/concepts/tensor_array.md
similarity index 100%
rename from doc/design/tensor_array.md
rename to doc/fluid/design/concepts/tensor_array.md
diff --git a/doc/design/var_desc.md b/doc/fluid/design/concepts/var_desc.md
similarity index 100%
rename from doc/design/var_desc.md
rename to doc/fluid/design/concepts/var_desc.md
diff --git a/paddle/fluid/framework/variable.md b/doc/fluid/design/concepts/variable.md
similarity index 100%
rename from paddle/fluid/framework/variable.md
rename to doc/fluid/design/concepts/variable.md
diff --git a/doc/design/concurrent_programming.md b/doc/fluid/design/concurrent/concurrent_programming.md
similarity index 100%
rename from doc/design/concurrent_programming.md
rename to doc/fluid/design/concurrent/concurrent_programming.md
diff --git a/doc/design/csp.md b/doc/fluid/design/concurrent/csp.md
similarity index 100%
rename from doc/design/csp.md
rename to doc/fluid/design/concurrent/csp.md
diff --git a/doc/design/parallel_do.md b/doc/fluid/design/concurrent/parallel_do.md
similarity index 100%
rename from doc/design/parallel_do.md
rename to doc/fluid/design/concurrent/parallel_do.md
diff --git a/doc/design/float16.md b/doc/fluid/design/data_type/float16.md
similarity index 100%
rename from doc/design/float16.md
rename to doc/fluid/design/data_type/float16.md
diff --git a/doc/fluid/design/dist_train/distributed_architecture.md b/doc/fluid/design/dist_train/distributed_architecture.md
index b32b00ec25269bc909b0206ffa622b5d63711155..a405cb6aaf80b9d2e8a1a9c774ca85cc7e62bbab 100644
--- a/doc/fluid/design/dist_train/distributed_architecture.md
+++ b/doc/fluid/design/dist_train/distributed_architecture.md
@@ -155,7 +155,7 @@ Cluster environment.
`RemoteExecutor.run` sends the `ProgramDesc` and
-[TrainingJob](https://github.com/PaddlePaddle/cloud/blob/develop/doc/autoscale/README.md#training-job-resource)
+[TrainingJob](https://github.com/PaddlePaddle/cloud/blob/unreleased-tpr/doc/autoscale/README.md#training-job-resource)
to a server in the cluster which executes `RemoteExecutor.listen`. This server is responsible
to start the final Kubernetes Jobs to run the different role of `ProgramDesc` from `ConfigMap`.
diff --git a/doc/design/ops/images/2_level_rnn.dot b/doc/fluid/design/dynamic_rnn/2_level_rnn.dot
similarity index 100%
rename from doc/design/ops/images/2_level_rnn.dot
rename to doc/fluid/design/dynamic_rnn/2_level_rnn.dot
diff --git a/doc/design/ops/images/2_level_rnn.png b/doc/fluid/design/dynamic_rnn/2_level_rnn.png
similarity index 100%
rename from doc/design/ops/images/2_level_rnn.png
rename to doc/fluid/design/dynamic_rnn/2_level_rnn.png
diff --git a/doc/design/ops/images/rnn.dot b/doc/fluid/design/dynamic_rnn/rnn.dot
similarity index 100%
rename from doc/design/ops/images/rnn.dot
rename to doc/fluid/design/dynamic_rnn/rnn.dot
diff --git a/doc/design/ops/images/rnn.jpg b/doc/fluid/design/dynamic_rnn/rnn.jpg
similarity index 100%
rename from doc/design/ops/images/rnn.jpg
rename to doc/fluid/design/dynamic_rnn/rnn.jpg
diff --git a/doc/design/ops/rnn.md b/doc/fluid/design/dynamic_rnn/rnn.md
similarity index 100%
rename from doc/design/ops/rnn.md
rename to doc/fluid/design/dynamic_rnn/rnn.md
diff --git a/doc/design/ops/images/rnn.png b/doc/fluid/design/dynamic_rnn/rnn.png
similarity index 100%
rename from doc/design/ops/images/rnn.png
rename to doc/fluid/design/dynamic_rnn/rnn.png
diff --git a/doc/design/ops/images/rnn_2level_data.dot b/doc/fluid/design/dynamic_rnn/rnn_2level_data.dot
similarity index 100%
rename from doc/design/ops/images/rnn_2level_data.dot
rename to doc/fluid/design/dynamic_rnn/rnn_2level_data.dot
diff --git a/doc/design/ops/images/rnn_2level_data.png b/doc/fluid/design/dynamic_rnn/rnn_2level_data.png
similarity index 100%
rename from doc/design/ops/images/rnn_2level_data.png
rename to doc/fluid/design/dynamic_rnn/rnn_2level_data.png
diff --git a/paddle/fluid/operators/op_documentation/rnn_design.md b/doc/fluid/design/dynamic_rnn/rnn_design.md
similarity index 100%
rename from paddle/fluid/operators/op_documentation/rnn_design.md
rename to doc/fluid/design/dynamic_rnn/rnn_design.md
diff --git a/doc/design/if_else_op.md b/doc/fluid/design/execution/if_else_op.md
similarity index 100%
rename from doc/design/if_else_op.md
rename to doc/fluid/design/execution/if_else_op.md
diff --git a/doc/design/switch.md b/doc/fluid/design/execution/switch.md
similarity index 100%
rename from doc/design/switch.md
rename to doc/fluid/design/execution/switch.md
diff --git a/doc/design/multi_language_interface/00.why_plain_c.md b/doc/fluid/design/interface/00.why_plain_c.md
similarity index 100%
rename from doc/design/multi_language_interface/00.why_plain_c.md
rename to doc/fluid/design/interface/00.why_plain_c.md
diff --git a/doc/design/multi_language_interface/01.inference_implementation.md b/doc/fluid/design/interface/01.inference_implementation.md
similarity index 100%
rename from doc/design/multi_language_interface/01.inference_implementation.md
rename to doc/fluid/design/interface/01.inference_implementation.md
diff --git a/paddle/fluid/memory/README.md b/doc/fluid/design/memory/README.md
similarity index 100%
rename from paddle/fluid/memory/README.md
rename to doc/fluid/design/memory/README.md
diff --git a/doc/design/images/control_flow_graph.png b/doc/fluid/design/memory/images/control_flow_graph.png
similarity index 100%
rename from doc/design/images/control_flow_graph.png
rename to doc/fluid/design/memory/images/control_flow_graph.png
diff --git a/doc/design/images/dataflow_equations.png b/doc/fluid/design/memory/images/dataflow_equations.png
similarity index 100%
rename from doc/design/images/dataflow_equations.png
rename to doc/fluid/design/memory/images/dataflow_equations.png
diff --git a/doc/design/images/deep_learning.png b/doc/fluid/design/memory/images/deep_learning.png
similarity index 100%
rename from doc/design/images/deep_learning.png
rename to doc/fluid/design/memory/images/deep_learning.png
diff --git a/doc/design/memory_optimization.md b/doc/fluid/design/memory/memory_optimization.md
similarity index 100%
rename from doc/design/memory_optimization.md
rename to doc/fluid/design/memory/memory_optimization.md
diff --git a/doc/design/backward.md b/doc/fluid/design/modules/backward.md
similarity index 100%
rename from doc/design/backward.md
rename to doc/fluid/design/modules/backward.md
diff --git a/paddle/fluid/operators/op_documentation/batch_norm_op.md b/doc/fluid/design/modules/batch_norm_op.md
similarity index 100%
rename from paddle/fluid/operators/op_documentation/batch_norm_op.md
rename to doc/fluid/design/modules/batch_norm_op.md
diff --git a/doc/design/evaluator.md b/doc/fluid/design/modules/evaluator.md
similarity index 100%
rename from doc/design/evaluator.md
rename to doc/fluid/design/modules/evaluator.md
diff --git a/paddle/fluid/operators/images/batch_norm_fork.dot b/doc/fluid/design/modules/images/batch_norm_fork.dot
similarity index 100%
rename from paddle/fluid/operators/images/batch_norm_fork.dot
rename to doc/fluid/design/modules/images/batch_norm_fork.dot
diff --git a/paddle/fluid/operators/images/batch_norm_fork.png b/doc/fluid/design/modules/images/batch_norm_fork.png
similarity index 100%
rename from paddle/fluid/operators/images/batch_norm_fork.png
rename to doc/fluid/design/modules/images/batch_norm_fork.png
diff --git a/paddle/fluid/operators/images/batch_norm_op_kernel.png b/doc/fluid/design/modules/images/batch_norm_op_kernel.png
similarity index 100%
rename from paddle/fluid/operators/images/batch_norm_op_kernel.png
rename to doc/fluid/design/modules/images/batch_norm_op_kernel.png
diff --git a/doc/design/images/feed_forward.png b/doc/fluid/design/modules/images/feed_forward.png
similarity index 100%
rename from doc/design/images/feed_forward.png
rename to doc/fluid/design/modules/images/feed_forward.png
diff --git a/doc/design/images/feed_forward_regularized.png b/doc/fluid/design/modules/images/feed_forward_regularized.png
similarity index 100%
rename from doc/design/images/feed_forward_regularized.png
rename to doc/fluid/design/modules/images/feed_forward_regularized.png
diff --git a/doc/design/images/l1_regularization.png b/doc/fluid/design/modules/images/l1_regularization.png
similarity index 100%
rename from doc/design/images/l1_regularization.png
rename to doc/fluid/design/modules/images/l1_regularization.png
diff --git a/doc/design/images/l2_regularization.png b/doc/fluid/design/modules/images/l2_regularization.png
similarity index 100%
rename from doc/design/images/l2_regularization.png
rename to doc/fluid/design/modules/images/l2_regularization.png
diff --git a/doc/design/images/loss_equation.png b/doc/fluid/design/modules/images/loss_equation.png
similarity index 100%
rename from doc/design/images/loss_equation.png
rename to doc/fluid/design/modules/images/loss_equation.png
diff --git a/doc/design/infer_var_type.md b/doc/fluid/design/modules/infer_var_type.md
similarity index 100%
rename from doc/design/infer_var_type.md
rename to doc/fluid/design/modules/infer_var_type.md
diff --git a/paddle/fluid/operators/op_documentation/net_op_design.md b/doc/fluid/design/modules/net_op_design.md
similarity index 100%
rename from paddle/fluid/operators/op_documentation/net_op_design.md
rename to doc/fluid/design/modules/net_op_design.md
diff --git a/doc/design/optimizer.md b/doc/fluid/design/modules/optimizer.md
similarity index 100%
rename from doc/design/optimizer.md
rename to doc/fluid/design/modules/optimizer.md
diff --git a/doc/design/prune.md b/doc/fluid/design/modules/prune.md
similarity index 100%
rename from doc/design/prune.md
rename to doc/fluid/design/modules/prune.md
diff --git a/doc/design/python_api.md b/doc/fluid/design/modules/python_api.md
similarity index 100%
rename from doc/design/python_api.md
rename to doc/fluid/design/modules/python_api.md
diff --git a/doc/design/register_grad_op.md b/doc/fluid/design/modules/register_grad_op.md
similarity index 100%
rename from doc/design/register_grad_op.md
rename to doc/fluid/design/modules/register_grad_op.md
diff --git a/doc/design/regularization.md b/doc/fluid/design/modules/regularization.md
similarity index 100%
rename from doc/design/regularization.md
rename to doc/fluid/design/modules/regularization.md
diff --git a/doc/design/selected_rows.md b/doc/fluid/design/modules/selected_rows.md
similarity index 100%
rename from doc/design/selected_rows.md
rename to doc/fluid/design/modules/selected_rows.md
diff --git a/doc/design/api.md b/doc/fluid/design/motivation/api.md
similarity index 100%
rename from doc/design/api.md
rename to doc/fluid/design/motivation/api.md
diff --git a/doc/design/fluid-compiler.graffle b/doc/fluid/design/motivation/fluid-compiler.graffle
similarity index 100%
rename from doc/design/fluid-compiler.graffle
rename to doc/fluid/design/motivation/fluid-compiler.graffle
diff --git a/doc/design/fluid-compiler.png b/doc/fluid/design/motivation/fluid-compiler.png
similarity index 100%
rename from doc/design/fluid-compiler.png
rename to doc/fluid/design/motivation/fluid-compiler.png
diff --git a/doc/design/fluid.md b/doc/fluid/design/motivation/fluid.md
similarity index 100%
rename from doc/design/fluid.md
rename to doc/fluid/design/motivation/fluid.md
diff --git a/doc/design/fluid_compiler.md b/doc/fluid/design/motivation/fluid_compiler.md
similarity index 100%
rename from doc/design/fluid_compiler.md
rename to doc/fluid/design/motivation/fluid_compiler.md
diff --git a/doc/design/refactorization.md b/doc/fluid/design/motivation/refactorization.md
similarity index 100%
rename from doc/design/refactorization.md
rename to doc/fluid/design/motivation/refactorization.md
diff --git a/doc/design/kernel_hint_design.md b/doc/fluid/design/muti_devices/kernel_hint_design.md
similarity index 100%
rename from doc/design/kernel_hint_design.md
rename to doc/fluid/design/muti_devices/kernel_hint_design.md
diff --git a/doc/design/kernel_selection.md b/doc/fluid/design/muti_devices/kernel_selection.md
similarity index 100%
rename from doc/design/kernel_selection.md
rename to doc/fluid/design/muti_devices/kernel_selection.md
diff --git a/doc/design/operator_kernel_type.md b/doc/fluid/design/muti_devices/operator_kernel_type.md
similarity index 100%
rename from doc/design/operator_kernel_type.md
rename to doc/fluid/design/muti_devices/operator_kernel_type.md
diff --git a/doc/design/speech/deep_speech_2.md b/doc/fluid/design/network/deep_speech_2.md
similarity index 98%
rename from doc/design/speech/deep_speech_2.md
rename to doc/fluid/design/network/deep_speech_2.md
index cfdc4d6df04344c70d3334626bd38eca997c31ff..af0c6ef36feba9e0239e7a5f81a8dc9108b2471a 100644
--- a/doc/design/speech/deep_speech_2.md
+++ b/doc/fluid/design/network/deep_speech_2.md
@@ -94,7 +94,7 @@ The classical DS2 network contains 15 layers (from bottom to top):
- **One** CTC-loss layer
-
+
Figure 1. Archetecture of Deep Speech 2 Network.
@@ -141,7 +141,7 @@ TODO by Assignees
### Beam Search with CTC and LM
-
+
Figure 2. Algorithm for CTC Beam Search Decoder.
diff --git a/doc/design/ops/images/LOD-and-shape-changes-during-decoding.jpg b/doc/fluid/design/network/images/LOD-and-shape-changes-during-decoding.jpg
similarity index 100%
rename from doc/design/ops/images/LOD-and-shape-changes-during-decoding.jpg
rename to doc/fluid/design/network/images/LOD-and-shape-changes-during-decoding.jpg
diff --git a/doc/design/speech/image/beam_search.png b/doc/fluid/design/network/images/beam_search.png
similarity index 100%
rename from doc/design/speech/image/beam_search.png
rename to doc/fluid/design/network/images/beam_search.png
diff --git a/doc/design/speech/image/ds2_network.png b/doc/fluid/design/network/images/ds2_network.png
similarity index 100%
rename from doc/design/speech/image/ds2_network.png
rename to doc/fluid/design/network/images/ds2_network.png
diff --git a/doc/design/ops/sequence_decoder.md b/doc/fluid/design/network/sequence_decoder.md
similarity index 100%
rename from doc/design/ops/sequence_decoder.md
rename to doc/fluid/design/network/sequence_decoder.md
diff --git a/doc/design/auto_gradient_check.md b/doc/fluid/design/others/auto_gradient_check.md
similarity index 100%
rename from doc/design/auto_gradient_check.md
rename to doc/fluid/design/others/auto_gradient_check.md
diff --git a/doc/design/dcgan.png b/doc/fluid/design/others/dcgan.png
similarity index 100%
rename from doc/design/dcgan.png
rename to doc/fluid/design/others/dcgan.png
diff --git a/doc/design/gan_api.md b/doc/fluid/design/others/gan_api.md
similarity index 100%
rename from doc/design/gan_api.md
rename to doc/fluid/design/others/gan_api.md
diff --git a/doc/design/graph.md b/doc/fluid/design/others/graph.md
similarity index 100%
rename from doc/design/graph.md
rename to doc/fluid/design/others/graph.md
diff --git a/doc/design/graph_survey.md b/doc/fluid/design/others/graph_survey.md
similarity index 100%
rename from doc/design/graph_survey.md
rename to doc/fluid/design/others/graph_survey.md
diff --git a/doc/design/images/graph_construction_example.bash b/doc/fluid/design/others/images/graph_construction_example.bash
similarity index 100%
rename from doc/design/images/graph_construction_example.bash
rename to doc/fluid/design/others/images/graph_construction_example.bash
diff --git a/doc/design/images/graph_construction_example.dot b/doc/fluid/design/others/images/graph_construction_example.dot
similarity index 100%
rename from doc/design/images/graph_construction_example.dot
rename to doc/fluid/design/others/images/graph_construction_example.dot
diff --git a/doc/design/images/graph_construction_example_all.png b/doc/fluid/design/others/images/graph_construction_example_all.png
similarity index 100%
rename from doc/design/images/graph_construction_example_all.png
rename to doc/fluid/design/others/images/graph_construction_example_all.png
diff --git a/doc/design/images/graph_construction_example_forward_backward.png b/doc/fluid/design/others/images/graph_construction_example_forward_backward.png
similarity index 100%
rename from doc/design/images/graph_construction_example_forward_backward.png
rename to doc/fluid/design/others/images/graph_construction_example_forward_backward.png
diff --git a/doc/design/images/graph_construction_example_forward_only.png b/doc/fluid/design/others/images/graph_construction_example_forward_only.png
similarity index 100%
rename from doc/design/images/graph_construction_example_forward_only.png
rename to doc/fluid/design/others/images/graph_construction_example_forward_only.png
diff --git a/doc/design/parameters_in_cpp.md b/doc/fluid/design/others/parameters_in_cpp.md
similarity index 100%
rename from doc/design/parameters_in_cpp.md
rename to doc/fluid/design/others/parameters_in_cpp.md
diff --git a/doc/design/simple_op_design.md b/doc/fluid/design/others/simple_op_design.md
similarity index 100%
rename from doc/design/simple_op_design.md
rename to doc/fluid/design/others/simple_op_design.md
diff --git a/doc/design/test.dot b/doc/fluid/design/others/test.dot
similarity index 100%
rename from doc/design/test.dot
rename to doc/fluid/design/others/test.dot
diff --git a/doc/design/test.dot.png b/doc/fluid/design/others/test.dot.png
similarity index 100%
rename from doc/design/test.dot.png
rename to doc/fluid/design/others/test.dot.png
diff --git a/doc/fluid/dev/api_doc_std_cn.md b/doc/fluid/dev/api_doc_std_cn.md
new file mode 100644
index 0000000000000000000000000000000000000000..5596b2653ae6ed9917f77dad08f926bcb1fb3419
--- /dev/null
+++ b/doc/fluid/dev/api_doc_std_cn.md
@@ -0,0 +1,220 @@
+# API注释撰写标准
+
+- [API注释模块](#API注释模块)
+- [格式及示例](#格式及示例)
+- [完整示例](#完整示例)
+
+
+## API注释模块
+
+API文档须包含以下几个模块(排列顺序为文档撰写顺序):
+
+- Python API Definition
+
+ API的代码定义。
+
+- Function Description
+
+ API的功能描述。描述该API的含义、作用或对输入所做的操作,及参考文献和对应链接(如果有),必要时给出公式,并解释公式中关键变量的含义。
+
+- Args Description
+
+ API参数介绍。按代码定义中的参数顺序逐个介绍,介绍内容包含数据类型、默认值(如果有)、含义等。
+
+- Returns
+
+ API返回值介绍。介绍返回值含义,必要时给出对应的形状。若返回值为包含多个参数的tuple,则按顺序逐个介绍各参数。
+
+- Raises(如果有)
+
+ 可能抛出的异常或错误及可能的产生原因,当可能抛出多种异常或错误时应分条列出。
+
+- Note(如果有)
+
+ 注意事项。当有多条注意事项时,应分条列出。
+
+- Examples
+
+ API的使用示例。
+
+
+## 格式及示例
+
+API文档须使用reStructuredText格式撰写,该格式详情请参考[链接](http://sphinx-doc-zh.readthedocs.io/en/latest/rest.html)。API文档各模块的内容格式及示例如下(以下以fc为例进行说明):
+
+- Python API Definition
+
+ - 格式:
+
+ [Python API Definition]
+
+ - 示例
+
+ ```
+ fc(input,
+ size,
+ num_flatten_dims=1,
+ param_attr=None,
+ bias_attr=None,
+ act=None,
+ name=None,
+ main_program=None,
+ startup_program=None)
+ ```
+
+- Function Description
+
+ - 格式
+
+ 本模块应包含以下内容(排列顺序为文档撰写顺序):
+
+ [Function Description]
+
+ [Formula]
+
+ [Symbols' Descriptions if necessary]
+
+ [References if necessary]
+
+ - 示例
+
+ [Function Description]
+
+ ```
+ **Fully Connected Layer**
+
+ The fully connected layer can take multiple tensors as its inputs. It
+ creates a variable called weights for each input tensor, which represents
+ a fully connected weight matrix from each input unit to each output unit.
+ The fully connected layer multiplies each input tensor with its coresponding
+ weight to produce an output Tensor. If multiple input tensors are given,
+ the results of multiple multiplications will be sumed up. If bias_attr is
+ not None, a bias variable will be created and added to the output. Finally,
+ if activation is not None, it will be applied to the output as well.
+ ```
+
+ [Formula]
+
+ ```
+ This process can be formulated as follows:
+
+ .. math::
+
+ Out = Act({\sum_{i=0}^{N-1}X_iW_i + b})
+ ```
+
+ [Symbols' Descriptions if necessary]
+
+ ```
+ In the above equation:
+
+ * :math:`N`: Number of the input.
+ * :math:`X_i`: The input tensor.
+ * :math:`W`: The weights created by this layer.
+ * :math:`b`: The bias parameter created by this layer (if needed).
+ * :math:`Act`: The activation function.
+ * :math:`Out`: The output tensor.
+ ```
+
+ [References if necessary]
+
+ 因fc没有必要列出的参考文献,故该内容省略。其他情况下需明确给出对应的参考文献和对应连接,以 layer_norm 为例:
+
+ ```
+ Refer to `Layer Normalization `_ for more details.
+ ```
+
+
+- Args Description
+
+ - 格式
+
+ \[Arg's Name\][(Data Type, Default Value)][Description]
+
+ - 示例
+
+ fc的部分参数注释如下:
+
+ ```
+ Args:
+ input (Variable|list of Variable): The input tensor(s) of this layer, and the dimension of
+ the input tensor(s) is at least 2.
+ param_attr (ParamAttr|list of ParamAttr, default None): The parameter attribute for learnable
+ parameters/weights of this layer.
+ name (str, default None): The name of this layer.
+ ```
+
+- Returns
+
+ - 格式
+
+ [Name][Shape]
+
+ - 示例
+
+ ```
+ Returns:
+ A tensor variable storing the transformation result.
+ ```
+
+ 当返回值为包含多个参数的tuple时,应按顺序逐个介绍各参数,以dynamic_lstm为例:
+
+ ```
+ Returns:
+ A tuple containing:
+ The hidden state of LSTM whose shape is (T X D).
+ The cell state of LSTM whose shape is (T X D).
+ ```
+
+- Raises
+
+ - 格式
+
+ [Exception Type][Condition]
+
+ - 示例
+
+ ```
+ Raises:
+ ValueError: If the rank of the input is less than 2.
+ ```
+
+- Note
+
+ - 格式
+
+ [Note]
+
+ - 示例
+
+ fc没有注意事项,故该模块省略不写。如有注意事项应明确给出,当有多条注意事项,须分条列出,以scaled\_dot\_product\_attention为例:
+
+ ```
+ Note:
+ 1. When num_heads > 1, three linear projections are learned respectively
+ to map input queries, keys and values into queries', keys' and values'.
+ queries', keys' and values' have the same shapes with queries, keys
+ and values.
+ 2. When num_heads == 1, scaled_dot_product_attention has no learnable
+ parameters.
+ ```
+
+- Examples
+
+ - 格式
+
+ \[Python Code Snipper]
+
+ - 示例
+
+ ```
+ Examples:
+ .. code-block:: python
+
+ data = fluid.layers.data(name="data", shape=[32, 32], dtype="float32")
+ fc = fluid.layers.fc(input=data, size=1000, act="tanh")
+ ```
+
+## 完整示例
+
+fc 的完整注释见[示例](src/fc.py)。
diff --git a/doc/design/ci_build_whl.png b/doc/fluid/dev/ci_build_whl.png
similarity index 100%
rename from doc/design/ci_build_whl.png
rename to doc/fluid/dev/ci_build_whl.png
diff --git a/paddle/fluid/operators/op_documentation/name_convention.md b/doc/fluid/dev/name_convention.md
similarity index 100%
rename from paddle/fluid/operators/op_documentation/name_convention.md
rename to doc/fluid/dev/name_convention.md
diff --git a/paddle/fluid/operators/op_documentation/op_markdown_format.md b/doc/fluid/dev/op_markdown_format.md
similarity index 100%
rename from paddle/fluid/operators/op_documentation/op_markdown_format.md
rename to doc/fluid/dev/op_markdown_format.md
diff --git a/doc/design/releasing_process.md b/doc/fluid/dev/releasing_process.md
similarity index 100%
rename from doc/design/releasing_process.md
rename to doc/fluid/dev/releasing_process.md
diff --git a/doc/fluid/dev/src/fc.py b/doc/fluid/dev/src/fc.py
new file mode 100644
index 0000000000000000000000000000000000000000..3b074821cc2276a29b2a8639e82199fcf4d72020
--- /dev/null
+++ b/doc/fluid/dev/src/fc.py
@@ -0,0 +1,81 @@
+# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+
+def fc(input,
+ size,
+ num_flatten_dims=1,
+ param_attr=None,
+ bias_attr=None,
+ act=None,
+ name=None):
+ """
+ **Fully Connected Layer**
+
+ The fully connected layer can take multiple tensors as its inputs. It
+ creates a variable called weights for each input tensor, which represents
+ a fully connected weight matrix from each input unit to each output unit.
+ The fully connected layer multiplies each input tensor with its coresponding
+ weight to produce an output Tensor. If multiple input tensors are given,
+ the results of multiple multiplications will be sumed up. If bias_attr is
+ not None, a bias variable will be created and added to the output. Finally,
+ if activation is not None, it will be applied to the output as well.
+
+ This process can be formulated as follows:
+
+ .. math::
+
+ Out = Act({\sum_{i=0}^{N-1}X_iW_i + b})
+
+ In the above equation:
+
+ * :math:`N`: Number of the input.
+ * :math:`X_i`: The input tensor.
+ * :math:`W`: The weights created by this layer.
+ * :math:`b`: The bias parameter created by this layer (if needed).
+ * :math:`Act`: The activation function.
+ * :math:`Out`: The output tensor.
+
+ Args:
+ input (Variable|list of Variable): The input tensor(s) of this layer, and the dimension of
+ the input tensor(s) is at least 2.
+ size(int): The number of output units in this layer.
+ num_flatten_dims (int, default 1): The fc layer can accept an input tensor with more than
+ two dimensions. If this happens, the multidimensional tensor will first be flattened
+ into a 2-dimensional matrix. The parameter `num_flatten_dims` determines how the input
+ tensor is flattened: the first `num_flatten_dims` (inclusive, index starts from 1)
+ dimensions will be flatten to form the first dimension of the final matrix (height of
+ the matrix), and the rest `rank(X) - num_flatten_dims` dimensions are flattened to
+ form the second dimension of the final matrix (width of the matrix). For example, suppose
+ `X` is a 6-dimensional tensor with a shape [2, 3, 4, 5, 6], and `num_flatten_dims` = 3.
+ Then, the flattened matrix will have a shape [2 x 3 x 4, 5 x 6] = [24, 30].
+ param_attr (ParamAttr|list of ParamAttr, default None): The parameter attribute for learnable
+ parameters/weights of this layer.
+ bias_attr (ParamAttr|list of ParamAttr, default None): The parameter attribute for the bias
+ of this layer. If it is set to None, no bias will be added to the output units.
+ act (str, default None): Activation to be applied to the output of this layer.
+ name (str, default None): The name of this layer.
+
+ Returns:
+ A tensor variable storing the transformation result.
+
+ Raises:
+ ValueError: If rank of the input tensor is less than 2.
+
+ Examples:
+ .. code-block:: python
+
+ data = fluid.layers.data(name="data", shape=[32, 32], dtype="float32")
+ fc = fluid.layers.fc(input=data, size=1000, act="tanh")
+ """
diff --git a/doc/design/support_new_device.md b/doc/fluid/dev/support_new_device.md
similarity index 100%
rename from doc/design/support_new_device.md
rename to doc/fluid/dev/support_new_device.md
diff --git a/doc/design/reader/README.md b/doc/fluid/getstarted/concepts/reader/README.md
similarity index 100%
rename from doc/design/reader/README.md
rename to doc/fluid/getstarted/concepts/reader/README.md
diff --git a/doc/design/model_format.md b/doc/fluid/getstarted/concepts/save_model/model_format.md
similarity index 100%
rename from doc/design/model_format.md
rename to doc/fluid/getstarted/concepts/save_model/model_format.md
diff --git a/doc/design/error_clip.md b/doc/fluid/howto/performance/error_clip.md
similarity index 100%
rename from doc/design/error_clip.md
rename to doc/fluid/howto/performance/error_clip.md
diff --git a/doc/design/images/profiler.png b/doc/fluid/howto/performance/images/profiler.png
similarity index 100%
rename from doc/design/images/profiler.png
rename to doc/fluid/howto/performance/images/profiler.png
diff --git a/doc/design/profiler.md b/doc/fluid/howto/performance/profiler.md
similarity index 100%
rename from doc/design/profiler.md
rename to doc/fluid/howto/performance/profiler.md
diff --git a/doc/design/images/multigpu_allreduce.graffle b/doc/fluid/howto/third_party/images/multigpu_allreduce.graffle
similarity index 100%
rename from doc/design/images/multigpu_allreduce.graffle
rename to doc/fluid/howto/third_party/images/multigpu_allreduce.graffle
diff --git a/doc/design/images/multigpu_allreduce.png b/doc/fluid/howto/third_party/images/multigpu_allreduce.png
similarity index 100%
rename from doc/design/images/multigpu_allreduce.png
rename to doc/fluid/howto/third_party/images/multigpu_allreduce.png
diff --git a/doc/design/images/multigpu_before_convert.graffle b/doc/fluid/howto/third_party/images/multigpu_before_convert.graffle
similarity index 100%
rename from doc/design/images/multigpu_before_convert.graffle
rename to doc/fluid/howto/third_party/images/multigpu_before_convert.graffle
diff --git a/doc/design/images/multigpu_before_convert.png b/doc/fluid/howto/third_party/images/multigpu_before_convert.png
similarity index 100%
rename from doc/design/images/multigpu_before_convert.png
rename to doc/fluid/howto/third_party/images/multigpu_before_convert.png
diff --git a/doc/design/mkl/mkldnn_fluid.md b/doc/fluid/howto/third_party/mkldnn_fluid.md
similarity index 100%
rename from doc/design/mkl/mkldnn_fluid.md
rename to doc/fluid/howto/third_party/mkldnn_fluid.md
diff --git a/doc/design/paddle_nccl.md b/doc/fluid/howto/third_party/paddle_nccl.md
similarity index 100%
rename from doc/design/paddle_nccl.md
rename to doc/fluid/howto/third_party/paddle_nccl.md
diff --git a/doc/design/cluster_train/README.md b/doc/v2/design/cluster_train/README.md
similarity index 100%
rename from doc/design/cluster_train/README.md
rename to doc/v2/design/cluster_train/README.md
diff --git a/doc/design/cluster_train/checkpointing.md b/doc/v2/design/cluster_train/checkpointing.md
similarity index 100%
rename from doc/design/cluster_train/checkpointing.md
rename to doc/v2/design/cluster_train/checkpointing.md
diff --git a/doc/design/cluster_train/data_dispatch.md b/doc/v2/design/cluster_train/data_dispatch.md
similarity index 100%
rename from doc/design/cluster_train/data_dispatch.md
rename to doc/v2/design/cluster_train/data_dispatch.md
diff --git a/doc/design/cluster_train/large_model_dist_train.md b/doc/v2/design/cluster_train/large_model_dist_train.md
similarity index 100%
rename from doc/design/cluster_train/large_model_dist_train.md
rename to doc/v2/design/cluster_train/large_model_dist_train.md
diff --git a/doc/design/cluster_train/master_server.md b/doc/v2/design/cluster_train/master_server.md
similarity index 100%
rename from doc/design/cluster_train/master_server.md
rename to doc/v2/design/cluster_train/master_server.md
diff --git a/doc/design/cluster_train/pserver_client.md b/doc/v2/design/cluster_train/pserver_client.md
similarity index 100%
rename from doc/design/cluster_train/pserver_client.md
rename to doc/v2/design/cluster_train/pserver_client.md
diff --git a/doc/design/cluster_train/remote_parameter_updater.md b/doc/v2/design/cluster_train/remote_parameter_updater.md
similarity index 100%
rename from doc/design/cluster_train/remote_parameter_updater.md
rename to doc/v2/design/cluster_train/remote_parameter_updater.md
diff --git a/doc/design/cluster_train/save_model.md b/doc/v2/design/cluster_train/save_model.md
similarity index 100%
rename from doc/design/cluster_train/save_model.md
rename to doc/v2/design/cluster_train/save_model.md
diff --git a/doc/design/cluster_train/src/checkpointing.png b/doc/v2/design/cluster_train/src/checkpointing.png
similarity index 100%
rename from doc/design/cluster_train/src/checkpointing.png
rename to doc/v2/design/cluster_train/src/checkpointing.png
diff --git a/doc/design/cluster_train/src/data_dispatch.png b/doc/v2/design/cluster_train/src/data_dispatch.png
similarity index 100%
rename from doc/design/cluster_train/src/data_dispatch.png
rename to doc/v2/design/cluster_train/src/data_dispatch.png
diff --git a/doc/design/cluster_train/src/dataset.graffle b/doc/v2/design/cluster_train/src/dataset.graffle
similarity index 100%
rename from doc/design/cluster_train/src/dataset.graffle
rename to doc/v2/design/cluster_train/src/dataset.graffle
diff --git a/doc/design/cluster_train/src/dataset.png b/doc/v2/design/cluster_train/src/dataset.png
similarity index 100%
rename from doc/design/cluster_train/src/dataset.png
rename to doc/v2/design/cluster_train/src/dataset.png
diff --git a/doc/design/cluster_train/src/file_storage.graffle b/doc/v2/design/cluster_train/src/file_storage.graffle
similarity index 100%
rename from doc/design/cluster_train/src/file_storage.graffle
rename to doc/v2/design/cluster_train/src/file_storage.graffle
diff --git a/doc/design/cluster_train/src/file_storage.png b/doc/v2/design/cluster_train/src/file_storage.png
similarity index 100%
rename from doc/design/cluster_train/src/file_storage.png
rename to doc/v2/design/cluster_train/src/file_storage.png
diff --git a/doc/design/cluster_train/src/init_lock.graffle b/doc/v2/design/cluster_train/src/init_lock.graffle
similarity index 100%
rename from doc/design/cluster_train/src/init_lock.graffle
rename to doc/v2/design/cluster_train/src/init_lock.graffle
diff --git a/doc/design/cluster_train/src/init_lock.png b/doc/v2/design/cluster_train/src/init_lock.png
similarity index 100%
rename from doc/design/cluster_train/src/init_lock.png
rename to doc/v2/design/cluster_train/src/init_lock.png
diff --git a/doc/design/cluster_train/src/paddle-cloud-in-data-center.png b/doc/v2/design/cluster_train/src/paddle-cloud-in-data-center.png
similarity index 100%
rename from doc/design/cluster_train/src/paddle-cloud-in-data-center.png
rename to doc/v2/design/cluster_train/src/paddle-cloud-in-data-center.png
diff --git a/doc/design/cluster_train/src/paddle-etcd.graffle b/doc/v2/design/cluster_train/src/paddle-etcd.graffle
similarity index 100%
rename from doc/design/cluster_train/src/paddle-etcd.graffle
rename to doc/v2/design/cluster_train/src/paddle-etcd.graffle
diff --git a/doc/design/cluster_train/src/paddle-etcd.png b/doc/v2/design/cluster_train/src/paddle-etcd.png
similarity index 100%
rename from doc/design/cluster_train/src/paddle-etcd.png
rename to doc/v2/design/cluster_train/src/paddle-etcd.png
diff --git a/doc/design/cluster_train/src/paddle-model-sharding.graffle b/doc/v2/design/cluster_train/src/paddle-model-sharding.graffle
similarity index 100%
rename from doc/design/cluster_train/src/paddle-model-sharding.graffle
rename to doc/v2/design/cluster_train/src/paddle-model-sharding.graffle
diff --git a/doc/design/cluster_train/src/paddle-model-sharding.png b/doc/v2/design/cluster_train/src/paddle-model-sharding.png
similarity index 100%
rename from doc/design/cluster_train/src/paddle-model-sharding.png
rename to doc/v2/design/cluster_train/src/paddle-model-sharding.png
diff --git a/doc/design/cluster_train/src/paddle-ps-0.png b/doc/v2/design/cluster_train/src/paddle-ps-0.png
similarity index 100%
rename from doc/design/cluster_train/src/paddle-ps-0.png
rename to doc/v2/design/cluster_train/src/paddle-ps-0.png
diff --git a/doc/design/cluster_train/src/paddle-ps-1.png b/doc/v2/design/cluster_train/src/paddle-ps-1.png
similarity index 100%
rename from doc/design/cluster_train/src/paddle-ps-1.png
rename to doc/v2/design/cluster_train/src/paddle-ps-1.png
diff --git a/doc/design/cluster_train/src/paddle-ps.graffle b/doc/v2/design/cluster_train/src/paddle-ps.graffle
similarity index 100%
rename from doc/design/cluster_train/src/paddle-ps.graffle
rename to doc/v2/design/cluster_train/src/paddle-ps.graffle
diff --git a/doc/design/cluster_train/src/paddle-task-queues.graffle b/doc/v2/design/cluster_train/src/paddle-task-queues.graffle
similarity index 100%
rename from doc/design/cluster_train/src/paddle-task-queues.graffle
rename to doc/v2/design/cluster_train/src/paddle-task-queues.graffle
diff --git a/doc/design/cluster_train/src/paddle-task-queues.png b/doc/v2/design/cluster_train/src/paddle-task-queues.png
similarity index 100%
rename from doc/design/cluster_train/src/paddle-task-queues.png
rename to doc/v2/design/cluster_train/src/paddle-task-queues.png
diff --git a/doc/design/cluster_train/src/paddle-task-states.graffle b/doc/v2/design/cluster_train/src/paddle-task-states.graffle
similarity index 100%
rename from doc/design/cluster_train/src/paddle-task-states.graffle
rename to doc/v2/design/cluster_train/src/paddle-task-states.graffle
diff --git a/doc/design/cluster_train/src/paddle-task-states.png b/doc/v2/design/cluster_train/src/paddle-task-states.png
similarity index 100%
rename from doc/design/cluster_train/src/paddle-task-states.png
rename to doc/v2/design/cluster_train/src/paddle-task-states.png
diff --git a/doc/design/cluster_train/src/pserver_init.graffle b/doc/v2/design/cluster_train/src/pserver_init.graffle
similarity index 100%
rename from doc/design/cluster_train/src/pserver_init.graffle
rename to doc/v2/design/cluster_train/src/pserver_init.graffle
diff --git a/doc/design/cluster_train/src/pserver_init.png b/doc/v2/design/cluster_train/src/pserver_init.png
similarity index 100%
rename from doc/design/cluster_train/src/pserver_init.png
rename to doc/v2/design/cluster_train/src/pserver_init.png
diff --git a/doc/design/cluster_train/src/submit-job.graffle b/doc/v2/design/cluster_train/src/submit-job.graffle
similarity index 100%
rename from doc/design/cluster_train/src/submit-job.graffle
rename to doc/v2/design/cluster_train/src/submit-job.graffle
diff --git a/doc/design/cluster_train/src/submit-job.png b/doc/v2/design/cluster_train/src/submit-job.png
similarity index 100%
rename from doc/design/cluster_train/src/submit-job.png
rename to doc/v2/design/cluster_train/src/submit-job.png
diff --git a/doc/design/cluster_train/src/trainer.graffle b/doc/v2/design/cluster_train/src/trainer.graffle
similarity index 100%
rename from doc/design/cluster_train/src/trainer.graffle
rename to doc/v2/design/cluster_train/src/trainer.graffle
diff --git a/doc/design/cluster_train/src/trainer.png b/doc/v2/design/cluster_train/src/trainer.png
similarity index 100%
rename from doc/design/cluster_train/src/trainer.png
rename to doc/v2/design/cluster_train/src/trainer.png
diff --git a/doc/design/cluster_train/submit-job.md b/doc/v2/design/cluster_train/submit-job.md
similarity index 100%
rename from doc/design/cluster_train/submit-job.md
rename to doc/v2/design/cluster_train/submit-job.md
diff --git a/doc/design/mkl/image/engine.png b/doc/v2/design/mkl/image/engine.png
similarity index 100%
rename from doc/design/mkl/image/engine.png
rename to doc/v2/design/mkl/image/engine.png
diff --git a/doc/design/mkl/image/gradients.png b/doc/v2/design/mkl/image/gradients.png
similarity index 100%
rename from doc/design/mkl/image/gradients.png
rename to doc/v2/design/mkl/image/gradients.png
diff --git a/doc/design/mkl/image/layers.png b/doc/v2/design/mkl/image/layers.png
similarity index 100%
rename from doc/design/mkl/image/layers.png
rename to doc/v2/design/mkl/image/layers.png
diff --git a/doc/design/mkl/image/matrix.png b/doc/v2/design/mkl/image/matrix.png
similarity index 100%
rename from doc/design/mkl/image/matrix.png
rename to doc/v2/design/mkl/image/matrix.png
diff --git a/doc/design/mkl/image/overview.png b/doc/v2/design/mkl/image/overview.png
similarity index 100%
rename from doc/design/mkl/image/overview.png
rename to doc/v2/design/mkl/image/overview.png
diff --git a/doc/design/mkl/mkl_packed.md b/doc/v2/design/mkl/mkl_packed.md
similarity index 100%
rename from doc/design/mkl/mkl_packed.md
rename to doc/v2/design/mkl/mkl_packed.md
diff --git a/doc/design/mkl/mkldnn.md b/doc/v2/design/mkl/mkldnn.md
similarity index 100%
rename from doc/design/mkl/mkldnn.md
rename to doc/v2/design/mkl/mkldnn.md
diff --git a/doc/v2/dev/new_layer_cn.rst b/doc/v2/dev/new_layer_cn.rst
index 0ded1c262adad44f4df000ef2933c7b68050f2fc..3115654b2bd87995fa63bb7828fd1b3039aea8cc 100644
--- a/doc/v2/dev/new_layer_cn.rst
+++ b/doc/v2/dev/new_layer_cn.rst
@@ -16,7 +16,7 @@
下图是一个全连接层的示意图。在全连接层中,每个输出节点都连接到所有的输入节点上。
-.. image:: FullyConnected.jpg
+.. image:: src/FullyConnected.jpg
:align: center
:scale: 60 %
diff --git a/doc/v2/dev/new_layer_en.rst b/doc/v2/dev/new_layer_en.rst
index 110a9fb38f890a766bb4480e91feb22d3b0838a5..b05bb45f11eb253dfb87d6283c29ec6689394d22 100644
--- a/doc/v2/dev/new_layer_en.rst
+++ b/doc/v2/dev/new_layer_en.rst
@@ -16,7 +16,7 @@ First we need to derive equations of the *forward* and *backward* part of the la
The illustration of a fully connected layer is shown in the following figure. In a fully connected layer, all output nodes are connected to all the input nodes.
-.. image:: FullyConnected.jpg
+.. image:: src/FullyConnected.jpg
:align: center
:scale: 60 %
diff --git a/doc/v2/dev/FullyConnected.jpg b/doc/v2/dev/src/FullyConnected.jpg
similarity index 100%
rename from doc/v2/dev/FullyConnected.jpg
rename to doc/v2/dev/src/FullyConnected.jpg
diff --git a/doc/v2/dev/src/doc_en.png b/doc/v2/dev/src/doc_en.png
new file mode 100644
index 0000000000000000000000000000000000000000..ed6b9178fba91a3bdf45ae797a9924f84146fbc8
Binary files /dev/null and b/doc/v2/dev/src/doc_en.png differ
diff --git a/doc/v2/dev/write_docs_cn.rst b/doc/v2/dev/write_docs_cn.rst
index f79769b810b91c6984016d95f40b89186bfb61b0..a055bb04c0c093c9159290067e5ccbd2525cd519 100644
--- a/doc/v2/dev/write_docs_cn.rst
+++ b/doc/v2/dev/write_docs_cn.rst
@@ -2,20 +2,19 @@
如何贡献文档
#############
-PaddlePaddle的文档包括英文文档 ``doc`` 和中文文档 ``doc_cn`` 两个部分。文档都是通过 `cmake`_ 驱动 `sphinx`_ 编译生成,生成后的文档分别存储在编译目录的 ``doc`` 和 ``doc_cn`` 两个子目录下。
-也可以利用PaddlePaddle 工具来编译文档,这个情况下所有的文件会存在整理过的的文件目录 .ppo_workspace/content 下
+PaddlePaddle的文档包括中英文两个部分。文档都是通过 ``cmake`` 驱动 ``sphinx`` 编译生成,也可以利用paddlepaddle.org工具来编译和预览文档。
如何构建文档
============
-PaddlePaddle的文档构建有三种方式。
+PaddlePaddle的文档构建有两种方式,分别为使用paddlepaddle.org工具和不使用paddlepaddle.org工具,两种方式都有各自的优点,前者方便预览,后者方便开发者进行调试。这两种方式中又分别有使用docker和不使用docker的两种构建方法。
使用PaddlePaddle.org工具
---------------
-这个是目前推荐的使用方法。除了可以自动编译文档,也可以直接在网页预览文档。
+------------------------
+这个是目前推荐的使用方法。除了可以自动编译文档,还可以直接在网页中预览文档,需要注意的是,采用后续说明的其它方式虽然也可以预览文档,但是文档的样式与官网文档是不一致的,使用PaddlePaddle.org工具进行编译才能产生与官网文档样式一致的预览效果。
-文件工具是使用Docker,需要在系统里先安装好Docker工具包。Docker安装请参考Docker的官网。安装好Docker之后及可用以下命令启动工具
+PaddlePaddle.org工具可以配合Docker使用,需要在系统里先安装好Docker工具包。Docker安装请参考 `Docker的官网 `_ 。安装好Docker之后即可用以下命令启动工具
.. code-block:: bash
@@ -35,7 +34,7 @@ PaddlePaddle的文档构建有三种方式。
之后再用网页连到http://localhost:8000就可以在网页上生成需要的文档
编译后的文件将被存储在工作目录 /.ppo_workspace/content。
-如果不想使用 Docker,你还可以通过运行Django框架直接激活工具的服务器。使用下面的命令来运行它。
+如果不想使用Docker,你还可以通过运行Django框架直接激活工具的服务器。使用下面的命令来运行它。
.. code-block:: bash
@@ -62,37 +61,46 @@ PaddlePaddle的文档构建有三种方式。
想了解更多PaddlePaddle.org工具的详细信息,可以 `点击这里 `_ 。
-使用Docker构建
---------------
+不使用PaddlePaddle.org工具
+--------------------------
使用Docker构建PaddlePaddle的文档,需要在系统里先安装好Docker工具包。Docker安装请参考 `Docker的官网 `_ 。安装好Docker之后可以使用源码目录下的脚本构建文档,即
-.. code-block:: bash
+[TBD]
- cd TO_YOUR_PADDLE_CLONE_PATH
- cd paddle/scripts/tools/build_docs
- sh build_docs.sh
+如果不想使用Docker,也可以使用以下命令直接构建PaddlePaddle文档,即
-编译完成之后,会在当前目录生成两个子目录\: doc(英文文档目录)和 doc_cn(中文文档目录)。
-打开浏览器访问对应目录下的index.html即可访问本地文档。
+.. code-block:: bash
-直接构建
---------
+ mkdir paddle
+ cd paddle
+ git clone https://github.com/PaddlePaddle/Paddle.git
+ mkdir -p build
+ cd build
+ cmake .. -DCMAKE_BUILD_TYPE=Release -DWITH_GPU=OFF -DWITH_MKL=OFF -DWITH_DOC=ON
-如果提示正确,可以执行以下命令编译生成文档,即
+ # 如果只需要构建使用文档,则执行以下命令
+ make -j $processors gen_proto_py
+ make -j $processors paddle_docs paddle_docs_cn
-.. code-block:: bash
+ # 如果只需要构建API,则执行以下命令
+ make -j $processors gen_proto_py framework_py_proto
+ make -j $processors copy_paddle_pybind
+ make -j $processors paddle_api_docs
+
+其中$processors代表启动和CPU核一样多的进程来并行编译,可以根据本机的CPU核数设置相应的值。
+
+编译完成后,进入 ``doc/v2`` 目录,如果选择构建文档则会在该目录下生成 ``cn/html/`` 、 ``en/html`` 两个子目录,选择构建API则会生成 ``api/en/html`` 目录,分别进入这些目录下,执行以下命令:
+
+.. code-block:: bash
- cd TO_YOUR_PADDLE_CLONE_PATH
- mkdir -p build
- cd build
- cmake .. -DCMAKE_BUILD_TYPE=Debug -DWITH_GPU=OFF -DWITH_MKL=OFF -DWITH_DOC=ON
- make gen_proto_py
- make paddle_docs paddle_docs_cn
+ python -m SimpleHTTPServer 8088
-编译完成之后,会在当前目录生成两个子目录\: doc(英文文档目录)和 doc_cn(中文文档目录)。
-打开浏览器访问对应目录下的index.html即可访问本地文档。
+在浏览器中输入http://localhost:8088就可以看到编译生成的中/英文的文档页面和英文的API页面,下图为生成的英文文档首页示例。注意,示例中由于使用了sphinx的原始主题,所以页面的风格与官网并不一致,但这并不影响开发者进行调试。
+.. image:: src/doc_en.png
+ :align: center
+ :scale: 60 %
如何书写文档
============
@@ -102,7 +110,7 @@ PaddlePaddle文档使用 `sphinx`_ 自动生成,用户可以参考sphinx教程
如何更新www.paddlepaddle.org
============================
-更新的文档以PR的形式提交到github中,提交方式参见 `贡献文档 `_ 。
+更新的文档以PR的形式提交到github中,提交方式参见 `如何贡献文档 `_ 。
目前PaddlePaddle的develop分支的文档是自动触发更新的,用户可以分别查看最新的 `中文文档 `_ 和
`英文文档 `_ 。
diff --git a/doc/v2/howto/cluster/index_en.rst b/doc/v2/howto/cluster/index_en.rst
index 2640a09dcc904619bc97c9bd3f3d81a9dc307663..c965d30d54e71339cf10d4b05f25e740c81adbf9 100644
--- a/doc/v2/howto/cluster/index_en.rst
+++ b/doc/v2/howto/cluster/index_en.rst
@@ -1,8 +1,7 @@
Distributed Training
====================
-In this section, we'll explain how to run distributed training jobs with PaddlePaddle on different types of clusters. The diagram below shows the main architecture of a distributed trainning job:
-
+The effectiveness of the deep learning model is often directly related to the scale of the data: it can generally achieve better results after increasing the size of the dataset on the same model. However, it can not fit in one single computer when the amount of data increases to a certain extent. At this point, using multiple computers for distributed training is a natural solution. In distributed training, the training data is divided into multiple copies (sharding), and multiple machines participating in the training read their own data for training and collaboratively update the parameters of the overall model.
.. image:: src/ps_en.png
:width: 500
@@ -10,13 +9,27 @@ In this section, we'll explain how to run distributed training jobs with PaddleP
- Trainer: each trainer reads the data shard, and train the neural network. Then the trainer will upload calculated "gradients" to parameter servers, and wait for parameters to be optimized on the parameter server side. When that finishes, the trainer download optimized parameters and continues its training.
- Parameter server: every parameter server stores part of the whole neural network model data. They will do optimization calculations when gradients are uploaded from trainers, and then send updated parameters to trainers.
-PaddlePaddle can support both synchronize stochastic gradient descent (SGD) and asynchronous SGD.
+The training of synchronous random gradient descent for neural network can be achieved by cooperation of trainers and parameter servers.
+
+PaddlePaddle supports both synchronize stochastic gradient descent (SGD) and asynchronous SGD.
-When training with synchronize SGD, PaddlePaddle uses an internal "synchronize barrier" which makes gradients update and parameter download in strict order. On the other hand, asynchronous SGD won't wait for all trainers to finish upload at a single step, this will increase the parallelism of distributed training: parameter servers do not depend on each other, they'll do parameter optimization concurrently. Parameter servers will not wait for trainers, so trainers will also do their work concurrently. But asynchronous SGD will introduce more randomness and noises in the gradient.
+Before starting the cluster training, you need to prepare the cluster configuration, PaddlePaddle installation, and other preparations. To understand how to configure the basic environment for distributed training, check the link below:
.. toctree::
:maxdepth: 1
preparations_en.md
+
+Cluster training has a large number of configurable parameters, such as the number of machines used, communication ports, etc. To learn how to configure the distributed training process by setting startup these parameters, check the link below:
+
+.. toctree::
+ :maxdepth: 1
+
cmd_argument_en.md
+
+PaddlePaddle is compatible with a variety of different clusters. Each cluster has its own advantages, To learn how to run PaddlePaddle in different types of them, check the link below:
+
+.. toctree::
+ :maxdepth: 1
+
multi_cluster/index_en.rst
diff --git a/paddle/CMakeLists.txt b/paddle/CMakeLists.txt
index a7b249d43bf3ad9924749d5e66618750f19d8bf7..d2a4b1335464f553a361728e64ed5ca177ca53da 100644
--- a/paddle/CMakeLists.txt
+++ b/paddle/CMakeLists.txt
@@ -1,27 +1,29 @@
-add_subdirectory(cuda)
-add_subdirectory(function)
-add_subdirectory(utils)
-add_subdirectory(math)
-add_subdirectory(gserver)
-add_subdirectory(parameter)
-add_subdirectory(testing)
-
-if(MOBILE_INFERENCE)
- add_subdirectory(capi)
-else()
- add_subdirectory(pserver)
- add_subdirectory(trainer)
- add_subdirectory(scripts)
+if(NOT WITH_FLUID)
+ add_subdirectory(cuda)
+ add_subdirectory(function)
+ add_subdirectory(utils)
+ add_subdirectory(math)
+ add_subdirectory(gserver)
+ add_subdirectory(parameter)
- if(WITH_C_API)
+ if(MOBILE_INFERENCE)
add_subdirectory(capi)
- endif()
+ else()
+ add_subdirectory(pserver)
+ add_subdirectory(trainer)
+ add_subdirectory(scripts)
- if(NOT ANDROID AND NOT IOS)
- add_subdirectory(fluid)
- endif()
+ if(WITH_C_API)
+ add_subdirectory(capi)
+ endif()
- if(WITH_SWIG_PY)
- add_subdirectory(api)
+ if(WITH_SWIG_PY)
+ add_subdirectory(api)
+ endif()
endif()
endif()
+
+add_subdirectory(testing)
+if(NOT MOBILE_INFERENCE AND NOT ANDROID AND NOT IOS)
+ add_subdirectory(fluid)
+endif()
diff --git a/paddle/fluid/framework/CMakeLists.txt b/paddle/fluid/framework/CMakeLists.txt
index 15e5574ecfd406b87db8370948352b7e736937ea..a4ea74a6d2fbc29dc33a6b57ee453f49ed36c7fa 100644
--- a/paddle/fluid/framework/CMakeLists.txt
+++ b/paddle/fluid/framework/CMakeLists.txt
@@ -103,4 +103,5 @@ cc_test(cow_ptr_tests SRCS details/cow_ptr_test.cc)
cc_test(channel_test SRCS channel_test.cc)
cc_test(tuple_test SRCS tuple_test.cc )
cc_test(concurrency_test SRCS concurrency_test.cc DEPS go_op channel_close_op channel_create_op
- channel_send_op channel_recv_op sum_op elementwise_add_op executor proto_desc)
+ channel_send_op channel_recv_op sum_op select_op elementwise_add_op compare_op
+ conditional_block_op while_op assign_op print_op executor proto_desc)
diff --git a/paddle/fluid/framework/channel.h b/paddle/fluid/framework/channel.h
index 9f8fb12098d622058a86f83c1c42a1feb1cfb2e2..adfaba26ace78f547161ad4029a741f3ca8a6764 100644
--- a/paddle/fluid/framework/channel.h
+++ b/paddle/fluid/framework/channel.h
@@ -15,23 +15,43 @@ limitations under the License. */
#pragma once
#include // for size_t
+#include
#include
#include "paddle/fluid/platform/enforce.h"
namespace paddle {
namespace framework {
+enum class ChannelAction {
+ SEND = 0,
+ RECEIVE = 1,
+ CLOSE = 2,
+};
+
// Channel is the abstract class of buffered and un-buffered channels.
template
class Channel {
public:
+ virtual bool CanSend() = 0;
+ virtual bool CanReceive() = 0;
virtual bool Send(T*) = 0;
virtual bool Receive(T*) = 0;
virtual size_t Cap() = 0;
virtual void Lock() = 0;
+
virtual void Unlock() = 0;
+ virtual bool IsClosed() = 0;
virtual void Close() = 0;
virtual ~Channel() {}
+
+ virtual void AddToSendQ(const void* referrer, T* data,
+ std::shared_ptr cond,
+ std::function cb) = 0;
+ virtual void AddToReceiveQ(const void* referrer, T* data,
+ std::shared_ptr cond,
+ std::function cb) = 0;
+ virtual void RemoveFromSendQ(const void* referrer) = 0;
+ virtual void RemoveFromReceiveQ(const void* referrer) = 0;
};
// Forward declaration of channel implementations.
@@ -80,6 +100,27 @@ class ChannelHolder {
return channel != nullptr ? channel->Receive(data) : false;
}
+ bool IsClosed() {
+ if (IsInitialized()) {
+ return holder_->IsClosed();
+ }
+ return false;
+ }
+
+ bool CanSend() {
+ if (IsInitialized()) {
+ return holder_->CanSend();
+ }
+ return false;
+ }
+
+ bool CanReceive() {
+ if (IsInitialized()) {
+ return holder_->CanReceive();
+ }
+ return false;
+ }
+
void close() {
if (IsInitialized()) holder_->Close();
}
@@ -97,6 +138,38 @@ class ChannelHolder {
if (IsInitialized()) holder_->Unlock();
}
+ template
+ void AddToSendQ(const void* referrer, T* data,
+ std::shared_ptr cond,
+ std::function cb) {
+ if (IsInitialized()) {
+ Channel* channel = static_cast*>(holder_->Ptr());
+ if (channel != nullptr) {
+ channel->AddToSendQ(referrer, data, cond, cb);
+ }
+ }
+ }
+
+ template
+ void AddToReceiveQ(const void* referrer, T* data,
+ std::shared_ptr cond,
+ std::function cb) {
+ if (IsInitialized()) {
+ Channel* channel = static_cast*>(holder_->Ptr());
+ if (channel != nullptr) {
+ channel->AddToReceiveQ(referrer, data, cond, cb);
+ }
+ }
+ }
+
+ void RemoveFromSendQ(const void* referrer) {
+ if (IsInitialized()) holder_->RemoveFromSendQ(referrer);
+ }
+
+ void RemoveFromReceiveQ(const void* referrer) {
+ if (IsInitialized()) holder_->RemoveFromReceiveQ(referrer);
+ }
+
inline bool IsInitialized() const { return holder_ != nullptr; }
inline const std::type_index Type() {
@@ -113,6 +186,11 @@ class ChannelHolder {
virtual ~Placeholder() {}
virtual const std::type_index Type() const = 0;
virtual void* Ptr() const = 0;
+ virtual bool IsClosed() = 0;
+ virtual bool CanSend() = 0;
+ virtual bool CanReceive() = 0;
+ virtual void RemoveFromSendQ(const void* referrer) = 0;
+ virtual void RemoveFromReceiveQ(const void* referrer) = 0;
virtual void Close() = 0;
virtual void Lock() = 0;
virtual void Unlock() = 0;
@@ -129,6 +207,39 @@ class ChannelHolder {
virtual void* Ptr() const { return static_cast(channel_.get()); }
+ virtual bool IsClosed() {
+ if (channel_) {
+ return channel_->IsClosed();
+ }
+ return false;
+ }
+
+ virtual bool CanSend() {
+ if (channel_) {
+ return channel_->CanSend();
+ }
+ return false;
+ }
+
+ virtual bool CanReceive() {
+ if (channel_) {
+ return channel_->CanReceive();
+ }
+ return false;
+ }
+
+ virtual void RemoveFromSendQ(const void* referrer) {
+ if (channel_) {
+ channel_->RemoveFromSendQ(referrer);
+ }
+ }
+
+ virtual void RemoveFromReceiveQ(const void* referrer) {
+ if (channel_) {
+ channel_->RemoveFromReceiveQ(referrer);
+ }
+ }
+
virtual void Close() {
if (channel_) channel_->Close();
}
diff --git a/paddle/fluid/framework/channel_impl.h b/paddle/fluid/framework/channel_impl.h
index a4561031fd8c49613269e7008ce558f25f9765e4..457abbf373d4549229e8fd8bd6b2087cc6b8f5c8 100644
--- a/paddle/fluid/framework/channel_impl.h
+++ b/paddle/fluid/framework/channel_impl.h
@@ -29,32 +29,50 @@ class ChannelImpl : public paddle::framework::Channel {
friend void paddle::framework::CloseChannel(Channel *);
public:
+ virtual bool CanSend();
+ virtual bool CanReceive();
virtual bool Send(T *);
virtual bool Receive(T *);
virtual size_t Cap() { return cap_; }
virtual void Lock();
virtual void Unlock();
+ virtual bool IsClosed();
virtual void Close();
-
ChannelImpl(size_t);
virtual ~ChannelImpl();
+ virtual void AddToSendQ(const void *referrer, T *data,
+ std::shared_ptr cond,
+ std::function cb);
+ virtual void AddToReceiveQ(const void *referrer, T *data,
+ std::shared_ptr cond,
+ std::function cb);
+
+ virtual void RemoveFromSendQ(const void *referrer);
+ virtual void RemoveFromReceiveQ(const void *referrer);
+
private:
struct QueueMessage {
T *data;
- std::condition_variable_any cond;
+ std::shared_ptr cond;
bool chan_closed = false;
bool completed = false;
+ const void *referrer; // TODO(thuan): figure out better way to do this
+ std::function callback;
- QueueMessage(T *item) : data(item) {}
+ QueueMessage(T *item)
+ : data(item), cond(std::make_shared()) {}
+
+ QueueMessage(T *item, std::shared_ptr cond)
+ : data(item), cond(cond) {}
void Wait(std::unique_lock &lock) {
- cond.wait(lock, [this]() { return completed; });
+ cond->wait(lock, [this]() { return completed; });
}
void Notify() {
completed = true;
- cond.notify_all();
+ cond->notify_all();
}
};
@@ -87,6 +105,18 @@ ChannelImpl::ChannelImpl(size_t capacity)
PADDLE_ENFORCE_GE(capacity, 0);
}
+template
+bool ChannelImpl::CanSend() {
+ std::lock_guard lock{mu_};
+ return !closed_ && (!recvq.empty() || buf_.size() < cap_);
+}
+
+template
+bool ChannelImpl::CanReceive() {
+ std::lock_guard lock{mu_};
+ return !(closed_ && buf_.empty()) && (!sendq.empty() || buf_.size() > 0);
+}
+
template
bool ChannelImpl::Send(T *item) {
send_ctr++;
@@ -105,7 +135,24 @@ bool ChannelImpl::Send(T *item) {
std::shared_ptr m = recvq.front();
recvq.pop_front();
// Do the data transfer
- *(m->data) = std::move(*item);
+ // We will do this data transfer if either of the following
+ // cases are true
+ // 1. callback == nullptr // This means it was a regular channel send
+ // 2. callback returns true
+ bool do_send = true;
+ if (m->callback != nullptr) do_send = m->callback(ChannelAction::SEND);
+ if (do_send)
+ *(m->data) = std::move(*item);
+ else
+ // We cannot do the data transfer because
+ // this QueueMessage was added by Select
+ // and some other case was executed.
+ // So call the Send function again.
+ // We do not care about notifying other
+ // because they would have been notified
+ // by the executed select case.
+ return send_return(Send(item));
+
// Wake up the blocked process and unlock
m->Notify();
lock.unlock();
@@ -150,7 +197,25 @@ bool ChannelImpl::Receive(T *item) {
std::shared_ptr m = sendq.front();
sendq.pop_front();
// Do the data transfer
- *item = std::move(*(m->data));
+ // We will do this data transfer if either of the following
+ // cases are true
+ // 1. callback == nullptr // This means it was a regular channel send
+ // 2. callback returns true
+ bool do_receive = true;
+ if (m->callback != nullptr)
+ do_receive = m->callback(ChannelAction::RECEIVE);
+ if (do_receive)
+ *item = std::move(*(m->data));
+ else
+ // We cannot do the data transfer because
+ // this QueueMessage was added by Select
+ // and some other case was executed.
+ // So call the Receive function again.
+ // We do not care about notifying other
+ // because they would have been notified
+ // by the executed select case.
+ return recv_return(Receive(item));
+
// Wake up the blocked process and unlock
m->Notify();
lock.unlock();
@@ -186,6 +251,12 @@ void ChannelImpl::Unlock() {
mu_.unlock();
}
+template
+bool ChannelImpl::IsClosed() {
+ std::lock_guard lock{mu_};
+ return closed_;
+}
+
template
void ChannelImpl::Close() {
std::unique_lock lock{mu_};
@@ -203,6 +274,12 @@ void ChannelImpl::Close() {
std::shared_ptr m = recvq.front();
recvq.pop_front();
m->chan_closed = true;
+
+ // Execute callback function (if any)
+ if (m->callback != nullptr) {
+ m->callback(ChannelAction::CLOSE);
+ }
+
m->Notify();
}
@@ -211,10 +288,70 @@ void ChannelImpl::Close() {
std::shared_ptr m = sendq.front();
sendq.pop_front();
m->chan_closed = true;
+
+ // Execute callback function (if any)
+ if (m->callback != nullptr) {
+ m->callback(ChannelAction::CLOSE);
+ }
+
m->Notify();
}
}
+template
+void ChannelImpl::AddToSendQ(
+ const void *referrer, T *data,
+ std::shared_ptr cond,
+ std::function cb) {
+ std::lock_guard lock{mu_};
+ auto m = std::make_shared(data, cond);
+ m->referrer = referrer;
+ m->callback = cb;
+ sendq.push_back(m);
+}
+
+template
+void ChannelImpl::AddToReceiveQ(
+ const void *referrer, T *data,
+ std::shared_ptr cond,
+ std::function cb) {
+ std::lock_guard lock{mu_};
+ auto m = std::make_shared(data, cond);
+ m->referrer = referrer;
+ m->callback = cb;
+ recvq.push_back(m);
+}
+
+template
+void ChannelImpl::RemoveFromSendQ(const void *referrer) {
+ std::lock_guard lock{mu_};
+
+ for (auto it = sendq.begin(); it != sendq.end();) {
+ std::shared_ptr sendMsg = (std::shared_ptr)*it;
+
+ if (sendMsg->referrer == referrer) {
+ it = sendq.erase(it);
+ } else {
+ ++it;
+ }
+ }
+}
+
+template
+void ChannelImpl::RemoveFromReceiveQ(const void *referrer) {
+ std::lock_guard lock{mu_};
+
+ for (auto it = recvq.begin(); it != recvq.end();) {
+ std::shared_ptr recvMsg = (std::shared_ptr)*it;
+
+ if (recvMsg->referrer == referrer) {
+ it = recvq.erase(it);
+ } else {
+ ++it;
+ }
+ }
+}
+
template
ChannelImpl::~ChannelImpl() {
Close();
diff --git a/paddle/fluid/framework/concurrency_test.cc b/paddle/fluid/framework/concurrency_test.cc
index 5770b0a5a18659e615e80a7c48113d8b543b69ec..25152054eb8452a9667bd65b4441665476c1d46d 100644
--- a/paddle/fluid/framework/concurrency_test.cc
+++ b/paddle/fluid/framework/concurrency_test.cc
@@ -19,7 +19,6 @@ limitations under the License. */
#include "paddle/fluid/framework/channel.h"
#include "paddle/fluid/framework/executor.h"
#include "paddle/fluid/framework/op_registry.h"
-#include "paddle/fluid/framework/program_desc.h"
USE_NO_KERNEL_OP(go);
USE_NO_KERNEL_OP(channel_close);
@@ -27,6 +26,12 @@ USE_NO_KERNEL_OP(channel_create);
USE_NO_KERNEL_OP(channel_recv);
USE_NO_KERNEL_OP(channel_send);
USE_NO_KERNEL_OP(elementwise_add);
+USE_NO_KERNEL_OP(select);
+USE_NO_KERNEL_OP(conditional_block);
+USE_NO_KERNEL_OP(equal);
+USE_NO_KERNEL_OP(assign);
+USE_NO_KERNEL_OP(while);
+USE_NO_KERNEL_OP(print);
namespace f = paddle::framework;
namespace p = paddle::platform;
@@ -35,27 +40,15 @@ namespace paddle {
namespace framework {
template
-void CreateIntVariable(Scope &scope, p::CPUPlace &place, std::string name,
- T value) {
- // Create LoDTensor of dim [1,1]
+LoDTensor *CreateVariable(Scope &scope, p::CPUPlace &place, std::string name,
+ T value) {
+ // Create LoDTensor of dim [1]
auto var = scope.Var(name);
auto tensor = var->GetMutable();
- tensor->Resize({1, 1});
+ tensor->Resize({1});
T *expect = tensor->mutable_data(place);
expect[0] = value;
-}
-
-void InitTensorsInScope(Scope &scope, p::CPUPlace &place) {
- p::CPUDeviceContext ctx(place);
-
- // Create channel variable
- scope.Var("Channel");
-
- // Create Variables, x0 will be put into channel,
- // result will be pulled from channel
- CreateIntVariable(scope, place, "Status", false);
- CreateIntVariable(scope, place, "x0", 99);
- CreateIntVariable(scope, place, "result", 0);
+ return tensor;
}
void AddOp(const std::string &type, const VariableNameMap &inputs,
@@ -73,12 +66,116 @@ void AddOp(const std::string &type, const VariableNameMap &inputs,
op->SetAttrMap(attrs);
}
+void AddCase(ProgramDesc *program, Scope *scope, p::CPUPlace *place,
+ BlockDesc *casesBlock, int caseId, int caseType,
+ std::string caseChannel, std::string caseVarName,
+ std::function func) {
+ std::string caseCondName = std::string("caseCond") + std::to_string(caseId);
+ std::string caseCondXVarName =
+ std::string("caseCondX") + std::to_string(caseId);
+
+ BlockDesc *caseBlock = program->AppendBlock(*casesBlock);
+ func(caseBlock, scope);
+
+ CreateVariable(*scope, *place, caseCondName, false);
+ CreateVariable(*scope, *place, caseCondXVarName, caseId);
+ CreateVariable(*scope, *place, caseVarName, caseId);
+
+ scope->Var("step_scope");
+
+ AddOp("equal", {{"X", {caseCondXVarName}}, {"Y", {"caseToExecute"}}},
+ {{"Out", {caseCondName}}}, {}, casesBlock);
+
+ AddOp("conditional_block", {{"X", {caseCondName}}, {"Params", {}}},
+ {{"Out", {}}, {"Scope", {"step_scope"}}},
+ {{"sub_block", caseBlock}, {"is_scalar_condition", true}}, casesBlock);
+}
+
+void AddFibonacciSelect(Scope *scope, p::CPUPlace *place, ProgramDesc *program,
+ BlockDesc *parentBlock, std::string dataChanName,
+ std::string quitChanName) {
+ BlockDesc *whileBlock = program->AppendBlock(*parentBlock);
+
+ CreateVariable(*scope, *place, "whileExitCond", true);
+ CreateVariable(*scope, *place, "caseToExecute", -1);
+ CreateVariable(*scope, *place, "case1var", 0);
+
+ CreateVariable(*scope, *place, "xtemp", 0);
+
+ // TODO(thuan): Need to create fibXToSend, since channel send moves the actual
+ // data,
+ // which causes the data to be no longer accessible to do the fib calculation
+ // TODO(abhinav): Change channel send to do a copy instead of a move!
+ CreateVariable(*scope, *place, "fibXToSend", 0);
+
+ CreateVariable(*scope, *place, "fibX", 0);
+ CreateVariable(*scope, *place, "fibY", 1);
+ CreateVariable(*scope, *place, "quitVar", 0);
+
+ BlockDesc *casesBlock = program->AppendBlock(*whileBlock);
+ std::function f = [](BlockDesc *caseBlock) {};
+
+ // TODO(thuan): Remove this once we change channel send to do a copy instead
+ // of move
+ AddOp("assign", {{"X", {"fibX"}}}, {{"Out", {"fibXToSend"}}}, {}, whileBlock);
+
+ // Case 0: Send to dataChanName
+ std::function case0Func = [&](
+ BlockDesc *caseBlock, Scope *scope) {
+ AddOp("assign", {{"X", {"fibX"}}}, {{"Out", {"xtemp"}}}, {}, caseBlock);
+ AddOp("assign", {{"X", {"fibY"}}}, {{"Out", {"fibX"}}}, {}, caseBlock);
+ AddOp("elementwise_add", {{"X", {"xtemp"}}, {"Y", {"fibY"}}},
+ {{"Out", {"fibY"}}}, {}, caseBlock);
+ };
+ AddCase(program, scope, place, casesBlock, 0, 1, dataChanName, "fibXToSend",
+ case0Func);
+ std::string case0Config =
+ std::string("0,1,") + dataChanName + std::string(",fibXToSend");
+
+ // Case 1: Receive from quitChanName
+ std::function case2Func = [&](
+ BlockDesc *caseBlock, Scope *scope) {
+ // Exit the while loop after we receive from quit channel.
+ // We assign a false to "whileExitCond" variable, which will
+ // break out of while_op loop
+ CreateVariable(*scope, *place, "whileFalse", false);
+ AddOp("assign", {{"X", {"whileFalse"}}}, {{"Out", {"whileExitCond"}}}, {},
+ caseBlock);
+ };
+ AddCase(program, scope, place, casesBlock, 1, 2, quitChanName, "quitVar",
+ case2Func);
+ std::string case1Config =
+ std::string("1,2,") + quitChanName + std::string(",quitVar");
+
+ // Select block
+ AddOp("select", {{"X", {dataChanName, quitChanName}},
+ {"case_to_execute", {"caseToExecute"}}},
+ {}, {{"sub_block", casesBlock},
+ {"cases", std::vector{case0Config, case1Config}}},
+ whileBlock);
+
+ scope->Var("stepScopes");
+ AddOp("while",
+ {{"X", {dataChanName, quitChanName}}, {"Condition", {"whileExitCond"}}},
+ {{"Out", {}}, {"StepScopes", {"stepScopes"}}},
+ {{"sub_block", whileBlock}}, parentBlock);
+}
+
TEST(Concurrency, Go_Op) {
Scope scope;
p::CPUPlace place;
// Initialize scope variables
- InitTensorsInScope(scope, place);
+ p::CPUDeviceContext ctx(place);
+
+ // Create channel variable
+ scope.Var("Channel");
+
+ // Create Variables, x0 will be put into channel,
+ // result will be pulled from channel
+ CreateVariable(scope, place, "Status", false);
+ CreateVariable(scope, place, "x0", 99);
+ CreateVariable(scope, place, "result", 0);
framework::Executor executor(place);
ProgramDesc program;
@@ -118,5 +215,78 @@ TEST(Concurrency, Go_Op) {
auto *finalData = tensor.data();
EXPECT_EQ(finalData[0], 99);
}
+
+/**
+ * This test implements the fibonacci function using go_op and select_op
+ */
+TEST(Concurrency, Select) {
+ Scope scope;
+ p::CPUPlace place;
+
+ // Initialize scope variables
+ p::CPUDeviceContext ctx(place);
+
+ CreateVariable(scope, place, "Status", false);
+ CreateVariable(scope, place, "result", 0);
+ CreateVariable(scope, place, "currentXFib", 0);
+
+ framework::Executor executor(place);
+ ProgramDesc program;
+ BlockDesc *block = program.MutableBlock(0);
+
+ // Create channel OP
+ std::string dataChanName = "Channel";
+ scope.Var(dataChanName);
+ AddOp("channel_create", {}, {{"Out", {dataChanName}}},
+ {{"capacity", 0}, {"data_type", f::proto::VarType::LOD_TENSOR}}, block);
+
+ std::string quitChanName = "Quit";
+ scope.Var(quitChanName);
+ AddOp("channel_create", {}, {{"Out", {quitChanName}}},
+ {{"capacity", 0}, {"data_type", f::proto::VarType::LOD_TENSOR}}, block);
+
+ // Create Go Op routine, which loops 10 times over fibonacci sequence
+ CreateVariable(scope, place, "xReceiveVar", 0);
+
+ BlockDesc *goOpBlock = program.AppendBlock(program.Block(0));
+ for (int i = 0; i < 10; ++i) {
+ AddOp("channel_recv", {{"Channel", {dataChanName}}},
+ {{"Status", {"Status"}}, {"Out", {"currentXFib"}}}, {}, goOpBlock);
+ AddOp("print", {{"In", {"currentXFib"}}}, {{"Out", {"currentXFib"}}},
+ {{"first_n", 100},
+ {"summarize", -1},
+ {"print_tensor_name", false},
+ {"print_tensor_type", true},
+ {"print_tensor_shape", false},
+ {"print_tensor_lod", false},
+ {"print_phase", std::string("FORWARD")},
+ {"message", std::string("X: ")}},
+ goOpBlock);
+ }
+
+ CreateVariable(scope, place, "quitSignal", 0);
+ AddOp("channel_send", {{"Channel", {quitChanName}}, {"X", {"quitSignal"}}},
+ {{"Status", {"Status"}}}, {}, goOpBlock);
+
+ // Create Go Op
+ AddOp("go", {{"X", {dataChanName, quitChanName}}}, {},
+ {{"sub_block", goOpBlock}}, block);
+
+ AddFibonacciSelect(&scope, &place, &program, block, dataChanName,
+ quitChanName);
+
+ // Create Channel Close Op
+ AddOp("channel_close", {{"Channel", {dataChanName}}}, {}, {}, block);
+ AddOp("channel_close", {{"Channel", {quitChanName}}}, {}, {}, block);
+
+ executor.Run(program, &scope, 0, true, true);
+
+ // After we call executor.run, "result" variable should be equal to 34
+ // (which is 10 loops through fibonacci sequence)
+ const LoDTensor &tensor = (scope.FindVar("currentXFib"))->Get();
+ auto *finalData = tensor.data();
+ EXPECT_EQ(finalData[0], 34);
+}
+
} // namespace framework
} // namespace paddle
diff --git a/paddle/fluid/framework/executor.cc b/paddle/fluid/framework/executor.cc
index d70a661cf365f0e5bd4a5178ccf7e032563a4a5e..7155d5ef2febc20aaa684c04a7a59f781857c9e5 100644
--- a/paddle/fluid/framework/executor.cc
+++ b/paddle/fluid/framework/executor.cc
@@ -25,6 +25,7 @@ limitations under the License. */
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/framework/reader.h"
#include "paddle/fluid/platform/place.h"
+#include "paddle/fluid/platform/profiler.h"
DECLARE_bool(benchmark);
DEFINE_bool(check_nan_inf, false,
@@ -33,28 +34,18 @@ DEFINE_bool(check_nan_inf, false,
namespace paddle {
namespace framework {
+namespace {
+// block id starts from 0. This id is used to represent the codeblock
+// wrapping the first block 0.
+int kProgramId = -1;
+} // namespace
struct ExecutorPrepareContext {
- ExecutorPrepareContext(const framework::ProgramDesc* prog, size_t block_id,
- bool own_program = true)
- : block_id_(block_id), own_program_(own_program) {
- if (own_program_) {
- prog_ = new ProgramDesc(*prog);
- } else {
- // If own_program_ is false, we can avoid a clone of the program.
- prog_ = prog;
- }
- }
-
- ~ExecutorPrepareContext() {
- if (own_program_) {
- delete prog_;
- }
- }
+ ExecutorPrepareContext(const framework::ProgramDesc& prog, size_t block_id)
+ : prog_(prog), block_id_(block_id) {}
- const framework::ProgramDesc* prog_;
+ const framework::ProgramDesc& prog_;
size_t block_id_;
- bool own_program_;
std::vector> ops_;
};
@@ -109,7 +100,8 @@ static void CheckTensorNANOrInf(const std::string& name,
void Executor::Run(const ProgramDesc& pdesc, Scope* scope, int block_id,
bool create_local_scope, bool create_vars) {
- auto* ctx = Prepare(pdesc, block_id, false);
+ platform::RecordBlock b(block_id);
+ auto* ctx = Prepare(pdesc, block_id);
RunPreparedContext(ctx, scope, create_local_scope, create_vars);
delete ctx;
}
@@ -200,6 +192,7 @@ void Executor::Run(const ProgramDesc& program, Scope* scope,
std::map& fetch_targets,
const std::string& feed_holder_name,
const std::string& fetch_holder_name) {
+ platform::RecordBlock b(kProgramId);
bool has_feed_ops =
has_feed_operators(program.Block(0), feed_targets, feed_holder_name);
bool has_fetch_ops =
@@ -282,8 +275,8 @@ void Executor::Run(const ProgramDesc& program, Scope* scope,
}
ExecutorPrepareContext* Executor::Prepare(const ProgramDesc& program,
- int block_id, bool own_program) {
- auto* ctx = new ExecutorPrepareContext(&program, block_id, own_program);
+ int block_id) {
+ auto* ctx = new ExecutorPrepareContext(program, block_id);
PADDLE_ENFORCE_LT(static_cast(block_id), program.Size());
auto& block = program.Block(block_id);
for (auto& op_desc : block.AllOps()) {
@@ -294,7 +287,7 @@ ExecutorPrepareContext* Executor::Prepare(const ProgramDesc& program,
void Executor::RunPreparedContext(ExecutorPrepareContext* ctx, Scope* scope,
bool create_local_scope, bool create_vars) {
- auto& block = ctx->prog_->Block(ctx->block_id_);
+ auto& block = ctx->prog_.Block(ctx->block_id_);
Scope* local_scope = scope;
if (create_vars) {
diff --git a/paddle/fluid/framework/executor.h b/paddle/fluid/framework/executor.h
index 2be48bd0061ce2d6860bef3eae86ec3ae7a51fcd..28ce3315154cea45412984df4daf7385ce2cf572 100644
--- a/paddle/fluid/framework/executor.h
+++ b/paddle/fluid/framework/executor.h
@@ -48,7 +48,7 @@ class Executor {
const std::string& fetch_holder_name = "fetch");
static ExecutorPrepareContext* Prepare(const ProgramDesc& program,
- int block_id, bool own_program = true);
+ int block_id);
void RunPreparedContext(ExecutorPrepareContext* ctx, Scope* scope,
bool create_local_scope = true,
diff --git a/paddle/fluid/framework/operator.cc b/paddle/fluid/framework/operator.cc
index 371c2fad97b1efd06eea9ac631122f194e65d656..b39a1164dbd9877d9f45cc6415d74f930921a42f 100644
--- a/paddle/fluid/framework/operator.cc
+++ b/paddle/fluid/framework/operator.cc
@@ -74,9 +74,6 @@ void OperatorBase::Run(const Scope& scope, const platform::Place& place) {
platform::SetDeviceId(dev_id);
#endif
}
- // profile
- auto* dev_ctx = platform::DeviceContextPool::Instance().Get(place);
- platform::RecordEvent record_event(Type(), dev_ctx);
RunImpl(scope, place);
}
@@ -445,15 +442,7 @@ class RuntimeInferShapeContext : public InferShapeContext {
}
std::vector GetRepeatedDims(const std::string& name) const override {
- Variable* var = scope_.FindVar(name);
- if (var->IsType()) {
- return var->Get().shapes();
- } else {
- PADDLE_THROW(
- "Only ReaderHolder support 'GetRepeatedDims', but Variable %s's "
- "type_id is %s.",
- name, var->Type().name());
- }
+ PADDLE_THROW("Only compile time support this method");
}
void SetDim(const std::string& name, const DDim& dim) override {
@@ -470,15 +459,7 @@ class RuntimeInferShapeContext : public InferShapeContext {
void SetRepeatedDims(const std::string& name,
const std::vector& dims) override {
- Variable* var = scope_.FindVar(name);
- if (var->IsType()) {
- var->GetMutable()->set_shapes(dims);
- } else {
- PADDLE_THROW(
- "Only ReaderHolder support 'SetRepeatedDims', but Variable %s's "
- "type_id is %s.",
- name, var->Type().name());
- }
+ PADDLE_THROW("Only compile time support this method");
}
proto::VarType::Type GetVarType(const std::string& name) const override {
@@ -501,6 +482,10 @@ void OperatorWithKernel::RunImpl(const Scope& scope,
this->InferShape(&infer_shape_ctx);
platform::DeviceContextPool& pool = platform::DeviceContextPool::Instance();
auto* dev_ctx = pool.Get(place);
+
+ // For profiling, don't move out of this function because that will result
+ // in the failure of multi-GPU profiling.
+ platform::RecordEvent record_event(Type(), dev_ctx);
// check if op[type] has kernel registered.
auto& all_op_kernels = AllOpKernels();
auto kernels_iter = all_op_kernels.find(type_);
diff --git a/paddle/fluid/framework/reader.cc b/paddle/fluid/framework/reader.cc
index 91879d6d45868bb37ca44baafb8b0e8677cd6d1a..fa00c08e0d5791ee1187aed38b4d140564b7c97d 100644
--- a/paddle/fluid/framework/reader.cc
+++ b/paddle/fluid/framework/reader.cc
@@ -16,14 +16,22 @@
namespace paddle {
namespace framework {
+ReaderBase::~ReaderBase() {}
-DDim ReaderBase::shape(size_t idx) const {
- PADDLE_ENFORCE_LT(
- idx, shapes_.size(),
- "Cannot get the %d'th shape, 'shapes_' only has %d elements.", idx,
- shapes_.size());
- return shapes_[idx];
-}
+FileReader::FileReader(const std::vector &dims) : dims_(dims) {}
+
+void FileReader::ReadNext(std::vector *out) {
+ ReadNextImpl(out);
+ PADDLE_ENFORCE_EQ(out->size(), dims_.size());
+ for (size_t i = 0; i < dims_.size(); ++i) {
+ auto &actual = out->at(i).dims();
+ auto &expect = dims_[i];
+ PADDLE_ENFORCE_EQ(actual.size(), expect.size());
+ for (int j = 0; j < actual.size(); ++j) {
+ PADDLE_ENFORCE(actual[i] == expect[i] || expect[i] == -1);
+ }
+ }
+}
} // namespace framework
} // namespace paddle
diff --git a/paddle/fluid/framework/reader.h b/paddle/fluid/framework/reader.h
index 18064ddc669aad7dda98d502119e56e7ddedcff3..3573b99becf6d657c680c5fec0bda4bdde5dd7a2 100644
--- a/paddle/fluid/framework/reader.h
+++ b/paddle/fluid/framework/reader.h
@@ -16,40 +16,29 @@
#include "paddle/fluid/framework/ddim.h"
#include "paddle/fluid/framework/lod_tensor_array.h"
+#include "paddle/fluid/platform/place.h"
+
+#include
+#include
+#include
namespace paddle {
namespace framework {
class ReaderBase {
public:
- explicit ReaderBase(const std::vector& shapes) : shapes_(shapes) {
- PADDLE_ENFORCE(!shapes_.empty());
- }
virtual void ReadNext(std::vector* out) = 0;
virtual void ReInit() = 0;
- DDim shape(size_t idx) const;
- std::vector shapes() const { return shapes_; }
- void set_shapes(const std::vector& shapes) { shapes_ = shapes; }
-
virtual bool HasNext() const = 0;
- virtual ~ReaderBase() {}
-
- protected:
- std::vector shapes_;
-};
-
-class FileReader : public ReaderBase {
- public:
- explicit FileReader(const std::vector& shapes) : ReaderBase(shapes) {}
+ virtual ~ReaderBase();
};
class DecoratedReader : public ReaderBase {
public:
- explicit DecoratedReader(ReaderBase* reader)
- : ReaderBase(reader->shapes()), reader_(reader) {
+ explicit DecoratedReader(ReaderBase* reader) : ReaderBase(), reader_(reader) {
PADDLE_ENFORCE_NOT_NULL(reader_);
}
@@ -61,6 +50,19 @@ class DecoratedReader : public ReaderBase {
ReaderBase* reader_;
};
+class FileReader : public ReaderBase {
+ public:
+ explicit FileReader(const std::vector& dims);
+
+ void ReadNext(std::vector* out) override;
+
+ protected:
+ virtual void ReadNextImpl(std::vector* out) = 0;
+
+ private:
+ std::vector dims_;
+};
+
// The ReaderHolder is used as reader' unified wrapper,
// making it easier to access different type reader in Variables.
class ReaderHolder {
@@ -78,19 +80,6 @@ class ReaderHolder {
reader_->ReInit();
}
- DDim shape(size_t idx) const {
- PADDLE_ENFORCE_NOT_NULL(reader_);
- return reader_->shape(idx);
- }
- std::vector shapes() const {
- PADDLE_ENFORCE_NOT_NULL(reader_);
- return reader_->shapes();
- }
- void set_shapes(const std::vector& shapes) {
- PADDLE_ENFORCE_NOT_NULL(reader_);
- reader_->set_shapes(shapes);
- }
-
bool HasNext() const { return reader_->HasNext(); }
private:
diff --git a/paddle/fluid/operators/CMakeLists.txt b/paddle/fluid/operators/CMakeLists.txt
index 625e0f7561899d30b40f9daa56f743a37bdaa27f..d30124d4a3b89b802a4abaae07a33b76526f163d 100644
--- a/paddle/fluid/operators/CMakeLists.txt
+++ b/paddle/fluid/operators/CMakeLists.txt
@@ -165,7 +165,6 @@ op_library(cond_op DEPS framework_proto tensor net_op)
op_library(cross_entropy_op DEPS cross_entropy)
op_library(softmax_with_cross_entropy_op DEPS cross_entropy softmax)
op_library(softmax_op DEPS softmax)
-op_library(detection_output_op DEPS softmax)
op_library(sequence_softmax_op DEPS softmax)
op_library(sum_op DEPS selected_rows_functor)
op_library(sgd_op DEPS selected_rows_functor)
@@ -203,6 +202,11 @@ op_library(save_combine_op DEPS lod_tensor)
op_library(load_combine_op DEPS lod_tensor)
op_library(concat_op DEPS concat)
+# FIXME(thuan): Move CSP operators to paddle/fluid/framework/operators/concurrency
+add_subdirectory(concurrency)
+op_library(channel_send_op DEPS concurrency)
+op_library(channel_recv_op DEPS concurrency)
+
list(REMOVE_ITEM GENERAL_OPS ${DEPS_OPS})
foreach(src ${GENERAL_OPS})
op_library(${src})
diff --git a/paddle/fluid/operators/assign_op.cc b/paddle/fluid/operators/assign_op.cc
index 39ae3c0040d04a6d901f1d6c992d547a6778c28e..d372213e1b6008b0c4227103dd40730f86a84301 100644
--- a/paddle/fluid/operators/assign_op.cc
+++ b/paddle/fluid/operators/assign_op.cc
@@ -56,6 +56,7 @@ class AssignFunctor {
private:
void copy_tensor(const framework::LoDTensor &lod_tensor,
framework::LoDTensor *out) const {
+ if (lod_tensor.numel() == 0) return;
auto &out_tensor = *out;
TensorCopy(lod_tensor, lod_tensor.place(), dev_ctx_, &out_tensor);
out_tensor.set_lod(lod_tensor.lod());
diff --git a/paddle/fluid/operators/channel_recv_op.cc b/paddle/fluid/operators/channel_recv_op.cc
index c12b88e7a91c4ea7044223464a2f902db494d1a8..844b3ae3b7bf87c9b253128165b3c938801d5d60 100644
--- a/paddle/fluid/operators/channel_recv_op.cc
+++ b/paddle/fluid/operators/channel_recv_op.cc
@@ -18,6 +18,7 @@ limitations under the License. */
#include
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/framework/var_type.h"
+#include "paddle/fluid/operators/concurrency/channel_util.h"
#include "paddle/fluid/operators/math/math_function.h"
static constexpr char Channel[] = "Channel";
@@ -36,25 +37,6 @@ void SetReceiveStatus(const platform::Place &dev_place,
status_tensor[0] = status;
}
-bool ChannelReceive(framework::ChannelHolder *ch, framework::Variable *var) {
- // Get type of channel and use that to call mutable data for Variable
- auto type = framework::ToVarType(ch->Type());
- if (type == framework::proto::VarType_Type_LOD_TENSOR)
- return ch->Receive(var->GetMutable());
- else if (type == framework::proto::VarType_Type_LOD_RANK_TABLE)
- return ch->Receive(var->GetMutable());
- else if (type == framework::proto::VarType_Type_LOD_TENSOR_ARRAY)
- return ch->Receive(var->GetMutable());
- else if (type == framework::proto::VarType_Type_SELECTED_ROWS)
- return ch->Receive(var->GetMutable());
- else if (type == framework::proto::VarType_Type_READER)
- return ch->Receive(var->GetMutable());
- else if (type == framework::proto::VarType_Type_CHANNEL)
- return ch->Receive(var->GetMutable());
- else
- PADDLE_THROW("ChannelReceive:Unsupported type");
-}
-
class ChannelRecvOp : public framework::OperatorBase {
public:
ChannelRecvOp(const std::string &type,
@@ -81,7 +63,7 @@ class ChannelRecvOp : public framework::OperatorBase {
scope.FindVar(Input(Channel))->GetMutable();
auto output_var = scope.FindVar(Output(Out));
// Receive the data from the channel.
- bool ok = ChannelReceive(ch, output_var);
+ bool ok = concurrency::ChannelReceive(ch, output_var);
// Set the status output of the `ChannelReceive` call.
SetReceiveStatus(dev_place, *scope.FindVar(Output(Status)), ok);
diff --git a/paddle/fluid/operators/channel_send_op.cc b/paddle/fluid/operators/channel_send_op.cc
index 6d7715ad229e821f02437246e3326063cb1ee757..47cf7d7efc9996e8a8db11b79c0310f77c2435a4 100644
--- a/paddle/fluid/operators/channel_send_op.cc
+++ b/paddle/fluid/operators/channel_send_op.cc
@@ -18,6 +18,7 @@ limitations under the License. */
#include
#include "paddle/fluid/framework/op_registry.h"
#include "paddle/fluid/framework/var_type.h"
+#include "paddle/fluid/operators/concurrency/channel_util.h"
#include "paddle/fluid/operators/math/math_function.h"
static constexpr char Channel[] = "Channel";
@@ -37,24 +38,6 @@ void SetSendStatus(const platform::Place &dev_place,
status_tensor[0] = status;
}
-bool ChannelSend(framework::ChannelHolder *ch, framework::Variable *var) {
- auto type = framework::ToVarType(var->Type());
- if (type == framework::proto::VarType_Type_LOD_TENSOR)
- return ch->Send(var->GetMutable());
- else if (type == framework::proto::VarType_Type_LOD_RANK_TABLE)
- return ch->Send(var->GetMutable());
- else if (type == framework::proto::VarType_Type_LOD_TENSOR_ARRAY)
- return ch->Send(var->GetMutable());
- else if (type == framework::proto::VarType_Type_SELECTED_ROWS)
- return ch->Send(var->GetMutable());
- else if (type == framework::proto::VarType_Type_READER)
- return ch->Send(var->GetMutable());
- else if (type == framework::proto::VarType_Type_CHANNEL)
- return ch->Send(var->GetMutable());
- else
- PADDLE_THROW("ChannelSend:Unsupported type");
-}
-
class ChannelSendOp : public framework::OperatorBase {
public:
ChannelSendOp(const std::string &type,
@@ -82,7 +65,7 @@ class ChannelSendOp : public framework::OperatorBase {
auto input_var = scope.FindVar(Input(X));
// Send the input data through the channel.
- bool ok = ChannelSend(ch, input_var);
+ bool ok = concurrency::ChannelSend(ch, input_var);
// Set the status output of the `ChannelSend` call.
SetSendStatus(dev_place, *scope.FindVar(Output(Status)), ok);
diff --git a/paddle/fluid/operators/concurrency/CMakeLists.txt b/paddle/fluid/operators/concurrency/CMakeLists.txt
new file mode 100644
index 0000000000000000000000000000000000000000..e4617440d152b4c15d09e81cd19c76739b95b979
--- /dev/null
+++ b/paddle/fluid/operators/concurrency/CMakeLists.txt
@@ -0,0 +1 @@
+cc_library(concurrency SRCS channel_util.cc DEPS device_context framework_proto boost eigen3)
diff --git a/paddle/fluid/operators/concurrency/channel_util.cc b/paddle/fluid/operators/concurrency/channel_util.cc
new file mode 100644
index 0000000000000000000000000000000000000000..a483af7affd824da7d18676d934dc959167ef71f
--- /dev/null
+++ b/paddle/fluid/operators/concurrency/channel_util.cc
@@ -0,0 +1,111 @@
+/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#include "channel_util.h"
+#include "paddle/fluid/framework/var_type.h"
+
+namespace poc = paddle::operators::concurrency;
+
+bool poc::ChannelSend(framework::ChannelHolder *ch, framework::Variable *var) {
+ auto type = framework::ToVarType(var->Type());
+ if (type == framework::proto::VarType_Type_LOD_TENSOR)
+ return ch->Send(var->GetMutable());
+ else if (type == framework::proto::VarType_Type_LOD_RANK_TABLE)
+ return ch->Send(var->GetMutable());
+ else if (type == framework::proto::VarType_Type_LOD_TENSOR_ARRAY)
+ return ch->Send(var->GetMutable());
+ else if (type == framework::proto::VarType_Type_SELECTED_ROWS)
+ return ch->Send(var->GetMutable());
+ else if (type == framework::proto::VarType_Type_READER)
+ return ch->Send(var->GetMutable());
+ else if (type == framework::proto::VarType_Type_CHANNEL)
+ return ch->Send(var->GetMutable());
+ else
+ PADDLE_THROW("ChannelSend:Unsupported type");
+}
+
+bool poc::ChannelReceive(framework::ChannelHolder *ch,
+ framework::Variable *var) {
+ // Get type of channel and use that to call mutable data for Variable
+ auto type = framework::ToVarType(ch->Type());
+ if (type == framework::proto::VarType_Type_LOD_TENSOR)
+ return ch->Receive(var->GetMutable());
+ else if (type == framework::proto::VarType_Type_LOD_RANK_TABLE)
+ return ch->Receive(var->GetMutable());
+ else if (type == framework::proto::VarType_Type_LOD_TENSOR_ARRAY)
+ return ch->Receive(var->GetMutable());
+ else if (type == framework::proto::VarType_Type_SELECTED_ROWS)
+ return ch->Receive(var->GetMutable());
+ else if (type == framework::proto::VarType_Type_READER)
+ return ch->Receive(var->GetMutable());
+ else if (type == framework::proto::VarType_Type_CHANNEL)
+ return ch->Receive(var->GetMutable());
+ else
+ PADDLE_THROW("ChannelReceive:Unsupported type");
+}
+
+void poc::ChannelAddToSendQ(framework::ChannelHolder *ch, const void *referrer,
+ framework::Variable *var,
+ std::shared_ptr cond,
+ std::function cb) {
+ auto type = framework::ToVarType(var->Type());
+ if (type == framework::proto::VarType_Type_LOD_TENSOR) {
+ ch->AddToSendQ(referrer, var->GetMutable(), cond, cb);
+ } else if (type == framework::proto::VarType_Type_LOD_RANK_TABLE) {
+ ch->AddToSendQ(referrer, var->GetMutable(), cond,
+ cb);
+ } else if (type == framework::proto::VarType_Type_LOD_TENSOR_ARRAY) {
+ ch->AddToSendQ(referrer, var->GetMutable(), cond,
+ cb);
+ } else if (type == framework::proto::VarType_Type_SELECTED_ROWS) {
+ ch->AddToSendQ(referrer, var->GetMutable(), cond,
+ cb);
+ } else if (type == framework::proto::VarType_Type_READER) {
+ ch->AddToSendQ(referrer, var->GetMutable(), cond,
+ cb);
+ } else if (type == framework::proto::VarType_Type_CHANNEL) {
+ ch->AddToSendQ(referrer, var->GetMutable(), cond,
+ cb);
+ } else {
+ PADDLE_THROW("ChannelAddToSendQ:Unsupported type");
+ }
+}
+
+void poc::ChannelAddToReceiveQ(
+ framework::ChannelHolder *ch, const void *referrer,
+ framework::Variable *var, std::shared_ptr cond,
+ std::function cb) {
+ auto type = framework::ToVarType(var->Type());
+ if (type == framework::proto::VarType_Type_LOD_TENSOR) {
+ ch->AddToReceiveQ(referrer, var->GetMutable(), cond,
+ cb);
+ } else if (type == framework::proto::VarType_Type_LOD_RANK_TABLE) {
+ ch->AddToReceiveQ(referrer, var->GetMutable(),
+ cond, cb);
+ } else if (type == framework::proto::VarType_Type_LOD_TENSOR_ARRAY) {
+ ch->AddToReceiveQ(referrer, var->GetMutable(),
+ cond, cb);
+ } else if (type == framework::proto::VarType_Type_SELECTED_ROWS) {
+ ch->AddToReceiveQ(referrer, var->GetMutable(),
+ cond, cb);
+ } else if (type == framework::proto::VarType_Type_READER) {
+ ch->AddToReceiveQ(referrer, var->GetMutable(),
+ cond, cb);
+ } else if (type == framework::proto::VarType_Type_CHANNEL) {
+ ch->AddToReceiveQ(referrer, var->GetMutable(),
+ cond, cb);
+ } else {
+ PADDLE_THROW("ChannelAddToReceiveQ:Unsupported type");
+ }
+}
diff --git a/paddle/fluid/operators/concurrency/channel_util.h b/paddle/fluid/operators/concurrency/channel_util.h
new file mode 100644
index 0000000000000000000000000000000000000000..c3674bd9815df451751707bfa84d18dbb5fa0f6b
--- /dev/null
+++ b/paddle/fluid/operators/concurrency/channel_util.h
@@ -0,0 +1,38 @@
+/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
+
+Licensed under the Apache License, Version 2.0 (the "License");
+you may not use this file except in compliance with the License.
+You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing, software
+distributed under the License is distributed on an "AS IS" BASIS,
+WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+See the License for the specific language governing permissions and
+limitations under the License. */
+
+#pragma once
+
+#include "paddle/fluid/framework/channel.h"
+#include "paddle/fluid/framework/variable.h"
+
+namespace paddle {
+namespace operators {
+namespace concurrency {
+
+bool ChannelSend(framework::ChannelHolder *ch, framework::Variable *var);
+bool ChannelReceive(framework::ChannelHolder *ch, framework::Variable *var);
+
+void ChannelAddToSendQ(framework::ChannelHolder *ch, const void *referrer,
+ framework::Variable *var,
+ std::shared_ptr cond,
+ std::function cb);
+void ChannelAddToReceiveQ(framework::ChannelHolder *ch, const void *referrer,
+ framework::Variable *var,
+ std::shared_ptr cond,
+ std::function cb);
+
+} // namespace concurrency
+} // namespace operators
+} // namespace paddle
diff --git a/paddle/fluid/operators/conv_cudnn_op.cu.cc b/paddle/fluid/operators/conv_cudnn_op.cu.cc
index ff0fbf21f86269885df5491afab7443df813f13f..0ddbfdb4aa9e844adbb291e1c5612e96681831d6 100644
--- a/paddle/fluid/operators/conv_cudnn_op.cu.cc
+++ b/paddle/fluid/operators/conv_cudnn_op.cu.cc
@@ -18,6 +18,7 @@ limitations under the License. */
#include "paddle/fluid/operators/conv_op.h"
#include "paddle/fluid/platform/assert.h"
#include "paddle/fluid/platform/cudnn_helper.h"
+#include "paddle/fluid/platform/float16.h"
namespace paddle {
namespace operators {
@@ -133,7 +134,8 @@ class CUDNNConvOpKernel : public framework::OpKernel {
platform::CUDAPlace gpu = boost::get(ctx.GetPlace());
cudnn_workspace = paddle::memory::Alloc(gpu, workspace_size_in_bytes);
// ------------------- cudnn conv forward ---------------------
- T alpha = 1.0f, beta = 0.0f;
+ typename platform::CudnnDataType::ScalingParamType alpha = 1.0f,
+ beta = 0.0f;
for (int i = 0; i < groups; i++) {
PADDLE_ENFORCE(platform::dynload::cudnnConvolutionForward(
handle, &alpha, cudnn_input_desc, input_data + i * group_offset_in,
@@ -280,7 +282,8 @@ class CUDNNConvGradOpKernel : public framework::OpKernel {
platform::CUDAPlace gpu = boost::get(ctx.GetPlace());
cudnn_workspace = paddle::memory::Alloc(gpu, workspace_size_in_bytes);
// ------------------- cudnn conv backward data ---------------------
- T alpha = 1.0f, beta = 0.0f;
+ typename platform::CudnnDataType::ScalingParamType alpha = 1.0f,
+ beta = 0.0f;
if (input_grad) {
T* input_grad_data = input_grad->mutable_data(ctx.GetPlace());
// Because beta is zero, it is unnecessary to reset input_grad.
@@ -315,16 +318,18 @@ class CUDNNConvGradOpKernel : public framework::OpKernel {
} // namespace operators
} // namespace paddle
-REGISTER_OP_KERNEL(conv2d, CUDNN, ::paddle::platform::CUDAPlace,
+namespace plat = paddle::platform;
+REGISTER_OP_KERNEL(conv2d, CUDNN, plat::CUDAPlace,
paddle::operators::CUDNNConvOpKernel,
- paddle::operators::CUDNNConvOpKernel);
-REGISTER_OP_KERNEL(conv2d_grad, CUDNN, ::paddle::platform::CUDAPlace,
+ paddle::operators::CUDNNConvOpKernel,
+ paddle::operators::CUDNNConvOpKernel);
+REGISTER_OP_KERNEL(conv2d_grad, CUDNN, plat::CUDAPlace,
paddle::operators::CUDNNConvGradOpKernel,
paddle::operators::CUDNNConvGradOpKernel);
-REGISTER_OP_KERNEL(conv3d, CUDNN, ::paddle::platform::CUDAPlace,
+REGISTER_OP_KERNEL(conv3d, CUDNN, plat::CUDAPlace,
paddle::operators::CUDNNConvOpKernel,
paddle::operators::CUDNNConvOpKernel);
-REGISTER_OP_KERNEL(conv3d_grad, CUDNN, ::paddle::platform::CUDAPlace,
+REGISTER_OP_KERNEL(conv3d_grad, CUDNN, plat::CUDAPlace,
paddle::operators::CUDNNConvGradOpKernel,
paddle::operators::CUDNNConvGradOpKernel);
diff --git a/paddle/fluid/operators/conv_op.cc b/paddle/fluid/operators/conv_op.cc
index 4b02b80d7772fa15d2333692551da5e59d93765f..e3fc21c90f95469d646139a4454501d1c30bd51c 100644
--- a/paddle/fluid/operators/conv_op.cc
+++ b/paddle/fluid/operators/conv_op.cc
@@ -83,12 +83,23 @@ framework::OpKernelType ConvOp::GetExpectedKernelType(
}
#endif
+ auto input_data_type =
+ framework::ToDataType(ctx.Input("Input")->type());
+ auto filter_data_type =
+ framework::ToDataType(ctx.Input("Filter")->type());
+ PADDLE_ENFORCE_EQ(input_data_type, filter_data_type,
+ "input and filter data type should be consistent");
+
+ if (input_data_type == framework::proto::VarType::FP16) {
+ PADDLE_ENFORCE_EQ(library_, framework::LibraryType::kCUDNN,
+ "float16 can only be used when CUDNN is used");
+ }
+
std::string data_format = ctx.Attr("data_format");
// TODO(pzelazko-intel): enable MKLDNN layout when it's ready
framework::DataLayout layout_ = framework::StringToDataLayout(data_format);
- return framework::OpKernelType(
- framework::ToDataType(ctx.Input("Input")->type()), ctx.GetPlace(),
- layout_, library_);
+ return framework::OpKernelType(input_data_type, ctx.GetPlace(), layout_,
+ library_);
}
Conv2DOpMaker::Conv2DOpMaker(OpProto* proto, OpAttrChecker* op_checker)
diff --git a/paddle/fluid/operators/detection_output_op.cc b/paddle/fluid/operators/detection_output_op.cc
deleted file mode 100644
index f7520475917ff23535f11ccfde0ee915112bba30..0000000000000000000000000000000000000000
--- a/paddle/fluid/operators/detection_output_op.cc
+++ /dev/null
@@ -1,89 +0,0 @@
-/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-Indicesou may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License. */
-
-#include "paddle/fluid/operators/detection_output_op.h"
-namespace paddle {
-namespace operators {
-
-class DetectionOutputOpMaker : public framework::OpProtoAndCheckerMaker {
- public:
- DetectionOutputOpMaker(OpProto* proto, OpAttrChecker* op_checker)
- : OpProtoAndCheckerMaker(proto, op_checker) {
- AddInput("Loc",
- "(Tensor) The input tensor of detection_output operator."
- "The input predict locations"
- "The format of input tensor is kNCHW. Where K is priorbox point "
- "numbers,"
- "N is How many boxes are there on each point, "
- "C is 4, H and W both are 1.");
- AddInput("Conf",
- "(Tensor) The input tensor of detection_output operator."
- "The input priorbox confidence."
- "The format of input tensor is kNCHW. Where K is priorbox point "
- "numbers,"
- "N is How many boxes are there on each point, "
- "C is the number of classes, H and W both are 1.");
- AddInput("PriorBox",
- "(Tensor) The input tensor of detection_output operator."
- "The format of input tensor is the position and variance "
- "of the boxes");
- AddOutput("Out",
- "(Tensor) The output tensor of detection_output operator.");
- AddAttr("background_label_id", "(int), The background class index.");
- AddAttr("num_classes", "(int), The number of the classification.");
- AddAttr("nms_threshold",
- "(float), The Non-maximum suppression threshold.");
- AddAttr("confidence_threshold",
- "(float), The classification confidence threshold.");
- AddAttr("top_k", "(int), The bbox number kept of the layer’s output.");
- AddAttr("nms_top_k",
- "(int), The bbox number kept of the NMS’s output.");
- AddComment(R"DOC(
- detection output for SSD(single shot multibox detector)
- Apply the NMS to the output of network and compute the predict
- bounding box location. The output’s shape of this layer could
- be zero if there is no valid bounding box.
- )DOC");
- }
-};
-
-class DetectionOutputOp : public framework::OperatorWithKernel {
- public:
- using framework::OperatorWithKernel::OperatorWithKernel;
- void InferShape(framework::InferShapeContext* ctx) const override {
- PADDLE_ENFORCE(ctx->HasInput("Loc"),
- "Input(X) of DetectionOutputOp"
- "should not be null.");
- PADDLE_ENFORCE(ctx->HasInput("Conf"),
- "Input(X) of DetectionOutputOp"
- "should not be null.");
- PADDLE_ENFORCE(ctx->HasInput("PriorBox"),
- "Input(X) of DetectionOutputOp"
- "should not be null.");
- PADDLE_ENFORCE(ctx->HasOutput("Out"),
- "Output(Out) of DetectionOutputOp should not be null.");
- std::vector output_shape({1, 7});
- ctx->SetOutputDim("Out", framework::make_ddim(output_shape));
- }
-};
-} // namespace operators
-} // namespace paddle
-
-namespace ops = paddle::operators;
-REGISTER_OP_WITHOUT_GRADIENT(detection_output, ops::DetectionOutputOp,
- ops::DetectionOutputOpMaker);
-REGISTER_OP_CPU_KERNEL(
- detection_output,
- ops::DetectionOutputKernel,
- ops::DetectionOutputKernel);
diff --git a/paddle/fluid/operators/detection_output_op.cu.cc b/paddle/fluid/operators/detection_output_op.cu.cc
deleted file mode 100644
index 0f48765c9c67c1d3fa32b19d5e87b2acaa3c486a..0000000000000000000000000000000000000000
--- a/paddle/fluid/operators/detection_output_op.cu.cc
+++ /dev/null
@@ -1,21 +0,0 @@
-/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
-
-Licensed under the Apache License, Version 2.0 (the "License");
-you may not use this file except in compliance with the License.
-Indicesou may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
-Unless required by applicable law or agreed to in writing, software
-distributed under the License is distributed on an "AS IS" BASIS,
-WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-See the License for the specific language governing permissions and
-limitations under the License. */
-
-#include "paddle/fluid/operators/detection_output_op.h"
-
-namespace ops = paddle::operators;
-REGISTER_OP_CUDA_KERNEL(
- detection_output,
- ops::DetectionOutputKernel,
- ops::DetectionOutputKernel);
diff --git a/paddle/fluid/operators/detection_output_op.h b/paddle/fluid/operators/detection_output_op.h
deleted file mode 100644
index af9081c93436776b6ca6ee7139e340054111e440..0000000000000000000000000000000000000000
--- a/paddle/fluid/operators/detection_output_op.h
+++ /dev/null
@@ -1,167 +0,0 @@
-/* Copyright (c) 2016 PaddlePaddle Authors. All Rights Reserved.
-
- Licensed under the Apache License, Version 2.0 (the "License");
- you may not use this file except in compliance with the License.
- Indicesou may obtain a copy of the License at
-
- http://www.apache.org/licenses/LICENSE-2.0
-
- Unless required by applicable law or agreed to in writing, software
- distributed under the License is distributed on an "AS IS" BASIS,
- WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- See the License for the specific language governing permissions and
- limitations under the License. */
-
-#pragma once
-#include "paddle/fluid/framework/op_registry.h"
-#include "paddle/fluid/framework/tensor.h"
-#include "paddle/fluid/operators/math/detection_util.h"
-#include "paddle/fluid/operators/math/math_function.h"
-#include "paddle/fluid/operators/math/softmax.h"
-#include "paddle/fluid/operators/strided_memcpy.h"
-namespace paddle {
-namespace operators {
-template
-inline void transpose_fun(const framework::ExecutionContext& context,
- const framework::Tensor& src,
- framework::Tensor* dst) {
- int input_nums = src.dims()[0];
- int offset = 0;
- for (int j = 0; j < input_nums; ++j) {
- framework::Tensor in_p_tensor = src.Slice(j, j + 1);
- std::vector shape_vec(
- {in_p_tensor.dims()[0], in_p_tensor.dims()[1], in_p_tensor.dims()[3],
- in_p_tensor.dims()[4], in_p_tensor.dims()[2]});
- framework::DDim shape(framework::make_ddim(shape_vec));
- framework::Tensor in_p_tensor_transpose;
- in_p_tensor_transpose.mutable_data(shape, context.GetPlace());
- std::vector shape_axis({0, 1, 3, 4, 2});
- math::Transpose trans5;
- trans5(context.template device_context(), in_p_tensor,
- &in_p_tensor_transpose, shape_axis);
- auto dst_stride = framework::stride(dst->dims());
- auto src_stride = framework::stride(in_p_tensor_transpose.dims());
- StridedMemcpy(context.device_context(), in_p_tensor_transpose.data(),
- src_stride, in_p_tensor_transpose.dims(), dst_stride,
- dst->data() + offset);
- offset += in_p_tensor_transpose.dims()[4] * src_stride[4];
- }
-}
-template
-class DetectionOutputKernel : public framework::OpKernel {
- public:
- void Compute(const framework::ExecutionContext& context) const override {
- const framework::Tensor* in_loc = context.Input("Loc");
- const framework::Tensor* in_conf = context.Input("Conf");
- const framework::Tensor* in_priorbox =
- context.Input("PriorBox");
- auto* out = context.Output("Out");
- int num_classes = context.template Attr("num_classes");
- int top_k = context.template Attr("top_k");
- int nms_top_k = context.template Attr("nms_top_k");
- int background_label_id = context.template Attr("background_label_id");
- float nms_threshold = context.template Attr("nms_threshold");
- float confidence_threshold =
- context.template Attr("confidence_threshold");
- size_t batch_size = in_conf->dims()[1];
- int conf_sum_size = in_conf->numel();
- // for softmax
- std::vector conf_shape_softmax_vec(
- {conf_sum_size / num_classes, num_classes});
- framework::DDim conf_shape_softmax(
- framework::make_ddim(conf_shape_softmax_vec));
- // for knchw => nhwc
- std::vector loc_shape_vec({1, in_loc->dims()[1], in_loc->dims()[3],
- in_loc->dims()[4],
- in_loc->dims()[2] * in_loc->dims()[0]});
- std::vector conf_shape_vec(
- {1, in_conf->dims()[1], in_conf->dims()[3], in_conf->dims()[4],
- in_conf->dims()[2] * in_conf->dims()[0]});
- framework::DDim loc_shape(framework::make_ddim(loc_shape_vec));
- framework::DDim conf_shape(framework::make_ddim(conf_shape_vec));
- framework::Tensor loc_tensor;
- framework::Tensor conf_tensor;
- loc_tensor.mutable_data(loc_shape, context.GetPlace());
- conf_tensor.mutable_data(conf_shape, context.GetPlace());
- // for cpu
- framework::Tensor loc_cpu;
- framework::Tensor conf_cpu;
- framework::Tensor priorbox_cpu;
- const T* priorbox_data = in_priorbox->data();
- transpose_fun(context, *in_loc, &loc_tensor);
- transpose_fun(context, *in_conf, &conf_tensor);
- conf_tensor.Resize(conf_shape_softmax);
- math::SoftmaxFunctor()(
- context.template device_context(), &conf_tensor,
- &conf_tensor);
- T* loc_data = loc_tensor.data();
- T* conf_data = conf_tensor.data();
- if (platform::is_gpu_place(context.GetPlace())) {
- loc_cpu.mutable_data(loc_tensor.dims(), platform::CPUPlace());
- framework::TensorCopy(loc_tensor, platform::CPUPlace(),
- context.device_context(), &loc_cpu);
- loc_data = loc_cpu.data();
- conf_cpu.mutable_data(conf_tensor.dims(), platform::CPUPlace());
- framework::TensorCopy(conf_tensor, platform::CPUPlace(),
- context.device_context(), &conf_cpu);
- conf_data = conf_cpu.data();
- priorbox_cpu.mutable_data(in_priorbox->dims(), platform::CPUPlace());
- framework::TensorCopy(*in_priorbox, platform::CPUPlace(),
- context.device_context(), &priorbox_cpu);
- priorbox_data = priorbox_cpu.data();
- }
- // get decode bboxes
- size_t num_priors = in_priorbox->numel() / 8;
- std::vector>> all_decoded_bboxes;
- for (size_t n = 0; n < batch_size; ++n) {
- std::vector> decoded_bboxes;
- for (size_t i = 0; i < num_priors; ++i) {
- size_t prior_offset = i * 8;
- size_t loc_pred_offset = n * num_priors * 4 + i * 4;
- std::vector> prior_bbox_vec;
- math::GetBBoxFromPriorData(priorbox_data + prior_offset, 1,
- prior_bbox_vec);
- std::vector> prior_bbox_var;
- math::GetBBoxVarFromPriorData(priorbox_data + prior_offset, 1,
- prior_bbox_var);
- std::vector loc_pred_data;
- for (size_t j = 0; j < 4; ++j)
- loc_pred_data.push_back(*(loc_data + loc_pred_offset + j));
- math::BBox bbox = math::DecodeBBoxWithVar(
- prior_bbox_vec[0], prior_bbox_var[0], loc_pred_data);
- decoded_bboxes.push_back(bbox);
- }
- all_decoded_bboxes.push_back(decoded_bboxes);
- }
- std::vector