diff --git a/cmake/external/mkldnn.cmake b/cmake/external/mkldnn.cmake
index 5759e5c489724332793bf103b7aacf7ffb068611..83c1cf1457dfc2d98039b7a03e8a569a0352991a 100644
--- a/cmake/external/mkldnn.cmake
+++ b/cmake/external/mkldnn.cmake
@@ -53,7 +53,7 @@ ExternalProject_Add(
${EXTERNAL_PROJECT_LOG_ARGS}
DEPENDS ${MKLDNN_DEPENDS}
GIT_REPOSITORY "https://github.com/01org/mkl-dnn.git"
- GIT_TAG "v0.11"
+ GIT_TAG "v0.14"
PREFIX ${MKLDNN_SOURCES_DIR}
UPDATE_COMMAND ""
CMAKE_ARGS -DCMAKE_INSTALL_PREFIX=${MKLDNN_INSTALL_DIR}
diff --git a/doc/fluid/build_and_install/paddleci.png b/doc/fluid/build_and_install/paddleci.png
new file mode 120000
index 0000000000000000000000000000000000000000..c3eb1457acc77cab9360e654240d1e8f548035b4
--- /dev/null
+++ b/doc/fluid/build_and_install/paddleci.png
@@ -0,0 +1 @@
+../../v2/build_and_install/paddleci.png
\ No newline at end of file
diff --git a/doc/fluid/design/motivation/refactorization.md b/doc/fluid/design/motivation/refactorization.md
index 4e1d660cef6369f04db8e1e83360f6af25259f96..ad9d0f6d3f3ad9884f108826e8410871fffd51bf 100644
--- a/doc/fluid/design/motivation/refactorization.md
+++ b/doc/fluid/design/motivation/refactorization.md
@@ -125,12 +125,12 @@ Compile Time -> IR -> Runtime
## Operator/OpWithKernel/OpKernel
-![class_diagram](http://api.paddlepaddle.org/graphviz?dot=https://gist.githubusercontent.com/reyoung/53df507f6749762675dff3e7ce53372f/raw/49caf1fb70820fb4a6c217634317c9306f361f36/op_op_with_kern_class_diagram.dot)
+![class_diagram](https://raw.githubusercontent.com/PaddlePaddle/Paddle/develop/doc/fluid/images/op_op_with_kern_class_diagram.dot)
---
## Operator
-![class_diagram](http://api.paddlepaddle.org/graphviz?dot=https://gist.githubusercontent.com/reyoung/53df507f6749762675dff3e7ce53372f/raw/dd598e8f1976f5759f58af5e5ef94738a6b2e661/op.dot)
+![class_diagram](https://raw.githubusercontent.com/PaddlePaddle/Paddle/develop/doc/fluid/images/op.dot)
* `Operator` is the fundamental building block of the user interface.
* Operator stores input/output variable names and attributes.
@@ -141,7 +141,7 @@ Compile Time -> IR -> Runtime
## OpWithKernel/Kernel
-![class_diagram](http://api.paddlepaddle.org/graphviz?dot=https://gist.githubusercontent.com/reyoung/53df507f6749762675dff3e7ce53372f/raw/9d7f4eba185cf41c8e2fbfb40ae21890dbddcd39/op_with_kernel.dot)
+![class_diagram](https://raw.githubusercontent.com/PaddlePaddle/Paddle/develop/doc/fluid/images/op_with_kernel.dot)
* `OpWithKernel` inherits `Operator`.
* `OpWithKernel` contains a Kernel map.
diff --git a/doc/fluid/images/op.dot b/doc/fluid/images/op.dot
new file mode 100644
index 0000000000000000000000000000000000000000..c8ad839cb88788e9b5906402257cc7bbc3ddcb54
--- /dev/null
+++ b/doc/fluid/images/op.dot
@@ -0,0 +1,4 @@
+digraph sample {
+ graph [rankdir=TD]; node [shape=record];
+ op [label="{Operator| InferShape()=0\lRun()=0\l | map<string, string[]> inputs_\lmap<string, string[]> outputs_ \l AttributeMap attrs_\l}"];
+}
\ No newline at end of file
diff --git a/doc/fluid/images/op_op_with_kern_class_diagram.dot b/doc/fluid/images/op_op_with_kern_class_diagram.dot
new file mode 100644
index 0000000000000000000000000000000000000000..8f24e9ea83acf879c7008f2d97113c0a4cc111c3
--- /dev/null
+++ b/doc/fluid/images/op_op_with_kern_class_diagram.dot
@@ -0,0 +1,38 @@
+digraph sample {
+ graph [rankdir=TD]; node [shape=record];
+ op [label="{Operator| InferShape()=0\lRun()=0\l | map<string, string[]> inputs_\lmap<string, string[]> outputs_ \l AttributeMap attrs_\l}"];
+ op_with_kern [label="{OpWithKernel | InferShape()=0\lRun()\l | map<OpKernelKey,OpKernel>kernels_ }"]
+ op_kernel [label="{OpKernel | Compute()=0}"]
+ op_kernel_key [label="{OpKernelKey| Place place\n...}"]
+
+ op -> op_with_kern [dir=back, arrowtail=onormal]
+ op_with_kern -> op_kernel [arrowhead=vee, label="contains many"]
+
+ {
+ rank=same;
+ op_with_kern
+ op_kernel
+ }
+
+ op_kernel -> op_kernel_key [style=invis]
+
+ {
+ rank=same;
+ op_kernel
+ op_kernel_key
+ }
+
+ op_with_kern -> op_kernel_key [arrowhead=vee, label ="\nas map key"]
+
+ mul_op [label="MulOp"]
+ op_with_kern -> mul_op [dir=back, arrowtail=onormal]
+ mul_kernel [label="template <typename Place>\lclass MulOpKernel\l"]
+ op_kernel -> mul_kernel [dir=back, arrowtail=onormal]
+ mul_op -> mul_kernel [arrowhead=vee, label="register many"]
+
+ {
+ rank=same;
+ mul_op;
+ mul_kernel;
+ }
+}
\ No newline at end of file
diff --git a/doc/fluid/images/op_with_kernel.dot b/doc/fluid/images/op_with_kernel.dot
new file mode 100644
index 0000000000000000000000000000000000000000..4f5af4f7b5f5a69693a058c99eb658900136077a
--- /dev/null
+++ b/doc/fluid/images/op_with_kernel.dot
@@ -0,0 +1,26 @@
+digraph sample {
+ graph [rankdir=TD]; node [shape=record];
+ op [label="{Operator}"];
+ op_with_kern [label="{OpWithKernel | InferShape()=0\lRun()\l | map<OpKernelKey,OpKernel>kernels_ }"]
+ op_kernel [label="{OpKernel | Compute()=0}"]
+ op_kernel_key [label="{OpKernelKey| Place place\n...}"]
+
+ op -> op_with_kern [dir=back, arrowtail=onormal]
+ op_with_kern -> op_kernel [arrowhead=vee, label="contains many"]
+
+ {
+ rank=same;
+ op_with_kern
+ op_kernel
+ }
+
+ op_kernel -> op_kernel_key [style=invis]
+
+ {
+ rank=same;
+ op_kernel
+ op_kernel_key
+ }
+
+ op_with_kern -> op_kernel_key [arrowhead=vee, label ="\nas map key"]
+}
\ No newline at end of file
diff --git a/doc/v2/api/config/layer.rst b/doc/v2/api/config/layer.rst
index 29388f5005bf779a1bfa63c0d46d35996c0c792d..1a6496968cae1fef88142ba9ca3f9e63a81b196d 100644
--- a/doc/v2/api/config/layer.rst
+++ b/doc/v2/api/config/layer.rst
@@ -142,7 +142,7 @@ gated_unit
-----------
.. autoclass:: paddle.v2.layer.gated_unit
:noindex:
-
+
Recurrent Layer Group
=====================
@@ -354,7 +354,7 @@ dropout
--------
.. autoclass:: paddle.v2.layer.dropout
:noindex:
-
+
dot_prod
---------
.. autoclass:: paddle.v2.layer.dot_prod
@@ -460,6 +460,11 @@ multi_binary_label_cross_entropy_cost
.. autoclass:: paddle.v2.layer.multi_binary_label_cross_entropy_cost
:noindex:
+classification_cost
+-------------------
+.. autoclass:: paddle.v2.layer.classification_cost
+ :noindex:
+
huber_regression_cost
-------------------------
.. autoclass:: paddle.v2.layer.huber_regression_cost
@@ -534,7 +539,7 @@ detection_output
----------------
.. autoclass:: paddle.v2.layer.detection_output
:noindex:
-
+
Check Layer
============
diff --git a/doc/v2/design/mkl/mkldnn.md b/doc/v2/design/mkl/mkldnn.md
index 1bd2e7bc34ee79eb753b3520d97e5e7beca89b0b..bd5bcf6f67168c21cebb046a629b948d1661e75c 100644
--- a/doc/v2/design/mkl/mkldnn.md
+++ b/doc/v2/design/mkl/mkldnn.md
@@ -5,7 +5,7 @@
充分展现英特尔平台的优势,有效提升PaddlePaddle在英特尔架构上的性能。
-
+
Figure 1. PaddlePaddle on IA
@@ -42,16 +42,43 @@ Figure 1. PaddlePaddle on IA
MKL,MKLML以及MKL-DNN三者关系如下表:
-| Name | Open Source | License | Descriptions |
-| :---------- | :--------------- | :---------- | :------------ |
-| MKL | No | Proprietary | Accelerate math processing routines |
-| MKLML | No | Proprietary | Small package of MKL, especially for Machine Learning |
-| MKL-DNN | Yes | Apache 2.0 | Accelerate primitives processing routines especially for Deep Neural Networks |
+
+
+
+Name |
+Open Source |
+License |
+Descriptions |
+
+
+
+
+MKL |
+No |
+Proprietary |
+Accelerate math processing routines |
+
+
+MKLML |
+No |
+Proprietary |
+Small package of MKL, especially for Machine Learning |
+
+
+
+MKL-DNN |
+Yes |
+Apache 2.0 |
+Accelerate primitives processing routines especially for Deep Neural Networks |
+
+
+
+
MKLML可以与MKL-DNN共同使用,以此达到最好的性能。
-
+
Figure 2. PaddlePaddle with MKL Engines
@@ -103,7 +130,7 @@ MKL-DNN的库目前只有动态库`libmkldnn.so`。
所以我们定义了一个`MKLDNNMatrix`用于管理MKL-DNN数据的不同格式以及相互之间的转换。
-
+
Figure 3. MKLDNNMatrix
@@ -113,7 +140,7 @@ Figure 3. MKLDNNMatrix
子类只需要使用定义好的接口,实现具体的函数功能即可。
-
+
Figure 4. MKLDNNLayer
@@ -150,7 +177,7 @@ Figure 4. MKLDNNLayer
所以整体上,在实现每个子类的时候就不需要关心分支的事情了。
-
+
Figure 5. Merge Gradients
diff --git a/doc/v2/images/FullyConnected.jpg b/doc/v2/images/FullyConnected.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..b2241f401434e527f95ee4e0e541a3f2ff78fd1e
Binary files /dev/null and b/doc/v2/images/FullyConnected.jpg differ
diff --git a/doc/v2/images/add_security_group.png b/doc/v2/images/add_security_group.png
new file mode 100644
index 0000000000000000000000000000000000000000..bd34f46c9b0ada7027fd53e553e7d033255d25fc
Binary files /dev/null and b/doc/v2/images/add_security_group.png differ
diff --git a/doc/v2/images/bi_lstm.jpg b/doc/v2/images/bi_lstm.jpg
new file mode 100644
index 0000000000000000000000000000000000000000..adec1606d64d6e35ffe7e62abfa9a09309b05c84
Binary files /dev/null and b/doc/v2/images/bi_lstm.jpg differ
diff --git a/doc/v2/images/checkpointing.png b/doc/v2/images/checkpointing.png
new file mode 100644
index 0000000000000000000000000000000000000000..c221e8474f90f37e31416cbb19c9452207a0d14c
Binary files /dev/null and b/doc/v2/images/checkpointing.png differ
diff --git a/doc/v2/images/create_efs.png b/doc/v2/images/create_efs.png
new file mode 100644
index 0000000000000000000000000000000000000000..e5f1526033d1daf401700989af1d25919bcb7675
Binary files /dev/null and b/doc/v2/images/create_efs.png differ
diff --git a/doc/v2/images/csr.png b/doc/v2/images/csr.png
new file mode 100644
index 0000000000000000000000000000000000000000..3dc10b8de4f6d3f517624956b1694b689405a031
Binary files /dev/null and b/doc/v2/images/csr.png differ
diff --git a/doc/v2/images/data_dispatch.png b/doc/v2/images/data_dispatch.png
new file mode 100644
index 0000000000000000000000000000000000000000..5bdcc24d6a6d193cb014f8c38b362451fded5e54
Binary files /dev/null and b/doc/v2/images/data_dispatch.png differ
diff --git a/doc/v2/images/dataset.graffle b/doc/v2/images/dataset.graffle
new file mode 100644
index 0000000000000000000000000000000000000000..c10a423ed16a23229a9ee33d11bfc82bb59646c8
Binary files /dev/null and b/doc/v2/images/dataset.graffle differ
diff --git a/doc/v2/images/dataset.png b/doc/v2/images/dataset.png
new file mode 100644
index 0000000000000000000000000000000000000000..2fb7f1cce3b6dd21489392557826e95a9f207c34
Binary files /dev/null and b/doc/v2/images/dataset.png differ
diff --git a/doc/v2/images/doc_en.png b/doc/v2/images/doc_en.png
new file mode 100644
index 0000000000000000000000000000000000000000..ed6b9178fba91a3bdf45ae797a9924f84146fbc8
Binary files /dev/null and b/doc/v2/images/doc_en.png differ
diff --git a/doc/v2/images/efs_mount.png b/doc/v2/images/efs_mount.png
new file mode 100644
index 0000000000000000000000000000000000000000..0f9e3cab98445707e5e9baa18ddabe15cdf04576
Binary files /dev/null and b/doc/v2/images/efs_mount.png differ
diff --git a/doc/v2/images/encoder-decoder-attention-model.png b/doc/v2/images/encoder-decoder-attention-model.png
new file mode 100644
index 0000000000000000000000000000000000000000..79f911d4ba12ac0c0d1a936c9df639c302786914
Binary files /dev/null and b/doc/v2/images/encoder-decoder-attention-model.png differ
diff --git a/doc/v2/images/engine.png b/doc/v2/images/engine.png
new file mode 100644
index 0000000000000000000000000000000000000000..1f5f65c2cc765a514a3ba9e7b7f468e1dc4b0c3b
Binary files /dev/null and b/doc/v2/images/engine.png differ
diff --git a/doc/v2/images/file_storage.graffle b/doc/v2/images/file_storage.graffle
new file mode 100644
index 0000000000000000000000000000000000000000..50a17e70fa255495337c529a3bf12a5c0024a5be
Binary files /dev/null and b/doc/v2/images/file_storage.graffle differ
diff --git a/doc/v2/images/file_storage.png b/doc/v2/images/file_storage.png
new file mode 100644
index 0000000000000000000000000000000000000000..fccb4e3e7e738224c7f1584326bd5f351ce799aa
Binary files /dev/null and b/doc/v2/images/file_storage.png differ
diff --git a/doc/v2/images/glossary_rnn.dot b/doc/v2/images/glossary_rnn.dot
new file mode 100644
index 0000000000000000000000000000000000000000..2cd0fb1820c44b0e8e0b869f9d39fcad27efa758
--- /dev/null
+++ b/doc/v2/images/glossary_rnn.dot
@@ -0,0 +1,42 @@
+digraph G{
+ subgraph cluster_timestep0 {
+ label="recurrent timestep i-1"
+ bgcolor=lightgray
+ node [style=filled,color=white]
+ fc0_0 [label="fc 0"]
+ fc0_1 [label="fc 1"]
+ fc0_2 [label="fc 2"]
+
+ fc0_0 -> fc0_1
+ fc0_1 -> fc0_2
+ }
+
+ subgraph cluster_timestep1 {
+ label="recurrent timestep i"
+ node [style=filled];
+ fc1_0 [label="fc 0"]
+ fc1_1 [label="fc 1"]
+ fc1_2 [label="fc 2"]
+ color=blue
+
+ fc1_0 -> fc1_1
+ fc1_1 -> fc1_2
+ }
+
+ subgraph cluster_timestep2 {
+ label="recurrent timestep i+1"
+ bgcolor=lightgray
+ node [style=filled,color=white]
+ fc2_0 [label="fc 0"]
+ fc2_1 [label="fc 1"]
+ fc2_2 [label="fc 2"]
+
+ fc2_0 -> fc2_1
+ fc2_1 -> fc2_2
+ }
+
+
+ fc0_1 -> fc1_1 [style="dotted" constraint=false]
+ fc1_1 -> fc2_1 [style="dotted" constraint=false]
+
+}
\ No newline at end of file
diff --git a/doc/v2/images/glossary_rnn_with_memory.dot b/doc/v2/images/glossary_rnn_with_memory.dot
new file mode 100644
index 0000000000000000000000000000000000000000..0f101ec2d8f15aec76c57f328046b6b55cf0c7eb
--- /dev/null
+++ b/doc/v2/images/glossary_rnn_with_memory.dot
@@ -0,0 +1,48 @@
+digraph G{
+ subgraph cluster_timestep0 {
+ label="recurrent timestep i-1"
+ bgcolor=lightgray
+ node [style=filled,color=white]
+ fc0_0 [label="fc 0"]
+ fc0_1 [label="fc 1"]
+ fc0_2 [label="fc 2"]
+ m0 [label="memory"]
+ fc0_0 -> fc0_1
+ fc0_1 -> fc0_2
+ fc0_1 -> m0
+ m0 -> fc0_1
+ }
+
+ subgraph cluster_timestep1 {
+ label="recurrent timestep i"
+ node [style=filled];
+ fc1_0 [label="fc 0"]
+ fc1_1 [label="fc 1"]
+ fc1_2 [label="fc 2"]
+ m1 [label="memory"]
+ color=blue
+ fc1_0 -> fc1_1
+ fc1_1 -> fc1_2
+ fc1_1 -> m1
+ m1 -> fc1_1
+ }
+
+ subgraph cluster_timestep2 {
+ label="recurrent timestep i+1"
+ bgcolor=lightgray
+ node [style=filled,color=white]
+ fc2_0 [label="fc 0"]
+ fc2_1 [label="fc 1"]
+ fc2_2 [label="fc 2"]
+ m2 [label="memory"]
+ fc2_0 -> fc2_1
+ fc2_1 -> fc2_2
+ fc2_1 -> m2
+ m2 -> fc2_1
+ }
+
+
+ m0 -> m1 [style="dotted" constraint=false]
+ m1 -> m2 [style="dotted" constraint=false]
+
+}
\ No newline at end of file
diff --git a/doc/v2/images/gradients.png b/doc/v2/images/gradients.png
new file mode 100644
index 0000000000000000000000000000000000000000..f031bcf8e4cec14e63075b8b9d2c7bbd9f1b1a3c
Binary files /dev/null and b/doc/v2/images/gradients.png differ
diff --git a/doc/v2/images/init_lock.graffle b/doc/v2/images/init_lock.graffle
new file mode 100644
index 0000000000000000000000000000000000000000..fa9149f21b1311eed48ef72ec55e556559d0fc94
Binary files /dev/null and b/doc/v2/images/init_lock.graffle differ
diff --git a/doc/v2/images/init_lock.png b/doc/v2/images/init_lock.png
new file mode 100644
index 0000000000000000000000000000000000000000..92404ee6d6c0f9a7727952bae3c869ba338ecd7f
Binary files /dev/null and b/doc/v2/images/init_lock.png differ
diff --git a/doc/v2/images/k8s-paddle-arch.png b/doc/v2/images/k8s-paddle-arch.png
new file mode 100644
index 0000000000000000000000000000000000000000..b3800c4fe81302d35e49f7dbacb9221c4dfa5cde
Binary files /dev/null and b/doc/v2/images/k8s-paddle-arch.png differ
diff --git a/doc/v2/images/layers.png b/doc/v2/images/layers.png
new file mode 100644
index 0000000000000000000000000000000000000000..306f79b7a844610915eb8944128f57d2b7a3065a
Binary files /dev/null and b/doc/v2/images/layers.png differ
diff --git a/doc/v2/images/managed_policy.png b/doc/v2/images/managed_policy.png
new file mode 100644
index 0000000000000000000000000000000000000000..c7ecda555b81d7750e9292a9ab72d2f517f76a2a
Binary files /dev/null and b/doc/v2/images/managed_policy.png differ
diff --git a/doc/v2/images/matrix.png b/doc/v2/images/matrix.png
new file mode 100644
index 0000000000000000000000000000000000000000..c33ce9cf0335e47cc8c1253304d0fe179186e6f2
Binary files /dev/null and b/doc/v2/images/matrix.png differ
diff --git a/doc/v2/images/nvvp1.png b/doc/v2/images/nvvp1.png
new file mode 100644
index 0000000000000000000000000000000000000000..1af23ac3c52929b2b0645d2f9fa4d4c6db1f6e77
Binary files /dev/null and b/doc/v2/images/nvvp1.png differ
diff --git a/doc/v2/images/nvvp2.png b/doc/v2/images/nvvp2.png
new file mode 100644
index 0000000000000000000000000000000000000000..177c9db708da6863d1075f3e615f5962dbe18b29
Binary files /dev/null and b/doc/v2/images/nvvp2.png differ
diff --git a/doc/v2/images/nvvp3.png b/doc/v2/images/nvvp3.png
new file mode 100644
index 0000000000000000000000000000000000000000..d8f393667d6569b6f1e61ffccac43fae5888b6db
Binary files /dev/null and b/doc/v2/images/nvvp3.png differ
diff --git a/doc/v2/images/nvvp4.png b/doc/v2/images/nvvp4.png
new file mode 100644
index 0000000000000000000000000000000000000000..51f2f3e183295de6cf8ddaf2b3b8a0862aa35f01
Binary files /dev/null and b/doc/v2/images/nvvp4.png differ
diff --git a/doc/v2/images/overview.png b/doc/v2/images/overview.png
new file mode 100644
index 0000000000000000000000000000000000000000..8fb7bbb9dd654bf363d701d0c8cd4a557043d188
Binary files /dev/null and b/doc/v2/images/overview.png differ
diff --git a/doc/v2/images/paddle-cloud-in-data-center.png b/doc/v2/images/paddle-cloud-in-data-center.png
new file mode 100644
index 0000000000000000000000000000000000000000..da5d1a77562480ad1d886f5f21dbd84001d3d508
Binary files /dev/null and b/doc/v2/images/paddle-cloud-in-data-center.png differ
diff --git a/doc/v2/images/paddle-etcd.graffle b/doc/v2/images/paddle-etcd.graffle
new file mode 100644
index 0000000000000000000000000000000000000000..f973dc9b9dbf72e9bc31e2d32822916cd281f8d9
Binary files /dev/null and b/doc/v2/images/paddle-etcd.graffle differ
diff --git a/doc/v2/images/paddle-etcd.png b/doc/v2/images/paddle-etcd.png
new file mode 100644
index 0000000000000000000000000000000000000000..57981ceb4b94f0f7d6dfa63f3d28c0402bf9cc31
Binary files /dev/null and b/doc/v2/images/paddle-etcd.png differ
diff --git a/doc/v2/images/paddle-model-sharding.graffle b/doc/v2/images/paddle-model-sharding.graffle
new file mode 100644
index 0000000000000000000000000000000000000000..fba30f0ca2b47f0d202a432821d95e55aac37ec8
Binary files /dev/null and b/doc/v2/images/paddle-model-sharding.graffle differ
diff --git a/doc/v2/images/paddle-model-sharding.png b/doc/v2/images/paddle-model-sharding.png
new file mode 100644
index 0000000000000000000000000000000000000000..8c3f6724ef46c6527e63a4cd8cb0b50fe0167124
Binary files /dev/null and b/doc/v2/images/paddle-model-sharding.png differ
diff --git a/doc/v2/images/paddle-ps-0.png b/doc/v2/images/paddle-ps-0.png
new file mode 100644
index 0000000000000000000000000000000000000000..47ef32806f182cab003da77f1556823b3f6d1721
Binary files /dev/null and b/doc/v2/images/paddle-ps-0.png differ
diff --git a/doc/v2/images/paddle-ps-1.png b/doc/v2/images/paddle-ps-1.png
new file mode 100644
index 0000000000000000000000000000000000000000..f3125db73096c52bac6e7c60e1675552857c0774
Binary files /dev/null and b/doc/v2/images/paddle-ps-1.png differ
diff --git a/doc/v2/images/paddle-ps.graffle b/doc/v2/images/paddle-ps.graffle
new file mode 100644
index 0000000000000000000000000000000000000000..0e536ffdd91cd696008b4c01bad3cb53edebdc16
Binary files /dev/null and b/doc/v2/images/paddle-ps.graffle differ
diff --git a/doc/v2/images/paddle-task-queues.graffle b/doc/v2/images/paddle-task-queues.graffle
new file mode 100644
index 0000000000000000000000000000000000000000..4263ed8bfd2ef0e55058828bf23f2fac3595e5fd
Binary files /dev/null and b/doc/v2/images/paddle-task-queues.graffle differ
diff --git a/doc/v2/images/paddle-task-queues.png b/doc/v2/images/paddle-task-queues.png
new file mode 100644
index 0000000000000000000000000000000000000000..5f980266795776752cebd0c346b85c4a75a47780
Binary files /dev/null and b/doc/v2/images/paddle-task-queues.png differ
diff --git a/doc/v2/images/paddle-task-states.graffle b/doc/v2/images/paddle-task-states.graffle
new file mode 100644
index 0000000000000000000000000000000000000000..cf1a0b9246d9386a949d2dbb8c32fe84f72eea83
Binary files /dev/null and b/doc/v2/images/paddle-task-states.graffle differ
diff --git a/doc/v2/images/paddle-task-states.png b/doc/v2/images/paddle-task-states.png
new file mode 100644
index 0000000000000000000000000000000000000000..4ae43cb66c071aee9eb90d875e2373b29af9c3e0
Binary files /dev/null and b/doc/v2/images/paddle-task-states.png differ
diff --git a/doc/v2/images/ps_cn.png b/doc/v2/images/ps_cn.png
new file mode 100644
index 0000000000000000000000000000000000000000..f9525739cc8bc6506adde642aafa0a85ae3ebebc
Binary files /dev/null and b/doc/v2/images/ps_cn.png differ
diff --git a/doc/v2/images/ps_en.png b/doc/v2/images/ps_en.png
new file mode 100644
index 0000000000000000000000000000000000000000..6537d3d56589ca9f19a77a50a970e4b5275e6ce0
Binary files /dev/null and b/doc/v2/images/ps_en.png differ
diff --git a/doc/v2/images/pserver_and_trainer.png b/doc/v2/images/pserver_and_trainer.png
new file mode 100644
index 0000000000000000000000000000000000000000..f41fe48920590333ad332bb51eb18e03dc251541
Binary files /dev/null and b/doc/v2/images/pserver_and_trainer.png differ
diff --git a/doc/v2/images/pserver_init.graffle b/doc/v2/images/pserver_init.graffle
new file mode 100644
index 0000000000000000000000000000000000000000..5f3f1f52be8aa7f9049a8fcd6b7c93c8560c1676
Binary files /dev/null and b/doc/v2/images/pserver_init.graffle differ
diff --git a/doc/v2/images/pserver_init.png b/doc/v2/images/pserver_init.png
new file mode 100644
index 0000000000000000000000000000000000000000..dfe491ff98dd7db1c336093c80964a260df2cd90
Binary files /dev/null and b/doc/v2/images/pserver_init.png differ
diff --git a/doc/v2/images/route53_create_recordset.png b/doc/v2/images/route53_create_recordset.png
new file mode 100644
index 0000000000000000000000000000000000000000..34e476c7beac30fcdde13fccc4cc8d08b4be3d35
Binary files /dev/null and b/doc/v2/images/route53_create_recordset.png differ
diff --git a/doc/v2/images/route53_create_zone.png b/doc/v2/images/route53_create_zone.png
new file mode 100644
index 0000000000000000000000000000000000000000..25b7ddb831c5cba97f4b2edddd27da3234d621af
Binary files /dev/null and b/doc/v2/images/route53_create_zone.png differ
diff --git a/doc/v2/images/sequence_data.png b/doc/v2/images/sequence_data.png
new file mode 100644
index 0000000000000000000000000000000000000000..6e47a46b8955dfe977e85898fe3c9f33ed28de7e
Binary files /dev/null and b/doc/v2/images/sequence_data.png differ
diff --git a/doc/v2/images/simple_full_hierarchical_recurrent.dot b/doc/v2/images/simple_full_hierarchical_recurrent.dot
new file mode 100644
index 0000000000000000000000000000000000000000..ff278a0323bb2c3ef07bf6f016a3a8df05783581
--- /dev/null
+++ b/doc/v2/images/simple_full_hierarchical_recurrent.dot
@@ -0,0 +1,30 @@
+digraph G {
+ rankdir=LR;
+
+ subgraph cluster_t0 {
+ a [label="4"]
+ b [label="5"]
+ c [label="2"]
+ }
+
+ subgraph cluster_t1 {
+ d [label="0"]
+ e [label="9"]
+ }
+
+ subgraph cluster_t2 {
+ f [label="8"]
+ g [label="1"]
+ h [label="4"]
+ }
+
+ a -> b;
+ b -> c;
+ c -> d [constraint=false];
+
+ d -> e;
+ e -> f [constraint=false];
+
+ f -> g;
+ g -> h;
+}
\ No newline at end of file
diff --git a/doc/v2/images/simple_full_recurrent.dot b/doc/v2/images/simple_full_recurrent.dot
new file mode 100644
index 0000000000000000000000000000000000000000..cee281fbac993afbd0cc3416570f95965cdf0a59
--- /dev/null
+++ b/doc/v2/images/simple_full_recurrent.dot
@@ -0,0 +1,19 @@
+digraph G {
+ rankdir=LR;
+ a [label="4"]
+ b [label="5"]
+ c [label="2"]
+ d [label="0"]
+ e [label="9"]
+ f [label="8"]
+ g [label="1"]
+ h [label="4"]
+
+ a -> b;
+ b -> c;
+ c -> d;
+ d -> e;
+ e -> f;
+ f -> g;
+ g -> h;
+}
\ No newline at end of file
diff --git a/doc/v2/images/submit-job.graffle b/doc/v2/images/submit-job.graffle
new file mode 100644
index 0000000000000000000000000000000000000000..677cdfb6d9a32168bf71729eb841fa1ca0dd31d6
Binary files /dev/null and b/doc/v2/images/submit-job.graffle differ
diff --git a/doc/v2/images/submit-job.png b/doc/v2/images/submit-job.png
new file mode 100644
index 0000000000000000000000000000000000000000..3046a460a7ba708079e88a560debaa215a694680
Binary files /dev/null and b/doc/v2/images/submit-job.png differ
diff --git a/doc/v2/images/trainer.graffle b/doc/v2/images/trainer.graffle
new file mode 100644
index 0000000000000000000000000000000000000000..43415ed8cf61a5acfa34f8e56b9577f338dbf254
Binary files /dev/null and b/doc/v2/images/trainer.graffle differ
diff --git a/doc/v2/images/trainer.png b/doc/v2/images/trainer.png
new file mode 100644
index 0000000000000000000000000000000000000000..6537d3d56589ca9f19a77a50a970e4b5275e6ce0
Binary files /dev/null and b/doc/v2/images/trainer.png differ
diff --git a/doc/v2/images/trainer_cn.png b/doc/v2/images/trainer_cn.png
new file mode 100644
index 0000000000000000000000000000000000000000..f9525739cc8bc6506adde642aafa0a85ae3ebebc
Binary files /dev/null and b/doc/v2/images/trainer_cn.png differ
diff --git a/doc/v2/images/worker_security_group.png b/doc/v2/images/worker_security_group.png
new file mode 100644
index 0000000000000000000000000000000000000000..57eb0265a34ad4223b69600d2a3dd355482e0bf5
Binary files /dev/null and b/doc/v2/images/worker_security_group.png differ
diff --git a/doc/v2/images/workflow_of_CAPI.png b/doc/v2/images/workflow_of_CAPI.png
new file mode 100644
index 0000000000000000000000000000000000000000..a4399ade048b3fe10d2d9c714bc34333ca068edb
Binary files /dev/null and b/doc/v2/images/workflow_of_CAPI.png differ
diff --git a/paddle/fluid/framework/details/fetch_op_handle.cc b/paddle/fluid/framework/details/fetch_op_handle.cc
index 1e8ca20b51d43554cf1898b41b31c27b90e6c642..a3cae8c64cdff8594c8971b0458c443f54375f11 100644
--- a/paddle/fluid/framework/details/fetch_op_handle.cc
+++ b/paddle/fluid/framework/details/fetch_op_handle.cc
@@ -49,7 +49,9 @@ void FetchOpHandle::RunImpl() {
platform::DeviceContextPool::Instance().Get(platform::CPUPlace());
for (auto *input : inputs_) {
auto *var = static_cast(input);
- var->generated_op_->Wait(cpu_ctx);
+ if (var->generated_op_) {
+ var->generated_op_->Wait(cpu_ctx);
+ }
}
tensors_.resize(inputs_.size());
auto *var_handle = static_cast(inputs_[0]);
diff --git a/paddle/fluid/framework/details/nccl_all_reduce_op_handle.cc b/paddle/fluid/framework/details/nccl_all_reduce_op_handle.cc
index b055bb48f608c9fd9cc671d175cb463d25dc489b..16aa5d067ab7a222af8fbb6ca8ec18222ecd799b 100644
--- a/paddle/fluid/framework/details/nccl_all_reduce_op_handle.cc
+++ b/paddle/fluid/framework/details/nccl_all_reduce_op_handle.cc
@@ -36,7 +36,9 @@ void NCCLAllReduceOpHandle::RunImpl() {
// Wait input done
for (auto *in : inputs_) {
auto &p = static_cast(in)->place_;
- in->generated_op_->Wait(dev_ctxes_[p]);
+ if (in->generated_op_) {
+ in->generated_op_->Wait(dev_ctxes_[p]);
+ }
}
auto &var_name = static_cast(this->inputs_[0])->name_;
diff --git a/paddle/fluid/framework/details/send_op_handle.cc b/paddle/fluid/framework/details/send_op_handle.cc
index 0763f92171e7813ec0ee8ca4f3aa42b76205130a..bd97c5260dbba935e422793e0aa6aac8b6875627 100644
--- a/paddle/fluid/framework/details/send_op_handle.cc
+++ b/paddle/fluid/framework/details/send_op_handle.cc
@@ -32,7 +32,9 @@ void SendOpHandle::RunImpl() {
if (in->DebugString() == "dummy") { // HACK
continue;
}
- in->generated_op_->Wait(dev_ctxes_[p]);
+ if (in->generated_op_) {
+ in->generated_op_->Wait(dev_ctxes_[p]);
+ }
}
auto &tmp_scope = local_scope_->FindVar(kLocalExecScopeName)->Get();
// FIXME(wuyi): can not use RunAndRecordEvent here, for it will cause dead
diff --git a/paddle/fluid/inference/CMakeLists.txt b/paddle/fluid/inference/CMakeLists.txt
index 50f635a41a99b2ae292d13afde5637a3bf4e6f8c..b98aeed8a0aaabfd39560fad3c074a6668b4f024 100644
--- a/paddle/fluid/inference/CMakeLists.txt
+++ b/paddle/fluid/inference/CMakeLists.txt
@@ -20,7 +20,9 @@ if(NOT APPLE)
endif()
if(WITH_TESTING)
+ # both tests/book and analysis depends the models that generated by python/paddle/fluid/tests/book
add_subdirectory(tests/book)
+ add_subdirectory(analysis)
endif()
if (TENSORRT_FOUND)
diff --git a/paddle/fluid/inference/analysis/CMakeLists.txt b/paddle/fluid/inference/analysis/CMakeLists.txt
new file mode 100644
index 0000000000000000000000000000000000000000..de7becae4d25d48111fea8d2123bc85aef70230a
--- /dev/null
+++ b/paddle/fluid/inference/analysis/CMakeLists.txt
@@ -0,0 +1 @@
+cc_library(dot SRCS dot.cc)
diff --git a/paddle/fluid/inference/analysis/dot.cc b/paddle/fluid/inference/analysis/dot.cc
new file mode 100644
index 0000000000000000000000000000000000000000..d5471ffcb594a6915e9e65c0fee5adc5f5bdf40c
--- /dev/null
+++ b/paddle/fluid/inference/analysis/dot.cc
@@ -0,0 +1,23 @@
+// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+#include "paddle/fluid/inference/analysis/dot.h"
+
+namespace paddle {
+namespace inference {
+namespace analysis {
+size_t Dot::counter = 0;
+} // namespace analysis
+} // namespace inference
+} // namespace paddle
diff --git a/paddle/fluid/inference/analysis/dot.h b/paddle/fluid/inference/analysis/dot.h
new file mode 100644
index 0000000000000000000000000000000000000000..3359987874f2d74d7e4646baa38790431c4b28fd
--- /dev/null
+++ b/paddle/fluid/inference/analysis/dot.h
@@ -0,0 +1,154 @@
+// Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+//
+// Licensed under the Apache License, Version 2.0 (the "License");
+// you may not use this file except in compliance with the License.
+// You may obtain a copy of the License at
+//
+// http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing, software
+// distributed under the License is distributed on an "AS IS" BASIS,
+// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+// See the License for the specific language governing permissions and
+// limitations under the License.
+
+/*
+ * This file implements some helper classes and methods for DOT programming
+ * support. It will give a visualization of the graph and that helps to debug
+ * the logics of each Pass.
+ */
+#pragma once
+
+#include
+#include
+#include
+#include
+
+namespace paddle {
+namespace inference {
+namespace analysis {
+
+/*
+ * A Dot template that helps to build a DOT graph definition.
+ */
+class Dot {
+ public:
+ static size_t counter;
+
+ struct Attr {
+ std::string key;
+ std::string value;
+
+ Attr(const std::string& key, const std::string& value)
+ : key(key), value(value) {}
+
+ std::string repr() const {
+ std::stringstream ss;
+ ss << key << "=" << '"' << value << '"';
+ return ss.str();
+ }
+ };
+
+ struct Node {
+ std::string name;
+ std::vector attrs;
+
+ Node(const std::string& name, const std::vector& attrs)
+ : name(name),
+ attrs(attrs),
+ id_("node_" + std::to_string(Dot::counter++)) {}
+
+ std::string id() const { return id_; }
+
+ std::string repr() const {
+ std::stringstream ss;
+ CHECK(!name.empty());
+ ss << id_;
+ for (size_t i = 0; i < attrs.size(); i++) {
+ if (i == 0) {
+ ss << "[label=" << '"' << name << '"' << " ";
+ }
+ ss << attrs[i].repr();
+ ss << ((i < attrs.size() - 1) ? " " : "]");
+ }
+ return ss.str();
+ }
+
+ private:
+ std::string id_;
+ };
+
+ struct Edge {
+ std::string source;
+ std::string target;
+ std::vector attrs;
+
+ Edge(const std::string& source, const std::string& target,
+ const std::vector& attrs)
+ : source(source), target(target), attrs(attrs) {}
+
+ std::string repr() const {
+ std::stringstream ss;
+ CHECK(!source.empty());
+ CHECK(!target.empty());
+ ss << source << "->" << target;
+ for (size_t i = 0; i < attrs.size(); i++) {
+ if (i == 0) {
+ ss << "[";
+ }
+ ss << attrs[i].repr();
+ ss << ((i < attrs.size() - 1) ? " " : "]");
+ }
+ return ss.str();
+ }
+ };
+
+ Dot() = default;
+
+ explicit Dot(const std::vector& attrs) : attrs_(attrs) {}
+
+ void AddNode(const std::string& name, const std::vector& attrs) {
+ CHECK(!nodes_.count(name)) << "duplicate Node '" << name << "'";
+ nodes_.emplace(name, Node{name, attrs});
+ }
+
+ void AddEdge(const std::string& source, const std::string& target,
+ const std::vector& attrs) {
+ CHECK(!source.empty());
+ CHECK(!target.empty());
+ auto sid = nodes_.at(source).id();
+ auto tid = nodes_.at(target).id();
+ edges_.emplace_back(sid, tid, attrs);
+ }
+
+ // Compile to DOT language codes.
+ std::string Build() const {
+ std::stringstream ss;
+ const std::string indent = " ";
+ ss << "digraph G {" << '\n';
+
+ // Add graph attrs
+ for (const auto& attr : attrs_) {
+ ss << indent << attr.repr() << '\n';
+ }
+ // add nodes
+ for (auto& item : nodes_) {
+ ss << indent << item.second.repr() << '\n';
+ }
+ // add edges
+ for (auto& edge : edges_) {
+ ss << indent << edge.repr() << '\n';
+ }
+ ss << "} // end G";
+ return ss.str();
+ }
+
+ private:
+ std::unordered_map nodes_;
+ std::vector edges_;
+ std::vector attrs_;
+};
+
+} // namespace analysis
+} // namespace inference
+} // namespace paddle
diff --git a/paddle/fluid/inference/engine.h b/paddle/fluid/inference/engine.h
index 6b0ac92fa908427a89a6a5fa74dacc3b24abd1c3..de0375551e16ec53b90414c7446234fda98bf706 100644
--- a/paddle/fluid/inference/engine.h
+++ b/paddle/fluid/inference/engine.h
@@ -19,6 +19,9 @@ limitations under the License. */
namespace paddle {
namespace inference {
+struct Buffer;
+enum class DeviceType { UNK = -1, CPU, GPU };
+
/*
* EngineBase is the base class of all inference engines. An inference engine
* takes a paddle program as input, and outputs the result in fluid Tensor
@@ -45,8 +48,20 @@ class EngineBase {
// Execute the engine, that will run the inference network.
virtual void Execute(int batch_size) = 0;
+ // Return the IO buffer that allocated in engine. One can read/write directly
+ // on the buffer. If the buffer's buffer is nullptr, one can also allocate
+ // memory and maintain it outside the engine.
+ virtual Buffer& buffer(const std::string& name) = 0;
+
virtual ~EngineBase() {}
}; // class EngineBase
+struct Buffer {
+ void* buffer{nullptr}; // buffer should be allocated only once.
+ int max_size; // buffer allocated space.
+ int size; // data size.
+ DeviceType device{DeviceType::UNK}; // tells which device this buffer is on.
+};
+
} // namespace inference
} // namespace paddle
diff --git a/paddle/fluid/inference/tensorrt/CMakeLists.txt b/paddle/fluid/inference/tensorrt/CMakeLists.txt
index 288789d6e484100820c937e6081701f1e9245706..677b3e04af8e7f5662a15fb32e3b03f45d262733 100644
--- a/paddle/fluid/inference/tensorrt/CMakeLists.txt
+++ b/paddle/fluid/inference/tensorrt/CMakeLists.txt
@@ -1,4 +1,5 @@
+nv_library(tensorrt_engine SRCS engine.cc DEPS framework_proto)
nv_test(test_tensorrt SRCS test_tensorrt.cc DEPS dynload_cuda device_context dynamic_loader)
-nv_test(test_tensorrt_engine SRCS test_engine.cc engine.cc DEPS dynload_cuda)
-set(ENGINE_FILE ${CMAKE_CURRENT_SOURCE_DIR}/engine.cc)
+nv_test(test_tensorrt_engine SRCS test_engine.cc DEPS dynload_cuda tensorrt_engine)
+
add_subdirectory(convert)
diff --git a/paddle/fluid/inference/tensorrt/convert/CMakeLists.txt b/paddle/fluid/inference/tensorrt/convert/CMakeLists.txt
index 3c5909c0be1c690d5148ecfb32b1b6c2dd6f3211..5178c54c08400125d190078dac6c52d021f8488b 100644
--- a/paddle/fluid/inference/tensorrt/convert/CMakeLists.txt
+++ b/paddle/fluid/inference/tensorrt/convert/CMakeLists.txt
@@ -1,4 +1,4 @@
nv_test(test_op_converter SRCS test_op_converter.cc mul_op.cc conv2d_op.cc DEPS ${FLUID_CORE_MODULES})
-nv_test(test_trt_activation_op SRCS test_activation_op.cc ${ENGINE_FILE} activation_op.cc
- DEPS ${FLUID_CORE_MODULES} activation_op)
+nv_test(test_trt_activation_op SRCS test_activation_op.cc activation_op.cc
+ DEPS ${FLUID_CORE_MODULES} activation_op tensorrt_engine)
nv_test(test_io_converter SRCS test_io_converter.cc io_converter.cc DEPS dynload_cuda dynamic_loader lod_tensor)
diff --git a/paddle/fluid/inference/tensorrt/engine.cc b/paddle/fluid/inference/tensorrt/engine.cc
index df123a59079acc5f549e733b412ab302aa397a92..1c296e33a610493b889359c43629003fd76b893c 100644
--- a/paddle/fluid/inference/tensorrt/engine.cc
+++ b/paddle/fluid/inference/tensorrt/engine.cc
@@ -30,16 +30,24 @@ void TensorRTEngine::Build(const DescType& paddle_model) {
}
void TensorRTEngine::Execute(int batch_size) {
- infer_context_->enqueue(batch_size, buffers_.data(), *stream_, nullptr);
+ std::vector buffers;
+ for (auto& buf : buffers_) {
+ PADDLE_ENFORCE_NOT_NULL(buf.buffer, "buffer should be allocated");
+ PADDLE_ENFORCE_GT(buf.max_size, 0);
+ PADDLE_ENFORCE(buf.device == DeviceType::GPU);
+ buffers.push_back(buf.buffer);
+ }
+ infer_context_->enqueue(batch_size, buffers.data(), *stream_, nullptr);
cudaStreamSynchronize(*stream_);
}
TensorRTEngine::~TensorRTEngine() {
// clean buffer
- for (auto& buffer : buffers_) {
- if (buffer != nullptr) {
- PADDLE_ENFORCE_EQ(0, cudaFree(buffer));
- buffer = nullptr;
+ for (auto& buf : buffers_) {
+ if (buf.buffer != nullptr) {
+ PADDLE_ENFORCE_EQ(0, cudaFree(buf.buffer));
+ buf.buffer = nullptr;
+ buf.max_size = 0;
}
}
}
@@ -59,7 +67,7 @@ void TensorRTEngine::FreezeNetwork() {
infer_context_.reset(infer_engine_->createExecutionContext());
// allocate GPU buffers.
- buffers_.resize(buffer_sizes_.size(), nullptr);
+ buffers_.resize(buffer_sizes_.size());
for (auto& item : buffer_sizes_) {
if (item.second == 0) {
auto slot_offset = infer_engine_->getBindingIndex(item.first.c_str());
@@ -67,7 +75,11 @@ void TensorRTEngine::FreezeNetwork() {
infer_engine_->getBindingDataType(slot_offset))] *
AccumDims(infer_engine_->getBindingDimensions(slot_offset));
}
- PADDLE_ENFORCE_EQ(0, cudaMalloc(&buffer(item.first), item.second));
+ auto& buf = buffer(item.first);
+ CHECK(buf.buffer == nullptr); // buffer should be allocated only once.
+ PADDLE_ENFORCE_EQ(0, cudaMalloc(&buf.buffer, item.second));
+ buf.size = buf.max_size = item.second;
+ buf.device = DeviceType::GPU;
}
}
@@ -113,7 +125,7 @@ void TensorRTEngine::DeclareOutput(const std::string& name) {
}
void* TensorRTEngine::GetOutputInGPU(const std::string& name) {
- return buffer(name);
+ return buffer(name).buffer;
}
void TensorRTEngine::GetOutputInCPU(const std::string& name, void* dst,
@@ -123,11 +135,13 @@ void TensorRTEngine::GetOutputInCPU(const std::string& name, void* dst,
PADDLE_ENFORCE(it != buffer_sizes_.end());
PADDLE_ENFORCE_GT(it->second, 0);
PADDLE_ENFORCE_GE(max_size, it->second);
- PADDLE_ENFORCE_EQ(0, cudaMemcpyAsync(dst, buffer(name), it->second,
+ auto& buf = buffer(name);
+ PADDLE_ENFORCE_NOT_NULL(buf.buffer, "buffer should be allocated before");
+ PADDLE_ENFORCE_EQ(0, cudaMemcpyAsync(dst, buf.buffer, it->second,
cudaMemcpyDeviceToHost, *stream_));
}
-void*& TensorRTEngine::buffer(const std::string& name) {
+Buffer& TensorRTEngine::buffer(const std::string& name) {
PADDLE_ENFORCE(infer_engine_ != nullptr, "call FreezeNetwork first.");
auto it = buffer_sizes_.find(name);
PADDLE_ENFORCE(it != buffer_sizes_.end());
@@ -137,10 +151,12 @@ void*& TensorRTEngine::buffer(const std::string& name) {
void TensorRTEngine::SetInputFromCPU(const std::string& name, void* data,
size_t size) {
- void* buf = buffer(name);
- cudaMemcpyAsync(buf, data, size, cudaMemcpyHostToDevice, *stream_);
- PADDLE_ENFORCE_EQ(
- 0, cudaMemcpyAsync(buf, data, size, cudaMemcpyHostToDevice, *stream_));
+ auto& buf = buffer(name);
+ PADDLE_ENFORCE_NOT_NULL(buf.buffer);
+ PADDLE_ENFORCE_LE(size, buf.max_size, "buffer is too small");
+ PADDLE_ENFORCE(buf.device == DeviceType::GPU);
+ PADDLE_ENFORCE_EQ(0, cudaMemcpyAsync(buf.buffer, data, size,
+ cudaMemcpyHostToDevice, *stream_));
}
void TensorRTEngine::SetITensor(const std::string& name,
diff --git a/paddle/fluid/inference/tensorrt/engine.h b/paddle/fluid/inference/tensorrt/engine.h
index ec919b943d3281dd675b15e2f14adb7b3487f46f..b8298c6059e8644327194a1fcf7a7438cc9a7286 100644
--- a/paddle/fluid/inference/tensorrt/engine.h
+++ b/paddle/fluid/inference/tensorrt/engine.h
@@ -87,7 +87,9 @@ class TensorRTEngine : public EngineBase {
// these memory directly for acceleration, for example, output the converted
// data directly to the buffer to save data copy overhead.
// NOTE this should be used after calling `FreezeNetwork`.
- void*& buffer(const std::string& name);
+ Buffer& buffer(const std::string& name) override;
+
+ cudaStream_t* stream() { return stream_; }
// Fill an input from CPU memory with name and size.
void SetInputFromCPU(const std::string& name, void* data, size_t size);
@@ -116,7 +118,7 @@ class TensorRTEngine : public EngineBase {
cudaStream_t* stream_;
nvinfer1::ILogger& logger_;
- std::vector buffers_;
+ std::vector buffers_;
// max data size for the buffers.
std::unordered_map buffer_sizes_;
std::unordered_map
diff --git a/paddle/fluid/inference/tensorrt/test_engine.cc b/paddle/fluid/inference/tensorrt/test_engine.cc
index a08b78f930d30d674247a713fadd3e42e5ada350..69dbb9a3f2b92c97813f31e179a35a753bbb62d9 100644
--- a/paddle/fluid/inference/tensorrt/test_engine.cc
+++ b/paddle/fluid/inference/tensorrt/test_engine.cc
@@ -77,6 +77,37 @@ TEST_F(TensorRTEngineTest, add_layer) {
ASSERT_EQ(y_cpu, x_v * 2 + 3);
}
+TEST_F(TensorRTEngineTest, add_layer_multi_dim) {
+ // Weight in CPU memory.
+ // It seems tensorrt FC use col-major: [[1.0, 3.3], [1.1, 4.4]]
+ // instead of row-major, which is [[1.0, 1.1], [3.3, 4.4]]
+ float raw_weight[4] = {1.0, 1.1, 3.3, 4.4};
+ float raw_bias[2] = {1.3, 2.4};
+
+ TensorRTEngine::Weight weight(nvinfer1::DataType::kFLOAT, raw_weight, 4);
+ TensorRTEngine::Weight bias(nvinfer1::DataType::kFLOAT, raw_bias, 2);
+ auto* x = engine_->DeclareInput("x", nvinfer1::DataType::kFLOAT,
+ nvinfer1::DimsCHW{1, 2, 1});
+ auto* fc_layer = TRT_ENGINE_ADD_LAYER(engine_, FullyConnected, *x, 2,
+ weight.get(), bias.get());
+ PADDLE_ENFORCE(fc_layer != nullptr);
+
+ engine_->DeclareOutput(fc_layer, 0, "y");
+ engine_->FreezeNetwork();
+ ASSERT_EQ(engine_->engine()->getNbBindings(), 2);
+
+ float x_v[2] = {1.0, 2.0};
+ engine_->SetInputFromCPU("x", reinterpret_cast(&x_v),
+ 2 * sizeof(float));
+ engine_->Execute(1);
+
+ LOG(INFO) << "to get output";
+ float y_cpu[2] = {-1., -1.};
+ engine_->GetOutputInCPU("y", &y_cpu[0], sizeof(float) * 2);
+ ASSERT_EQ(y_cpu[0], 4.5);
+ ASSERT_EQ(y_cpu[1], 14.5);
+}
+
} // namespace tensorrt
} // namespace inference
} // namespace paddle
diff --git a/paddle/fluid/inference/tests/book/CMakeLists.txt b/paddle/fluid/inference/tests/book/CMakeLists.txt
index cc179a86256e6b552c08a091402157bdcc86b383..dbb81462b8273bd701e9c9f530eaf69817abd6a1 100644
--- a/paddle/fluid/inference/tests/book/CMakeLists.txt
+++ b/paddle/fluid/inference/tests/book/CMakeLists.txt
@@ -36,5 +36,5 @@ inference_test(label_semantic_roles)
inference_test(recognize_digits ARGS mlp conv)
inference_test(recommender_system)
#inference_test(rnn_encoder_decoder)
-inference_test(understand_sentiment ARGS conv)
+#inference_test(understand_sentiment ARGS conv)
inference_test(word2vec)
diff --git a/paddle/fluid/operators/conv_op.h b/paddle/fluid/operators/conv_op.h
index c51898abb422663a6731a17e0717c62ebf0701f8..f462f00c0803c12ee2f2b0f94dc90afdca500da3 100644
--- a/paddle/fluid/operators/conv_op.h
+++ b/paddle/fluid/operators/conv_op.h
@@ -187,7 +187,8 @@ class GemmConvKernel : public framework::OpKernel {
// gemm
Tensor out_slice = out_batch.Slice(g * out_step, (g + 1) * out_step);
Tensor filter_slice = filter.Slice(g * out_step, (g + 1) * out_step);
- blas.MatMul(filter_slice, col_matrix, &out_slice);
+ blas.MatMul(filter_slice, false, col_matrix, false, T(1.0), &out_slice,
+ T(0.0));
}
}
}
@@ -304,7 +305,8 @@ class GemmConvGradKernel : public framework::OpKernel {
col_matrix.ShareDataWith(in_grad_slice);
col_matrix.Resize(col_matrix_shape);
}
- blas.MatMul(filter_slice, true, out_grad_slice, false, &col_matrix);
+ blas.MatMul(filter_slice, true, out_grad_slice, false, T(1.0),
+ &col_matrix, T(0.0));
if (is_expand && data_dim == 2U) {
col2im(dev_ctx, col, dilations, strides,
@@ -351,8 +353,8 @@ class GemmConvGradKernel : public framework::OpKernel {
// gemm
Tensor filter_grad_slice =
filter_grad_.Slice(g * out_step, (g + 1) * out_step);
- blas.MatMul(out_grad_slice, false, col_matrix, true,
- &filter_grad_slice);
+ blas.MatMul(out_grad_slice, false, col_matrix, true, T(1.0),
+ &filter_grad_slice, T(1.0));
}
}
}
diff --git a/paddle/fluid/operators/conv_transpose_op.h b/paddle/fluid/operators/conv_transpose_op.h
index 9276e5bfef71a58741c2dfa25b31c2bd07c309b8..898121412b17cd6fbbbeb57e9d63842e592703ac 100644
--- a/paddle/fluid/operators/conv_transpose_op.h
+++ b/paddle/fluid/operators/conv_transpose_op.h
@@ -135,7 +135,8 @@ class GemmConvTransposeKernel : public framework::OpKernel {
// col_matrix = filter * input_batch
// of shape (c * k_h * k_w, h * w) or (c * k_d * k_h * k_w, d * h * w)
- blas.MatMul(filter, true, input_batch, false, &col_matrix);
+ blas.MatMul(filter, true, input_batch, false, static_cast(1.0),
+ &col_matrix, static_cast(0.0));
if (data_dim == 2U) {
// col2im: col_matrix -> dy
@@ -267,7 +268,8 @@ class GemmConvTransposeGradKernel : public framework::OpKernel {
// or
// (m, c * k_d * k_h * k_w) * (c * k_d * k_h * k_w, d * h * w) -> (m,
// d, h, w)
- blas.MatMul(filter, false, col_matrix, false, &input_grad_batch);
+ blas.MatMul(filter, false, col_matrix, false, static_cast(1.0),
+ &input_grad_batch, static_cast(0.0));
}
if (filter_grad) {
// input batch
@@ -277,7 +279,8 @@ class GemmConvTransposeGradKernel : public framework::OpKernel {
// or
// (m, d * h * w) * (d * h * w, c * k_d * k_h * k_w) -> (m, c * k_d *
// k_h * k_w)
- blas.MatMul(in_batch, false, col_matrix, true, &filter_grad_);
+ blas.MatMul(in_batch, false, col_matrix, true, static_cast(1.0),
+ &filter_grad_, static_cast(1.0));
}
}
}
diff --git a/paddle/fluid/operators/load_op.cc b/paddle/fluid/operators/load_op.cc
index 6ffe0bec5e38432676ecadfa1abbbe70a1425bb1..c6bd2bf3dfca77dc078eb04b1d90c7d90883203f 100644
--- a/paddle/fluid/operators/load_op.cc
+++ b/paddle/fluid/operators/load_op.cc
@@ -46,19 +46,6 @@ class LoadOp : public framework::OperatorBase {
auto *tensor = out_var->GetMutable();
DeserializeFromStream(fin, tensor, *dev_ctx);
-
- if (platform::is_gpu_place(place)) {
- // copy CPU to GPU
- framework::LoDTensor cpu_tensor;
- cpu_tensor.ShareDataWith(*tensor);
- cpu_tensor.set_lod(tensor->lod());
-
- // reset tensor
- out_var->Clear();
- tensor = out_var->GetMutable();
- tensor->set_lod(cpu_tensor.lod());
- TensorCopy(cpu_tensor, place, *dev_ctx, tensor);
- }
}
};
diff --git a/paddle/fluid/platform/cuda_device_function.h b/paddle/fluid/platform/cuda_device_function.h
index e81c385727be5c2ba3f02bfbd86168cb4650dfda..ecec4178f2d9937920e52eb74bf9068b84e741a0 100644
--- a/paddle/fluid/platform/cuda_device_function.h
+++ b/paddle/fluid/platform/cuda_device_function.h
@@ -63,6 +63,7 @@ __device__ T reduceSum(T val, int tid, int len) {
val += platform::CudaShuffleDownSync(mask, val, offset);
if (tid < warpSize) shm[tid] = 0;
+ __syncthreads();
if (tid % warpSize == 0) {
shm[tid / warpSize] = val;
diff --git a/paddle/fluid/platform/profiler.cc b/paddle/fluid/platform/profiler.cc
index cfddd8e8711f8005e0eff7ef7a2980f535b2f851..50bc0aba6aa0f056dc0b2d49f6b3b745433e0756 100644
--- a/paddle/fluid/platform/profiler.cc
+++ b/paddle/fluid/platform/profiler.cc
@@ -463,7 +463,7 @@ void SetProfileListener() {
std::mt19937 rng;
rng.seed(std::random_device()());
std::uniform_int_distribution dist6(
- 1, std::numeric_limits::max());
+ 1, std::numeric_limits::max());
profiler_lister_id = dist6(rng);
}
int64_t ListenerId() { return profiler_lister_id; }
diff --git a/paddle/scripts/paddle_build.sh b/paddle/scripts/paddle_build.sh
index 1595cc9e8aad4d143ca62f84f812dbc791dc1d26..c9b49adef7061d2cfa504258cfc589346c27e192 100755
--- a/paddle/scripts/paddle_build.sh
+++ b/paddle/scripts/paddle_build.sh
@@ -398,7 +398,7 @@ function gen_dockerfile() {
cat <> /paddle/build/Dockerfile <> ${PADDLE_ROOT}/build/Dockerfile <> ${PADDLE_ROOT}/build/Dockerfile <> ${PADDLE_ROOT}/build/Dockerfile < Variable(learning_rate)
self._learning_rate_map = dict()
@@ -77,7 +79,7 @@ class Optimizer(object):
name=unique_name.generate("learning_rate"),
shape=[1],
value=float(self._learning_rate),
- dtype='float32',
+ dtype='float32' if self._dtype == None else self._dtype,
persistable=True)
def global_learning_rate(self, program=None):
@@ -200,6 +202,7 @@ class Optimizer(object):
# Create any accumulators
program = loss.block.program
+ self._dtype = loss.dtype
with program_guard(program, startup_program):
global_block = framework.default_main_program().global_block()
start = len(global_block.ops)
@@ -391,7 +394,7 @@ class AdamOptimizer(Optimizer):
beta_shape = [1]
self._beta1_pow_acc = self.helper.create_global_variable(
name=unique_name.generate('beta1_pow_acc'),
- dtype='float32',
+ dtype='float32' if self._dtype == None else self._dtype,
shape=beta_shape,
lod_level=0,
persistable=True)
@@ -400,7 +403,7 @@ class AdamOptimizer(Optimizer):
self._beta2_pow_acc = self.helper.create_global_variable(
name=unique_name.generate('beta2_pow_acc'),
- dtype='float32',
+ dtype='float32' if self._dtype == None else self._dtype,
shape=beta_shape,
lod_level=0,
persistable=True)
@@ -493,7 +496,7 @@ class AdamaxOptimizer(Optimizer):
beta_shape = [1]
self._beta1_pow_acc = self.helper.create_global_variable(
name=unique_name.generate('beta1_pow_acc'),
- dtype='float32',
+ dtype='float32' if self._dtype == None else self._dtype,
shape=beta_shape,
lod_level=0,
persistable=True)
@@ -900,8 +903,10 @@ class ModelAverage(Optimizer):
# param = (sum_1 + sum_2 + sum_3) / (num_accumulates + old_num_accumulates)
tmp = layers.sum(x=[num_accumulates, old_num_accumulates])
sum = layers.sum(x=[sum_1, sum_2, sum_3])
- tmp = layers.cast(x=tmp, dtype='float32')
- sum = layers.cast(x=sum, dtype='float32')
+ tmp = layers.cast(
+ x=tmp, dtype='float32' if self._dtype == None else self._dtype)
+ sum = layers.cast(
+ x=sum, dtype='float32' if self._dtype == None else self._dtype)
layers.elementwise_div(x=sum, y=tmp, out=param)
def _add_average_restore_op(self, block, param_grad):
diff --git a/python/paddle/fluid/tests/book/image_classification/notest_image_classification_resnet.py b/python/paddle/fluid/tests/book/high-level-api/image_classification/notest_image_classification_resnet.py
similarity index 100%
rename from python/paddle/fluid/tests/book/image_classification/notest_image_classification_resnet.py
rename to python/paddle/fluid/tests/book/high-level-api/image_classification/notest_image_classification_resnet.py
diff --git a/python/paddle/fluid/tests/book/image_classification/notest_image_classification_vgg.py b/python/paddle/fluid/tests/book/high-level-api/image_classification/notest_image_classification_vgg.py
similarity index 100%
rename from python/paddle/fluid/tests/book/image_classification/notest_image_classification_vgg.py
rename to python/paddle/fluid/tests/book/high-level-api/image_classification/notest_image_classification_vgg.py
diff --git a/python/paddle/fluid/tests/book/label_semantic_roles/no_test_label_semantic_roles.py b/python/paddle/fluid/tests/book/high-level-api/label_semantic_roles/no_test_label_semantic_roles.py
similarity index 100%
rename from python/paddle/fluid/tests/book/label_semantic_roles/no_test_label_semantic_roles.py
rename to python/paddle/fluid/tests/book/high-level-api/label_semantic_roles/no_test_label_semantic_roles.py
diff --git a/python/paddle/fluid/tests/book/notest_recognize_digits/notest_recognize_digits_conv.py b/python/paddle/fluid/tests/book/high-level-api/recognize_digits/notest_recognize_digits_conv.py
similarity index 100%
rename from python/paddle/fluid/tests/book/notest_recognize_digits/notest_recognize_digits_conv.py
rename to python/paddle/fluid/tests/book/high-level-api/recognize_digits/notest_recognize_digits_conv.py
diff --git a/python/paddle/fluid/tests/book/notest_recognize_digits/notest_recognize_digits_mlp.py b/python/paddle/fluid/tests/book/high-level-api/recognize_digits/notest_recognize_digits_mlp.py
similarity index 100%
rename from python/paddle/fluid/tests/book/notest_recognize_digits/notest_recognize_digits_mlp.py
rename to python/paddle/fluid/tests/book/high-level-api/recognize_digits/notest_recognize_digits_mlp.py
diff --git a/python/paddle/fluid/tests/book/understand_sentiment/notest_understand_sentiment_stacked_lstm.py b/python/paddle/fluid/tests/book/high-level-api/understand_sentiment/notest_understand_sentiment_stacked_lstm.py
similarity index 100%
rename from python/paddle/fluid/tests/book/understand_sentiment/notest_understand_sentiment_stacked_lstm.py
rename to python/paddle/fluid/tests/book/high-level-api/understand_sentiment/notest_understand_sentiment_stacked_lstm.py
diff --git a/python/paddle/fluid/tests/book/word2vec/no_test_word2vec_new_api.py b/python/paddle/fluid/tests/book/high-level-api/word2vec/no_test_word2vec_new_api.py
similarity index 100%
rename from python/paddle/fluid/tests/book/word2vec/no_test_word2vec_new_api.py
rename to python/paddle/fluid/tests/book/high-level-api/word2vec/no_test_word2vec_new_api.py
diff --git a/python/paddle/fluid/tests/book/test_understand_sentiment.py b/python/paddle/fluid/tests/book/notest_understand_sentiment.py
similarity index 100%
rename from python/paddle/fluid/tests/book/test_understand_sentiment.py
rename to python/paddle/fluid/tests/book/notest_understand_sentiment.py
diff --git a/python/paddle/fluid/tests/book/test_label_semantic_roles.py b/python/paddle/fluid/tests/book/test_label_semantic_roles.py
index 50ef29c4572f1b12fe9793bbf037cd7fe71a9e53..0faba33032d5dfc0b751a5191e7b2ae0c1f172bf 100644
--- a/python/paddle/fluid/tests/book/test_label_semantic_roles.py
+++ b/python/paddle/fluid/tests/book/test_label_semantic_roles.py
@@ -36,7 +36,7 @@ depth = 8
mix_hidden_lr = 1e-3
IS_SPARSE = True
-PASS_NUM = 100
+PASS_NUM = 10
BATCH_SIZE = 10
embedding_name = 'emb'
diff --git a/python/paddle/fluid/tests/unittests/test_memory_optimization_transpiler.py b/python/paddle/fluid/tests/unittests/test_memory_optimization_transpiler.py
index f3dcca6b0107a9c4a6efcb0c0fd50324aaf92648..cfd6e63e12258a92447e68b4afbc7ead91b68cc1 100644
--- a/python/paddle/fluid/tests/unittests/test_memory_optimization_transpiler.py
+++ b/python/paddle/fluid/tests/unittests/test_memory_optimization_transpiler.py
@@ -18,7 +18,7 @@ import unittest
import paddle.fluid.layers as layers
import paddle.fluid.optimizer as optimizer
from paddle.fluid.framework import Program, program_guard
-from paddle.fluid.memory_optimization_transpiler import memory_optimize
+from paddle.fluid.transpiler import memory_optimize
class TestControlFlowGraph(unittest.TestCase):
diff --git a/python/paddle/fluid/tests/unittests/test_network_with_dtype.py b/python/paddle/fluid/tests/unittests/test_network_with_dtype.py
new file mode 100644
index 0000000000000000000000000000000000000000..baafcdbb80238385752183ee0a8ff96a5da4659c
--- /dev/null
+++ b/python/paddle/fluid/tests/unittests/test_network_with_dtype.py
@@ -0,0 +1,74 @@
+# Copyright (c) 2018 PaddlePaddle Authors. All Rights Reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import unittest
+
+import numpy as np
+import paddle
+import paddle.fluid as fluid
+import paddle.fluid.core as core
+from paddle.fluid.executor import Executor
+
+BATCH_SIZE = 20
+
+
+class TestNetWithDtype(unittest.TestCase):
+ def setUp(self):
+ self.dtype = "float64"
+ self.init_dtype()
+ self.x = fluid.layers.data(name='x', shape=[13], dtype=self.dtype)
+ self.y = fluid.layers.data(name='y', shape=[1], dtype=self.dtype)
+ y_predict = fluid.layers.fc(input=self.x, size=1, act=None)
+
+ cost = fluid.layers.square_error_cost(input=y_predict, label=self.y)
+ avg_cost = fluid.layers.mean(cost)
+ self.fetch_list = [avg_cost]
+
+ sgd_optimizer = fluid.optimizer.SGD(learning_rate=0.001)
+ sgd_optimizer.minimize(avg_cost)
+
+ def run_net_on_place(self, place):
+ train_reader = paddle.batch(
+ paddle.dataset.uci_housing.train(), batch_size=BATCH_SIZE)
+ feeder = fluid.DataFeeder(place=place, feed_list=[self.x, self.y])
+ exe = fluid.Executor(place)
+ exe.run(fluid.default_startup_program())
+ for data in train_reader():
+ exe.run(fluid.default_main_program(),
+ feed=feeder.feed(data),
+ fetch_list=self.fetch_list)
+ # the main program is runable, the datatype is fully supported
+ break
+
+ def init_dtype(self):
+ pass
+
+ def test_cpu(self):
+ place = fluid.CPUPlace()
+ self.run_net_on_place(place)
+
+ def test_gpu(self):
+ if not core.is_compiled_with_cuda():
+ return
+ place = fluid.CUDAPlace(0)
+ self.run_net_on_place(place)
+
+
+# TODO(dzhwinter): make sure the fp16 is runable
+# class TestFloat16(SimpleNet):
+# def init_dtype(self):
+# self.dtype = "float16"
+
+if __name__ == '__main__':
+ unittest.main()
diff --git a/python/paddle/fluid/tests/unittests/test_parallel_executor.py b/python/paddle/fluid/tests/unittests/test_parallel_executor.py
index 9056f5e66fceb42397c9a923d802320dd772725b..4eb25a6e00b7564ac17db568ec78c1c84933af43 100644
--- a/python/paddle/fluid/tests/unittests/test_parallel_executor.py
+++ b/python/paddle/fluid/tests/unittests/test_parallel_executor.py
@@ -12,7 +12,7 @@
# See the License for the specific language governing permissions and
# limitations under the License.
-import numpy
+import numpy as np
import unittest
import paddle.fluid as fluid
@@ -243,7 +243,7 @@ class TestParallelExecutorBase(unittest.TestCase):
begin = time.time()
first_loss, = run_executor(
exe=exe, feed=feed_dict, fetch_list=[loss.name])
- first_loss = numpy.array(first_loss)
+ first_loss = np.array(first_loss)
for i in xrange(iter):
run_executor(exe=exe, feed=feed_dict, fetch_list=[])
@@ -256,7 +256,7 @@ class TestParallelExecutorBase(unittest.TestCase):
print "%.4f Instance per second" % (
(batch_size * iter + 2) / (end - begin))
- last_loss = numpy.array(last_loss)
+ last_loss = np.array(last_loss)
print first_loss, last_loss
# self.assertGreater(first_loss[0], last_loss[0])
@@ -284,8 +284,8 @@ class TestMNIST(TestParallelExecutorBase):
self.check_network_convergence(simple_fc_net)
self.check_network_convergence(simple_fc_net, allow_op_delay=True)
- img = numpy.zeros(shape=[32, 784], dtype='float32')
- label = numpy.ones(shape=[32, 1], dtype='int64')
+ img = np.zeros(shape=[32, 784], dtype='float32')
+ label = np.ones(shape=[32, 1], dtype='int64')
self.check_network_convergence(
simple_fc_net, feed_dict={"image": img,
"label": label})
@@ -294,8 +294,8 @@ class TestMNIST(TestParallelExecutorBase):
self.check_simple_fc_convergence()
def check_simple_fc_parallel_accuracy(self):
- img = numpy.zeros(shape=[32, 784], dtype='float32')
- label = numpy.ones(shape=[32, 1], dtype='int64')
+ img = np.zeros(shape=[32, 784], dtype='float32')
+ label = np.ones(shape=[32, 1], dtype='int64')
single_first_loss, single_last_loss = self.check_network_convergence(
method=simple_fc_net,
seed=1000,
@@ -319,8 +319,8 @@ class TestMNIST(TestParallelExecutorBase):
def check_batchnorm_fc_convergence(self):
self.check_network_convergence(fc_with_batchnorm)
- img = numpy.zeros(shape=[32, 784], dtype='float32')
- label = numpy.ones(shape=[32, 1], dtype='int64')
+ img = np.zeros(shape=[32, 784], dtype='float32')
+ label = np.ones(shape=[32, 1], dtype='int64')
self.check_network_convergence(
fc_with_batchnorm, feed_dict={"image": img,
"label": label})
@@ -404,9 +404,6 @@ class ModelHyperParams(object):
dropout = 0.1
-import numpy as np
-
-
def prepare_batch_input(insts, src_pad_idx, trg_pad_idx, n_head):
"""
Pad the instances to the max sequence length in batch, and generate the
@@ -533,9 +530,8 @@ class ParallelExecutorTestingDuringTraining(unittest.TestCase):
opt.minimize(loss)
batch_size = 32
- image = numpy.random.normal(size=(batch_size,
- 784)).astype('float32')
- label = numpy.random.randint(0, 10, (batch_size, 1), dtype="int64")
+ image = np.random.normal(size=(batch_size, 784)).astype('float32')
+ label = np.random.randint(0, 10, (batch_size, 1), dtype="int64")
place = fluid.CUDAPlace(0)
exe = fluid.Executor(place)
@@ -552,12 +548,12 @@ class ParallelExecutorTestingDuringTraining(unittest.TestCase):
for i in xrange(5):
test_loss, = test_exe.run([loss.name], feed=feed_dict)
- test_loss = numpy.array(test_loss)
+ test_loss = np.array(test_loss)
train_loss, = train_exe.run([loss.name], feed=feed_dict)
- train_loss = numpy.array(train_loss)
+ train_loss = np.array(train_loss)
self.assertTrue(
- numpy.allclose(
+ np.allclose(
train_loss, test_loss, atol=1e-8),
"Train loss: " + str(train_loss) + "\n Test loss:" +
str(test_loss))
@@ -712,7 +708,7 @@ class TestCRFModel(unittest.TestCase):
data = train_data()
for i in xrange(10):
cur_batch = next(data)
- print map(numpy.array,
+ print map(np.array,
pe.run(feed=feeder.feed(cur_batch),
fetch_list=[avg_cost.name]))[0]
@@ -721,3 +717,84 @@ class TestCRFModel(unittest.TestCase):
def test_update_dense_parameter(self):
self.check_network_convergence(is_sparse=False)
+
+
+# test fetch all the variables of global_block
+
+import paddle.dataset.flowers as flowers
+import math
+
+
+def Lenet(data, class_dim):
+ conv1 = fluid.layers.conv2d(data, 32, 5, 1, act=None)
+ bn1 = fluid.layers.batch_norm(conv1, act='relu')
+ pool1 = fluid.layers.pool2d(bn1, 2, 'max', 2)
+ conv2 = fluid.layers.conv2d(pool1, 50, 5, 1, act=None)
+ bn2 = fluid.layers.batch_norm(conv2, act='relu')
+ pool2 = fluid.layers.pool2d(bn2, 2, 'max', 2)
+
+ fc1 = fluid.layers.fc(pool2, size=500, act='relu')
+ fc2 = fluid.layers.fc(fc1, size=class_dim, act='softmax')
+
+ return fc2
+
+
+class TestFetchOp(unittest.TestCase):
+ def parallel_exe(self, train_inputs, seed):
+ main = fluid.Program()
+ startup = fluid.Program()
+ startup.random_seed = seed
+ with fluid.program_guard(main, startup):
+ data = fluid.layers.data(
+ name='image', shape=[3, 224, 224], dtype='float32')
+ label = fluid.layers.data(name='label', shape=[1], dtype='int64')
+ out = Lenet(data, class_dim=102)
+ loss = fluid.layers.cross_entropy(input=out, label=label)
+ loss = fluid.layers.mean(loss)
+
+ opt = fluid.optimizer.Momentum(
+ learning_rate=0.1,
+ momentum=0.9,
+ regularization=fluid.regularizer.L2Decay(1e-4))
+
+ opt.minimize(loss)
+
+ # TODO(zcd): I found that onece the memory optimizer is open,
+ # parallel_exe doesn't fetch some variable, such as conv2d_0.b_0@GRAD,
+ # conv2d_1.b_0@GRAD. Those variables should not be pruned.
+ # fluid.memory_optimize(main)
+
+ place = fluid.CUDAPlace(0)
+ exe = fluid.Executor(place)
+ exe.run(startup)
+
+ feeder = fluid.DataFeeder(place=place, feed_list=[data, label])
+ pe = fluid.ParallelExecutor(
+ use_cuda=True, loss_name=loss.name, main_program=main)
+
+ fetch_list = []
+ all_vars = main.global_block().vars
+ for k, v in all_vars.iteritems():
+ if 'tmp' not in k and k[0] is not '_' or v.persistable:
+ fetch_list.append(k)
+
+ for data in train_inputs:
+ ret = pe.run(fetch_list, feed=feeder.feed(data))
+ for i in range(len(fetch_list)):
+ assert not math.isnan(np.sum(ret[i])) and \
+ not math.isinf(np.sum(ret[i]))
+
+ def test_update_sparse_parameter(self):
+ tst_reader = paddle.batch(flowers.test(use_xmap=False), batch_size=16)
+ tst_reader_iter = tst_reader()
+
+ iters = 3
+ train_inputs = []
+ for i in range(iters):
+ train_inputs.append(tst_reader_iter.next())
+
+ self.parallel_exe(train_inputs, seed=1)
+
+
+if __name__ == '__main__':
+ unittest.main()
diff --git a/python/paddle/fluid/tests/unittests/test_split_var.py b/python/paddle/fluid/tests/unittests/test_split_var.py
index 104ceb4fe7beb70b9016f57cef0ef895a3eb8ba6..79d387f0066672058d1640f4e5fd28ed8913fe4c 100644
--- a/python/paddle/fluid/tests/unittests/test_split_var.py
+++ b/python/paddle/fluid/tests/unittests/test_split_var.py
@@ -14,7 +14,7 @@
import math
import unittest
-from paddle.fluid.distribute_transpiler import split_dense_variable
+from paddle.fluid.transpiler.distribute_transpiler import split_dense_variable
import paddle.fluid as fluid
import paddle.fluid.core as core
import random
diff --git a/tools/aws_benchmarking/server/cluster_master.py b/tools/aws_benchmarking/server/cluster_master.py
index 1333a942bf013a8182585b56e5843803c56945b1..a9b24846544d8aca5e4c7bd5709e70564c088431 100644
--- a/tools/aws_benchmarking/server/cluster_master.py
+++ b/tools/aws_benchmarking/server/cluster_master.py
@@ -20,6 +20,7 @@ import time
import threading
import logging
import copy
+import csv
import netaddr
import boto3
@@ -136,6 +137,12 @@ parser.add_argument(
parser.add_argument(
'--master_server_ip', type=str, default="", help="master server private ip")
+parser.add_argument(
+ '--metric_data_identifier',
+ type=str,
+ default="**metrics_data: ",
+ help="key string to identify metrics data")
+
parser.add_argument(
'--no_clean_up',
type=str2bool,
@@ -155,6 +162,11 @@ logging.basicConfig(
log_files = ["master.log"]
+metrics = {}
+
+metrics_csv_file_name = "metrics.csv"
+is_metrics_file_created = False
+
def create_subnet():
# if no vpc id provided, list vpcs
@@ -329,12 +341,42 @@ def create_pservers():
cleanup(args.task_name)
+def save_metrics_data(str_msg):
+ #parse msg
+ logging.info("found metrics data, saving it to csv file")
+ global is_metrics_file_created
+ metrics_raw = str_msg.split(",")
+ with open(args.log_path + metrics_csv_file_name, 'a') as csvfile:
+ csv_fieldnames = []
+ csv_write_data = {}
+ for metric in metrics_raw:
+ metric_data = metric.split("=")
+ metric_key = metric_data[0].strip()
+ metric_val = float(metric_data[1].strip())
+ if not metric_key in metrics:
+ metrics[metric_key] = []
+ metric_repo = metrics[metric_key]
+ metric_repo.append(metric_val)
+ csv_fieldnames.append(metric_key)
+ csv_write_data[metric_key] = metric_val
+ writer = csv.DictWriter(csvfile, fieldnames=csv_fieldnames)
+ if not is_metrics_file_created:
+ writer.writeheader()
+ is_metrics_file_created = True
+ writer.writerow(csv_write_data)
+ logging.info("csv file appended")
+
+
def log_to_file(source, filename):
if not filename in log_files:
log_files.append(filename)
with open(args.log_path + filename, "a") as log_file:
for line in iter(source.readline, ""):
log_file.write(line)
+ if (line.startswith(args.metric_data_identifier)):
+ #found key data, trying to add to csv
+ line = line.replace(args.metric_data_identifier, "")
+ save_metrics_data(line)
def parse_command(command_raw, defaults={}):