From 2edeb639e27562569e1bc6571e43cf9f6259a498 Mon Sep 17 00:00:00 2001
From: "Yang Yang(Tony)" <yangyang62@baidu.com>
Date: Wed, 28 Feb 2018 12:52:21 -0800
Subject: [PATCH] Update parallel_do.md (#8665)

---
 doc/design/parallel_do.md | 21 +++++++++++----------
 1 file changed, 11 insertions(+), 10 deletions(-)

diff --git a/doc/design/parallel_do.md b/doc/design/parallel_do.md
index 45f7731996..42bd136f82 100644
--- a/doc/design/parallel_do.md
+++ b/doc/design/parallel_do.md
@@ -39,15 +39,16 @@ In the backward pass
 This implementation allows to write mixed device program like this
 
 ```python
-# get embedding feature on CPU
-feature = some_cpu_only_op(data)
+W1 = fluid.tensor(size=[100,20], parameter=true)
+W2 = fluid.tensor(size=[20,15], parameter=true)
 
-gpu_places = get_place(use_gpu=True)
+data = layers.data()
+
+gpu_places = layers.get_place(use_gpu=True)
 # parallel processing on multiple GPUs
 pd = ParallelDo(gpu_places)
-with pd.do():
-    read_input(feature)
-    prediction = my_net(feature)
+with pd.do(input=data):
+    prediction = softmax(fc(fc(data, W1), W2))
     write_output(prediction)
 prediction = pd()
 loss = cross_entropy(prediction, label)
@@ -66,20 +67,20 @@ start_program
 main_program
 {
 block0 {
-  vars: data, places, w1, w2
+  vars: data, places, w1, w2, w1_grad, w2_grad,
   ops: data, get_place, parallel_do(block1),
        parallel_do_grad(block2),
        sgd(w2, w2_grad),
        sgd(w1, w1_grad)
 }
-block1 {
+block1 { # the forward pass
   parent_block: 0
   vars: data, h1, h2, loss
   ops: fc, fc, softmax
 }
-block2 {
+block2 { # the backward pass
   parent_block: 1
-  vars: data_grad, h1_grad, h2_grad, loss_gard, w1_grad, w2_grad
+  vars: data_grad, h1_grad, h2_grad, loss_gard, local_w1_grad, local_w2_grad
   ops: softmax_grad,
        fc_grad
        fc_grad
-- 
GitLab