diff --git a/doc/design/graph.md b/doc/design/graph.md new file mode 100644 index 0000000000000000000000000000000000000000..87f696f90f164a639ad5182823ddfb14aab7e065 --- /dev/null +++ b/doc/design/graph.md @@ -0,0 +1,51 @@ +# Design Doc: Computations as Graphs + +A primary goal of the refactorization of PaddlePaddle is a more flexible representation of deep learning computation, in particular, a graph of operators and variables, instead of sequences of layers as before. + +This document explains that the construction of a graph as three steps: + +- construct the forward part +- construct the backward part +- construct the optimization part + +Let us take the problem of image classification as a simple example. The application program that trains the model looks like: + +```python +x = layer.data("images") +l = layer.data("label") +y = layer.fc(x) +cost = layer.mse(y, l) +optimize(cost) +train(cost, reader=mnist.train()) +``` + +### Forward Part + +The first four lines of above program build the forward part of the graph. + +![](images/graph_construction_example_forward_only.png) + +In particular, the first line `x = layer.data("images")` creates variable x and a Feed operator that copies a column from the minibatch to x. `y = layer.fc(x)` creates not only the FC operator and output variable y, but also two parameters, W and b. + +In this example, all operators are created as `OpDesc` protobuf messages, and all variables are `VarDesc`. These protobuf messages are saved in a `BlockDesc` protobuf message. + +### Backward Part + +The fifth line `optimize(cost)` calls two functions, `ConstructBackwardGraph` and `ConstructOptimizationGraph`. + +`ConstructBackwardGraph` traverses the forward graph in the `BlockDesc` protobuf message and builds the backward part. + +![](images/graph_construction_example_forward_backward.png) + +According to the chain rule of gradient computation, `ConstructBackwardGraph` would + +1. create a gradient operator G for each operator F, +1. make all inputs, outputs, and outputs' gradient of F as inputs of G, +1. create gradients for all inputs of F, except for those who don't have gradients, like x and l, and +1. make all these gradients as outputs of G. + +### Optimization Part + +For each parameter, like W and b created by `layer.fc`, marked as double circles in above graphs, `ConstructOptimizationGraph` creates an optimization operator to apply its gradient. Here results in the complete graph: + +![](images/graph_construction_example_all.png) diff --git a/doc/design/images/graph_construction_example.bash b/doc/design/images/graph_construction_example.bash new file mode 100755 index 0000000000000000000000000000000000000000..35e6997abd17588e17a82d448918fc1b3bd7220e --- /dev/null +++ b/doc/design/images/graph_construction_example.bash @@ -0,0 +1,11 @@ +cat ./graph_construction_example.dot | \ + sed 's/color=red/color=red, style=invis/g' | \ + sed 's/color=green/color=green, style=invis/g' | \ + dot -Tpng > graph_construction_example_forward_only.png + +cat ./graph_construction_example.dot | \ + sed 's/color=green/color=green, style=invis/g' | \ + dot -Tpng > graph_construction_example_forward_backward.png + +cat ./graph_construction_example.dot | \ + dot -Tpng > graph_construction_example_all.png diff --git a/doc/design/images/graph_construction_example.dot b/doc/design/images/graph_construction_example.dot new file mode 100644 index 0000000000000000000000000000000000000000..bedb6de0111a8ccab4030d034d65cf72705fc25a --- /dev/null +++ b/doc/design/images/graph_construction_example.dot @@ -0,0 +1,65 @@ +digraph ImageClassificationGraph { + ///////// The forward part ///////// + FeedX [label="Feed", color=blue, shape=box]; + FeedY [label="Feed", color=blue, shape=box]; + FC [label="FC", color=blue, shape=box]; + MSE [label="MSE", color=blue, shape=box]; + + x [label="x", color=blue, shape=oval]; + l [label="l", color=blue, shape=oval]; + y [label="y", color=blue, shape=oval]; + W [label="W", color=blue, shape=doublecircle]; + b [label="b", color=blue, shape=doublecircle]; + cost [label="cost", color=blue, shape=oval]; + + FeedX -> x -> FC -> y -> MSE -> cost [color=blue]; + FeedY -> l [color=blue]; + W -> FC [color=blue]; + b -> FC [color=blue]; + l -> MSE [color=blue]; + + ////////// The backward part ///////// + MSE_Grad [label="MSE_grad", color=red, shape=box]; + FC_Grad [label="FC_grad", color=red, shape=box]; + + d_cost [label="d cost", color=red, shape=oval]; + d_y [label="d y", color=red, shape=oval]; + d_b [label="d b", color=red, shape=oval]; + d_W [label="d W", color=red, shape=oval]; + + cost -> MSE_Grad [color=red]; + d_cost -> MSE_Grad [color=red]; + x -> MSE_Grad [color=red]; + l -> MSE_Grad [color=red]; + y -> MSE_Grad -> d_y [color=red]; + + x -> FC_Grad [color=red]; + y -> FC_Grad [color=red]; + d_y -> FC_Grad [color=red]; + W -> FC_Grad -> d_W [color=red]; + b -> FC_Grad -> d_b [color=red]; + + ////////// The optimizaiton part ////////// + + OPT_W [label="SGD", color=green, shape=box]; + OPT_b [label="SGD", color=green, shape=box]; + + W -> OPT_W [color=green]; + b -> OPT_b [color=green]; + d_W -> OPT_W -> W [color=green]; + d_b -> OPT_b -> b [color=green]; + + ////////// Groupings ////////// + + subgraph clusterMSE { + style=invis; + MSE; + MSE_Grad; + } + + subgraph clusterFC { + style=invis; + FC; + FC_Grad; + } +} diff --git a/doc/design/images/graph_construction_example_all.png b/doc/design/images/graph_construction_example_all.png new file mode 100644 index 0000000000000000000000000000000000000000..18d8330b60e12720bb993c8cf588d64ff8db1ea9 Binary files /dev/null and b/doc/design/images/graph_construction_example_all.png differ diff --git a/doc/design/images/graph_construction_example_forward_backward.png b/doc/design/images/graph_construction_example_forward_backward.png new file mode 100644 index 0000000000000000000000000000000000000000..61c3a02a04bc8891ab5b921a889829bcce386df8 Binary files /dev/null and b/doc/design/images/graph_construction_example_forward_backward.png differ diff --git a/doc/design/images/graph_construction_example_forward_only.png b/doc/design/images/graph_construction_example_forward_only.png new file mode 100644 index 0000000000000000000000000000000000000000..14805df11fc09f64d6bc17f5e969f1400d615148 Binary files /dev/null and b/doc/design/images/graph_construction_example_forward_only.png differ