提交 8baf59ce 编写于 作者: _青葱's avatar _青葱

Upload image sources to github

上级 d02a68c4
...@@ -7,7 +7,9 @@ Polyak and Juditsky (1992) showed that the test performance of simple average of ...@@ -7,7 +7,9 @@ Polyak and Juditsky (1992) showed that the test performance of simple average of
Hence, to accelerate the speed of Stochastic Gradient Descent, Averaged Stochastic Gradient Descent (ASGD) was proposed in Polyak and Juditsky (1992). For ASGD, the running average of parameters obtained by SGD, is used as the estimator for <img src="./images/theta_star.gif"/><br/> . The averaging is done as follows: Hence, to accelerate the speed of Stochastic Gradient Descent, Averaged Stochastic Gradient Descent (ASGD) was proposed in Polyak and Juditsky (1992). For ASGD, the running average of parameters obtained by SGD, is used as the estimator for <img src="./images/theta_star.gif"/><br/> . The averaging is done as follows:
![](./images/asgd.gif) <p align="center">
<img src="https://github.com/PaddlePaddle/Paddle/tree/develop/doc/fluid/images/asgd.gif"><br />
</p>
We propose averaging for any optimizer similar to how ASGD performs it, as mentioned above. We propose averaging for any optimizer similar to how ASGD performs it, as mentioned above.
......
digraph G {
rnn [label="1st level RNN" shape=box]
subgraph cluster0 {
label = "time step 0"
sent0 [label="sentence"]
sent1 [label="sentence"]
rnn1 [label="2nd level RNN" shape=box]
sent0 -> rnn1
sent1 -> rnn1
}
subgraph cluster1 {
label = "time step 1"
sent2 [label="sentence"]
sent3 [label="sentence"]
rnn2 [label="2nd level RNN" shape=box]
sent2 -> rnn2
sent3 -> rnn2
}
subgraph cluster2 {
label = "time step 2"
sent4 [label="sentence"]
sent5 [label="sentence"]
rnn3 [label="2nd level RNN" shape=box]
sent4 -> rnn3
sent5 -> rnn3
}
para0 [label="paragraph info 0"]
para1 [label="paragraph info 1"]
para2 [label="paragraph info 2"]
rnn1 -> para0
rnn2 -> para1
rnn3 -> para2
para0 -> rnn
para1 -> rnn
para2 -> rnn
chapter [label="chapter info"]
rnn -> chapter
}
digraph ImageBatchNormForkGragh {
subgraph cluster_before {
Prev [label="...", shape=plaintext];
Rnn [label="rnn_op", shape=box];
BatchNorm [label="batch_norm_op", shape=box];
Fc [label="fc_op", shape=box];
After [label="...", shape=plaintext];
Prev -> Rnn -> BatchNorm -> Fc -> After;
label="original";
}
subgraph cluster_after {
Prev2 [label="...", shape=plaintext];
Rnn2 [label="rnn_op", shape=box];
BatchNorm2_1 [label="train_batch_norm_op", shape=box];
BatchNorm2_2 [label="infer_batch_norm_op", shape=box];
Fc2_1 [label="fc_op", shape=box];
Fc2_2 [label="fc_op", shape=box];
After2_1 [label="...", shape=plaintext];
After2_2 [label="...", shape=plaintext];
Prev2 -> Rnn2 -> BatchNorm2_1 -> Fc2_1 -> After2_1;
Rnn2 -> BatchNorm2_2 ->Fc2_2 ->After2_2
label="forked";
}
}
cat ./graph_construction_example.dot | \
sed 's/color=red/color=red, style=invis/g' | \
sed 's/color=green/color=green, style=invis/g' | \
dot -Tpng > graph_construction_example_forward_only.png
cat ./graph_construction_example.dot | \
sed 's/color=green/color=green, style=invis/g' | \
dot -Tpng > graph_construction_example_forward_backward.png
cat ./graph_construction_example.dot | \
dot -Tpng > graph_construction_example_all.png
digraph ImageClassificationGraph {
///////// The forward part /////////
FeedX [label="Feed", color=blue, shape=box];
FeedY [label="Feed", color=blue, shape=box];
InitW [label="Init", color=blue, shape=diamond];
Initb [label="Init", color=blue, shape=diamond];
FC [label="FC", color=blue, shape=box];
MSE [label="MSE", color=blue, shape=box];
x [label="x", color=blue, shape=oval];
l [label="l", color=blue, shape=oval];
y [label="y", color=blue, shape=oval];
W [label="W", color=blue, shape=doublecircle];
b [label="b", color=blue, shape=doublecircle];
cost [label="cost", color=blue, shape=oval];
FeedX -> x -> FC -> y -> MSE -> cost [color=blue];
FeedY -> l [color=blue];
InitW -> W [color=blue];
Initb -> b [color=blue];
W -> FC [color=blue];
b -> FC [color=blue];
l -> MSE [color=blue];
////////// The backward part /////////
MSE_Grad [label="MSE_grad", color=red, shape=box];
FC_Grad [label="FC_grad", color=red, shape=box];
d_cost [label="d cost", color=red, shape=oval];
d_y [label="d y", color=red, shape=oval];
d_b [label="d b", color=red, shape=oval];
d_W [label="d W", color=red, shape=oval];
cost -> MSE_Grad [color=red];
d_cost -> MSE_Grad [color=red];
l -> MSE_Grad [color=red];
y -> MSE_Grad -> d_y [color=red];
x -> FC_Grad [color=red];
y -> FC_Grad [color=red];
d_y -> FC_Grad [color=red];
W -> FC_Grad -> d_W [color=red];
b -> FC_Grad -> d_b [color=red];
////////// The optimizaiton part //////////
OPT_W [label="SGD", color=green, shape=box];
OPT_b [label="SGD", color=green, shape=box];
W -> OPT_W [color=green];
b -> OPT_b [color=green];
d_W -> OPT_W -> W [color=green];
d_b -> OPT_b -> b [color=green];
////////// Groupings //////////
subgraph clusterMSE {
style=invis;
MSE;
MSE_Grad;
}
subgraph clusterFC {
style=invis;
FC;
FC_Grad;
}
}
digraph G {
label = "simple RNN implementation"
ranksep=2;
//graph [nodesep=1, ranksep=1];
node[nodesep=1]
subgraph cluster0 {
label = "global scope"
rankdir = TB
W
boot_memory
input
output
}
subgraph cluster1 {
label = "step-scope 0"
rankdir = TB
memory0[label="memory"]
prememory0[label="pre-memory"]
step_input0[label="step input"]
step_output0[label="step output"]
}
subgraph cluster2 {
label = "step-scope 1"
rankdir = TB
memory1[label="memory"]
prememory1[label="pre-memory"]
step_input1[label="step input"]
step_output1[label="step output"]
}
subgraph cluster3 {
label = "step-scope 2"
rankdir = TB
memory2[label="memory"]
prememory2[label="pre-memory"]
step_input2[label="step input"]
step_output2[label="step output"]
}
stepnet [shape=box]
stepnet0 [shape=box, style=dashed]
stepnet1 [shape=box, style=dashed]
stepnet2 [shape=box, style=dashed]
edge[color=blue]
boot_memory -> prememory0 [label="init" color="blue"]
memory0 -> prememory1 [label="copy/reference" color="blue"]
memory1 -> prememory2 [label="copy/reference" color="blue"]
edge[color=black]
W -> stepnet0[constraint=false, style=dashed]
W -> stepnet1[constraint=false, style=dashed]
W -> stepnet2[constraint=false, style=dashed]
memory0 -> stepnet0[style=dashed]
prememory0 -> stepnet0 -> step_output0[style=dashed]
memory1 -> stepnet1[style=dashed]
prememory1 -> stepnet1 -> step_output1[style=dashed]
memory2 -> stepnet2[style=dashed]
prememory2 -> stepnet2 -> step_output2[style=dashed]
input -> step_input0
input -> step_input1
input -> step_input2
step_input0 -> stepnet0 [style=dashed]
step_input1 -> stepnet1[style=dashed]
step_input2 -> stepnet2[style=dashed]
step_output0 -> output
step_output1 -> output
step_output2 -> output
stepnet0 -> stepnet[style=dashed]
stepnet1 -> stepnet[style=dashed]
stepnet2 -> stepnet[style=dashed]
}
digraph G {
chapter [label="chapter"]
subgraph cluster0 {
label = "paragraph 0"
top_rnn0[label="top rnn step 0" shape=box]
p0 [label="paragraph 0"]
p1 [label="paragraph 1"]
}
subgraph cluster1{
label = "paragraph 1"
top_rnn1[label="top rnn step 1" shape=box]
p2 [label="paragraph 0"]
p3 [label="paragraph 1"]
}
subgraph cluster_p0 {
label = "sentence 0"
low_rnn0 [label="low rnn step 0" shape=box]
s00 [label="sentence 0"]
s01 [label="sentence 1"]
low_rnn0 -> s00
low_rnn0 -> s01
}
subgraph cluster_p1 {
label = "sentence 1"
low_rnn1 [label="low rnn step 1" shape=box]
s10 [label="sentence 0"]
s11 [label="sentence 1"]
low_rnn1 -> s10
low_rnn1 -> s11
}
subgraph cluster_p2 {
label = "sentence 1"
low_rnn2 [label="low rnn step 0" shape=box]
s20 [label="sentence 0"]
s21 [label="sentence 1"]
low_rnn2 -> s20
low_rnn2 -> s21
}
subgraph cluster_p3 {
label = "sentence 1"
low_rnn3 [label="low rnn step 1" shape=box]
s30 [label="sentence 0"]
s31 [label="sentence 1"]
low_rnn3 -> s30
low_rnn3 -> s31
}
chapter -> top_rnn0
chapter -> top_rnn1
top_rnn0 -> p0
top_rnn0 -> p1
top_rnn1 -> p2
top_rnn1 -> p3
p0 -> low_rnn0
p1 -> low_rnn1
p2 -> low_rnn2
p3 -> low_rnn3
}
digraph Test {
z -> generator -> G_img;
G_img -> discriminator -> D_f -> d_loss_f;
label0 -> d_loss_f -> d_loss;
img -> discriminator -> D_t -> d_loss_t;
label1 -> d_loss_t -> d_loss;
d_loss -> d_loss_t[color=red, style=dashed];
d_loss -> d_loss_f[color=red, style=dashed];
d_loss_t -> D_t[color=red, style=dashed];
d_loss_f -> D_f[color=red, style=dashed];
D_t -> discriminator[color=red, style=dashed];
D_f -> discriminator[color=red, style=dashed];
D_f -> g_loss;
label2 -> g_loss;
g_loss -> D_f[color=green, style=dashed];
D_f -> discriminator[color=green, style=dashed];
discriminator -> G_img[color=green, style=dashed];
G_img -> generator[color=green, style=dashed];
discriminator [color=red, shape=box];
generator [color=green, shape=box];
z [shape=diamond];
img [shape=diamond];
label0 [shape=diamond];
label1 [shape=diamond];
label2 [shape=diamond];
d_loss [color=red];
g_loss [color=green];
}
Markdown is supported
0% .
You are about to add 0 people to the discussion. Proceed with caution.
先完成此消息的编辑!
想要评论请 注册