digraph ImageClassificationGraph { ///////// The forward part ///////// FeedX [label="Feed", color=blue, shape=box]; FeedY [label="Feed", color=blue, shape=box]; FC [label="FC", color=blue, shape=box]; MSE [label="MSE", color=blue, shape=box]; x [label="x", color=blue, shape=oval]; l [label="l", color=blue, shape=oval]; y [label="y", color=blue, shape=oval]; W [label="W", color=blue, shape=doublecircle]; b [label="b", color=blue, shape=doublecircle]; cost [label="cost", color=blue, shape=oval]; FeedX -> x -> FC -> y -> MSE -> cost [color=blue]; FeedY -> l -> FC [color=blue]; W -> FC [color=blue]; b -> FC [color=blue]; l -> MSE [color=blue]; ////////// The backward part ///////// MSE_Grad [label="MSE_grad", color=red, shape=box]; FC_Grad [label="FC_grad", color=red, shape=box]; d_cost [label="d cost", color=red, shape=oval]; d_y [label="d y", color=red, shape=oval]; d_b [label="d b", color=red, shape=oval]; d_W [label="d W", color=red, shape=oval]; cost -> MSE_Grad [color=red]; d_cost -> MSE_Grad [color=red]; x -> MSE_Grad [color=red]; l -> MSE_Grad [color=red]; y -> MSE_Grad -> d_y [color=red]; x -> FC_Grad [color=red]; y -> FC_Grad [color=red]; d_y -> FC_Grad [color=red]; W -> FC_Grad -> d_W [color=red]; b -> FC_Grad -> d_b [color=red]; ////////// The optimizaiton part ////////// OPT_W [label="SGD", color=green, shape=box]; OPT_b [label="SGD", color=green, shape=box]; d_W -> OPT_W -> W [color=green]; d_b -> OPT_b -> b [color=green]; ////////// Groupings ////////// subgraph clusterMSE { style=invis; MSE; MSE_Grad; } subgraph clusterFC { style=invis; FC; FC_Grad; } }