+
+
+- GoogleNet, ms / batch
+
+| total-BatchSize | 128 * 4 | 256 * 4 |
+|-------------------|--------------| ----------- |
+| PaddlePaddle | 1178 | 2367 |
+| TensorFlow | 1210 | 2292 |
+| Caffe | 2007 | out of memory |
+
+
+
+
+## RNN
+We use lstm network for text classfication to test benchmark.
+
+### Dataset
+- [IMDB](http://www.iro.umontreal.ca/~lisa/deep/data/imdb.pkl)
+- Sequence length is 100. In fact, PaddlePaddle supports training with variable-length sequence, but TensorFlow needs to pad. Thus, we also pad sequence length to 100 in PaddlePaddle in order to compare.
+- Dictionary size=30000
+- Peephole connection is used in `lstmemory` by default in PaddlePaddle. It is also configured in TensorFlow.
+
+### Single-GPU
+
+#### LSTM in Text Classification
+
+Testing `2 lstm layer + fc` network with different hidden size and batch size.
+
+- Batch size = 64, ms / batch
+
+| hidden_size | 256 | 512 | 1280 |
+|--------------|-------| -------| --------|
+| PaddlePaddle | 83 | 184 | 641 |
+| TensorFlow | 175 | 280 | 818 |
+
+- Batch size = 128, ms / batch
+
+| hidden_size | 256 | 512 | 1280 |
+|--------------|------- | -------| --------|
+| PaddlePaddle | 110 | 261 | 1007 |
+| TensorFlow | 181 | 361 | 1237 |
+
+
+- Batch size = 256, ms / batch
+
+| hidden_size | 256 | 512 | 1280 |
+|--------------|-------| -------| --------|
+| PaddlePaddle | 170 | 414 | 1655 |
+| TensorFlow | 238 | 536 | 1905 |
+
+
+
+#### Seq2Seq
+
+The benchmark of sequence-to-sequence network will be added later.
+
+
+### Multi GPU: 4 GPUs
+
+#### LSTM in Text Classification
+
+- hidden_size = 256, ms / batch
+
+| batch_size | 256 | 512 |
+|--------------| -------| --------|
+| PaddlePaddle | 90 | 118 |
+| TensorFlow | 226 | 118 |
+
+
+- hidden_size = 512, ms / batch
+
+| batch_size | 256 | 512 |
+|--------------| -------| --------|
+| PaddlePaddle | 189 | 268 |
+| TensorFlow | 297 | 383 |
+
+
+
+
+#### Seq2Seq
+
+The benchmark of sequence-to-sequence network will be added later.
diff --git a/benchmark/caffe/image/alexnet.prototxt b/benchmark/caffe/image/alexnet.prototxt
new file mode 100644
index 0000000000000000000000000000000000000000..aca184ddaf2ca2b5e2bea17d131055e0621b8271
--- /dev/null
+++ b/benchmark/caffe/image/alexnet.prototxt
@@ -0,0 +1,347 @@
+name: "alexnet"
+input: "data"
+input_dim: 64
+input_dim: 3
+input_dim: 227
+input_dim: 227
+input: "label"
+input_dim: 64
+input_dim: 1
+input_dim: 1
+input_dim: 1
+force_backward: true
+layer {
+ name: "conv1"
+ type: "Convolution"
+ bottom: "data"
+ top: "conv1"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 96
+ kernel_size: 11
+ stride: 4
+ weight_filler {
+ type: "gaussian"
+ std: 0.01
+ }
+ bias_filler {
+ type: "constant"
+ value: 0
+ }
+ }
+}
+layer {
+ name: "relu1"
+ type: "ReLU"
+ bottom: "conv1"
+ top: "conv1"
+}
+layer {
+ name: "norm1"
+ type: "LRN"
+ bottom: "conv1"
+ top: "norm1"
+ lrn_param {
+ local_size: 5
+ alpha: 0.0001
+ beta: 0.75
+ }
+}
+layer {
+ name: "pool1"
+ type: "Pooling"
+ bottom: "norm1"
+ top: "pool1"
+ pooling_param {
+ pool: MAX
+ kernel_size: 3
+ stride: 2
+ }
+}
+layer {
+ name: "conv2"
+ type: "Convolution"
+ bottom: "pool1"
+ top: "conv2"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 256
+ pad: 2
+ kernel_size: 5
+ group: 1
+ weight_filler {
+ type: "gaussian"
+ std: 0.01
+ }
+ bias_filler {
+ type: "constant"
+ value: 0.1
+ }
+ }
+}
+layer {
+ name: "relu2"
+ type: "ReLU"
+ bottom: "conv2"
+ top: "conv2"
+}
+layer {
+ name: "norm2"
+ type: "LRN"
+ bottom: "conv2"
+ top: "norm2"
+ lrn_param {
+ local_size: 5
+ alpha: 0.0001
+ beta: 0.75
+ }
+}
+layer {
+ name: "pool2"
+ type: "Pooling"
+ bottom: "norm2"
+ top: "pool2"
+ pooling_param {
+ pool: MAX
+ kernel_size: 3
+ stride: 2
+ }
+}
+layer {
+ name: "conv3"
+ type: "Convolution"
+ bottom: "pool2"
+ top: "conv3"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 384
+ pad: 1
+ kernel_size: 3
+ weight_filler {
+ type: "gaussian"
+ std: 0.01
+ }
+ bias_filler {
+ type: "constant"
+ value: 0
+ }
+ }
+}
+layer {
+ name: "relu3"
+ type: "ReLU"
+ bottom: "conv3"
+ top: "conv3"
+}
+layer {
+ name: "conv4"
+ type: "Convolution"
+ bottom: "conv3"
+ top: "conv4"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 384
+ pad: 1
+ kernel_size: 3
+ group: 1
+ weight_filler {
+ type: "gaussian"
+ std: 0.01
+ }
+ bias_filler {
+ type: "constant"
+ value: 0.1
+ }
+ }
+}
+layer {
+ name: "relu4"
+ type: "ReLU"
+ bottom: "conv4"
+ top: "conv4"
+}
+layer {
+ name: "conv5"
+ type: "Convolution"
+ bottom: "conv4"
+ top: "conv5"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 256
+ pad: 1
+ kernel_size: 3
+ group: 1
+ weight_filler {
+ type: "gaussian"
+ std: 0.01
+ }
+ bias_filler {
+ type: "constant"
+ value: 0.1
+ }
+ }
+}
+layer {
+ name: "relu5"
+ type: "ReLU"
+ bottom: "conv5"
+ top: "conv5"
+}
+layer {
+ name: "pool5"
+ type: "Pooling"
+ bottom: "conv5"
+ top: "pool5"
+ pooling_param {
+ pool: MAX
+ kernel_size: 3
+ stride: 2
+ }
+}
+layer {
+ name: "fc6"
+ type: "InnerProduct"
+ bottom: "pool5"
+ top: "fc6"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ inner_product_param {
+ num_output: 4096
+ weight_filler {
+ type: "gaussian"
+ std: 0.005
+ }
+ bias_filler {
+ type: "constant"
+ value: 0.1
+ }
+ }
+}
+layer {
+ name: "relu6"
+ type: "ReLU"
+ bottom: "fc6"
+ top: "fc6"
+}
+layer {
+ name: "drop6"
+ type: "Dropout"
+ bottom: "fc6"
+ top: "fc6"
+ dropout_param {
+ dropout_ratio: 0.5
+ }
+}
+layer {
+ name: "fc7"
+ type: "InnerProduct"
+ bottom: "fc6"
+ top: "fc7"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ inner_product_param {
+ num_output: 4096
+ weight_filler {
+ type: "gaussian"
+ std: 0.005
+ }
+ bias_filler {
+ type: "constant"
+ value: 0.1
+ }
+ }
+}
+layer {
+ name: "relu7"
+ type: "ReLU"
+ bottom: "fc7"
+ top: "fc7"
+}
+layer {
+ name: "drop7"
+ type: "Dropout"
+ bottom: "fc7"
+ top: "fc7"
+ dropout_param {
+ dropout_ratio: 0.5
+ }
+}
+layer {
+ name: "fc8"
+ type: "InnerProduct"
+ bottom: "fc7"
+ top: "fc8"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ inner_product_param {
+ num_output: 1000
+ weight_filler {
+ type: "gaussian"
+ std: 0.01
+ }
+ bias_filler {
+ type: "constant"
+ value: 0
+ }
+ }
+}
+layer {
+ name: "loss"
+ type: "SoftmaxWithLoss"
+ bottom: "fc8"
+ bottom: "label"
+ top: "loss"
+}
diff --git a/benchmark/caffe/image/googlenet.prototxt b/benchmark/caffe/image/googlenet.prototxt
new file mode 100644
index 0000000000000000000000000000000000000000..c5f3b4fe3efcb6f7397031c086997fa914c67b7f
--- /dev/null
+++ b/benchmark/caffe/image/googlenet.prototxt
@@ -0,0 +1,2334 @@
+name: "googlenet"
+input: "data"
+input_dim: 128
+input_dim: 3
+input_dim: 224
+input_dim: 224
+input: "label"
+input_dim: 128
+input_dim: 1
+input_dim: 1
+input_dim: 1
+layer {
+ name: "conv1/7x7_s2"
+ type: "Convolution"
+ bottom: "data"
+ top: "conv1/7x7_s2"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 64
+ pad: 3
+ kernel_size: 7
+ stride: 2
+ weight_filler {
+ type: "xavier"
+ }
+ bias_filler {
+ type: "constant"
+ value: 0.2
+ }
+ }
+}
+layer {
+ name: "conv1/relu_7x7"
+ type: "ReLU"
+ bottom: "conv1/7x7_s2"
+ top: "conv1/7x7_s2"
+}
+layer {
+ name: "pool1/3x3_s2"
+ type: "Pooling"
+ bottom: "conv1/7x7_s2"
+ top: "pool1/3x3_s2"
+ pooling_param {
+ pool: MAX
+ kernel_size: 3
+ stride: 2
+ }
+}
+#layer {
+# name: "pool1/norm1"
+# type: "LRN"
+# bottom: "pool1/3x3_s2"
+# top: "pool1/norm1"
+# lrn_param {
+# local_size: 5
+# alpha: 0.0001
+# beta: 0.75
+# }
+#}
+layer {
+ name: "conv2/3x3_reduce"
+ type: "Convolution"
+# bottom: "pool1/norm1"
+ bottom: "pool1/3x3_s2"
+ top: "conv2/3x3_reduce"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 64
+ kernel_size: 1
+ weight_filler {
+ type: "xavier"
+ }
+ bias_filler {
+ type: "constant"
+ value: 0.2
+ }
+ }
+}
+layer {
+ name: "conv2/relu_3x3_reduce"
+ type: "ReLU"
+ bottom: "conv2/3x3_reduce"
+ top: "conv2/3x3_reduce"
+}
+layer {
+ name: "conv2/3x3"
+ type: "Convolution"
+ bottom: "conv2/3x3_reduce"
+ top: "conv2/3x3"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 192
+ pad: 1
+ kernel_size: 3
+ weight_filler {
+ type: "xavier"
+ }
+ bias_filler {
+ type: "constant"
+ value: 0.2
+ }
+ }
+}
+layer {
+ name: "conv2/relu_3x3"
+ type: "ReLU"
+ bottom: "conv2/3x3"
+ top: "conv2/3x3"
+}
+#layer {
+# name: "conv2/norm2"
+# type: "LRN"
+# bottom: "conv2/3x3"
+# top: "conv2/norm2"
+# lrn_param {
+# local_size: 5
+# alpha: 0.0001
+# beta: 0.75
+# }
+#}
+layer {
+ name: "pool2/3x3_s2"
+ type: "Pooling"
+# bottom: "conv2/norm2"
+ bottom: "conv2/3x3"
+ top: "pool2/3x3_s2"
+ pooling_param {
+ pool: MAX
+ kernel_size: 3
+ stride: 2
+ }
+}
+layer {
+ name: "inception_3a/1x1"
+ type: "Convolution"
+ bottom: "pool2/3x3_s2"
+ top: "inception_3a/1x1"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 64
+ kernel_size: 1
+ weight_filler {
+ type: "xavier"
+ }
+ bias_filler {
+ type: "constant"
+ value: 0.2
+ }
+ }
+}
+layer {
+ name: "inception_3a/relu_1x1"
+ type: "ReLU"
+ bottom: "inception_3a/1x1"
+ top: "inception_3a/1x1"
+}
+layer {
+ name: "inception_3a/3x3_reduce"
+ type: "Convolution"
+ bottom: "pool2/3x3_s2"
+ top: "inception_3a/3x3_reduce"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 96
+ kernel_size: 1
+ weight_filler {
+ type: "xavier"
+ }
+ bias_filler {
+ type: "constant"
+ value: 0.2
+ }
+ }
+}
+layer {
+ name: "inception_3a/relu_3x3_reduce"
+ type: "ReLU"
+ bottom: "inception_3a/3x3_reduce"
+ top: "inception_3a/3x3_reduce"
+}
+layer {
+ name: "inception_3a/3x3"
+ type: "Convolution"
+ bottom: "inception_3a/3x3_reduce"
+ top: "inception_3a/3x3"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 128
+ pad: 1
+ kernel_size: 3
+ weight_filler {
+ type: "xavier"
+ }
+ bias_filler {
+ type: "constant"
+ value: 0.2
+ }
+ }
+}
+layer {
+ name: "inception_3a/relu_3x3"
+ type: "ReLU"
+ bottom: "inception_3a/3x3"
+ top: "inception_3a/3x3"
+}
+layer {
+ name: "inception_3a/5x5_reduce"
+ type: "Convolution"
+ bottom: "pool2/3x3_s2"
+ top: "inception_3a/5x5_reduce"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 16
+ kernel_size: 1
+ weight_filler {
+ type: "xavier"
+ }
+ bias_filler {
+ type: "constant"
+ value: 0.2
+ }
+ }
+}
+layer {
+ name: "inception_3a/relu_5x5_reduce"
+ type: "ReLU"
+ bottom: "inception_3a/5x5_reduce"
+ top: "inception_3a/5x5_reduce"
+}
+layer {
+ name: "inception_3a/5x5"
+ type: "Convolution"
+ bottom: "inception_3a/5x5_reduce"
+ top: "inception_3a/5x5"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 32
+ pad: 2
+ kernel_size: 5
+ weight_filler {
+ type: "xavier"
+ }
+ bias_filler {
+ type: "constant"
+ value: 0.2
+ }
+ }
+}
+layer {
+ name: "inception_3a/relu_5x5"
+ type: "ReLU"
+ bottom: "inception_3a/5x5"
+ top: "inception_3a/5x5"
+}
+layer {
+ name: "inception_3a/pool"
+ type: "Pooling"
+ bottom: "pool2/3x3_s2"
+ top: "inception_3a/pool"
+ pooling_param {
+ pool: MAX
+ kernel_size: 3
+ stride: 1
+ pad: 1
+ }
+}
+layer {
+ name: "inception_3a/pool_proj"
+ type: "Convolution"
+ bottom: "inception_3a/pool"
+ top: "inception_3a/pool_proj"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 32
+ kernel_size: 1
+ weight_filler {
+ type: "xavier"
+ }
+ bias_filler {
+ type: "constant"
+ value: 0.2
+ }
+ }
+}
+layer {
+ name: "inception_3a/relu_pool_proj"
+ type: "ReLU"
+ bottom: "inception_3a/pool_proj"
+ top: "inception_3a/pool_proj"
+}
+layer {
+ name: "inception_3a/output"
+ type: "Concat"
+ bottom: "inception_3a/1x1"
+ bottom: "inception_3a/3x3"
+ bottom: "inception_3a/5x5"
+ bottom: "inception_3a/pool_proj"
+ top: "inception_3a/output"
+}
+layer {
+ name: "inception_3b/1x1"
+ type: "Convolution"
+ bottom: "inception_3a/output"
+ top: "inception_3b/1x1"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 128
+ kernel_size: 1
+ weight_filler {
+ type: "xavier"
+ }
+ bias_filler {
+ type: "constant"
+ value: 0.2
+ }
+ }
+}
+layer {
+ name: "inception_3b/relu_1x1"
+ type: "ReLU"
+ bottom: "inception_3b/1x1"
+ top: "inception_3b/1x1"
+}
+layer {
+ name: "inception_3b/3x3_reduce"
+ type: "Convolution"
+ bottom: "inception_3a/output"
+ top: "inception_3b/3x3_reduce"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 128
+ kernel_size: 1
+ weight_filler {
+ type: "xavier"
+ }
+ bias_filler {
+ type: "constant"
+ value: 0.2
+ }
+ }
+}
+layer {
+ name: "inception_3b/relu_3x3_reduce"
+ type: "ReLU"
+ bottom: "inception_3b/3x3_reduce"
+ top: "inception_3b/3x3_reduce"
+}
+layer {
+ name: "inception_3b/3x3"
+ type: "Convolution"
+ bottom: "inception_3b/3x3_reduce"
+ top: "inception_3b/3x3"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 192
+ pad: 1
+ kernel_size: 3
+ weight_filler {
+ type: "xavier"
+ }
+ bias_filler {
+ type: "constant"
+ value: 0.2
+ }
+ }
+}
+layer {
+ name: "inception_3b/relu_3x3"
+ type: "ReLU"
+ bottom: "inception_3b/3x3"
+ top: "inception_3b/3x3"
+}
+layer {
+ name: "inception_3b/5x5_reduce"
+ type: "Convolution"
+ bottom: "inception_3a/output"
+ top: "inception_3b/5x5_reduce"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 32
+ kernel_size: 1
+ weight_filler {
+ type: "xavier"
+ }
+ bias_filler {
+ type: "constant"
+ value: 0.2
+ }
+ }
+}
+layer {
+ name: "inception_3b/relu_5x5_reduce"
+ type: "ReLU"
+ bottom: "inception_3b/5x5_reduce"
+ top: "inception_3b/5x5_reduce"
+}
+layer {
+ name: "inception_3b/5x5"
+ type: "Convolution"
+ bottom: "inception_3b/5x5_reduce"
+ top: "inception_3b/5x5"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 96
+ pad: 2
+ kernel_size: 5
+ weight_filler {
+ type: "xavier"
+ }
+ bias_filler {
+ type: "constant"
+ value: 0.2
+ }
+ }
+}
+layer {
+ name: "inception_3b/relu_5x5"
+ type: "ReLU"
+ bottom: "inception_3b/5x5"
+ top: "inception_3b/5x5"
+}
+layer {
+ name: "inception_3b/pool"
+ type: "Pooling"
+ bottom: "inception_3a/output"
+ top: "inception_3b/pool"
+ pooling_param {
+ pool: MAX
+ kernel_size: 3
+ stride: 1
+ pad: 1
+ }
+}
+layer {
+ name: "inception_3b/pool_proj"
+ type: "Convolution"
+ bottom: "inception_3b/pool"
+ top: "inception_3b/pool_proj"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 64
+ kernel_size: 1
+ weight_filler {
+ type: "xavier"
+ }
+ bias_filler {
+ type: "constant"
+ value: 0.2
+ }
+ }
+}
+layer {
+ name: "inception_3b/relu_pool_proj"
+ type: "ReLU"
+ bottom: "inception_3b/pool_proj"
+ top: "inception_3b/pool_proj"
+}
+layer {
+ name: "inception_3b/output"
+ type: "Concat"
+ bottom: "inception_3b/1x1"
+ bottom: "inception_3b/3x3"
+ bottom: "inception_3b/5x5"
+ bottom: "inception_3b/pool_proj"
+ top: "inception_3b/output"
+}
+layer {
+ name: "pool3/3x3_s2"
+ type: "Pooling"
+ bottom: "inception_3b/output"
+ top: "pool3/3x3_s2"
+ pooling_param {
+ pool: MAX
+ kernel_size: 3
+ stride: 2
+ }
+}
+layer {
+ name: "inception_4a/1x1"
+ type: "Convolution"
+ bottom: "pool3/3x3_s2"
+ top: "inception_4a/1x1"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 192
+ kernel_size: 1
+ weight_filler {
+ type: "xavier"
+ }
+ bias_filler {
+ type: "constant"
+ value: 0.2
+ }
+ }
+}
+layer {
+ name: "inception_4a/relu_1x1"
+ type: "ReLU"
+ bottom: "inception_4a/1x1"
+ top: "inception_4a/1x1"
+}
+layer {
+ name: "inception_4a/3x3_reduce"
+ type: "Convolution"
+ bottom: "pool3/3x3_s2"
+ top: "inception_4a/3x3_reduce"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 96
+ kernel_size: 1
+ weight_filler {
+ type: "xavier"
+ }
+ bias_filler {
+ type: "constant"
+ value: 0.2
+ }
+ }
+}
+layer {
+ name: "inception_4a/relu_3x3_reduce"
+ type: "ReLU"
+ bottom: "inception_4a/3x3_reduce"
+ top: "inception_4a/3x3_reduce"
+}
+layer {
+ name: "inception_4a/3x3"
+ type: "Convolution"
+ bottom: "inception_4a/3x3_reduce"
+ top: "inception_4a/3x3"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 208
+ pad: 1
+ kernel_size: 3
+ weight_filler {
+ type: "xavier"
+ }
+ bias_filler {
+ type: "constant"
+ value: 0.2
+ }
+ }
+}
+layer {
+ name: "inception_4a/relu_3x3"
+ type: "ReLU"
+ bottom: "inception_4a/3x3"
+ top: "inception_4a/3x3"
+}
+layer {
+ name: "inception_4a/5x5_reduce"
+ type: "Convolution"
+ bottom: "pool3/3x3_s2"
+ top: "inception_4a/5x5_reduce"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 16
+ kernel_size: 1
+ weight_filler {
+ type: "xavier"
+ }
+ bias_filler {
+ type: "constant"
+ value: 0.2
+ }
+ }
+}
+layer {
+ name: "inception_4a/relu_5x5_reduce"
+ type: "ReLU"
+ bottom: "inception_4a/5x5_reduce"
+ top: "inception_4a/5x5_reduce"
+}
+layer {
+ name: "inception_4a/5x5"
+ type: "Convolution"
+ bottom: "inception_4a/5x5_reduce"
+ top: "inception_4a/5x5"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 48
+ pad: 2
+ kernel_size: 5
+ weight_filler {
+ type: "xavier"
+ }
+ bias_filler {
+ type: "constant"
+ value: 0.2
+ }
+ }
+}
+layer {
+ name: "inception_4a/relu_5x5"
+ type: "ReLU"
+ bottom: "inception_4a/5x5"
+ top: "inception_4a/5x5"
+}
+layer {
+ name: "inception_4a/pool"
+ type: "Pooling"
+ bottom: "pool3/3x3_s2"
+ top: "inception_4a/pool"
+ pooling_param {
+ pool: MAX
+ kernel_size: 3
+ stride: 1
+ pad: 1
+ }
+}
+layer {
+ name: "inception_4a/pool_proj"
+ type: "Convolution"
+ bottom: "inception_4a/pool"
+ top: "inception_4a/pool_proj"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 64
+ kernel_size: 1
+ weight_filler {
+ type: "xavier"
+ }
+ bias_filler {
+ type: "constant"
+ value: 0.2
+ }
+ }
+}
+layer {
+ name: "inception_4a/relu_pool_proj"
+ type: "ReLU"
+ bottom: "inception_4a/pool_proj"
+ top: "inception_4a/pool_proj"
+}
+layer {
+ name: "inception_4a/output"
+ type: "Concat"
+ bottom: "inception_4a/1x1"
+ bottom: "inception_4a/3x3"
+ bottom: "inception_4a/5x5"
+ bottom: "inception_4a/pool_proj"
+ top: "inception_4a/output"
+}
+#layer {
+# name: "loss1/ave_pool"
+# type: "Pooling"
+# bottom: "inception_4a/output"
+# top: "loss1/ave_pool"
+# pooling_param {
+# pool: AVE
+# kernel_size: 5
+# stride: 3
+# }
+#}
+#layer {
+# name: "loss1/conv"
+# type: "Convolution"
+# bottom: "loss1/ave_pool"
+# top: "loss1/conv"
+# param {
+# lr_mult: 1
+# decay_mult: 1
+# }
+# param {
+# lr_mult: 2
+# decay_mult: 0
+# }
+# convolution_param {
+# num_output: 128
+# kernel_size: 1
+# weight_filler {
+# type: "xavier"
+# }
+# bias_filler {
+# type: "constant"
+# value: 0.2
+# }
+# }
+#}
+#layer {
+# name: "loss1/relu_conv"
+# type: "ReLU"
+# bottom: "loss1/conv"
+# top: "loss1/conv"
+#}
+#layer {
+# name: "loss1/fc"
+# type: "InnerProduct"
+# bottom: "loss1/conv"
+# top: "loss1/fc"
+# param {
+# lr_mult: 1
+# decay_mult: 1
+# }
+# param {
+# lr_mult: 2
+# decay_mult: 0
+# }
+# inner_product_param {
+# num_output: 1024
+# weight_filler {
+# type: "xavier"
+# }
+# bias_filler {
+# type: "constant"
+# value: 0.2
+# }
+# }
+#}
+#layer {
+# name: "loss1/relu_fc"
+# type: "ReLU"
+# bottom: "loss1/fc"
+# top: "loss1/fc"
+#}
+#layer {
+# name: "loss1/drop_fc"
+# type: "Dropout"
+# bottom: "loss1/fc"
+# top: "loss1/fc"
+# dropout_param {
+# dropout_ratio: 0.7
+# }
+#}
+#layer {
+# name: "loss1/classifier"
+# type: "InnerProduct"
+# bottom: "loss1/fc"
+# top: "loss1/classifier"
+# param {
+# lr_mult: 1
+# decay_mult: 1
+# }
+# param {
+# lr_mult: 2
+# decay_mult: 0
+# }
+# inner_product_param {
+# num_output: 1000
+# weight_filler {
+# type: "xavier"
+# }
+# bias_filler {
+# type: "constant"
+# value: 0
+# }
+# }
+#}
+#layer {
+# name: "loss1/loss"
+# type: "SoftmaxWithLoss"
+# bottom: "loss1/classifier"
+# bottom: "label"
+# top: "loss1/loss1"
+# loss_weight: 0.3
+#}
+layer {
+ name: "inception_4b/1x1"
+ type: "Convolution"
+ bottom: "inception_4a/output"
+ top: "inception_4b/1x1"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 160
+ kernel_size: 1
+ weight_filler {
+ type: "xavier"
+ }
+ bias_filler {
+ type: "constant"
+ value: 0.2
+ }
+ }
+}
+layer {
+ name: "inception_4b/relu_1x1"
+ type: "ReLU"
+ bottom: "inception_4b/1x1"
+ top: "inception_4b/1x1"
+}
+layer {
+ name: "inception_4b/3x3_reduce"
+ type: "Convolution"
+ bottom: "inception_4a/output"
+ top: "inception_4b/3x3_reduce"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 112
+ kernel_size: 1
+ weight_filler {
+ type: "xavier"
+ }
+ bias_filler {
+ type: "constant"
+ value: 0.2
+ }
+ }
+}
+layer {
+ name: "inception_4b/relu_3x3_reduce"
+ type: "ReLU"
+ bottom: "inception_4b/3x3_reduce"
+ top: "inception_4b/3x3_reduce"
+}
+layer {
+ name: "inception_4b/3x3"
+ type: "Convolution"
+ bottom: "inception_4b/3x3_reduce"
+ top: "inception_4b/3x3"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 224
+ pad: 1
+ kernel_size: 3
+ weight_filler {
+ type: "xavier"
+ }
+ bias_filler {
+ type: "constant"
+ value: 0.2
+ }
+ }
+}
+layer {
+ name: "inception_4b/relu_3x3"
+ type: "ReLU"
+ bottom: "inception_4b/3x3"
+ top: "inception_4b/3x3"
+}
+layer {
+ name: "inception_4b/5x5_reduce"
+ type: "Convolution"
+ bottom: "inception_4a/output"
+ top: "inception_4b/5x5_reduce"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 24
+ kernel_size: 1
+ weight_filler {
+ type: "xavier"
+ }
+ bias_filler {
+ type: "constant"
+ value: 0.2
+ }
+ }
+}
+layer {
+ name: "inception_4b/relu_5x5_reduce"
+ type: "ReLU"
+ bottom: "inception_4b/5x5_reduce"
+ top: "inception_4b/5x5_reduce"
+}
+layer {
+ name: "inception_4b/5x5"
+ type: "Convolution"
+ bottom: "inception_4b/5x5_reduce"
+ top: "inception_4b/5x5"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 64
+ pad: 2
+ kernel_size: 5
+ weight_filler {
+ type: "xavier"
+ }
+ bias_filler {
+ type: "constant"
+ value: 0.2
+ }
+ }
+}
+layer {
+ name: "inception_4b/relu_5x5"
+ type: "ReLU"
+ bottom: "inception_4b/5x5"
+ top: "inception_4b/5x5"
+}
+layer {
+ name: "inception_4b/pool"
+ type: "Pooling"
+ bottom: "inception_4a/output"
+ top: "inception_4b/pool"
+ pooling_param {
+ pool: MAX
+ kernel_size: 3
+ stride: 1
+ pad: 1
+ }
+}
+layer {
+ name: "inception_4b/pool_proj"
+ type: "Convolution"
+ bottom: "inception_4b/pool"
+ top: "inception_4b/pool_proj"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 64
+ kernel_size: 1
+ weight_filler {
+ type: "xavier"
+ }
+ bias_filler {
+ type: "constant"
+ value: 0.2
+ }
+ }
+}
+layer {
+ name: "inception_4b/relu_pool_proj"
+ type: "ReLU"
+ bottom: "inception_4b/pool_proj"
+ top: "inception_4b/pool_proj"
+}
+layer {
+ name: "inception_4b/output"
+ type: "Concat"
+ bottom: "inception_4b/1x1"
+ bottom: "inception_4b/3x3"
+ bottom: "inception_4b/5x5"
+ bottom: "inception_4b/pool_proj"
+ top: "inception_4b/output"
+}
+layer {
+ name: "inception_4c/1x1"
+ type: "Convolution"
+ bottom: "inception_4b/output"
+ top: "inception_4c/1x1"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 128
+ kernel_size: 1
+ weight_filler {
+ type: "xavier"
+ }
+ bias_filler {
+ type: "constant"
+ value: 0.2
+ }
+ }
+}
+layer {
+ name: "inception_4c/relu_1x1"
+ type: "ReLU"
+ bottom: "inception_4c/1x1"
+ top: "inception_4c/1x1"
+}
+layer {
+ name: "inception_4c/3x3_reduce"
+ type: "Convolution"
+ bottom: "inception_4b/output"
+ top: "inception_4c/3x3_reduce"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 128
+ kernel_size: 1
+ weight_filler {
+ type: "xavier"
+ }
+ bias_filler {
+ type: "constant"
+ value: 0.2
+ }
+ }
+}
+layer {
+ name: "inception_4c/relu_3x3_reduce"
+ type: "ReLU"
+ bottom: "inception_4c/3x3_reduce"
+ top: "inception_4c/3x3_reduce"
+}
+layer {
+ name: "inception_4c/3x3"
+ type: "Convolution"
+ bottom: "inception_4c/3x3_reduce"
+ top: "inception_4c/3x3"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 256
+ pad: 1
+ kernel_size: 3
+ weight_filler {
+ type: "xavier"
+ }
+ bias_filler {
+ type: "constant"
+ value: 0.2
+ }
+ }
+}
+layer {
+ name: "inception_4c/relu_3x3"
+ type: "ReLU"
+ bottom: "inception_4c/3x3"
+ top: "inception_4c/3x3"
+}
+layer {
+ name: "inception_4c/5x5_reduce"
+ type: "Convolution"
+ bottom: "inception_4b/output"
+ top: "inception_4c/5x5_reduce"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 24
+ kernel_size: 1
+ weight_filler {
+ type: "xavier"
+ }
+ bias_filler {
+ type: "constant"
+ value: 0.2
+ }
+ }
+}
+layer {
+ name: "inception_4c/relu_5x5_reduce"
+ type: "ReLU"
+ bottom: "inception_4c/5x5_reduce"
+ top: "inception_4c/5x5_reduce"
+}
+layer {
+ name: "inception_4c/5x5"
+ type: "Convolution"
+ bottom: "inception_4c/5x5_reduce"
+ top: "inception_4c/5x5"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 64
+ pad: 2
+ kernel_size: 5
+ weight_filler {
+ type: "xavier"
+ }
+ bias_filler {
+ type: "constant"
+ value: 0.2
+ }
+ }
+}
+layer {
+ name: "inception_4c/relu_5x5"
+ type: "ReLU"
+ bottom: "inception_4c/5x5"
+ top: "inception_4c/5x5"
+}
+layer {
+ name: "inception_4c/pool"
+ type: "Pooling"
+ bottom: "inception_4b/output"
+ top: "inception_4c/pool"
+ pooling_param {
+ pool: MAX
+ kernel_size: 3
+ stride: 1
+ pad: 1
+ }
+}
+layer {
+ name: "inception_4c/pool_proj"
+ type: "Convolution"
+ bottom: "inception_4c/pool"
+ top: "inception_4c/pool_proj"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 64
+ kernel_size: 1
+ weight_filler {
+ type: "xavier"
+ }
+ bias_filler {
+ type: "constant"
+ value: 0.2
+ }
+ }
+}
+layer {
+ name: "inception_4c/relu_pool_proj"
+ type: "ReLU"
+ bottom: "inception_4c/pool_proj"
+ top: "inception_4c/pool_proj"
+}
+layer {
+ name: "inception_4c/output"
+ type: "Concat"
+ bottom: "inception_4c/1x1"
+ bottom: "inception_4c/3x3"
+ bottom: "inception_4c/5x5"
+ bottom: "inception_4c/pool_proj"
+ top: "inception_4c/output"
+}
+layer {
+ name: "inception_4d/1x1"
+ type: "Convolution"
+ bottom: "inception_4c/output"
+ top: "inception_4d/1x1"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 112
+ kernel_size: 1
+ weight_filler {
+ type: "xavier"
+ }
+ bias_filler {
+ type: "constant"
+ value: 0.2
+ }
+ }
+}
+layer {
+ name: "inception_4d/relu_1x1"
+ type: "ReLU"
+ bottom: "inception_4d/1x1"
+ top: "inception_4d/1x1"
+}
+layer {
+ name: "inception_4d/3x3_reduce"
+ type: "Convolution"
+ bottom: "inception_4c/output"
+ top: "inception_4d/3x3_reduce"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 144
+ kernel_size: 1
+ weight_filler {
+ type: "xavier"
+ }
+ bias_filler {
+ type: "constant"
+ value: 0.2
+ }
+ }
+}
+layer {
+ name: "inception_4d/relu_3x3_reduce"
+ type: "ReLU"
+ bottom: "inception_4d/3x3_reduce"
+ top: "inception_4d/3x3_reduce"
+}
+layer {
+ name: "inception_4d/3x3"
+ type: "Convolution"
+ bottom: "inception_4d/3x3_reduce"
+ top: "inception_4d/3x3"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 288
+ pad: 1
+ kernel_size: 3
+ weight_filler {
+ type: "xavier"
+ }
+ bias_filler {
+ type: "constant"
+ value: 0.2
+ }
+ }
+}
+layer {
+ name: "inception_4d/relu_3x3"
+ type: "ReLU"
+ bottom: "inception_4d/3x3"
+ top: "inception_4d/3x3"
+}
+layer {
+ name: "inception_4d/5x5_reduce"
+ type: "Convolution"
+ bottom: "inception_4c/output"
+ top: "inception_4d/5x5_reduce"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 32
+ kernel_size: 1
+ weight_filler {
+ type: "xavier"
+ }
+ bias_filler {
+ type: "constant"
+ value: 0.2
+ }
+ }
+}
+layer {
+ name: "inception_4d/relu_5x5_reduce"
+ type: "ReLU"
+ bottom: "inception_4d/5x5_reduce"
+ top: "inception_4d/5x5_reduce"
+}
+layer {
+ name: "inception_4d/5x5"
+ type: "Convolution"
+ bottom: "inception_4d/5x5_reduce"
+ top: "inception_4d/5x5"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 64
+ pad: 2
+ kernel_size: 5
+ weight_filler {
+ type: "xavier"
+ }
+ bias_filler {
+ type: "constant"
+ value: 0.2
+ }
+ }
+}
+layer {
+ name: "inception_4d/relu_5x5"
+ type: "ReLU"
+ bottom: "inception_4d/5x5"
+ top: "inception_4d/5x5"
+}
+layer {
+ name: "inception_4d/pool"
+ type: "Pooling"
+ bottom: "inception_4c/output"
+ top: "inception_4d/pool"
+ pooling_param {
+ pool: MAX
+ kernel_size: 3
+ stride: 1
+ pad: 1
+ }
+}
+layer {
+ name: "inception_4d/pool_proj"
+ type: "Convolution"
+ bottom: "inception_4d/pool"
+ top: "inception_4d/pool_proj"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 64
+ kernel_size: 1
+ weight_filler {
+ type: "xavier"
+ }
+ bias_filler {
+ type: "constant"
+ value: 0.2
+ }
+ }
+}
+layer {
+ name: "inception_4d/relu_pool_proj"
+ type: "ReLU"
+ bottom: "inception_4d/pool_proj"
+ top: "inception_4d/pool_proj"
+}
+layer {
+ name: "inception_4d/output"
+ type: "Concat"
+ bottom: "inception_4d/1x1"
+ bottom: "inception_4d/3x3"
+ bottom: "inception_4d/5x5"
+ bottom: "inception_4d/pool_proj"
+ top: "inception_4d/output"
+}
+#layer {
+# name: "loss2/ave_pool"
+# type: "Pooling"
+# bottom: "inception_4d/output"
+# top: "loss2/ave_pool"
+# pooling_param {
+# pool: AVE
+# kernel_size: 5
+# stride: 3
+# }
+#}
+#layer {
+# name: "loss2/conv"
+# type: "Convolution"
+# bottom: "loss2/ave_pool"
+# top: "loss2/conv"
+# param {
+# lr_mult: 1
+# decay_mult: 1
+# }
+# param {
+# lr_mult: 2
+# decay_mult: 0
+# }
+# convolution_param {
+# num_output: 128
+# kernel_size: 1
+# weight_filler {
+# type: "xavier"
+# }
+# bias_filler {
+# type: "constant"
+# value: 0.2
+# }
+# }
+#}
+#layer {
+# name: "loss2/relu_conv"
+# type: "ReLU"
+# bottom: "loss2/conv"
+# top: "loss2/conv"
+#}
+#layer {
+# name: "loss2/fc"
+# type: "InnerProduct"
+# bottom: "loss2/conv"
+# top: "loss2/fc"
+# param {
+# lr_mult: 1
+# decay_mult: 1
+# }
+# param {
+# lr_mult: 2
+# decay_mult: 0
+# }
+# inner_product_param {
+# num_output: 1024
+# weight_filler {
+# type: "xavier"
+# }
+# bias_filler {
+# type: "constant"
+# value: 0.2
+# }
+# }
+#}
+#layer {
+# name: "loss2/relu_fc"
+# type: "ReLU"
+# bottom: "loss2/fc"
+# top: "loss2/fc"
+#}
+#layer {
+# name: "loss2/drop_fc"
+# type: "Dropout"
+# bottom: "loss2/fc"
+# top: "loss2/fc"
+# dropout_param {
+# dropout_ratio: 0.7
+# }
+#}
+#layer {
+# name: "loss2/classifier"
+# type: "InnerProduct"
+# bottom: "loss2/fc"
+# top: "loss2/classifier"
+# param {
+# lr_mult: 1
+# decay_mult: 1
+# }
+# param {
+# lr_mult: 2
+# decay_mult: 0
+# }
+# inner_product_param {
+# num_output: 1000
+# weight_filler {
+# type: "xavier"
+# }
+# bias_filler {
+# type: "constant"
+# value: 0
+# }
+# }
+#}
+#layer {
+# name: "loss2/loss"
+# type: "SoftmaxWithLoss"
+# bottom: "loss2/classifier"
+# bottom: "label"
+# top: "loss2/loss1"
+# loss_weight: 0.3
+#}
+layer {
+ name: "inception_4e/1x1"
+ type: "Convolution"
+ bottom: "inception_4d/output"
+ top: "inception_4e/1x1"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 256
+ kernel_size: 1
+ weight_filler {
+ type: "xavier"
+ }
+ bias_filler {
+ type: "constant"
+ value: 0.2
+ }
+ }
+}
+layer {
+ name: "inception_4e/relu_1x1"
+ type: "ReLU"
+ bottom: "inception_4e/1x1"
+ top: "inception_4e/1x1"
+}
+layer {
+ name: "inception_4e/3x3_reduce"
+ type: "Convolution"
+ bottom: "inception_4d/output"
+ top: "inception_4e/3x3_reduce"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 160
+ kernel_size: 1
+ weight_filler {
+ type: "xavier"
+ }
+ bias_filler {
+ type: "constant"
+ value: 0.2
+ }
+ }
+}
+layer {
+ name: "inception_4e/relu_3x3_reduce"
+ type: "ReLU"
+ bottom: "inception_4e/3x3_reduce"
+ top: "inception_4e/3x3_reduce"
+}
+layer {
+ name: "inception_4e/3x3"
+ type: "Convolution"
+ bottom: "inception_4e/3x3_reduce"
+ top: "inception_4e/3x3"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 320
+ pad: 1
+ kernel_size: 3
+ weight_filler {
+ type: "xavier"
+ }
+ bias_filler {
+ type: "constant"
+ value: 0.2
+ }
+ }
+}
+layer {
+ name: "inception_4e/relu_3x3"
+ type: "ReLU"
+ bottom: "inception_4e/3x3"
+ top: "inception_4e/3x3"
+}
+layer {
+ name: "inception_4e/5x5_reduce"
+ type: "Convolution"
+ bottom: "inception_4d/output"
+ top: "inception_4e/5x5_reduce"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 32
+ kernel_size: 1
+ weight_filler {
+ type: "xavier"
+ }
+ bias_filler {
+ type: "constant"
+ value: 0.2
+ }
+ }
+}
+layer {
+ name: "inception_4e/relu_5x5_reduce"
+ type: "ReLU"
+ bottom: "inception_4e/5x5_reduce"
+ top: "inception_4e/5x5_reduce"
+}
+layer {
+ name: "inception_4e/5x5"
+ type: "Convolution"
+ bottom: "inception_4e/5x5_reduce"
+ top: "inception_4e/5x5"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 128
+ pad: 2
+ kernel_size: 5
+ weight_filler {
+ type: "xavier"
+ }
+ bias_filler {
+ type: "constant"
+ value: 0.2
+ }
+ }
+}
+layer {
+ name: "inception_4e/relu_5x5"
+ type: "ReLU"
+ bottom: "inception_4e/5x5"
+ top: "inception_4e/5x5"
+}
+layer {
+ name: "inception_4e/pool"
+ type: "Pooling"
+ bottom: "inception_4d/output"
+ top: "inception_4e/pool"
+ pooling_param {
+ pool: MAX
+ kernel_size: 3
+ stride: 1
+ pad: 1
+ }
+}
+layer {
+ name: "inception_4e/pool_proj"
+ type: "Convolution"
+ bottom: "inception_4e/pool"
+ top: "inception_4e/pool_proj"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 128
+ kernel_size: 1
+ weight_filler {
+ type: "xavier"
+ }
+ bias_filler {
+ type: "constant"
+ value: 0.2
+ }
+ }
+}
+layer {
+ name: "inception_4e/relu_pool_proj"
+ type: "ReLU"
+ bottom: "inception_4e/pool_proj"
+ top: "inception_4e/pool_proj"
+}
+layer {
+ name: "inception_4e/output"
+ type: "Concat"
+ bottom: "inception_4e/1x1"
+ bottom: "inception_4e/3x3"
+ bottom: "inception_4e/5x5"
+ bottom: "inception_4e/pool_proj"
+ top: "inception_4e/output"
+}
+layer {
+ name: "pool4/3x3_s2"
+ type: "Pooling"
+ bottom: "inception_4e/output"
+ top: "pool4/3x3_s2"
+ pooling_param {
+ pool: MAX
+ kernel_size: 3
+ stride: 2
+ }
+}
+layer {
+ name: "inception_5a/1x1"
+ type: "Convolution"
+ bottom: "pool4/3x3_s2"
+ top: "inception_5a/1x1"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 256
+ kernel_size: 1
+ weight_filler {
+ type: "xavier"
+ }
+ bias_filler {
+ type: "constant"
+ value: 0.2
+ }
+ }
+}
+layer {
+ name: "inception_5a/relu_1x1"
+ type: "ReLU"
+ bottom: "inception_5a/1x1"
+ top: "inception_5a/1x1"
+}
+layer {
+ name: "inception_5a/3x3_reduce"
+ type: "Convolution"
+ bottom: "pool4/3x3_s2"
+ top: "inception_5a/3x3_reduce"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 160
+ kernel_size: 1
+ weight_filler {
+ type: "xavier"
+ }
+ bias_filler {
+ type: "constant"
+ value: 0.2
+ }
+ }
+}
+layer {
+ name: "inception_5a/relu_3x3_reduce"
+ type: "ReLU"
+ bottom: "inception_5a/3x3_reduce"
+ top: "inception_5a/3x3_reduce"
+}
+layer {
+ name: "inception_5a/3x3"
+ type: "Convolution"
+ bottom: "inception_5a/3x3_reduce"
+ top: "inception_5a/3x3"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 320
+ pad: 1
+ kernel_size: 3
+ weight_filler {
+ type: "xavier"
+ }
+ bias_filler {
+ type: "constant"
+ value: 0.2
+ }
+ }
+}
+layer {
+ name: "inception_5a/relu_3x3"
+ type: "ReLU"
+ bottom: "inception_5a/3x3"
+ top: "inception_5a/3x3"
+}
+layer {
+ name: "inception_5a/5x5_reduce"
+ type: "Convolution"
+ bottom: "pool4/3x3_s2"
+ top: "inception_5a/5x5_reduce"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 32
+ kernel_size: 1
+ weight_filler {
+ type: "xavier"
+ }
+ bias_filler {
+ type: "constant"
+ value: 0.2
+ }
+ }
+}
+layer {
+ name: "inception_5a/relu_5x5_reduce"
+ type: "ReLU"
+ bottom: "inception_5a/5x5_reduce"
+ top: "inception_5a/5x5_reduce"
+}
+layer {
+ name: "inception_5a/5x5"
+ type: "Convolution"
+ bottom: "inception_5a/5x5_reduce"
+ top: "inception_5a/5x5"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 128
+ pad: 2
+ kernel_size: 5
+ weight_filler {
+ type: "xavier"
+ }
+ bias_filler {
+ type: "constant"
+ value: 0.2
+ }
+ }
+}
+layer {
+ name: "inception_5a/relu_5x5"
+ type: "ReLU"
+ bottom: "inception_5a/5x5"
+ top: "inception_5a/5x5"
+}
+layer {
+ name: "inception_5a/pool"
+ type: "Pooling"
+ bottom: "pool4/3x3_s2"
+ top: "inception_5a/pool"
+ pooling_param {
+ pool: MAX
+ kernel_size: 3
+ stride: 1
+ pad: 1
+ }
+}
+layer {
+ name: "inception_5a/pool_proj"
+ type: "Convolution"
+ bottom: "inception_5a/pool"
+ top: "inception_5a/pool_proj"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 128
+ kernel_size: 1
+ weight_filler {
+ type: "xavier"
+ }
+ bias_filler {
+ type: "constant"
+ value: 0.2
+ }
+ }
+}
+layer {
+ name: "inception_5a/relu_pool_proj"
+ type: "ReLU"
+ bottom: "inception_5a/pool_proj"
+ top: "inception_5a/pool_proj"
+}
+layer {
+ name: "inception_5a/output"
+ type: "Concat"
+ bottom: "inception_5a/1x1"
+ bottom: "inception_5a/3x3"
+ bottom: "inception_5a/5x5"
+ bottom: "inception_5a/pool_proj"
+ top: "inception_5a/output"
+}
+layer {
+ name: "inception_5b/1x1"
+ type: "Convolution"
+ bottom: "inception_5a/output"
+ top: "inception_5b/1x1"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 384
+ kernel_size: 1
+ weight_filler {
+ type: "xavier"
+ }
+ bias_filler {
+ type: "constant"
+ value: 0.2
+ }
+ }
+}
+layer {
+ name: "inception_5b/relu_1x1"
+ type: "ReLU"
+ bottom: "inception_5b/1x1"
+ top: "inception_5b/1x1"
+}
+layer {
+ name: "inception_5b/3x3_reduce"
+ type: "Convolution"
+ bottom: "inception_5a/output"
+ top: "inception_5b/3x3_reduce"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 192
+ kernel_size: 1
+ weight_filler {
+ type: "xavier"
+ }
+ bias_filler {
+ type: "constant"
+ value: 0.2
+ }
+ }
+}
+layer {
+ name: "inception_5b/relu_3x3_reduce"
+ type: "ReLU"
+ bottom: "inception_5b/3x3_reduce"
+ top: "inception_5b/3x3_reduce"
+}
+layer {
+ name: "inception_5b/3x3"
+ type: "Convolution"
+ bottom: "inception_5b/3x3_reduce"
+ top: "inception_5b/3x3"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 384
+ pad: 1
+ kernel_size: 3
+ weight_filler {
+ type: "xavier"
+ }
+ bias_filler {
+ type: "constant"
+ value: 0.2
+ }
+ }
+}
+layer {
+ name: "inception_5b/relu_3x3"
+ type: "ReLU"
+ bottom: "inception_5b/3x3"
+ top: "inception_5b/3x3"
+}
+layer {
+ name: "inception_5b/5x5_reduce"
+ type: "Convolution"
+ bottom: "inception_5a/output"
+ top: "inception_5b/5x5_reduce"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 48
+ kernel_size: 1
+ weight_filler {
+ type: "xavier"
+ }
+ bias_filler {
+ type: "constant"
+ value: 0.2
+ }
+ }
+}
+layer {
+ name: "inception_5b/relu_5x5_reduce"
+ type: "ReLU"
+ bottom: "inception_5b/5x5_reduce"
+ top: "inception_5b/5x5_reduce"
+}
+layer {
+ name: "inception_5b/5x5"
+ type: "Convolution"
+ bottom: "inception_5b/5x5_reduce"
+ top: "inception_5b/5x5"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 128
+ pad: 2
+ kernel_size: 5
+ weight_filler {
+ type: "xavier"
+ }
+ bias_filler {
+ type: "constant"
+ value: 0.2
+ }
+ }
+}
+layer {
+ name: "inception_5b/relu_5x5"
+ type: "ReLU"
+ bottom: "inception_5b/5x5"
+ top: "inception_5b/5x5"
+}
+layer {
+ name: "inception_5b/pool"
+ type: "Pooling"
+ bottom: "inception_5a/output"
+ top: "inception_5b/pool"
+ pooling_param {
+ pool: MAX
+ kernel_size: 3
+ stride: 1
+ pad: 1
+ }
+}
+layer {
+ name: "inception_5b/pool_proj"
+ type: "Convolution"
+ bottom: "inception_5b/pool"
+ top: "inception_5b/pool_proj"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ convolution_param {
+ num_output: 128
+ kernel_size: 1
+ weight_filler {
+ type: "xavier"
+ }
+ bias_filler {
+ type: "constant"
+ value: 0.2
+ }
+ }
+}
+layer {
+ name: "inception_5b/relu_pool_proj"
+ type: "ReLU"
+ bottom: "inception_5b/pool_proj"
+ top: "inception_5b/pool_proj"
+}
+layer {
+ name: "inception_5b/output"
+ type: "Concat"
+ bottom: "inception_5b/1x1"
+ bottom: "inception_5b/3x3"
+ bottom: "inception_5b/5x5"
+ bottom: "inception_5b/pool_proj"
+ top: "inception_5b/output"
+}
+layer {
+ name: "pool5/7x7_s1"
+ type: "Pooling"
+ bottom: "inception_5b/output"
+ top: "pool5/7x7_s1"
+ pooling_param {
+ pool: AVE
+ kernel_size: 7
+ stride: 1
+ }
+}
+layer {
+ name: "pool5/drop_7x7_s1"
+ type: "Dropout"
+ bottom: "pool5/7x7_s1"
+ top: "pool5/7x7_s1"
+ dropout_param {
+ dropout_ratio: 0.4
+ }
+}
+layer {
+ name: "loss3/classifier"
+ type: "InnerProduct"
+ bottom: "pool5/7x7_s1"
+ top: "loss3/classifier"
+ param {
+ lr_mult: 1
+ decay_mult: 1
+ }
+ param {
+ lr_mult: 2
+ decay_mult: 0
+ }
+ inner_product_param {
+ num_output: 1000
+ weight_filler {
+ type: "xavier"
+ }
+ bias_filler {
+ type: "constant"
+ value: 0
+ }
+ }
+}
+layer {
+ name: "loss3/loss3"
+ type: "SoftmaxWithLoss"
+ bottom: "loss3/classifier"
+ bottom: "label"
+ top: "loss3/loss3"
+ loss_weight: 1
+}
diff --git a/benchmark/caffe/image/run.sh b/benchmark/caffe/image/run.sh
new file mode 100755
index 0000000000000000000000000000000000000000..aa9ac20ca5cc1d48a07ce39f7d6c6d70ad4121ab
--- /dev/null
+++ b/benchmark/caffe/image/run.sh
@@ -0,0 +1,30 @@
+set -e
+
+function test() {
+ cfg=$1
+ batch=$2
+ prefix=$3
+ sed -i "/input: \"data\"/{n;s/^input_dim.*/input_dim: $batch/g}" $cfg
+ sed -i "/input: \"label\"/{n;s/^input_dim.*/input_dim: $batch/g}" $cfg
+ caffe time --model=$cfg --iterations=50 --gpu 0 > logs/$prefix-1gpu-batch${batch}.log 2>&1
+}
+
+if [ ! -d "logs" ]; then
+ mkdir logs
+fi
+
+# alexnet
+test alexnet.prototxt 64 alexnet
+test alexnet.prototxt 128 alexnet
+test alexnet.prototxt 256 alexnet
+test alexnet.prototxt 512 alexnet
+
+# googlenet
+test googlenet.prototxt 64 googlenet
+test googlenet.prototxt 128 googlenet
+
+# small net
+test smallnet_mnist_cifar.prototxt 64 smallnet
+test smallnet_mnist_cifar.prototxt 128 smallnet
+test smallnet_mnist_cifar.prototxt 256 smallnet
+test smallnet_mnist_cifar.prototxt 512 smallnet
diff --git a/benchmark/caffe/image/run_multi.sh b/benchmark/caffe/image/run_multi.sh
new file mode 100755
index 0000000000000000000000000000000000000000..9a0a71bc185a421842265ea6d2310429adb86913
--- /dev/null
+++ b/benchmark/caffe/image/run_multi.sh
@@ -0,0 +1,24 @@
+#!/bin/bash
+set -e
+
+function test() {
+ cfg=$1
+ batch=$2
+ prefix=$3
+ batch_per_gpu=`expr ${batch} / 4`
+ sed -i "/input: \"data\"/{n;s/^input_dim.*/input_dim: ${batch_per_gpu}/g}" $cfg
+ sed -i "/input: \"label\"/{n;s/^input_dim.*/input_dim: ${batch_per_gpu}/g}" $cfg
+ sed -i "1c\net : \"${cfg}\"" solver.prototxt
+ caffe train --solver=solver.prototxt -gpu 0,1,2,3 > logs/${prefix}-4gpu-batch${batch}.log 2>&1
+}
+
+if [ ! -d "logs" ]; then
+ mkdir logs
+fi
+
+# alexnet
+test alexnet.prototxt 512 alexnet
+test alexnet.prototxt 1024 alexnet
+
+# googlnet
+test googlenet.prototxt 512 googlenet
diff --git a/benchmark/caffe/image/smallnet_mnist_cifar.prototxt b/benchmark/caffe/image/smallnet_mnist_cifar.prototxt
new file mode 100644
index 0000000000000000000000000000000000000000..3cb0e32bbfb9f785ece6d428356987e5503dd25d
--- /dev/null
+++ b/benchmark/caffe/image/smallnet_mnist_cifar.prototxt
@@ -0,0 +1,198 @@
+name: "mnist/cifar"
+input: "data"
+input_dim: 128
+input_dim: 3
+input_dim: 32
+input_dim: 32
+input: "label"
+input_dim: 128
+input_dim: 1
+input_dim: 1
+input_dim: 1
+layer {
+ name: "conv1"
+ type: "Convolution"
+ bottom: "data"
+ top: "conv1"
+ param {
+ lr_mult: 1
+ }
+ param {
+ lr_mult: 2
+ }
+ convolution_param {
+ num_output: 32
+ pad: 2
+ kernel_size: 5
+ stride: 1
+ weight_filler {
+ type: "gaussian"
+ std: 0.0001
+ }
+ bias_filler {
+ type: "constant"
+ }
+ }
+}
+layer {
+ name: "pool1"
+ type: "Pooling"
+ bottom: "conv1"
+ top: "pool1"
+ pooling_param {
+ pool: MAX
+ kernel_size: 3
+ stride: 2
+ }
+}
+layer {
+ name: "relu1"
+ type: "ReLU"
+ bottom: "pool1"
+ top: "pool1"
+}
+layer {
+ name: "conv2"
+ type: "Convolution"
+ bottom: "pool1"
+ top: "conv2"
+ param {
+ lr_mult: 1
+ }
+ param {
+ lr_mult: 2
+ }
+ convolution_param {
+ num_output: 32
+ pad: 2
+ kernel_size: 5
+ stride: 1
+ weight_filler {
+ type: "gaussian"
+ std: 0.01
+ }
+ bias_filler {
+ type: "constant"
+ }
+ }
+}
+layer {
+ name: "relu2"
+ type: "ReLU"
+ bottom: "conv2"
+ top: "conv2"
+}
+layer {
+ name: "pool2"
+ type: "Pooling"
+ bottom: "conv2"
+ top: "pool2"
+ pooling_param {
+ pool: AVE
+ kernel_size: 3
+ stride: 2
+ }
+}
+layer {
+ name: "conv3"
+ type: "Convolution"
+ bottom: "pool2"
+ top: "conv3"
+ param {
+ lr_mult: 1
+ }
+ param {
+ lr_mult: 2
+ }
+ convolution_param {
+ num_output: 64
+ pad: 2
+ kernel_size: 5
+ stride: 1
+ weight_filler {
+ type: "gaussian"
+ std: 0.01
+ }
+ bias_filler {
+ type: "constant"
+ }
+ }
+}
+layer {
+ name: "relu3"
+ type: "ReLU"
+ bottom: "conv3"
+ top: "conv3"
+}
+layer {
+ name: "pool3"
+ type: "Pooling"
+ bottom: "conv3"
+ top: "pool3"
+ pooling_param {
+ pool: AVE
+ kernel_size: 3
+ stride: 2
+ }
+}
+layer {
+ name: "ip1"
+ type: "InnerProduct"
+ bottom: "pool3"
+ top: "ip1"
+ param {
+ lr_mult: 1
+ }
+ param {
+ lr_mult: 2
+ }
+ inner_product_param {
+ num_output: 64
+ weight_filler {
+ type: "gaussian"
+ std: 0.1
+ }
+ bias_filler {
+ type: "constant"
+ }
+ }
+}
+layer {
+ name: "ip2"
+ type: "InnerProduct"
+ bottom: "ip1"
+ top: "ip2"
+ param {
+ lr_mult: 1
+ }
+ param {
+ lr_mult: 2
+ }
+ inner_product_param {
+ num_output: 10
+ weight_filler {
+ type: "gaussian"
+ std: 0.1
+ }
+ bias_filler {
+ type: "constant"
+ }
+ }
+}
+layer {
+ name: "accuracy"
+ type: "Accuracy"
+ bottom: "ip2"
+ bottom: "label"
+ top: "accuracy"
+ include {
+ phase: TEST
+ }
+}
+layer {
+ name: "loss"
+ type: "SoftmaxWithLoss"
+ bottom: "ip2"
+ bottom: "label"
+ top: "loss"
+}
diff --git a/benchmark/caffe/image/solver.prototxt b/benchmark/caffe/image/solver.prototxt
new file mode 100644
index 0000000000000000000000000000000000000000..61c10284e6027b4cc0b3d4c8fcf949e0a5a22a85
--- /dev/null
+++ b/benchmark/caffe/image/solver.prototxt
@@ -0,0 +1,10 @@
+net: "alexnet.prototxt"
+base_lr: 0.01
+lr_policy: "fixed"
+display: 20
+max_iter: 200
+momentum: 0.9
+weight_decay: 0.0005
+snapshot: 10000
+snapshot_prefix: "models/caffe_alexnet_train"
+solver_mode: GPU
diff --git a/benchmark/figs/alexnet-4gpu.png b/benchmark/figs/alexnet-4gpu.png
new file mode 100644
index 0000000000000000000000000000000000000000..28b95a44508f0ee7ad270c9ccdf8659009406b03
Binary files /dev/null and b/benchmark/figs/alexnet-4gpu.png differ
diff --git a/benchmark/figs/googlenet-4gpu.png b/benchmark/figs/googlenet-4gpu.png
new file mode 100644
index 0000000000000000000000000000000000000000..9b5331f05a3e54cacf949f10b6603bf627a6d106
Binary files /dev/null and b/benchmark/figs/googlenet-4gpu.png differ
diff --git a/benchmark/figs/rnn_lstm_4gpus.png b/benchmark/figs/rnn_lstm_4gpus.png
new file mode 100644
index 0000000000000000000000000000000000000000..973ce2fa5f65e9681c972d4f5bd5776b5c4aa264
Binary files /dev/null and b/benchmark/figs/rnn_lstm_4gpus.png differ
diff --git a/benchmark/figs/rnn_lstm_cls.png b/benchmark/figs/rnn_lstm_cls.png
new file mode 100644
index 0000000000000000000000000000000000000000..26d05cac11aa7ae8cdfbcd8c4401f6547a9404f6
Binary files /dev/null and b/benchmark/figs/rnn_lstm_cls.png differ
diff --git a/benchmark/paddle/image/alexnet.py b/benchmark/paddle/image/alexnet.py
new file mode 100644
index 0000000000000000000000000000000000000000..3358d43a4b08c6a9b89d59e1a8be53ee1f12bbe0
--- /dev/null
+++ b/benchmark/paddle/image/alexnet.py
@@ -0,0 +1,64 @@
+#!/usr/bin/env python
+
+from paddle.trainer_config_helpers import *
+
+height = 227
+width = 227
+num_class = 1000
+batch_size = get_config_arg('batch_size', int, 128)
+
+args = {'height': height, 'width': width, 'color': True, 'num_class': num_class}
+define_py_data_sources2(
+ "train.list", None, module="provider", obj="process", args=args)
+
+settings(
+ batch_size=batch_size,
+ learning_rate=0.01 / batch_size,
+ learning_method=MomentumOptimizer(0.9),
+ regularization=L2Regularization(0.0005 * batch_size))
+
+# conv1
+net = data_layer('data', size=height * width * 3)
+net = img_conv_layer(
+ input=net,
+ filter_size=11,
+ num_channels=3,
+ num_filters=96,
+ stride=4,
+ padding=1)
+net = img_cmrnorm_layer(input=net, size=5, scale=0.0001, power=0.75)
+net = img_pool_layer(input=net, pool_size=3, stride=2)
+
+# conv2
+net = img_conv_layer(
+ input=net, filter_size=5, num_filters=256, stride=1, padding=2, groups=1)
+net = img_cmrnorm_layer(input=net, size=5, scale=0.0001, power=0.75)
+net = img_pool_layer(input=net, pool_size=3, stride=2)
+
+# conv3
+net = img_conv_layer(
+ input=net, filter_size=3, num_filters=384, stride=1, padding=1)
+# conv4
+net = img_conv_layer(
+ input=net, filter_size=3, num_filters=384, stride=1, padding=1, groups=1)
+
+# conv5
+net = img_conv_layer(
+ input=net, filter_size=3, num_filters=256, stride=1, padding=1, groups=1)
+net = img_pool_layer(input=net, pool_size=3, stride=2)
+
+net = fc_layer(
+ input=net,
+ size=4096,
+ act=ReluActivation(),
+ layer_attr=ExtraAttr(drop_rate=0.5))
+net = fc_layer(
+ input=net,
+ size=4096,
+ act=ReluActivation(),
+ layer_attr=ExtraAttr(drop_rate=0.5))
+net = fc_layer(input=net, size=1000, act=SoftmaxActivation())
+
+lab = data_layer('label', num_class)
+loss = cross_entropy(input=net, label=lab)
+outputs(loss)
diff --git a/benchmark/paddle/image/googlenet.py b/benchmark/paddle/image/googlenet.py
new file mode 100644
index 0000000000000000000000000000000000000000..bc893bab98c4d2e07c62fbd012d51a0939db4766
--- /dev/null
+++ b/benchmark/paddle/image/googlenet.py
@@ -0,0 +1,226 @@
+#!/usr/bin/env python
+from paddle.trainer_config_helpers import *
+
+height = 224
+width = 224
+num_class = 1000
+batch_size = get_config_arg('batch_size', int, 128)
+
+args = {'height': height, 'width': width, 'color': True, 'num_class': num_class}
+define_py_data_sources2(
+ "train.list", None, module="provider", obj="process", args=args)
+
+settings(
+ batch_size=batch_size,
+ learning_rate=0.01 / batch_size,
+ learning_method=MomentumOptimizer(0.9),
+ regularization=L2Regularization(0.0005 * batch_size))
+
+def inception2(name, input, channels, \
+ filter1,
+ filter3R, filter3,
+ filter5R, filter5,
+ proj):
+
+ conv1 = name + '_1'
+ conv3r = name + '_3r'
+ conv3 = name + '_3'
+ conv5r = name + '_5r'
+ conv5 = name + '_5'
+ maxpool = name + '_max'
+ convproj = name + '_proj'
+
+ cov1 = img_conv_layer(
+ name=conv1,
+ input=input,
+ filter_size=1,
+ num_channels=channels,
+ num_filters=filter1,
+ stride=1,
+ padding=0)
+
+ cov3r = img_conv_layer(
+ name=conv3r,
+ input=input,
+ filter_size=1,
+ num_channels=channels,
+ num_filters=filter3R,
+ stride=1,
+ padding=0)
+ cov3 = img_conv_layer(
+ name=conv3,
+ input=cov3r,
+ filter_size=3,
+ num_filters=filter3,
+ stride=1,
+ padding=1)
+
+ cov5r = img_conv_layer(
+ name=conv5r,
+ input=input,
+ filter_size=1,
+ num_channels=channels,
+ num_filters=filter5R,
+ stride=1,
+ padding=0)
+ cov5 = img_conv_layer(
+ name=conv5,
+ input=cov5r,
+ filter_size=5,
+ num_filters=filter5,
+ stride=1,
+ padding=2)
+
+ pool1 = img_pool_layer(
+ name=maxpool,
+ input=input,
+ pool_size=3,
+ num_channels=channels,
+ stride=1,
+ padding=1)
+ covprj = img_conv_layer(
+ name=convproj,
+ input=pool1,
+ filter_size=1,
+ num_filters=proj,
+ stride=1,
+ padding=0)
+
+ cat = concat_layer(name=name, input=[cov1, cov3, cov5, covprj])
+ return cat
+
+def inception(name, input, channels, \
+ filter1,
+ filter3R, filter3,
+ filter5R, filter5,
+ proj):
+
+ cov1 = conv_projection(
+ input=input,
+ filter_size=1,
+ num_channels=channels,
+ num_filters=filter1,
+ stride=1,
+ padding=0)
+
+ cov3r = img_conv_layer(
+ name=name + '_3r',
+ input=input,
+ filter_size=1,
+ num_channels=channels,
+ num_filters=filter3R,
+ stride=1,
+ padding=0)
+ cov3 = conv_projection(
+ input=cov3r, filter_size=3, num_filters=filter3, stride=1, padding=1)
+
+ cov5r = img_conv_layer(
+ name=name + '_5r',
+ input=input,
+ filter_size=1,
+ num_channels=channels,
+ num_filters=filter5R,
+ stride=1,
+ padding=0)
+ cov5 = conv_projection(
+ input=cov5r, filter_size=5, num_filters=filter5, stride=1, padding=2)
+
+ pool1 = img_pool_layer(
+ name=name + '_max',
+ input=input,
+ pool_size=3,
+ num_channels=channels,
+ stride=1,
+ padding=1)
+ covprj = conv_projection(
+ input=pool1, filter_size=1, num_filters=proj, stride=1, padding=0)
+
+ cat = concat_layer(
+ name=name,
+ input=[cov1, cov3, cov5, covprj],
+ bias_attr=True,
+ act=ReluActivation())
+ return cat
+
+
+lab = data_layer(name="label", size=1000)
+data = data_layer(name="input", size=3 * height * width)
+
+# stage 1
+conv1 = img_conv_layer(
+ name="conv1",
+ input=data,
+ filter_size=7,
+ num_channels=3,
+ num_filters=64,
+ stride=2,
+ padding=3)
+pool1 = img_pool_layer(
+ name="pool1", input=conv1, pool_size=3, num_channels=64, stride=2)
+
+# stage 2
+conv2_1 = img_conv_layer(
+ name="conv2_1",
+ input=pool1,
+ filter_size=1,
+ num_filters=64,
+ stride=1,
+ padding=0)
+conv2_2 = img_conv_layer(
+ name="conv2_2",
+ input=conv2_1,
+ filter_size=3,
+ num_filters=192,
+ stride=1,
+ padding=1)
+pool2 = img_pool_layer(
+ name="pool2", input=conv2_2, pool_size=3, num_channels=192, stride=2)
+
+# stage 3
+ince3a = inception("ince3a", pool2, 192, 64, 96, 128, 16, 32, 32)
+ince3b = inception("ince3b", ince3a, 256, 128, 128, 192, 32, 96, 64)
+pool3 = img_pool_layer(
+ name="pool3", input=ince3b, num_channels=480, pool_size=3, stride=2)
+
+# stage 4
+ince4a = inception("ince4a", pool3, 480, 192, 96, 208, 16, 48, 64)
+ince4b = inception("ince4b", ince4a, 512, 160, 112, 224, 24, 64, 64)
+ince4c = inception("ince4c", ince4b, 512, 128, 128, 256, 24, 64, 64)
+ince4d = inception("ince4d", ince4c, 512, 112, 144, 288, 32, 64, 64)
+ince4e = inception("ince4e", ince4d, 528, 256, 160, 320, 32, 128, 128)
+pool4 = img_pool_layer(
+ name="pool4", input=ince4e, num_channels=832, pool_size=3, stride=2)
+
+# stage 5
+ince5a = inception("ince5a", pool4, 832, 256, 160, 320, 32, 128, 128)
+ince5b = inception("ince5b", ince5a, 832, 384, 192, 384, 48, 128, 128)
+pool5 = img_pool_layer(
+ name="pool5",
+ input=ince5b,
+ num_channels=1024,
+ pool_size=7,
+ stride=7,
+ pool_type=AvgPooling())
+
+# We remove loss1 and loss2 for all system when testing benchmark
+# output 1
+# pool_o1 = img_pool_layer(name="pool_o1", input=ince4a, num_channels=512, pool_size=5, stride=3, pool_type=AvgPooling())
+# conv_o1 = img_conv_layer(name="conv_o1", input=pool_o1, filter_size=1, num_filters=128, stride=1, padding=0)
+# fc_o1 = fc_layer(name="fc_o1", input=conv_o1, size=1024, layer_attr=ExtraAttr(drop_rate=0.7), act=ReluActivation())
+# out1 = fc_layer(name="output1", input=fc_o1, size=1000, act=SoftmaxActivation())
+# loss1 = cross_entropy(name='loss1', input=out1, label=lab, coeff=0.3)
+
+# output 2
+#pool_o2 = img_pool_layer(name="pool_o2", input=ince4d, num_channels=528, pool_size=5, stride=3, pool_type=AvgPooling())
+#conv_o2 = img_conv_layer(name="conv_o2", input=pool_o2, filter_size=1, num_filters=128, stride=1, padding=0)
+#fc_o2 = fc_layer(name="fc_o2", input=conv_o2, size=1024, layer_attr=ExtraAttr(drop_rate=0.7), act=ReluActivation())
+#out2 = fc_layer(name="output2", input=fc_o2, size=1000, act=SoftmaxActivation())
+#loss2 = cross_entropy(name='loss2', input=out2, label=lab, coeff=0.3)
+
+# output 3
+dropout = dropout_layer(name="dropout", input=pool5, dropout_rate=0.4)
+out3 = fc_layer(
+ name="output3", input=dropout, size=1000, act=SoftmaxActivation())
+loss3 = cross_entropy(name='loss3', input=out3, label=lab)
+
+outputs(loss3)
diff --git a/benchmark/paddle/image/provider.py b/benchmark/paddle/image/provider.py
new file mode 100644
index 0000000000000000000000000000000000000000..1ac47212b5a75667e8e9d4465b33f575516e2836
--- /dev/null
+++ b/benchmark/paddle/image/provider.py
@@ -0,0 +1,26 @@
+import io, os
+import random
+import numpy as np
+from paddle.trainer.PyDataProvider2 import *
+
+
+def initHook(settings, height, width, color, num_class, **kwargs):
+ settings.height = height
+ settings.width = width
+ settings.color = color
+ settings.num_class = num_class
+ if settings.color:
+ settings.data_size = settings.height * settings.width * 3
+ else:
+ settings.data_size = settings.height * settings.width
+
+ settings.slots = [dense_vector(settings.data_size), integer_value(1)]
+
+
+@provider(
+ init_hook=initHook, min_pool_size=-1, cache=CacheType.CACHE_PASS_IN_MEM)
+def process(settings, file_list):
+ for i in xrange(1024):
+ img = np.random.rand(1, settings.data_size).reshape(-1, 1).flatten()
+ lab = random.randint(0, settings.num_class)
+ yield img.astype('float32'), int(lab)
diff --git a/benchmark/paddle/image/run.sh b/benchmark/paddle/image/run.sh
new file mode 100755
index 0000000000000000000000000000000000000000..717ed487ba7657db6535efcb1128a355a0f15eaf
--- /dev/null
+++ b/benchmark/paddle/image/run.sh
@@ -0,0 +1,51 @@
+set -e
+
+function train() {
+ cfg=$1
+ thread=$2
+ bz=$3
+ args="batch_size=$3"
+ prefix=$4
+ paddle train --job=time \
+ --config=$cfg \
+ --use_gpu=True \
+ --trainer_count=$thread \
+ --log_period=10 \
+ --test_period=100 \
+ --config_args=$args \
+ > logs/$prefix-${thread}gpu-$bz.log 2>&1
+}
+
+if [ ! -d "train.list" ]; then
+ echo " " > train.list
+fi
+if [ ! -d "logs" ]; then
+ mkdir logs
+fi
+
+#========single-gpu=========#
+# alexnet
+train alexnet.py 1 64 alexnet
+train alexnet.py 1 128 alexnet
+train alexnet.py 1 256 alexnet
+train alexnet.py 1 512 alexnet
+
+# googlenet
+train googlenet.py 1 64 googlenet
+train googlenet.py 1 128 googlenet
+train googlenet.py 1 256 googlenet
+
+# smallnet
+train smallnet_mnist_cifar.py 1 64 smallnet
+train smallnet_mnist_cifar.py 1 128 smallnet
+train smallnet_mnist_cifar.py 1 256 smallnet
+train smallnet_mnist_cifar.py 1 512 smallnet
+
+
+############################
+#========multi-gpus=========#
+train alexnet.py 4 512 alexnet
+train alexnet.py 4 1024 alexnet
+
+train googlenet.py 4 512 googlenet
+train googlenet.py 4 1024 googlenet
diff --git a/benchmark/paddle/image/smallnet_mnist_cifar.py b/benchmark/paddle/image/smallnet_mnist_cifar.py
new file mode 100644
index 0000000000000000000000000000000000000000..58879c454f37991405d83bbb593bb5d1e977ff53
--- /dev/null
+++ b/benchmark/paddle/image/smallnet_mnist_cifar.py
@@ -0,0 +1,49 @@
+#!/usr/bin/env python
+
+from paddle.trainer_config_helpers import *
+
+height = 32
+width = 32
+num_class = 10
+
+batch_size = get_config_arg('batch_size', int, 128)
+
+args = {'height': height, 'width': width, 'color': True, 'num_class': num_class}
+define_py_data_sources2(
+ "train.list", None, module="provider", obj="process", args=args)
+
+settings(
+ batch_size=batch_size,
+ learning_rate=0.01 / batch_size,
+ learning_method=MomentumOptimizer(0.9),
+ regularization=L2Regularization(0.0005 * batch_size))
+
+# conv1
+net = data_layer('data', size=height * width * 3)
+net = img_conv_layer(
+ input=net,
+ filter_size=5,
+ num_channels=3,
+ num_filters=32,
+ stride=1,
+ padding=2)
+net = img_pool_layer(input=net, pool_size=3, stride=2, padding=1)
+
+# conv2
+net = img_conv_layer(
+ input=net, filter_size=5, num_filters=32, stride=1, padding=2)
+net = img_pool_layer(
+ input=net, pool_size=3, stride=2, padding=1, pool_type=AvgPooling())
+
+# conv3
+net = img_conv_layer(
+ input=net, filter_size=3, num_filters=64, stride=1, padding=1)
+net = img_pool_layer(
+ input=net, pool_size=3, stride=2, padding=1, pool_type=AvgPooling())
+
+net = fc_layer(input=net, size=64, act=ReluActivation())
+net = fc_layer(input=net, size=10, act=SoftmaxActivation())
+
+lab = data_layer('label', num_class)
+loss = classification_cost(input=net, label=lab)
+outputs(loss)
diff --git a/benchmark/paddle/rnn/imdb.py b/benchmark/paddle/rnn/imdb.py
new file mode 100755
index 0000000000000000000000000000000000000000..fc4ed4025f9ed2e0a32a1709ff8df4af53521196
--- /dev/null
+++ b/benchmark/paddle/rnn/imdb.py
@@ -0,0 +1,46 @@
+from __future__ import print_function
+import six.moves.cPickle as pickle
+import gzip
+import os
+import numpy
+
+
+def get_dataset_file(dataset, default_dataset, origin):
+ data_dir, data_file = os.path.split(dataset)
+ if (not os.path.isfile(dataset)) and data_file == default_dataset:
+ from six.moves import urllib
+ print('Downloading data from %s' % origin)
+ urllib.request.urlretrieve(origin, dataset)
+
+ return dataset
+
+
+def create_data(path="imdb.pkl"):
+
+ if (not os.path.isfile('imdb.train.pkl')):
+ path = get_dataset_file(
+ path, "imdb.pkl",
+ "http://www.iro.umontreal.ca/~lisa/deep/data/imdb.pkl")
+
+ if path.endswith(".gz"):
+ f = gzip.open(path, 'rb')
+ else:
+ f = open(path, 'rb')
+
+ train_set = pickle.load(f)
+ test_set = pickle.load(f)
+ f.close()
+
+ pickle.dump(train_set, open('imdb.train.pkl', 'wb'))
+ pickle.dump(test_set, open('imdb.test.pkl', 'wb'))
+
+ if (not os.path.isfile('train.list')):
+ file('train.list', 'w').write('imdb.train.pkl\n')
+
+
+def main():
+ create_data('imdb.pkl')
+
+
+if __name__ == "__main__":
+ main()
diff --git a/benchmark/paddle/rnn/provider.py b/benchmark/paddle/rnn/provider.py
new file mode 100644
index 0000000000000000000000000000000000000000..928ca75daf84ccebb775364b0be0d8b3d5eebff9
--- /dev/null
+++ b/benchmark/paddle/rnn/provider.py
@@ -0,0 +1,72 @@
+import io, os
+import random
+import numpy as np
+import six.moves.cPickle as pickle
+from paddle.trainer.PyDataProvider2 import *
+
+
+def remove_unk(x, n_words):
+ return [[1 if w >= n_words else w for w in sen] for sen in x]
+
+
+# ==============================================================
+# tensorflow uses fixed length, but PaddlePaddle can process
+# variable-length. Padding is used in benchmark in order to
+# compare with other platform.
+# ==============================================================
+def pad_sequences(sequences,
+ maxlen=None,
+ dtype='int32',
+ padding='post',
+ truncating='post',
+ value=0.):
+ lengths = [len(s) for s in sequences]
+
+ nb_samples = len(sequences)
+ if maxlen is None:
+ maxlen = np.max(lengths)
+
+ x = (np.ones((nb_samples, maxlen)) * value).astype(dtype)
+ for idx, s in enumerate(sequences):
+ if len(s) == 0:
+ continue # empty list was found
+ if truncating == 'pre':
+ trunc = s[-maxlen:]
+ elif truncating == 'post':
+ trunc = s[:maxlen]
+ else:
+ raise ValueError("Truncating type '%s' not understood" % padding)
+
+ if padding == 'post':
+ x[idx, :len(trunc)] = trunc
+ elif padding == 'pre':
+ x[idx, -len(trunc):] = trunc
+ else:
+ raise ValueError("Padding type '%s' not understood" % padding)
+ return x
+
+
+def initHook(settings, vocab_size, pad_seq, maxlen, **kwargs):
+ settings.vocab_size = vocab_size
+ settings.pad_seq = pad_seq
+ settings.maxlen = maxlen
+ settings.input_types = [
+ integer_value_sequence(vocab_size), integer_value(2)
+ ]
+
+
+@provider(
+ init_hook=initHook, min_pool_size=-1, cache=CacheType.CACHE_PASS_IN_MEM)
+def process(settings, file):
+ f = open(file, 'rb')
+ train_set = pickle.load(f)
+ f.close()
+ x, y = train_set
+
+ # remove unk, namely remove the words out of dictionary
+ x = remove_unk(x, settings.vocab_size)
+ if settings.pad_seq:
+ x = pad_sequences(x, maxlen=settings.maxlen, value=0.)
+
+ for i in range(len(y)):
+ yield map(int, x[i]), int(y[i])
diff --git a/benchmark/paddle/rnn/rnn.py b/benchmark/paddle/rnn/rnn.py
new file mode 100755
index 0000000000000000000000000000000000000000..83eb3e565473f7e7e91cddeaa3cd2aafb7e3df2c
--- /dev/null
+++ b/benchmark/paddle/rnn/rnn.py
@@ -0,0 +1,38 @@
+#!/usr/bin/env python
+
+from paddle.trainer_config_helpers import *
+import imdb
+
+num_class = 2
+vocab_size = 30000
+fixedlen = 100
+batch_size = get_config_arg('batch_size', int, 128)
+lstm_num = get_config_arg('lstm_num', int, 1)
+hidden_size = get_config_arg('hidden_size', int, 128)
+# whether to pad sequence into fixed length
+pad_seq = get_config_arg('pad_seq', bool, True)
+imdb.create_data('imdb.pkl')
+
+args = {'vocab_size': vocab_size, 'pad_seq': pad_seq, 'maxlen': fixedlen}
+define_py_data_sources2(
+ "train.list", None, module="provider", obj="process", args=args)
+
+settings(
+ batch_size=batch_size,
+ learning_rate=2e-3,
+ learning_method=AdamOptimizer(),
+ regularization=L2Regularization(8e-4),
+ gradient_clipping_threshold=25)
+
+net = data_layer('data', size=vocab_size)
+net = embedding_layer(input=net, size=128)
+
+for i in xrange(lstm_num):
+ net = simple_lstm(input=net, size=hidden_size)
+
+net = last_seq(input=net)
+net = fc_layer(input=net, size=2, act=SoftmaxActivation())
+
+lab = data_layer('label', num_class)
+loss = classification_cost(input=net, label=lab)
+outputs(loss)
diff --git a/benchmark/paddle/rnn/run.sh b/benchmark/paddle/rnn/run.sh
new file mode 100755
index 0000000000000000000000000000000000000000..e9dfeb2e525979f47e4ef48f7610dc1007900f2c
--- /dev/null
+++ b/benchmark/paddle/rnn/run.sh
@@ -0,0 +1,50 @@
+set -e
+
+function train() {
+ cfg=$1
+ thread=$2
+ args="lstm_num=${3},seq_pad=${4},hidden_size=${5},batch_size=${6}"
+ paddle train --job=time \
+ --config=$cfg \
+ --use_gpu=1 \
+ --trainer_count=$thread \
+ --log_period=10 \
+ --test_period=100 \
+ --num_passes=1 \
+ --feed_data=1 \
+ --config_args=$args \
+ >logs/rnn-pad${4}-${thread}gpu-lstm${3}-batch${6}-hid${5}.log 2>&1
+}
+
+if [ ! -d "logs" ]; then
+ mkdir logs
+fi
+
+## padding, single gpu
+#-----config--gpu--lstm_num--padding--hidden_size--batch_size
+## lstm_num=2, batch_size=64
+train rnn.py 1 2 1 256 64
+train rnn.py 1 2 1 512 64
+train rnn.py 1 2 1 1280 64
+
+## lstm_num=2, batch_size=128
+train rnn.py 1 2 1 256 128
+train rnn.py 1 2 1 512 128
+train rnn.py 1 2 1 1280 128
+
+## lstm_num=4, batch_size=256
+train rnn.py 1 2 1 256 256
+train rnn.py 1 2 1 512 256
+train rnn.py 1 2 1 1280 256
+
+
+#==================multi gpus=====================#
+# hidden_size=256, lstm_num=2, different batch size
+train rnn.py 4 2 1 256 128
+train rnn.py 4 2 1 256 256
+train rnn.py 4 2 1 256 512
+
+# hidden_size=512, lstm_num=4, different batch size
+train rnn.py 4 2 1 512 128
+train rnn.py 4 2 1 512 256
+train rnn.py 4 2 1 512 512
diff --git a/benchmark/tensorflow/image/alexnet.py b/benchmark/tensorflow/image/alexnet.py
new file mode 100644
index 0000000000000000000000000000000000000000..f6a39ef778e21bee7374718a1b1ddf43392825a8
--- /dev/null
+++ b/benchmark/tensorflow/image/alexnet.py
@@ -0,0 +1,298 @@
+from six.moves import xrange # pylint: disable=redefined-builtin
+from datetime import datetime
+import math
+import time
+
+import tensorflow.python.platform
+import tensorflow as tf
+
+FLAGS = tf.app.flags.FLAGS
+
+tf.app.flags.DEFINE_integer('batch_size', 128, """Batch size.""")
+tf.app.flags.DEFINE_integer('num_batches', 100, """Number of batches to run.""")
+tf.app.flags.DEFINE_boolean('forward_only', False,
+ """Only run the forward pass.""")
+tf.app.flags.DEFINE_boolean('forward_backward_only', False,
+ """Only run the forward-forward pass.""")
+tf.app.flags.DEFINE_string('data_format', 'NCHW',
+ """The data format for Convnet operations.
+ Can be either NHWC or NCHW.
+ """)
+tf.app.flags.DEFINE_boolean('log_device_placement', False,
+ """Whether to log device placement.""")
+
+
+def _conv(name, inpOp, nIn, nOut, kH, kW, dH, dW, padType, wd=0.0005):
+ with tf.name_scope(name) as scope:
+ kernel = tf.get_variable(
+ name + '_w', [kH, kW, nIn, nOut],
+ initializer=tf.truncated_normal_initializer(
+ stddev=0.01, dtype=tf.float32),
+ dtype=tf.float32)
+
+ if wd is not None and wd > 0:
+ weight_decay = tf.mul(tf.nn.l2_loss(kernel), wd, name='weight_loss')
+ tf.add_to_collection('losses', weight_decay)
+
+ if FLAGS.data_format == 'NCHW':
+ strides = [1, 1, dH, dW]
+ else:
+ strides = [1, dH, dW, 1]
+ conv = tf.nn.conv2d(
+ inpOp,
+ kernel,
+ strides,
+ padding=padType,
+ data_format=FLAGS.data_format)
+
+ biases = tf.get_variable(
+ name=name + '_b',
+ shape=[nOut],
+ initializer=tf.constant_initializer(
+ value=0.0, dtype=tf.float32),
+ dtype=tf.float32)
+
+ bias = tf.reshape(
+ tf.nn.bias_add(
+ conv, biases, data_format=FLAGS.data_format),
+ conv.get_shape())
+
+ conv1 = tf.nn.relu(bias, name=scope)
+ return conv1
+
+
+def _affine(name, inpOp, nIn, nOut, wd=0.0005, act=True, drop=None):
+ with tf.name_scope(name) as scope:
+ kernel = tf.get_variable(
+ name + '_w', [nIn, nOut],
+ initializer=tf.truncated_normal_initializer(
+ stddev=0.01, dtype=tf.float32),
+ dtype=tf.float32)
+
+ if wd is not None and wd > 0:
+ weight_decay = tf.mul(tf.nn.l2_loss(kernel), wd, name='weight_loss')
+ tf.add_to_collection('losses', weight_decay)
+
+ biases = tf.get_variable(
+ name + '_b', [nOut],
+ initializer=tf.constant_initializer(
+ value=0.0, dtype=tf.float32),
+ dtype=tf.float32,
+ trainable=True)
+
+ affine1 = tf.nn.relu_layer(inpOp, kernel, biases, name=name) if act else \
+ tf.matmul(inpOp, kernel) + biases
+
+ output = tf.nn.dropout(affine1, drop) if drop else affine1
+
+ return output
+
+
+def _mpool(name, inpOp, kH, kW, dH, dW):
+ if FLAGS.data_format == 'NCHW':
+ ksize = [1, 1, kH, kW]
+ strides = [1, 1, dH, dW]
+ else:
+ ksize = [1, kH, kW, 1]
+ strides = [1, dH, dW, 1]
+ return tf.nn.max_pool(
+ inpOp,
+ ksize=ksize,
+ strides=strides,
+ padding='VALID',
+ data_format=FLAGS.data_format,
+ name=name)
+
+
+def _norm(name, l_input, lsize=4):
+ return tf.nn.lrn(l_input,
+ lsize,
+ bias=1.0,
+ alpha=0.001 / 9.0,
+ beta=0.75,
+ name=name)
+
+
+def loss(logits, labels):
+ labels = tf.cast(labels, tf.int64)
+ cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(
+ logits, labels, name='cross_entropy_per_example')
+ cross_entropy_mean = tf.reduce_mean(cross_entropy, name='cross_entropy')
+ tf.add_to_collection('losses', cross_entropy_mean)
+
+ # The total loss is defined as the cross entropy loss plus all of the weight
+ # decay terms (L2 loss).
+ return tf.add_n(tf.get_collection('losses'), name='total_loss')
+
+
+def get_incoming_shape(incoming):
+ """ Returns the incoming data shape """
+ if isinstance(incoming, tf.Tensor):
+ return incoming.get_shape().as_list()
+ elif type(incoming) in [np.array, list, tuple]:
+ return np.shape(incoming)
+ else:
+ raise Exception("Invalid incoming layer.")
+
+
+def inference(images):
+ conv1 = _conv('conv1', images, 3, 96, 11, 11, 4, 4, 'VALID')
+ pool1 = _mpool('pool1', conv1, 3, 3, 2, 2)
+ norm1 = _norm('norm1', pool1, lsize=5)
+ conv2 = _conv('conv2', norm1, 96, 256, 5, 5, 1, 1, 'SAME')
+ pool2 = _mpool('pool2', conv2, 3, 3, 2, 2)
+ norm2 = _norm('norm2', pool2, lsize=5)
+ conv3 = _conv('conv3', norm2, 256, 384, 3, 3, 1, 1, 'SAME')
+ conv4 = _conv('conv4', conv3, 384, 384, 3, 3, 1, 1, 'SAME')
+ conv5 = _conv('conv5', conv4, 384, 256, 3, 3, 1, 1, 'SAME')
+ pool5 = _mpool('pool5', conv5, 3, 3, 2, 2)
+ resh1 = tf.reshape(pool5, [-1, 256 * 6 * 6])
+ affn1 = _affine('fc6', resh1, 256 * 6 * 6, 4096, 0.5)
+ affn2 = _affine('fc7', affn1, 4096, 4096, 0.5)
+ affn3 = _affine('fc8', affn2, 4096, 1000, wd=None, act=False) # last fc
+
+ return affn3
+
+
+def time_tensorflow_run(session, target, info_string):
+ num_steps_burn_in = 10
+ total_duration = 0.0
+ total_duration_squared = 0.0
+ if not isinstance(target, list):
+ target = [target]
+ target_op = tf.group(*target)
+ for i in xrange(FLAGS.num_batches + num_steps_burn_in):
+ start_time = time.time()
+ _ = session.run(target_op)
+ duration = time.time() - start_time
+ if i > num_steps_burn_in:
+ if not i % 10:
+ print('%s: step %d, duration = %.3f' %
+ (datetime.now(), i - num_steps_burn_in, duration))
+ total_duration += duration
+ total_duration_squared += duration * duration
+ mn = total_duration / FLAGS.num_batches
+ vr = total_duration_squared / FLAGS.num_batches - mn * mn
+ sd = math.sqrt(vr)
+ print('%s: %s across %d steps, %.3f +/- %.3f sec / batch' %
+ (datetime.now(), info_string, FLAGS.num_batches, mn, sd))
+
+
+def _add_loss_summaries(total_loss):
+ """
+ Generates moving average for all losses and associated summaries for
+ visualizing the performance of the network.
+
+ Args:
+ total_loss: Total loss from loss().
+ Returns:
+ loss_averages_op: op for generating moving averages of losses.
+ """
+ # Compute the moving average of all individual losses and the total loss.
+ loss_averages = tf.train.ExponentialMovingAverage(0.9, name='avg')
+ losses = tf.get_collection('losses')
+ loss_averages_op = loss_averages.apply(losses + [total_loss])
+
+ # Attach a scalar summary to all individual losses and the total loss; do the
+ # same for the averaged version of the losses.
+ for l in losses + [total_loss]:
+ # Name each loss as '(raw)' and name the moving average version of the loss
+ # as the original loss name.
+ tf.scalar_summary(l.op.name + ' (raw)', l)
+ tf.scalar_summary(l.op.name, loss_averages.average(l))
+
+ return loss_averages_op
+
+
+def run_benchmark():
+ with tf.Graph().as_default():
+ with tf.device('/gpu:0'):
+ # Generate some dummy images.
+ image_size = 224
+ # Note that our padding definition is slightly different the cuda-convnet.
+ # In order to force the model to start with the same activations sizes,
+ # we add 3 to the image_size and employ VALID padding above.
+ if FLAGS.data_format == 'NCHW':
+ image_shape = [
+ FLAGS.batch_size, 3, image_size + 3, image_size + 3
+ ]
+ else:
+ image_shape = [
+ FLAGS.batch_size, image_size + 3, image_size + 3, 3
+ ]
+ images = tf.get_variable(
+ 'image',
+ image_shape,
+ initializer=tf.truncated_normal_initializer(
+ stddev=0.1, dtype=tf.float32),
+ dtype=tf.float32,
+ trainable=False)
+
+ labels = tf.get_variable(
+ 'label', [FLAGS.batch_size],
+ initializer=tf.constant_initializer(1),
+ dtype=tf.int32,
+ trainable=False)
+
+ # Build a Graph that computes the logits predictions from the
+ # inference model.
+ last_layer = inference(images)
+
+ objective = loss(last_layer, labels)
+ # Compute the gradient with respect to all the parameters.
+
+ # Compute gradients.
+ # opt = tf.train.GradientDescentOptimizer(0.001)
+ opt = tf.train.MomentumOptimizer(0.001, 0.9)
+ grads = opt.compute_gradients(objective)
+ global_step = tf.get_variable(
+ 'global_step', [],
+ initializer=tf.constant_initializer(
+ 0.0, dtype=tf.float32),
+ trainable=False,
+ dtype=tf.float32)
+ apply_gradient_op = opt.apply_gradients(
+ grads, global_step=global_step)
+
+ # Track the moving averages of all trainable variables.
+ variable_averages = tf.train.ExponentialMovingAverage(0.9,
+ global_step)
+ variables_averages_op = variable_averages.apply(
+ tf.trainable_variables())
+
+ # Build an initialization operation.
+ init = tf.initialize_all_variables()
+
+ # Start running operations on the Graph.
+ sess = tf.Session(config=tf.ConfigProto(
+ allow_soft_placement=True,
+ log_device_placement=FLAGS.log_device_placement))
+ sess.run(init)
+
+ run_forward = True
+ run_forward_backward = True
+ if FLAGS.forward_only and FLAGS.forward_backward_only:
+ raise ValueError("Cannot specify --forward_only and "
+ "--forward_backward_only at the same time.")
+ if FLAGS.forward_only:
+ run_forward_backward = False
+ elif FLAGS.forward_backward_only:
+ run_forward = False
+
+ if run_forward:
+ time_tensorflow_run(sess, last_layer, "Forward")
+
+ if run_forward_backward:
+ with tf.control_dependencies(
+ [apply_gradient_op, variables_averages_op]):
+ train_op = tf.no_op(name='train')
+ time_tensorflow_run(sess, [train_op, objective],
+ "Forward-backward")
+
+
+def main(_):
+ run_benchmark()
+
+
+if __name__ == '__main__':
+ tf.app.run()
diff --git a/benchmark/tensorflow/image/alexnet_multi_gpu.py b/benchmark/tensorflow/image/alexnet_multi_gpu.py
new file mode 100644
index 0000000000000000000000000000000000000000..7b5ee78f4dd5429abd85d75c092a6e3a2a39f922
--- /dev/null
+++ b/benchmark/tensorflow/image/alexnet_multi_gpu.py
@@ -0,0 +1,365 @@
+from six.moves import xrange # pylint: disable=redefined-builtin
+from datetime import datetime
+import math
+import re
+import time
+
+import tensorflow.python.platform
+import tensorflow as tf
+
+FLAGS = tf.app.flags.FLAGS
+
+tf.app.flags.DEFINE_integer('batch_size', 64, """Batch size.""")
+tf.app.flags.DEFINE_integer('num_batches', 100, """Number of batches to run.""")
+tf.app.flags.DEFINE_string('data_format', 'NCHW',
+ """The data format for Convnet operations.
+ Can be either NHWC or NCHW.
+ """)
+
+tf.app.flags.DEFINE_string('train_dir', '/train_model',
+ """Directory where to write event logs """
+ """and checkpoint.""")
+tf.app.flags.DEFINE_integer('num_gpus', 4, """How many GPUs to use.""")
+tf.app.flags.DEFINE_boolean('log_device_placement', False,
+ """Whether to log device placement.""")
+
+NUM_EXAMPLES_PER_EPOCH_FOR_TRAIN = 50000
+NUM_EPOCHS_PER_DECAY = 50
+INITIAL_LEARNING_RATE = 0.1
+LEARNING_RATE_DECAY_FACTOR = 0.1
+TOWER_NAME = 'tower'
+
+
+def _conv(name, inpOp, nIn, nOut, kH, kW, dH, dW, padType, wd=0.005):
+ with tf.name_scope(name) as scope:
+ kernel = tf.get_variable(
+ name + '_w', [kH, kW, nIn, nOut],
+ initializer=tf.truncated_normal_initializer(
+ stddev=0.01, dtype=tf.float32),
+ dtype=tf.float32)
+
+ if wd is not None:
+ weight_decay = tf.mul(tf.nn.l2_loss(kernel), wd, name='weight_loss')
+ tf.add_to_collection('losses', weight_decay)
+
+ if FLAGS.data_format == 'NCHW':
+ strides = [1, 1, dH, dW]
+ else:
+ strides = [1, dH, dW, 1]
+ conv = tf.nn.conv2d(
+ inpOp,
+ kernel,
+ strides,
+ padding=padType,
+ data_format=FLAGS.data_format)
+
+ biases = tf.get_variable(
+ name=name + '_b',
+ shape=[nOut],
+ initializer=tf.constant_initializer(
+ value=0.0, dtype=tf.float32),
+ dtype=tf.float32)
+
+ bias = tf.reshape(
+ tf.nn.bias_add(
+ conv, biases, data_format=FLAGS.data_format),
+ conv.get_shape())
+
+ conv1 = tf.nn.relu(bias, name=scope)
+ return conv1
+
+
+def _affine(name, inpOp, nIn, nOut, wd=0.005, act=True):
+ with tf.name_scope(name) as scope:
+ kernel = tf.get_variable(
+ name + '_w', [nIn, nOut],
+ initializer=tf.truncated_normal_initializer(
+ stddev=0.01, dtype=tf.float32),
+ dtype=tf.float32)
+
+ if wd is not None:
+ weight_decay = tf.mul(tf.nn.l2_loss(kernel), wd, name='weight_loss')
+ tf.add_to_collection('losses', weight_decay)
+
+ biases = tf.get_variable(
+ name + '_b', [nOut],
+ initializer=tf.constant_initializer(
+ value=0.0, dtype=tf.float32),
+ dtype=tf.float32,
+ trainable=True)
+
+ affine1 = tf.nn.relu_layer(inpOp, kernel, biases, name=name) if act else \
+ tf.matmul(inpOp, kernel) + biases
+
+ return affine1
+
+
+def _mpool(name, inpOp, kH, kW, dH, dW):
+ if FLAGS.data_format == 'NCHW':
+ ksize = [1, 1, kH, kW]
+ strides = [1, 1, dH, dW]
+ else:
+ ksize = [1, kH, kW, 1]
+ strides = [1, dH, dW, 1]
+ return tf.nn.max_pool(
+ inpOp,
+ ksize=ksize,
+ strides=strides,
+ padding='VALID',
+ data_format=FLAGS.data_format,
+ name=name)
+
+
+def _norm(name, l_input, lsize=4):
+ return tf.nn.lrn(l_input,
+ lsize,
+ bias=1.0,
+ alpha=0.001 / 9.0,
+ beta=0.75,
+ name=name)
+
+
+def loss(logits, labels):
+ labels = tf.cast(labels, tf.int64)
+ cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(
+ logits, labels, name='cross_entropy_per_example')
+ cross_entropy_mean = tf.reduce_mean(cross_entropy, name='cross_entropy')
+ tf.add_to_collection('losses', cross_entropy_mean)
+
+ # The total loss is defined as the cross entropy loss plus all of the weight
+ # decay terms (L2 loss).
+ return tf.add_n(tf.get_collection('losses'), name='total_loss')
+
+
+def get_incoming_shape(incoming):
+ """ Returns the incoming data shape """
+ if isinstance(incoming, tf.Tensor):
+ return incoming.get_shape().as_list()
+ elif type(incoming) in [np.array, list, tuple]:
+ return np.shape(incoming)
+ else:
+ raise Exception("Invalid incoming layer.")
+
+
+def inference(images):
+ conv1 = _conv('conv1', images, 3, 96, 11, 11, 4, 4, 'VALID')
+ pool1 = _mpool('pool1', conv1, 3, 3, 2, 2)
+ norm1 = _norm('norm1', pool1, lsize=5)
+ conv2 = _conv('conv2', norm1, 96, 256, 5, 5, 1, 1, 'SAME')
+ pool2 = _mpool('pool2', conv2, 3, 3, 2, 2)
+ norm2 = _norm('norm2', pool2, lsize=5)
+ conv3 = _conv('conv3', norm2, 256, 384, 3, 3, 1, 1, 'SAME')
+ conv4 = _conv('conv4', conv3, 384, 384, 3, 3, 1, 1, 'SAME')
+ conv5 = _conv('conv5', conv4, 384, 256, 3, 3, 1, 1, 'SAME')
+ pool5 = _mpool('pool5', conv5, 3, 3, 2, 2)
+ resh1 = tf.reshape(pool5, [-1, 256 * 6 * 6])
+ affn1 = _affine('fc6', resh1, 256 * 6 * 6, 4096)
+ affn2 = _affine('fc7', affn1, 4096, 4096)
+ affn3 = _affine('fc8', affn2, 4096, 1000, wd=None, act=False) # last fc
+
+ return affn3
+
+
+def tower_loss(scope):
+ """Calculate the total loss on a single tower running the model.
+ Args:
+ scope: unique prefix string identifying the tower, e.g. 'tower_0'
+ Returns:
+ Tensor of shape [] containing the total loss for a batch of data
+ """
+ image_size = 224
+ if FLAGS.data_format == 'NCHW':
+ image_shape = [FLAGS.batch_size, 3, image_size + 3, image_size + 3]
+ else:
+ image_shape = [FLAGS.batch_size, image_size + 3, image_size + 3, 3]
+ images = tf.get_variable(
+ 'image',
+ image_shape,
+ initializer=tf.truncated_normal_initializer(
+ stddev=0.1, dtype=tf.float32),
+ dtype=tf.float32,
+ trainable=False)
+
+ labels = tf.get_variable(
+ 'label', [FLAGS.batch_size],
+ initializer=tf.constant_initializer(1),
+ dtype=tf.int32,
+ trainable=False)
+
+ # Build a Graph that computes the logits predictions from the
+ # inference model.
+ last_layer = inference(images)
+
+ # Build the portion of the Graph calculating the losses. Note that we will
+ # assemble the total_loss using a custom function below.
+ _ = loss(last_layer, labels)
+
+ # Assemble all of the losses for the current tower only.
+ losses = tf.get_collection('losses', scope)
+
+ # Calculate the total loss for the current tower.
+ total_loss = tf.add_n(losses, name='total_loss')
+
+ # Compute the moving average of all individual losses and the total loss.
+ loss_averages = tf.train.ExponentialMovingAverage(0.9, name='avg')
+ loss_averages_op = loss_averages.apply(losses + [total_loss])
+
+ # Attach a scalar summary to all individual losses and the total loss; do the
+ # same for the averaged version of the losses.
+ for l in losses + [total_loss]:
+ # Remove 'tower_[0-9]/' from the name in case this is a multi-GPU training
+ # session. This helps the clarity of presentation on tensorboard.
+ loss_name = re.sub('%s_[0-9]*/' % TOWER_NAME, '', l.op.name)
+ # Name each loss as '(raw)' and name the moving average version of the loss
+ # as the original loss name.
+ tf.scalar_summary(loss_name + ' (raw)', l)
+ tf.scalar_summary(loss_name, loss_averages.average(l))
+
+ with tf.control_dependencies([loss_averages_op]):
+ total_loss = tf.identity(total_loss)
+ return total_loss
+
+
+def average_gradients(tower_grads):
+ """Calculate the average gradient for each shared variable across all towers.
+ Note that this function provides a synchronization point across all towers.
+ Args:
+ tower_grads: List of lists of (gradient, variable) tuples. The outer list
+ is over individual gradients. The inner list is over the gradient
+ calculation for each tower.
+ Returns:
+ List of pairs of (gradient, variable) where the gradient has been averaged
+ across all towers.
+ """
+ average_grads = []
+ for grad_and_vars in zip(*tower_grads):
+ # Note that each grad_and_vars looks like the following:
+ # ((grad0_gpu0, var0_gpu0), ... , (grad0_gpuN, var0_gpuN))
+ grads = []
+ for g, _ in grad_and_vars:
+ # Add 0 dimension to the gradients to represent the tower.
+ expanded_g = tf.expand_dims(g, 0)
+
+ # Append on a 'tower' dimension which we will average over below.
+ grads.append(expanded_g)
+
+ # Average over the 'tower' dimension.
+ grad = tf.concat(0, grads)
+ grad = tf.reduce_mean(grad, 0)
+
+ # Keep in mind that the Variables are redundant because they are shared
+ # across towers. So .. we will just return the first tower's pointer to
+ # the Variable.
+ v = grad_and_vars[0][1]
+ grad_and_var = (grad, v)
+ average_grads.append(grad_and_var)
+ return average_grads
+
+
+def time_tensorflow_run(session, target):
+ num_steps_burn_in = 50
+ total_duration = 0.0
+ total_duration_squared = 0.0
+ for i in xrange(FLAGS.num_batches + num_steps_burn_in):
+ start_time = time.time()
+ _, loss_value = session.run(target)
+ duration = time.time() - start_time
+ if i > num_steps_burn_in:
+ if not i % 10:
+ num_examples_per_step = FLAGS.batch_size * FLAGS.num_gpus
+ examples_per_sec = num_examples_per_step / duration
+ sec_per_batch = duration
+
+ format_str = (
+ '%s: step %d, loss = %.2f (%.1f examples/sec; %.3f '
+ 'sec/batch batch_size = %d)')
+ print(format_str %
+ (datetime.now(), i - num_steps_burn_in, loss_value,
+ duration, sec_per_batch, num_examples_per_step))
+
+ total_duration += duration
+ total_duration_squared += duration * duration
+
+ mn = total_duration / FLAGS.num_batches
+ vr = total_duration_squared / FLAGS.num_batches - mn * mn
+ sd = math.sqrt(vr)
+ print('%s: FwdBwd across %d steps, %.3f +/- %.3f sec / batch' %
+ (datetime.now(), FLAGS.num_batches, mn, sd))
+
+
+def run_benchmark():
+ with tf.Graph().as_default(), tf.device('/cpu:0'):
+ # Create a variable to count the number of train() calls. This equals the
+ # number of batches processed * FLAGS.num_gpus.
+ global_step = tf.get_variable(
+ 'global_step', [],
+ initializer=tf.constant_initializer(0),
+ trainable=False)
+
+ # Calculate the learning rate schedule.
+ num_batches_per_epoch = (NUM_EXAMPLES_PER_EPOCH_FOR_TRAIN /
+ FLAGS.batch_size)
+ decay_steps = int(num_batches_per_epoch * NUM_EPOCHS_PER_DECAY)
+
+ # Decay the learning rate exponentially based on the number of steps.
+ lr = tf.train.exponential_decay(
+ INITIAL_LEARNING_RATE,
+ global_step,
+ decay_steps,
+ LEARNING_RATE_DECAY_FACTOR,
+ staircase=True)
+
+ # Create an optimizer that performs gradient descent.
+ opt = tf.train.MomentumOptimizer(lr, 0.9)
+
+ # Calculate the gradients for each model tower.
+ tower_grads = []
+ for i in xrange(FLAGS.num_gpus):
+ with tf.device('/gpu:%d' % i):
+ with tf.name_scope('%s_%d' % (TOWER_NAME, i)) as scope:
+ # Calculate the loss for one tower of the model. This function
+ # constructs the entire model but shares the variables across
+ # all towers.
+ loss = tower_loss(scope)
+
+ # Reuse variables for the next tower.
+ tf.get_variable_scope().reuse_variables()
+
+ # Retain the summaries from the final tower.
+ summaries = tf.get_collection(tf.GraphKeys.SUMMARIES, scope)
+
+ # Calculate the gradients for the batch of data on this tower.
+ grads = opt.compute_gradients(loss)
+
+ # Keep track of the gradients across all towers.
+ tower_grads.append(grads)
+
+ # We must calculate the mean of each gradient. Note that this is the
+ # synchronization point across all towers.
+ grads = average_gradients(tower_grads)
+
+ # Apply the gradients to adjust the shared variables.
+ apply_gradient_op = opt.apply_gradients(grads, global_step=global_step)
+
+ # Group all updates to into a single train op.
+ train_op = tf.group(apply_gradient_op)
+
+ # Build an initialization operation.
+ init = tf.initialize_all_variables()
+
+ # Start running operations on the Graph. allow_soft_placement must be set to
+ # True to build towers on GPU, as some of the ops do not have GPU
+ # implementations.
+ sess = tf.Session(config=tf.ConfigProto(
+ allow_soft_placement=True,
+ log_device_placement=FLAGS.log_device_placement))
+ sess.run(init)
+ time_tensorflow_run(sess, [train_op, loss])
+
+
+def main(_):
+ run_benchmark()
+
+
+if __name__ == '__main__':
+ tf.app.run()
diff --git a/benchmark/tensorflow/image/googlenet.py b/benchmark/tensorflow/image/googlenet.py
new file mode 100644
index 0000000000000000000000000000000000000000..decf855b54451efba5f6a7868fbcf631789f3572
--- /dev/null
+++ b/benchmark/tensorflow/image/googlenet.py
@@ -0,0 +1,311 @@
+from six.moves import xrange
+from datetime import datetime
+import math
+import time
+
+import tensorflow.python.platform
+import tensorflow as tf
+
+FLAGS = tf.app.flags.FLAGS
+
+tf.app.flags.DEFINE_integer('batch_size', 128, """Batch size.""")
+tf.app.flags.DEFINE_integer('num_batches', 100, """Number of batches to run.""")
+tf.app.flags.DEFINE_boolean('forward_only', False,
+ """Only run the forward pass.""")
+tf.app.flags.DEFINE_boolean('forward_backward_only', False,
+ """Only run the forward-forward pass.""")
+tf.app.flags.DEFINE_string('data_format', 'NCHW',
+ """The data format for Convnet operations.
+ Can be either NHWC or NCHW.
+ """)
+tf.app.flags.DEFINE_boolean('log_device_placement', False,
+ """Whether to log device placement.""")
+
+parameters = []
+
+conv_counter = 1
+pool_counter = 1
+affine_counter = 1
+
+
+def _conv(inpOp, nIn, nOut, kH, kW, dH, dW, padType, wd=0.0005):
+ global conv_counter
+ global parameters
+ name = 'conv' + str(conv_counter)
+ conv_counter += 1
+ with tf.name_scope(name) as scope:
+ kernel = tf.Variable(
+ tf.truncated_normal(
+ [kH, kW, nIn, nOut], dtype=tf.float32, stddev=1e-1),
+ name='weights')
+
+ if wd is not None and wd > 0:
+ weight_decay = tf.mul(tf.nn.l2_loss(kernel), wd, name='weight_loss')
+ tf.add_to_collection('losses', weight_decay)
+
+ if FLAGS.data_format == 'NCHW':
+ strides = [1, 1, dH, dW]
+ else:
+ strides = [1, dH, dW, 1]
+ conv = tf.nn.conv2d(
+ inpOp,
+ kernel,
+ strides,
+ padding=padType,
+ data_format=FLAGS.data_format)
+ biases = tf.Variable(
+ tf.constant(
+ 0.0, shape=[nOut], dtype=tf.float32),
+ trainable=True,
+ name='biases')
+ bias = tf.reshape(
+ tf.nn.bias_add(
+ conv, biases, data_format=FLAGS.data_format),
+ conv.get_shape())
+ conv1 = tf.nn.relu(bias, name=scope)
+ parameters += [kernel, biases]
+ return conv1
+
+
+def _affine(inpOp, nIn, nOut, act=True, wd=0.0005):
+ global affine_counter
+ global parameters
+ name = 'affine' + str(affine_counter)
+ affine_counter += 1
+ with tf.name_scope(name) as scope:
+ kernel = tf.Variable(
+ tf.truncated_normal(
+ [nIn, nOut], dtype=tf.float32, stddev=1e-1),
+ name='weights')
+
+ if wd is not None and wd > 0:
+ weight_decay = tf.mul(tf.nn.l2_loss(kernel), wd, name='weight_loss')
+ tf.add_to_collection('losses', weight_decay)
+
+ biases = tf.Variable(
+ tf.constant(
+ 0.0, shape=[nOut], dtype=tf.float32),
+ trainable=True,
+ name='biases')
+ affine1 = tf.nn.relu_layer(
+ inpOp, kernel, biases,
+ name=name) if act else tf.matmul(inpOp, kernel) + biases
+ parameters += [kernel, biases]
+ return affine1
+
+
+def _mpool(inpOp, kH, kW, dH, dW, padding):
+ global pool_counter
+ global parameters
+ name = 'pool' + str(pool_counter)
+ pool_counter += 1
+ if FLAGS.data_format == 'NCHW':
+ ksize = [1, 1, kH, kW]
+ strides = [1, 1, dH, dW]
+ else:
+ ksize = [1, kH, kW, 1]
+ strides = [1, dH, dW, 1]
+ return tf.nn.max_pool(
+ inpOp,
+ ksize=ksize,
+ strides=strides,
+ padding=padding,
+ data_format=FLAGS.data_format,
+ name=name)
+
+
+def _apool(inpOp, kH, kW, dH, dW, padding):
+ global pool_counter
+ global parameters
+ name = 'pool' + str(pool_counter)
+ pool_counter += 1
+ if FLAGS.data_format == 'NCHW':
+ ksize = [1, 1, kH, kW]
+ strides = [1, 1, dH, dW]
+ else:
+ ksize = [1, kH, kW, 1]
+ strides = [1, dH, dW, 1]
+ return tf.nn.avg_pool(
+ inpOp,
+ ksize=ksize,
+ strides=strides,
+ padding=padding,
+ data_format=FLAGS.data_format,
+ name=name)
+
+
+def _inception(inp, inSize, o1s, o2s1, o2s2, o3s1, o3s2, o4s1, o4s2):
+ conv1 = _conv(inp, inSize, o1s, 1, 1, 1, 1, 'VALID')
+
+ conv3_ = _conv(inp, inSize, o2s1, 1, 1, 1, 1, 'VALID')
+ conv3 = _conv(conv3_, o2s1, o2s2, 3, 3, 1, 1, 'SAME')
+
+ conv5_ = _conv(inp, inSize, o3s1, 1, 1, 1, 1, 'VALID')
+ conv5 = _conv(conv5_, o3s1, o3s2, 5, 5, 1, 1, 'SAME')
+
+ pool_ = _mpool(inp, o4s1, o4s1, 1, 1, 'SAME')
+ pool = _conv(pool_, inSize, o4s2, 1, 1, 1, 1, 'VALID')
+
+ if FLAGS.data_format == 'NCHW':
+ channel_dim = 1
+ else:
+ channel_dim = 3
+ incept = tf.concat(channel_dim, [conv1, conv3, conv5, pool])
+ return incept
+
+
+def loss(logits, labels):
+ batch_size = tf.size(labels)
+ labels = tf.expand_dims(labels, 1)
+ indices = tf.expand_dims(tf.range(0, batch_size, 1), 1)
+ concated = tf.concat(1, [indices, labels])
+ onehot_labels = tf.sparse_to_dense(concated,
+ tf.pack([batch_size, 1000]), 1.0, 0.0)
+ cross_entropy = tf.nn.softmax_cross_entropy_with_logits(
+ logits, onehot_labels, name='xentropy')
+ loss = tf.reduce_mean(cross_entropy, name='xentropy_mean')
+ return loss
+
+
+def inference(images):
+ # stage 1
+ conv1 = _conv(images, 3, 64, 7, 7, 2, 2, 'SAME')
+ pool1 = _mpool(conv1, 3, 3, 2, 2, 'SAME')
+ # stage 2
+ conv2 = _conv(pool1, 64, 64, 1, 1, 1, 1, 'VALID')
+ conv3 = _conv(conv2, 64, 192, 3, 3, 1, 1, 'SAME')
+ pool3 = _mpool(conv3, 3, 3, 2, 2, 'SAME')
+
+ # stage 3
+ incept3a = _inception(pool3, 192, 64, 96, 128, 16, 32, 3, 32)
+ incept3b = _inception(incept3a, 256, 128, 128, 192, 32, 96, 3, 64)
+ pool4 = _mpool(incept3b, 3, 3, 2, 2, 'SAME')
+
+ # stage 4
+ incept4a = _inception(pool4, 480, 192, 96, 208, 16, 48, 3, 64)
+ incept4b = _inception(incept4a, 512, 160, 112, 224, 24, 64, 3, 64)
+ incept4c = _inception(incept4b, 512, 128, 128, 256, 24, 64, 3, 64)
+ incept4d = _inception(incept4c, 512, 112, 144, 288, 32, 64, 3, 64)
+ incept4e = _inception(incept4d, 528, 256, 160, 320, 32, 128, 3, 128)
+ pool5 = _mpool(incept4e, 3, 3, 2, 2, 'SAME')
+
+ # stage 5
+ incept5a = _inception(pool5, 832, 256, 160, 320, 32, 128, 3, 128)
+ incept5b = _inception(incept5a, 832, 384, 192, 384, 48, 128, 3, 128)
+ pool6 = _apool(incept5b, 7, 7, 1, 1, 'VALID')
+
+ # output 1
+ resh1 = tf.reshape(pool6, [-1, 1024])
+ drop = tf.nn.dropout(resh1, 0.4)
+ affn1 = _affine(resh1, 1024, 1000, act=False)
+
+ return affn1
+
+
+def time_tensorflow_run(session, target, info_string):
+ num_steps_burn_in = 10
+ total_duration = 0.0
+ total_duration_squared = 0.0
+ if not isinstance(target, list):
+ target = [target]
+ target_op = tf.group(*target)
+ for i in range(FLAGS.num_batches + num_steps_burn_in):
+ start_time = time.time()
+ _ = session.run(target_op)
+ duration = time.time() - start_time
+ if i > num_steps_burn_in:
+ if not i % 10:
+ print('%s: step %d, duration = %.3f' %
+ (datetime.now(), i - num_steps_burn_in, duration))
+ total_duration += duration
+ total_duration_squared += duration * duration
+ mn = total_duration / FLAGS.num_batches
+ vr = total_duration_squared / FLAGS.num_batches - mn * mn
+ sd = math.sqrt(vr)
+ print('%s: %s across %d steps, %.3f +/- %.3f sec / batch' %
+ (datetime.now(), info_string, FLAGS.num_batches, mn, sd))
+
+
+def run_benchmark():
+ global parameters
+ with tf.Graph().as_default():
+ # Generate some dummy images.
+ image_size = 224
+ if FLAGS.data_format == 'NCHW':
+ image_shape = [FLAGS.batch_size, 3, image_size, image_size]
+ else:
+ image_shape = [FLAGS.batch_size, image_size, image_size, 3]
+
+ images = tf.get_variable(
+ 'image',
+ image_shape,
+ initializer=tf.truncated_normal_initializer(
+ stddev=0.1, dtype=tf.float32),
+ dtype=tf.float32,
+ trainable=False)
+
+ labels = tf.get_variable(
+ 'label', [FLAGS.batch_size],
+ initializer=tf.constant_initializer(1),
+ dtype=tf.int32,
+ trainable=False)
+
+ # Build a Graph that computes the logits predictions from the
+ # inference model.
+ last_layer = inference(images)
+
+ objective = loss(last_layer, labels)
+
+ # Compute gradients.
+ # opt = tf.train.GradientDescentOptimizer(0.001)
+ opt = tf.train.MomentumOptimizer(0.001, 0.9)
+ grads = opt.compute_gradients(objective)
+ global_step = tf.get_variable(
+ 'global_step', [],
+ initializer=tf.constant_initializer(
+ 0.0, dtype=tf.float32),
+ trainable=False,
+ dtype=tf.float32)
+ apply_gradient_op = opt.apply_gradients(grads, global_step=global_step)
+
+ # Track the moving averages of all trainable variables.
+ variable_averages = tf.train.ExponentialMovingAverage(0.9, global_step)
+ variables_averages_op = variable_averages.apply(tf.trainable_variables(
+ ))
+
+ # Build an initialization operation.
+ init = tf.initialize_all_variables()
+
+ # Start running operations on the Graph.
+ sess = tf.Session(config=tf.ConfigProto(
+ allow_soft_placement=True,
+ log_device_placement=FLAGS.log_device_placement))
+ sess.run(init)
+
+ run_forward = True
+ run_forward_backward = True
+ if FLAGS.forward_only and FLAGS.forward_backward_only:
+ raise ValueError("Cannot specify --forward_only and "
+ "--forward_backward_only at the same time.")
+ if FLAGS.forward_only:
+ run_forward_backward = False
+ elif FLAGS.forward_backward_only:
+ run_forward = False
+
+ if run_forward:
+ # Run the forward benchmark.
+ time_tensorflow_run(sess, last_layer, "Forward")
+
+ if run_forward_backward:
+ with tf.control_dependencies(
+ [apply_gradient_op, variables_averages_op]):
+ train_op = tf.no_op(name='train')
+ time_tensorflow_run(sess, [train_op, objective], "Forward-backward")
+
+
+def main(_):
+ run_benchmark()
+
+
+if __name__ == '__main__':
+ tf.app.run()
diff --git a/benchmark/tensorflow/image/googlenet_multi_gpu.py b/benchmark/tensorflow/image/googlenet_multi_gpu.py
new file mode 100644
index 0000000000000000000000000000000000000000..31466faa37c47c66e4fe4628e28c867875e89f2e
--- /dev/null
+++ b/benchmark/tensorflow/image/googlenet_multi_gpu.py
@@ -0,0 +1,411 @@
+from six.moves import xrange # pylint: disable=redefined-builtin
+from datetime import datetime
+import math
+import re
+import time
+
+import tensorflow.python.platform
+import tensorflow as tf
+
+FLAGS = tf.app.flags.FLAGS
+
+tf.app.flags.DEFINE_integer('batch_size', 64, """Batch size.""")
+tf.app.flags.DEFINE_integer('num_batches', 100, """Number of batches to run.""")
+tf.app.flags.DEFINE_string('data_format', 'NCHW',
+ """The data format for Convnet operations.
+ Can be either NHWC or NCHW.
+ """)
+
+tf.app.flags.DEFINE_string('train_dir', '/train_model',
+ """Directory where to write event logs """
+ """and checkpoint.""")
+tf.app.flags.DEFINE_integer('num_gpus', 4, """How many GPUs to use.""")
+tf.app.flags.DEFINE_boolean('log_device_placement', False,
+ """Whether to log device placement.""")
+
+NUM_EXAMPLES_PER_EPOCH_FOR_TRAIN = 50000
+NUM_EPOCHS_PER_DECAY = 50
+INITIAL_LEARNING_RATE = 0.1
+LEARNING_RATE_DECAY_FACTOR = 0.1
+TOWER_NAME = 'tower'
+
+
+def _conv(name, inpOp, nIn, nOut, kH, kW, dH, dW, padType, wd=0.005):
+ with tf.name_scope(name) as scope:
+ kernel = tf.get_variable(
+ name + '_w', [kH, kW, nIn, nOut],
+ initializer=tf.truncated_normal_initializer(
+ stddev=0.01, dtype=tf.float32),
+ dtype=tf.float32)
+
+ if wd is not None:
+ weight_decay = tf.mul(tf.nn.l2_loss(kernel), wd, name='weight_loss')
+ tf.add_to_collection('losses', weight_decay)
+
+ if FLAGS.data_format == 'NCHW':
+ strides = [1, 1, dH, dW]
+ else:
+ strides = [1, dH, dW, 1]
+ conv = tf.nn.conv2d(
+ inpOp,
+ kernel,
+ strides,
+ padding=padType,
+ data_format=FLAGS.data_format)
+
+ biases = tf.get_variable(
+ name=name + '_b',
+ shape=[nOut],
+ initializer=tf.constant_initializer(
+ value=0.0, dtype=tf.float32),
+ dtype=tf.float32)
+
+ bias = tf.reshape(
+ tf.nn.bias_add(
+ conv, biases, data_format=FLAGS.data_format),
+ conv.get_shape())
+
+ conv1 = tf.nn.relu(bias, name=scope)
+ return conv1
+
+
+def _affine(name, inpOp, nIn, nOut, wd=0.005, act=True):
+ with tf.name_scope(name) as scope:
+ kernel = tf.get_variable(
+ name + '_w', [nIn, nOut],
+ initializer=tf.truncated_normal_initializer(
+ stddev=0.01, dtype=tf.float32),
+ dtype=tf.float32)
+
+ if wd is not None:
+ weight_decay = tf.mul(tf.nn.l2_loss(kernel), wd, name='weight_loss')
+ tf.add_to_collection('losses', weight_decay)
+
+ biases = tf.get_variable(
+ name + '_b', [nOut],
+ initializer=tf.constant_initializer(
+ value=0.0, dtype=tf.float32),
+ dtype=tf.float32,
+ trainable=True)
+
+ affine1 = tf.nn.relu_layer(inpOp, kernel, biases, name=name) if act else \
+ tf.matmul(inpOp, kernel) + biases
+
+ return affine1
+
+
+def _mpool(name, inpOp, kH, kW, dH, dW, padding):
+ if FLAGS.data_format == 'NCHW':
+ ksize = [1, 1, kH, kW]
+ strides = [1, 1, dH, dW]
+ else:
+ ksize = [1, kH, kW, 1]
+ strides = [1, dH, dW, 1]
+ return tf.nn.max_pool(
+ inpOp,
+ ksize=ksize,
+ strides=strides,
+ padding=padding,
+ data_format=FLAGS.data_format,
+ name=name)
+
+
+def _apool(name, inpOp, kH, kW, dH, dW, padding):
+ if FLAGS.data_format == 'NCHW':
+ ksize = [1, 1, kH, kW]
+ strides = [1, 1, dH, dW]
+ else:
+ ksize = [1, kH, kW, 1]
+ strides = [1, dH, dW, 1]
+ return tf.nn.avg_pool(
+ inpOp,
+ ksize=ksize,
+ strides=strides,
+ padding=padding,
+ data_format=FLAGS.data_format,
+ name=name)
+
+
+def loss(logits, labels):
+ labels = tf.cast(labels, tf.int64)
+ cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(
+ logits, labels, name='cross_entropy_per_example')
+ cross_entropy_mean = tf.reduce_mean(cross_entropy, name='cross_entropy')
+ tf.add_to_collection('losses', cross_entropy_mean)
+
+ # The total loss is defined as the cross entropy loss plus all of the weight
+ # decay terms (L2 loss).
+ return tf.add_n(tf.get_collection('losses'), name='total_loss')
+
+
+def get_incoming_shape(incoming):
+ """ Returns the incoming data shape """
+ if isinstance(incoming, tf.Tensor):
+ return incoming.get_shape().as_list()
+ elif type(incoming) in [np.array, list, tuple]:
+ return np.shape(incoming)
+ else:
+ raise Exception("Invalid incoming layer.")
+
+
+def _inception(name, inp, inSize, o1s, o2s1, o2s2, o3s1, o3s2, o4s1, o4s2):
+ conv1 = _conv(name + '_1', inp, inSize, o1s, 1, 1, 1, 1, 'VALID')
+
+ conv3_ = _conv(name + '_3r', inp, inSize, o2s1, 1, 1, 1, 1, 'VALID')
+ conv3 = _conv(name + '_3', conv3_, o2s1, o2s2, 3, 3, 1, 1, 'SAME')
+
+ conv5_ = _conv(name + '_5r', inp, inSize, o3s1, 1, 1, 1, 1, 'VALID')
+ conv5 = _conv(name + '5', conv5_, o3s1, o3s2, 5, 5, 1, 1, 'SAME')
+
+ pool_ = _mpool(name + 'pool', inp, o4s1, o4s1, 1, 1, 'SAME')
+ pool = _conv(name + 'proj', pool_, inSize, o4s2, 1, 1, 1, 1, 'VALID')
+
+ if FLAGS.data_format == 'NCHW':
+ channel_dim = 1
+ else:
+ channel_dim = 3
+ incept = tf.concat(channel_dim, [conv1, conv3, conv5, pool])
+ return incept
+
+
+def inference(images):
+ # stage 1
+ conv1 = _conv('conv1', images, 3, 64, 7, 7, 2, 2, 'SAME')
+ pool1 = _mpool('pool1', conv1, 3, 3, 2, 2, 'SAME')
+
+ # stage 2
+ conv2 = _conv('conv2', pool1, 64, 64, 1, 1, 1, 1, 'VALID')
+ conv3 = _conv('conv3', conv2, 64, 192, 3, 3, 1, 1, 'SAME')
+ pool3 = _mpool('pool3', conv3, 3, 3, 2, 2, 'SAME')
+
+ # stage 3
+ incept3a = _inception('ince3a', pool3, 192, 64, 96, 128, 16, 32, 3, 32)
+ incept3b = _inception('ince3b', incept3a, 256, 128, 128, 192, 32, 96, 3, 64)
+ pool4 = _mpool('pool4', incept3b, 3, 3, 2, 2, 'SAME')
+
+ # stage 4
+ incept4a = _inception('ince4a', pool4, 480, 192, 96, 208, 16, 48, 3, 64)
+ incept4b = _inception('ince4b', incept4a, 512, 160, 112, 224, 24, 64, 3, 64)
+ incept4c = _inception('ince4c', incept4b, 512, 128, 128, 256, 24, 64, 3, 64)
+ incept4d = _inception('ince4d', incept4c, 512, 112, 144, 288, 32, 64, 3, 64)
+ incept4e = _inception('ince4e', incept4d, 528, 256, 160, 320, 32, 128, 3,
+ 128)
+ pool5 = _mpool('pool5', incept4e, 3, 3, 2, 2, 'SAME')
+
+ # stage 5
+ incept5a = _inception('ince5a', pool5, 832, 256, 160, 320, 32, 128, 3, 128)
+ incept5b = _inception('ince5b', incept5a, 832, 384, 192, 384, 48, 128, 3,
+ 128)
+ pool6 = _apool('pool6', incept5b, 7, 7, 1, 1, 'VALID')
+
+ # output 1
+ resh1 = tf.reshape(pool6, [-1, 1024])
+ drop = tf.nn.dropout(resh1, 0.4)
+ affn1 = _affine('fc_out', resh1, 1024, 1000, act=False)
+
+ return affn1
+
+
+def tower_loss(scope):
+ """Calculate the total loss on a single tower running the model.
+ Args:
+ scope: unique prefix string identifying the tower, e.g. 'tower_0'
+ Returns:
+ Tensor of shape [] containing the total loss for a batch of data
+ """
+ image_size = 224
+ if FLAGS.data_format == 'NCHW':
+ image_shape = [FLAGS.batch_size, 3, image_size, image_size]
+ else:
+ image_shape = [FLAGS.batch_size, image_size, image_size, 3]
+ images = tf.get_variable(
+ 'image',
+ image_shape,
+ initializer=tf.truncated_normal_initializer(
+ stddev=0.1, dtype=tf.float32),
+ dtype=tf.float32,
+ trainable=False)
+
+ labels = tf.get_variable(
+ 'label', [FLAGS.batch_size],
+ initializer=tf.constant_initializer(1),
+ dtype=tf.int32,
+ trainable=False)
+
+ # Build a Graph that computes the logits predictions from the
+ # inference model.
+ last_layer = inference(images)
+
+ # Build the portion of the Graph calculating the losses. Note that we will
+ # assemble the total_loss using a custom function below.
+ _ = loss(last_layer, labels)
+
+ # Assemble all of the losses for the current tower only.
+ losses = tf.get_collection('losses', scope)
+
+ # Calculate the total loss for the current tower.
+ total_loss = tf.add_n(losses, name='total_loss')
+
+ # Compute the moving average of all individual losses and the total loss.
+ loss_averages = tf.train.ExponentialMovingAverage(0.9, name='avg')
+ loss_averages_op = loss_averages.apply(losses + [total_loss])
+
+ # Attach a scalar summary to all individual losses and the total loss; do the
+ # same for the averaged version of the losses.
+ for l in losses + [total_loss]:
+ # Remove 'tower_[0-9]/' from the name in case this is a multi-GPU training
+ # session. This helps the clarity of presentation on tensorboard.
+ loss_name = re.sub('%s_[0-9]*/' % TOWER_NAME, '', l.op.name)
+ # Name each loss as '(raw)' and name the moving average version of the loss
+ # as the original loss name.
+ tf.scalar_summary(loss_name + ' (raw)', l)
+ tf.scalar_summary(loss_name, loss_averages.average(l))
+
+ with tf.control_dependencies([loss_averages_op]):
+ total_loss = tf.identity(total_loss)
+ return total_loss
+
+
+def average_gradients(tower_grads):
+ """Calculate the average gradient for each shared variable across all towers.
+ Note that this function provides a synchronization point across all towers.
+ Args:
+ tower_grads: List of lists of (gradient, variable) tuples. The outer list
+ is over individual gradients. The inner list is over the gradient
+ calculation for each tower.
+ Returns:
+ List of pairs of (gradient, variable) where the gradient has been averaged
+ across all towers.
+ """
+ average_grads = []
+ for grad_and_vars in zip(*tower_grads):
+ # Note that each grad_and_vars looks like the following:
+ # ((grad0_gpu0, var0_gpu0), ... , (grad0_gpuN, var0_gpuN))
+ grads = []
+ for g, _ in grad_and_vars:
+ # Add 0 dimension to the gradients to represent the tower.
+ expanded_g = tf.expand_dims(g, 0)
+
+ # Append on a 'tower' dimension which we will average over below.
+ grads.append(expanded_g)
+
+ # Average over the 'tower' dimension.
+ grad = tf.concat(0, grads)
+ grad = tf.reduce_mean(grad, 0)
+
+ # Keep in mind that the Variables are redundant because they are shared
+ # across towers. So .. we will just return the first tower's pointer to
+ # the Variable.
+ v = grad_and_vars[0][1]
+ grad_and_var = (grad, v)
+ average_grads.append(grad_and_var)
+ return average_grads
+
+
+def time_tensorflow_run(session, target):
+ num_steps_burn_in = 50
+ total_duration = 0.0
+ total_duration_squared = 0.0
+ for i in xrange(FLAGS.num_batches + num_steps_burn_in):
+ start_time = time.time()
+ _, loss_value = session.run(target)
+ duration = time.time() - start_time
+ if i > num_steps_burn_in:
+ if not i % 10:
+ num_examples_per_step = FLAGS.batch_size * FLAGS.num_gpus
+ examples_per_sec = num_examples_per_step / duration
+ sec_per_batch = duration
+
+ format_str = (
+ '%s: step %d, loss = %.2f (%.1f examples/sec; %.3f '
+ 'sec/batch batch_size = %d)')
+ print(format_str %
+ (datetime.now(), i - num_steps_burn_in, loss_value,
+ duration, sec_per_batch, num_examples_per_step))
+
+ total_duration += duration
+ total_duration_squared += duration * duration
+
+ mn = total_duration / FLAGS.num_batches
+ vr = total_duration_squared / FLAGS.num_batches - mn * mn
+ sd = math.sqrt(vr)
+ print('%s: FwdBwd across %d steps, %.3f +/- %.3f sec / batch' %
+ (datetime.now(), FLAGS.num_batches, mn, sd))
+
+
+def run_benchmark():
+ with tf.Graph().as_default(), tf.device('/cpu:0'):
+ # Create a variable to count the number of train() calls. This equals the
+ # number of batches processed * FLAGS.num_gpus.
+ global_step = tf.get_variable(
+ 'global_step', [],
+ initializer=tf.constant_initializer(0),
+ trainable=False)
+
+ # Calculate the learning rate schedule.
+ num_batches_per_epoch = (NUM_EXAMPLES_PER_EPOCH_FOR_TRAIN /
+ FLAGS.batch_size)
+ decay_steps = int(num_batches_per_epoch * NUM_EPOCHS_PER_DECAY)
+
+ # Decay the learning rate exponentially based on the number of steps.
+ lr = tf.train.exponential_decay(
+ INITIAL_LEARNING_RATE,
+ global_step,
+ decay_steps,
+ LEARNING_RATE_DECAY_FACTOR,
+ staircase=True)
+
+ # Create an optimizer that performs gradient descent.
+ opt = tf.train.MomentumOptimizer(lr, 0.9)
+
+ # Calculate the gradients for each model tower.
+ tower_grads = []
+ for i in xrange(FLAGS.num_gpus):
+ with tf.device('/gpu:%d' % i):
+ with tf.name_scope('%s_%d' % (TOWER_NAME, i)) as scope:
+ # Calculate the loss for one tower of the model. This function
+ # constructs the entire model but shares the variables across
+ # all towers.
+ loss = tower_loss(scope)
+
+ # Reuse variables for the next tower.
+ tf.get_variable_scope().reuse_variables()
+
+ # Retain the summaries from the final tower.
+ summaries = tf.get_collection(tf.GraphKeys.SUMMARIES, scope)
+
+ # Calculate the gradients for the batch of data on this tower.
+ grads = opt.compute_gradients(loss)
+
+ # Keep track of the gradients across all towers.
+ tower_grads.append(grads)
+
+ # We must calculate the mean of each gradient. Note that this is the
+ # synchronization point across all towers.
+ grads = average_gradients(tower_grads)
+
+ # Apply the gradients to adjust the shared variables.
+ apply_gradient_op = opt.apply_gradients(grads, global_step=global_step)
+
+ # Group all updates to into a single train op.
+ train_op = tf.group(apply_gradient_op)
+
+ # Build an initialization operation.
+ init = tf.initialize_all_variables()
+
+ # Start running operations on the Graph. allow_soft_placement must be set to
+ # True to build towers on GPU, as some of the ops do not have GPU
+ # implementations.
+ sess = tf.Session(config=tf.ConfigProto(
+ allow_soft_placement=True,
+ log_device_placement=FLAGS.log_device_placement))
+ sess.run(init)
+ time_tensorflow_run(sess, [train_op, loss])
+
+
+def main(_):
+ run_benchmark()
+
+
+if __name__ == '__main__':
+ tf.app.run()
diff --git a/benchmark/tensorflow/image/run.sh b/benchmark/tensorflow/image/run.sh
new file mode 100755
index 0000000000000000000000000000000000000000..eade36beb9df5f8d3978939216e058203e024c1a
--- /dev/null
+++ b/benchmark/tensorflow/image/run.sh
@@ -0,0 +1,28 @@
+set -e
+
+function test() {
+ cfg=$1
+ batch_size=$2
+ prefix=$3
+ python $cfg --batch_size=$batch_size > logs/${prefix}-1gpu-${batch_size}.log 2>&1
+}
+
+if [ ! -d "logs" ]; then
+ mkdir logs
+fi
+
+# alexnet
+test alexnet.py 64 alexnet
+test alexnet.py 128 alexnet
+test alexnet.py 256 alexnet
+test alexnet.py 512 alexnet
+
+# googlenet
+test googlenet.py 64 googlenet
+test googlenet.py 128 googlenet
+
+# smallnet
+test smallnet_mnist_cifar.py 64 smallnet
+test smallnet_mnist_cifar.py 128 smallnet
+test smallnet_mnist_cifar.py 256 smallnet
+test smallnet_mnist_cifar.py 512 smallnet
diff --git a/benchmark/tensorflow/image/run_multi.sh b/benchmark/tensorflow/image/run_multi.sh
new file mode 100755
index 0000000000000000000000000000000000000000..69faa4331744f2276e7706185ae10bc507f95764
--- /dev/null
+++ b/benchmark/tensorflow/image/run_multi.sh
@@ -0,0 +1,22 @@
+set -e
+
+function test() {
+ cfg=$1
+ num_gpu=$2
+ batch_size=$3
+ batch_per_gpu=`expr ${batch_size} / ${num_gpu}`
+ prefix=$4
+ python $cfg --num_gpus=$num_gpu --batch_size=${batch_per_gpu} > logs/${prefix}-4gpu-${batch_size}.log 2>&1
+}
+
+if [ ! -d "logs" ]; then
+ mkdir logs
+fi
+
+# alexnet
+test alexnet_multi_gpu.py 4 512 alexnet
+test alexnet_multi_gpu.py 4 1024 alexnet
+
+# googlenet
+test googlenet_multi_gpu.py 4 512 alexnet
+test googlenet_multi_gpu.py 4 1024 alexnet
diff --git a/benchmark/tensorflow/image/smallnet_mnist_cifar.py b/benchmark/tensorflow/image/smallnet_mnist_cifar.py
new file mode 100644
index 0000000000000000000000000000000000000000..1a625134a6c58586b29190ede9c66253f484d2cf
--- /dev/null
+++ b/benchmark/tensorflow/image/smallnet_mnist_cifar.py
@@ -0,0 +1,304 @@
+from six.moves import xrange # pylint: disable=redefined-builtin
+from datetime import datetime
+import math
+import time
+
+import tensorflow.python.platform
+import tensorflow as tf
+
+FLAGS = tf.app.flags.FLAGS
+
+tf.app.flags.DEFINE_integer('batch_size', 128, """Batch size.""")
+tf.app.flags.DEFINE_integer('num_batches', 100, """Number of batches to run.""")
+tf.app.flags.DEFINE_boolean('forward_only', False,
+ """Only run the forward pass.""")
+tf.app.flags.DEFINE_boolean('forward_backward_only', False,
+ """Only run the forward-forward pass.""")
+tf.app.flags.DEFINE_string('data_format', 'NCHW',
+ """The data format for Convnet operations.
+ Can be either NHWC or NCHW.
+ """)
+tf.app.flags.DEFINE_boolean('log_device_placement', False,
+ """Whether to log device placement.""")
+
+parameters = []
+
+conv_counter = 1
+pool_counter = 1
+affine_counter = 1
+
+
+def _conv(inpOp, nIn, nOut, kH, kW, dH, dW, padType, wd=0.005, act=True):
+ global conv_counter
+ global parameters
+ name = 'conv' + str(conv_counter)
+ conv_counter += 1
+ with tf.name_scope(name) as scope:
+ kernel = tf.Variable(
+ tf.truncated_normal(
+ [kH, kW, nIn, nOut], dtype=tf.float32, stddev=1e-1),
+ name='weights')
+
+ if wd is not None:
+ weight_decay = tf.mul(tf.nn.l2_loss(kernel), wd, name='weight_loss')
+ tf.add_to_collection('losses', weight_decay)
+
+ if FLAGS.data_format == 'NCHW':
+ strides = [1, 1, dH, dW]
+ else:
+ strides = [1, dH, dW, 1]
+ conv = tf.nn.conv2d(
+ inpOp,
+ kernel,
+ strides,
+ padding=padType,
+ data_format=FLAGS.data_format)
+ biases = tf.Variable(
+ tf.constant(
+ 0.0, shape=[nOut], dtype=tf.float32),
+ trainable=True,
+ name='biases')
+ bias = tf.reshape(
+ tf.nn.bias_add(
+ conv, biases, data_format=FLAGS.data_format),
+ conv.get_shape())
+
+ conv1 = tf.nn.relu(bias, name=scope) if act else bias
+
+ parameters += [kernel, biases]
+
+ return conv1
+
+
+def _affine(inpOp, nIn, nOut, wd=None, act=True):
+ global affine_counter
+ global parameters
+ name = 'affine' + str(affine_counter)
+ affine_counter += 1
+ with tf.name_scope(name) as scope:
+ kernel = tf.Variable(
+ tf.truncated_normal(
+ [nIn, nOut], dtype=tf.float32, stddev=1e-1),
+ name='weights')
+
+ if wd is not None:
+ weight_decay = tf.mul(tf.nn.l2_loss(kernel), wd, name='weight_loss')
+ tf.add_to_collection('losses', weight_decay)
+
+ biases = tf.Variable(
+ tf.constant(
+ 0.0, shape=[nOut], dtype=tf.float32),
+ trainable=True,
+ name='biases')
+
+ affine1 = tf.nn.relu_layer(
+ inpOp, kernel, biases,
+ name=name) if act else tf.matmul(inpOp, kernel) + biases
+
+ parameters += [kernel, biases]
+
+ return affine1
+
+
+def _mpool(inpOp, kH, kW, dH, dW, padding):
+ global pool_counter
+ global parameters
+ name = 'pool' + str(pool_counter)
+ pool_counter += 1
+ if FLAGS.data_format == 'NCHW':
+ ksize = [1, 1, kH, kW]
+ strides = [1, 1, dH, dW]
+ else:
+ ksize = [1, kH, kW, 1]
+ strides = [1, dH, dW, 1]
+ return tf.nn.max_pool(
+ inpOp,
+ ksize=ksize,
+ strides=strides,
+ padding=padding,
+ data_format=FLAGS.data_format,
+ name=name)
+
+
+def _apool(inpOp, kH, kW, dH, dW, padding):
+ global pool_counter
+ global parameters
+ name = 'pool' + str(pool_counter)
+ pool_counter += 1
+ if FLAGS.data_format == 'NCHW':
+ ksize = [1, 1, kH, kW]
+ strides = [1, 1, dH, dW]
+ else:
+ ksize = [1, kH, kW, 1]
+ strides = [1, dH, dW, 1]
+ return tf.nn.avg_pool(
+ inpOp,
+ ksize=ksize,
+ strides=strides,
+ padding=padding,
+ data_format=FLAGS.data_format,
+ name=name)
+
+
+def _norm(name, l_input, lsize=4):
+ return tf.nn.lrn(l_input,
+ lsize,
+ bias=1.0,
+ alpha=0.001 / 9.0,
+ beta=0.75,
+ name=name)
+
+
+def loss(logits, labels):
+ batch_size = tf.size(labels)
+ labels = tf.expand_dims(labels, 1)
+ indices = tf.expand_dims(tf.range(0, batch_size, 1), 1)
+ concated = tf.concat(1, [indices, labels])
+ onehot_labels = tf.sparse_to_dense(concated,
+ tf.pack([batch_size, 10]), 1.0, 0.0)
+ cross_entropy = tf.nn.softmax_cross_entropy_with_logits(
+ logits, onehot_labels, name='xentropy')
+ loss = tf.reduce_mean(cross_entropy, name='xentropy_mean')
+ return loss
+
+
+def get_incoming_shape(incoming):
+ """ Returns the incoming data shape """
+ if isinstance(incoming, tf.Tensor):
+ return incoming.get_shape().as_list()
+ elif type(incoming) in [np.array, list, tuple]:
+ return np.shape(incoming)
+ else:
+ raise Exception("Invalid incoming layer.")
+
+
+def inference(images):
+ conv1 = _conv(images, 3, 32, 5, 5, 1, 1, 'SAME')
+ pool1 = _mpool(conv1, 3, 3, 2, 2, 'SAME')
+ conv2 = _conv(pool1, 32, 32, 5, 5, 1, 1, 'SAME')
+ pool2 = _apool(conv2, 3, 3, 2, 2, 'SAME')
+ conv3 = _conv(pool2, 32, 64, 5, 5, 1, 1, 'SAME')
+ pool3 = _apool(conv3, 3, 3, 2, 2, 'SAME')
+ resh1 = tf.reshape(pool3, [-1, 64 * 4 * 4])
+ affn1 = _affine(resh1, 64 * 4 * 4, 64)
+ affn2 = _affine(affn1, 64, 10, act=False)
+
+ print('conv1:', get_incoming_shape(conv1))
+ print('pool1:', get_incoming_shape(pool1))
+ print('conv2:', get_incoming_shape(conv2))
+ print('pool2:', get_incoming_shape(pool2))
+ print('conv3:', get_incoming_shape(conv3))
+ print('pool3:', get_incoming_shape(pool3))
+
+ return affn2
+
+
+def time_tensorflow_run(session, target, info_string):
+ num_steps_burn_in = 10
+ total_duration = 0.0
+ total_duration_squared = 0.0
+ if not isinstance(target, list):
+ target = [target]
+ target_op = tf.group(*target)
+ for i in xrange(FLAGS.num_batches + num_steps_burn_in):
+ start_time = time.time()
+ _ = session.run(target_op)
+ duration = time.time() - start_time
+ if i > num_steps_burn_in:
+ if not i % 10:
+ print('%s: step %d, duration = %.3f' %
+ (datetime.now(), i - num_steps_burn_in, duration))
+ total_duration += duration
+ total_duration_squared += duration * duration
+ mn = total_duration / FLAGS.num_batches
+ vr = total_duration_squared / FLAGS.num_batches - mn * mn
+ sd = math.sqrt(vr)
+ print('%s: %s across %d steps, %.3f +/- %.3f sec / batch' %
+ (datetime.now(), info_string, FLAGS.num_batches, mn, sd))
+
+
+def run_benchmark():
+ global parameters
+ with tf.Graph().as_default():
+ # Generate some dummy images.
+ image_size = 32
+ # Note that our padding definition is slightly different the cuda-convnet.
+ # In order to force the model to start with the same activations sizes,
+ # we add 3 to the image_size and employ VALID padding above.
+ if FLAGS.data_format == 'NCHW':
+ image_shape = [FLAGS.batch_size, 3, image_size, image_size]
+ else:
+ image_shape = [FLAGS.batch_size, image_size, image_size, 3]
+
+ images = tf.get_variable(
+ 'image',
+ image_shape,
+ initializer=tf.truncated_normal_initializer(
+ stddev=0.1, dtype=tf.float32),
+ dtype=tf.float32,
+ trainable=False)
+
+ labels = tf.get_variable(
+ 'label', [FLAGS.batch_size],
+ initializer=tf.constant_initializer(1),
+ dtype=tf.int32,
+ trainable=False)
+
+ # Build a Graph that computes the logits predictions from the
+ # inference model.
+ last_layer = inference(images)
+
+ objective = loss(last_layer, labels)
+
+ # Compute gradients.
+ opt = tf.train.MomentumOptimizer(0.001, 0.9)
+ grads = opt.compute_gradients(objective)
+ global_step = tf.get_variable(
+ 'global_step', [],
+ initializer=tf.constant_initializer(
+ 0.0, dtype=tf.float32),
+ trainable=False,
+ dtype=tf.float32)
+ apply_gradient_op = opt.apply_gradients(grads, global_step=global_step)
+
+ # Track the moving averages of all trainable variables.
+ variable_averages = tf.train.ExponentialMovingAverage(0.9, global_step)
+ variables_averages_op = variable_averages.apply(tf.trainable_variables(
+ ))
+
+ # Build an initialization operation.
+ init = tf.initialize_all_variables()
+
+ # Start running operations on the Graph.
+ sess = tf.Session(config=tf.ConfigProto(
+ allow_soft_placement=True,
+ log_device_placement=FLAGS.log_device_placement))
+ sess.run(init)
+
+ run_forward = True
+ run_forward_backward = True
+ if FLAGS.forward_only and FLAGS.forward_backward_only:
+ raise ValueError("Cannot specify --forward_only and "
+ "--forward_backward_only at the same time.")
+ if FLAGS.forward_only:
+ run_forward_backward = False
+ elif FLAGS.forward_backward_only:
+ run_forward = False
+
+ if run_forward:
+ # Run the forward benchmark.
+ time_tensorflow_run(sess, last_layer, "Forward")
+
+ if run_forward_backward:
+ with tf.control_dependencies(
+ [apply_gradient_op, variables_averages_op]):
+ train_op = tf.no_op(name='train')
+ time_tensorflow_run(sess, [train_op, objective], "Forward-backward")
+
+
+def main(_):
+ run_benchmark()
+
+
+if __name__ == '__main__':
+ tf.app.run()
diff --git a/benchmark/tensorflow/rnn/README.md b/benchmark/tensorflow/rnn/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..da8e7b8b07969051cbec3ac6a713eaf7fc738a55
--- /dev/null
+++ b/benchmark/tensorflow/rnn/README.md
@@ -0,0 +1,5 @@
+You also should install tflearn:
+
+```bash
+pip install -r requirements.txt
+```
diff --git a/benchmark/tensorflow/rnn/reader.py b/benchmark/tensorflow/rnn/reader.py
new file mode 100755
index 0000000000000000000000000000000000000000..f538329a15ea9ad9293c97c94340989e2c421eb2
--- /dev/null
+++ b/benchmark/tensorflow/rnn/reader.py
@@ -0,0 +1,92 @@
+import os.path
+import io
+import numpy as np
+import tensorflow as tf
+
+# tflearn
+import tflearn
+from tflearn.data_utils import to_categorical, pad_sequences
+from tflearn.datasets import imdb
+
+FLAGS = tf.app.flags.FLAGS
+
+
+class DataSet(object):
+ def __init__(self, data, labels):
+ assert data.shape[0] == labels.shape[0], (
+ 'data.shape: %s labels.shape: %s' % (data.shape, labels.shape))
+ self._num_examples = data.shape[0]
+
+ self._data = data
+ self._labels = labels
+ self._epochs_completed = 0
+ self._index_in_epoch = 0
+
+ @property
+ def data(self):
+ return self._data
+
+ @property
+ def labels(self):
+ return self._labels
+
+ @property
+ def num_examples(self):
+ return self._num_examples
+
+ @property
+ def epochs_completed(self):
+ return self._epochs_completed
+
+ def next_batch(self, batch_size):
+ assert batch_size <= self._num_examples
+
+ start = self._index_in_epoch
+ self._index_in_epoch += batch_size
+ if self._index_in_epoch > self._num_examples:
+ # Finished epoch
+ self._epochs_completed += 1
+ # Shuffle the data
+ perm = np.arange(self._num_examples)
+ np.random.shuffle(perm)
+ self._data = self._data[perm]
+ self._labels = self._labels[perm]
+ # Start next epoch
+ start = 0
+ self._index_in_epoch = batch_size
+
+ end = self._index_in_epoch
+
+ return self._data[start:end], self._labels[start:end]
+
+
+def create_datasets(file_path, vocab_size=30000, val_fraction=0.0):
+
+ # IMDB Dataset loading
+ train, test, _ = imdb.load_data(
+ path=file_path,
+ n_words=vocab_size,
+ valid_portion=val_fraction,
+ sort_by_len=False)
+ trainX, trainY = train
+ testX, testY = test
+
+ # Data preprocessing
+ # Sequence padding
+ trainX = pad_sequences(trainX, maxlen=FLAGS.max_len, value=0.)
+ testX = pad_sequences(testX, maxlen=FLAGS.max_len, value=0.)
+ # Converting labels to binary vectors
+ trainY = to_categorical(trainY, nb_classes=2)
+ testY = to_categorical(testY, nb_classes=2)
+
+ train_dataset = DataSet(trainX, trainY)
+
+ return train_dataset
+
+
+def main():
+ create_datasets('imdb.pkl')
+
+
+if __name__ == "__main__":
+ main()
diff --git a/benchmark/tensorflow/rnn/requirements.txt b/benchmark/tensorflow/rnn/requirements.txt
new file mode 100644
index 0000000000000000000000000000000000000000..4242e7d24fbbeb18e8fb9a760d76fa6d5363b03f
--- /dev/null
+++ b/benchmark/tensorflow/rnn/requirements.txt
@@ -0,0 +1 @@
+tflearn
diff --git a/benchmark/tensorflow/rnn/rnn.py b/benchmark/tensorflow/rnn/rnn.py
new file mode 100755
index 0000000000000000000000000000000000000000..f288083e13656563b511980553245142efec4e65
--- /dev/null
+++ b/benchmark/tensorflow/rnn/rnn.py
@@ -0,0 +1,223 @@
+#!/usr/bin/env python
+from six.moves import xrange # pylint: disable=redefined-builtin
+import math
+import time
+import numpy as np
+from datetime import datetime
+
+import reader
+import tensorflow as tf
+from tensorflow.python.ops import rnn
+
+FLAGS = tf.app.flags.FLAGS
+
+tf.app.flags.DEFINE_integer('batch_size', 128, """Batch size.""")
+tf.app.flags.DEFINE_integer('num_batches', 100, """Number of batches to run.""")
+tf.app.flags.DEFINE_integer('num_layers', 1, """Number of batches to run.""")
+tf.app.flags.DEFINE_integer('max_len', 100, """Number of batches to run.""")
+tf.app.flags.DEFINE_boolean('forward_only', False,
+ """Only run the forward pass.""")
+tf.app.flags.DEFINE_boolean('forward_backward_only', False,
+ """Only run the forward-forward pass.""")
+tf.app.flags.DEFINE_integer('hidden_size', 128, """Number of batches to run.""")
+tf.app.flags.DEFINE_integer('emb_size', 128, """Number of batches to run.""")
+tf.app.flags.DEFINE_boolean('log_device_placement', False,
+ """Whether to log device placement.""")
+
+VOCAB_SIZE = 30000
+NUM_CLASS = 2
+
+
+def get_feed_dict(x_data, y_data=None):
+ feed_dict = {}
+
+ if y_data is not None:
+ feed_dict[y_input] = y_data
+
+ for i in xrange(x_data.shape[0]):
+ feed_dict[x_input[i]] = x_data[i, :, :]
+
+ return feed_dict
+
+
+def get_incoming_shape(incoming):
+ """ Returns the incoming data shape """
+ if isinstance(incoming, tf.Tensor):
+ return incoming.get_shape().as_list()
+ elif type(incoming) in [np.array, list, tuple]:
+ return np.shape(incoming)
+ else:
+ raise Exception("Invalid incoming layer.")
+
+
+# Note input * W is done in LSTMCell,
+# which is different from PaddlePaddle
+def single_lstm(name,
+ incoming,
+ n_units,
+ use_peepholes=True,
+ return_seq=False,
+ return_state=False):
+ with tf.name_scope(name) as scope:
+ cell = tf.nn.rnn_cell.LSTMCell(n_units, use_peepholes=use_peepholes)
+ output, _cell_state = rnn.rnn(cell, incoming, dtype=tf.float32)
+ out = output if return_seq else output[-1]
+ return (out, _cell_state) if return_state else out
+
+
+def lstm(name,
+ incoming,
+ n_units,
+ use_peepholes=True,
+ return_seq=False,
+ return_state=False,
+ num_layers=1):
+ with tf.name_scope(name) as scope:
+ lstm_cell = tf.nn.rnn_cell.LSTMCell(
+ n_units, use_peepholes=use_peepholes)
+ cell = tf.nn.rnn_cell.MultiRNNCell([lstm_cell] * num_layers)
+ initial_state = cell.zero_state(FLAGS.batch_size, dtype=tf.float32)
+ if not isinstance(incoming, list):
+ # if the input is embeding, the Tensor shape : [None, time_step, emb_size]
+ incoming = [
+ tf.squeeze(input_, [1])
+ for input_ in tf.split(1, FLAGS.max_len, incoming)
+ ]
+ outputs, state = tf.nn.rnn(cell,
+ incoming,
+ initial_state=initial_state,
+ dtype=tf.float32)
+ out = outputs if return_seq else outputs[-1]
+ return (out, _cell_state) if return_state else out
+
+
+def embedding(name, incoming, vocab_size, emb_size):
+ with tf.name_scope(name) as scope:
+ #with tf.device("/cpu:0"):
+ embedding = tf.get_variable(
+ name + '_emb', [vocab_size, emb_size], dtype=tf.float32)
+ out = tf.nn.embedding_lookup(embedding, incoming)
+ return out
+
+
+def fc(name, inpOp, nIn, nOut, act=True):
+ with tf.name_scope(name) as scope:
+ kernel = tf.get_variable(
+ name + '_w', [nIn, nOut],
+ initializer=tf.truncated_normal_initializer(
+ stddev=0.01, dtype=tf.float32),
+ dtype=tf.float32)
+
+ biases = tf.get_variable(
+ name + '_b', [nOut],
+ initializer=tf.constant_initializer(
+ value=0.0, dtype=tf.float32),
+ dtype=tf.float32,
+ trainable=True)
+
+ net = tf.nn.relu_layer(inpOp, kernel, biases, name=name) if act else \
+ tf.matmul(inpOp, kernel) + biases
+
+ return net
+
+
+def inference(seq):
+ net = embedding('emb', seq, VOCAB_SIZE, FLAGS.emb_size)
+ print "emb:", get_incoming_shape(net)
+ net = lstm('lstm', net, FLAGS.hidden_size, num_layers=FLAGS.num_layers)
+ print "lstm:", get_incoming_shape(net)
+ net = fc('fc1', net, FLAGS.hidden_size, 2)
+ return net
+
+
+def loss(logits, labels):
+ # one label index for one sample
+ labels = tf.cast(labels, tf.float32)
+ cross_entropy = tf.nn.softmax_cross_entropy_with_logits(
+ logits, labels, name='cross_entropy_per_example')
+ cross_entropy_mean = tf.reduce_mean(cross_entropy, name='cross_entropy')
+ tf.add_to_collection('losses', cross_entropy_mean)
+ return tf.add_n(tf.get_collection('losses'), name='total_loss')
+
+
+def time_tensorflow_run(session, target, x_input, y_input, info_string):
+ num_steps_burn_in = 50
+ total_duration = 0.0
+ total_duration_squared = 0.0
+ if not isinstance(target, list):
+ target = [target]
+ target_op = tf.group(*target)
+ train_dataset = reader.create_datasets("imdb.pkl", VOCAB_SIZE)
+ for i in xrange(FLAGS.num_batches + num_steps_burn_in):
+ start_time = time.time()
+ data, label = train_dataset.next_batch(FLAGS.batch_size)
+ _ = session.run(target_op, feed_dict={x_input: data, y_input: label})
+ duration = time.time() - start_time
+ if i > num_steps_burn_in:
+ if not i % 10:
+ print('%s: step %d, duration = %.3f' %
+ (datetime.now(), i - num_steps_burn_in, duration))
+ total_duration += duration
+ total_duration_squared += duration * duration
+ mn = total_duration / FLAGS.num_batches
+ vr = total_duration_squared / FLAGS.num_batches - mn * mn
+ sd = math.sqrt(vr)
+ print('%s: %s across %d steps, %.3f +/- %.3f sec / batch' %
+ (datetime.now(), info_string, FLAGS.num_batches, mn, sd))
+
+
+def run_benchmark():
+ with tf.Graph().as_default():
+ global_step = 0
+ with tf.device('/cpu:0'):
+ global_step = tf.Variable(0, trainable=False)
+ with tf.device('/gpu:0'):
+ #x_input = tf.placeholder(tf.int32, [None, FLAGS.max_len], name="x_input")
+ #y_input = tf.placeholder(tf.int32, [None, NUM_CLASS], name="y_input")
+ x_input = tf.placeholder(
+ tf.int32, [FLAGS.batch_size, FLAGS.max_len], name="x_input")
+ y_input = tf.placeholder(
+ tf.int32, [FLAGS.batch_size, NUM_CLASS], name="y_input")
+ # Generate some dummy sequnce.
+
+ last_layer = inference(x_input)
+
+ objective = loss(last_layer, y_input)
+ opt = tf.train.AdamOptimizer(0.001)
+ grads = opt.compute_gradients(objective)
+ apply_gradient_op = opt.apply_gradients(
+ grads, global_step=global_step)
+
+ init = tf.initialize_all_variables()
+ sess = tf.Session(config=tf.ConfigProto(
+ allow_soft_placement=True,
+ log_device_placement=FLAGS.log_device_placement))
+ sess.run(init)
+
+ run_forward = True
+ run_forward_backward = True
+ if FLAGS.forward_only and FLAGS.forward_backward_only:
+ raise ValueError("Cannot specify --forward_only and "
+ "--forward_backward_only at the same time.")
+ if FLAGS.forward_only:
+ run_forward_backward = False
+ elif FLAGS.forward_backward_only:
+ run_forward = False
+
+ if run_forward:
+ time_tensorflow_run(sess, last_layer, x_input, y_input,
+ "Forward")
+
+ if run_forward_backward:
+ with tf.control_dependencies([apply_gradient_op]):
+ train_op = tf.no_op(name='train')
+ time_tensorflow_run(sess, [train_op, objective], x_input,
+ y_input, "Forward-backward")
+
+
+def main(_):
+ run_benchmark()
+
+
+if __name__ == '__main__':
+ tf.app.run()
diff --git a/benchmark/tensorflow/rnn/rnn_multi_gpu.py b/benchmark/tensorflow/rnn/rnn_multi_gpu.py
new file mode 100755
index 0000000000000000000000000000000000000000..eabee4fa8fe6325212ace1c11be4862cd2720b08
--- /dev/null
+++ b/benchmark/tensorflow/rnn/rnn_multi_gpu.py
@@ -0,0 +1,322 @@
+#!/usr/bin/env python
+from six.moves import xrange # pylint: disable=redefined-builtin
+import re
+import math
+import time
+import numpy as np
+from datetime import datetime
+
+import reader
+import tensorflow as tf
+from tensorflow.python.ops import rnn
+
+FLAGS = tf.app.flags.FLAGS
+
+tf.app.flags.DEFINE_integer('batch_size', 64, """Batch size.""")
+tf.app.flags.DEFINE_integer('num_batches', 100, """Number of batches to run.""")
+tf.app.flags.DEFINE_integer('num_layers', 1, """Number of batches to run.""")
+tf.app.flags.DEFINE_integer('max_len', 100, """Number of batches to run.""")
+tf.app.flags.DEFINE_integer('hidden_size', 128, """Number of batches to run.""")
+tf.app.flags.DEFINE_integer('emb_size', 64, """Number of batches to run.""")
+tf.app.flags.DEFINE_boolean('log_device_placement', False,
+ """Whether to log device placement.""")
+tf.app.flags.DEFINE_integer('num_gpus', 4, """How many GPUs to use.""")
+
+VOCAB_SIZE = 30000
+NUM_CLASS = 2
+
+NUM_EXAMPLES_PER_EPOCH_FOR_TRAIN = 50000
+NUM_EPOCHS_PER_DECAY = 50
+INITIAL_LEARNING_RATE = 0.1
+LEARNING_RATE_DECAY_FACTOR = 0.1
+TOWER_NAME = 'tower'
+
+train_dataset = reader.create_datasets("imdb.pkl", VOCAB_SIZE)
+
+
+def get_incoming_shape(incoming):
+ """ Returns the incoming data shape """
+ if isinstance(incoming, tf.Tensor):
+ return incoming.get_shape().as_list()
+ elif type(incoming) in [np.array, list, tuple]:
+ return np.shape(incoming)
+ else:
+ raise Exception("Invalid incoming layer.")
+
+
+# Note input * W is done in LSTMCell,
+# which is different from PaddlePaddle
+def single_lstm(name,
+ incoming,
+ n_units,
+ use_peepholes=True,
+ return_seq=False,
+ return_state=False):
+ with tf.name_scope(name) as scope:
+ cell = tf.nn.rnn_cell.LSTMCell(n_units, use_peepholes=use_peepholes)
+ output, _cell_state = rnn.rnn(cell, incoming, dtype=tf.float32)
+ out = output if return_seq else output[-1]
+ return (out, _cell_state) if return_state else out
+
+
+def lstm(name,
+ incoming,
+ n_units,
+ use_peepholes=True,
+ return_seq=False,
+ return_state=False,
+ num_layers=1):
+ with tf.name_scope(name) as scope:
+ lstm_cell = tf.nn.rnn_cell.LSTMCell(
+ n_units, use_peepholes=use_peepholes)
+ cell = tf.nn.rnn_cell.MultiRNNCell([lstm_cell] * num_layers)
+ initial_state = cell.zero_state(FLAGS.batch_size, dtype=tf.float32)
+ if not isinstance(incoming, list):
+ # if the input is embeding, the Tensor shape : [None, time_step, emb_size]
+ incoming = [
+ tf.squeeze(input_, [1])
+ for input_ in tf.split(1, FLAGS.max_len, incoming)
+ ]
+ outputs, state = tf.nn.rnn(cell,
+ incoming,
+ initial_state=initial_state,
+ dtype=tf.float32)
+ out = outputs if return_seq else outputs[-1]
+ return (out, _cell_state) if return_state else out
+
+
+def embedding(name, incoming, vocab_size, emb_size):
+ with tf.name_scope(name) as scope:
+ #with tf.device("/cpu:0"):
+ embedding = tf.get_variable(
+ name + '_emb', [vocab_size, emb_size], dtype=tf.float32)
+ out = tf.nn.embedding_lookup(embedding, incoming)
+ return out
+
+
+def fc(name, inpOp, nIn, nOut, act=True):
+ with tf.name_scope(name) as scope:
+ kernel = tf.get_variable(
+ name + '_w', [nIn, nOut],
+ initializer=tf.truncated_normal_initializer(
+ stddev=0.01, dtype=tf.float32),
+ dtype=tf.float32)
+
+ biases = tf.get_variable(
+ name + '_b', [nOut],
+ initializer=tf.constant_initializer(
+ value=0.0, dtype=tf.float32),
+ dtype=tf.float32,
+ trainable=True)
+
+ net = tf.nn.relu_layer(inpOp, kernel, biases, name=name) if act else \
+ tf.matmul(inpOp, kernel) + biases
+
+ return net
+
+
+def inference(seq):
+ net = embedding('emb', seq, VOCAB_SIZE, FLAGS.emb_size)
+ print "emb:", get_incoming_shape(net)
+ net = lstm('lstm', net, FLAGS.hidden_size, num_layers=FLAGS.num_layers)
+ print "lstm:", get_incoming_shape(net)
+ net = fc('fc1', net, FLAGS.hidden_size, 2)
+ return net
+
+
+def loss(logits, labels):
+ # one label index for one sample
+ #labels = tf.cast(labels, tf.int64)
+ # cross_entropy = tf.nn.sparse_softmax_cross_entropy_with_logits(
+ # logits, labels, name='cross_entropy_per_example')
+ labels = tf.cast(labels, tf.float32)
+ cross_entropy = tf.nn.softmax_cross_entropy_with_logits(
+ logits, labels, name='cross_entropy_per_example')
+ cross_entropy_mean = tf.reduce_mean(cross_entropy, name='cross_entropy')
+ tf.add_to_collection('losses', cross_entropy_mean)
+ return tf.add_n(tf.get_collection('losses'), name='total_loss')
+
+
+def tower_loss(scope):
+ """Calculate the total loss on a single tower running the model.
+ Args:
+ scope: unique prefix string identifying the tower, e.g. 'tower_0'
+ Returns:
+ Tensor of shape [] containing the total loss for a batch of data
+ """
+ data, label = train_dataset.next_batch(FLAGS.batch_size)
+
+ # Build a Graph that computes the logits predictions from the
+ # inference model.
+ last_layer = inference(data)
+
+ # Build the portion of the Graph calculating the losses. Note that we will
+ # assemble the total_loss using a custom function below.
+ #_ = loss(last_layer, label)
+ _ = loss(last_layer, label)
+
+ # Assemble all of the losses for the current tower only.
+ losses = tf.get_collection('losses', scope)
+
+ # Calculate the total loss for the current tower.
+ total_loss = tf.add_n(losses, name='total_loss')
+
+ # Compute the moving average of all individual losses and the total loss.
+ loss_averages = tf.train.ExponentialMovingAverage(0.9, name='avg')
+ loss_averages_op = loss_averages.apply(losses + [total_loss])
+
+ # Attach a scalar summary to all individual losses and the total loss; do the
+ # same for the averaged version of the losses.
+ for l in losses + [total_loss]:
+ # Remove 'tower_[0-9]/' from the name in case this is a multi-GPU training
+ # session. This helps the clarity of presentation on tensorboard.
+ loss_name = re.sub('%s_[0-9]*/' % TOWER_NAME, '', l.op.name)
+ # Name each loss as '(raw)' and name the moving average version of the loss
+ # as the original loss name.
+ tf.scalar_summary(loss_name + ' (raw)', l)
+ #tf.scalar_summary(loss_name, loss_averages.average(l))
+
+ with tf.control_dependencies([loss_averages_op]):
+ total_loss = tf.identity(total_loss)
+ return total_loss
+
+
+def average_gradients(tower_grads):
+ """Calculate the average gradient for each shared variable across all towers.
+ Note that this function provides a synchronization point across all towers.
+ Args:
+ tower_grads: List of lists of (gradient, variable) tuples. The outer list
+ is over individual gradients. The inner list is over the gradient
+ calculation for each tower.
+ Returns:
+ List of pairs of (gradient, variable) where the gradient has been averaged
+ across all towers.
+ """
+ average_grads = []
+ for grad_and_vars in zip(*tower_grads):
+ # Note that each grad_and_vars looks like the following:
+ # ((grad0_gpu0, var0_gpu0), ... , (grad0_gpuN, var0_gpuN))
+ grads = []
+ for g, _ in grad_and_vars:
+ # Add 0 dimension to the gradients to represent the tower.
+ expanded_g = tf.expand_dims(g, 0)
+
+ # Append on a 'tower' dimension which we will average over below.
+ grads.append(expanded_g)
+
+ # Average over the 'tower' dimension.
+ grad = tf.concat(0, grads)
+ grad = tf.reduce_mean(grad, 0)
+
+ # Keep in mind that the Variables are redundant because they are shared
+ # across towers. So .. we will just return the first tower's pointer to
+ # the Variable.
+ v = grad_and_vars[0][1]
+ grad_and_var = (grad, v)
+ average_grads.append(grad_and_var)
+ return average_grads
+
+
+def time_tensorflow_run(session, target):
+ num_steps_burn_in = 80
+ total_duration = 0.0
+ total_duration_squared = 0.0
+ for i in xrange(FLAGS.num_batches + num_steps_burn_in):
+ start_time = time.time()
+ _ = session.run(target, feed_dict={x_input: data, y_input: label})
+ _, loss_value = session.run(target)
+ duration = time.time() - start_time
+ if i > num_steps_burn_in:
+ if not i % 10:
+ num_examples_per_step = FLAGS.batch_size * FLAGS.num_gpus
+ examples_per_sec = num_examples_per_step / duration
+ # sec_per_batch = duration / FLAGS.num_gpus
+ sec_per_batch = duration
+
+ format_str = (
+ '%s: step %d, loss= %.2f (%.1f examples/sec; %.3f '
+ 'sec/batch batch_size= %d)')
+ print(format_str %
+ (datetime.now(), i - num_steps_burn_in, loss_value,
+ duration, sec_per_batch, num_examples_per_step))
+
+ total_duration += duration
+ total_duration_squared += duration * duration
+
+ mn = total_duration / FLAGS.num_batches
+ vr = total_duration_squared / FLAGS.num_batches - mn * mn
+ sd = math.sqrt(vr)
+ print('%s: FwdBwd across %d steps, %.3f +/- %.3f sec / batch' %
+ (datetime.now(), FLAGS.num_batches, mn, sd))
+
+
+def run_benchmark():
+ with tf.Graph().as_default(), tf.device('/cpu:0'):
+ # Create a variable to count the number of train() calls. This equals the
+ # number of batches processed * FLAGS.num_gpus.
+ global_step = tf.get_variable(
+ 'global_step', [],
+ initializer=tf.constant_initializer(0),
+ trainable=False)
+
+ # Calculate the learning rate schedule.
+ num_batches_per_epoch = (NUM_EXAMPLES_PER_EPOCH_FOR_TRAIN /
+ FLAGS.batch_size)
+ decay_steps = int(num_batches_per_epoch * NUM_EPOCHS_PER_DECAY)
+
+ # Create an optimizer that performs gradient descent.
+ opt = tf.train.AdamOptimizer(0.001)
+
+ #train_dataset = reader.create_datasets("imdb.pkl", VOCAB_SIZE)
+
+ # Calculate the gradients for each model tower.
+ tower_grads = []
+ for i in xrange(FLAGS.num_gpus):
+ with tf.device('/gpu:%d' % i):
+ with tf.name_scope('%s_%d' % (TOWER_NAME, i)) as scope:
+ # Calculate the loss for one tower of the model. This function
+ # constructs the entire model but shares the variables across
+ # all towers.
+ loss = tower_loss(scope)
+
+ # Reuse variables for the next tower.
+ tf.get_variable_scope().reuse_variables()
+
+ # Retain the summaries from the final tower.
+ # summaries = tf.get_collection(tf.GraphKeys.SUMMARIES, scope)
+
+ # Calculate the gradients for the batch of data on this tower.
+ grads = opt.compute_gradients(loss)
+
+ # Keep track of the gradients across all towers.
+ tower_grads.append(grads)
+
+ # We must calculate the mean of each gradient. Note that this is the
+ # synchronization point across all towers.
+ grads = average_gradients(tower_grads)
+
+ # Apply the gradients to adjust the shared variables.
+ apply_gradient_op = opt.apply_gradients(grads, global_step=global_step)
+
+ # Group all updates to into a single train op.
+ train_op = tf.group(apply_gradient_op)
+
+ # Build an initialization operation.
+ init = tf.initialize_all_variables()
+
+ # Start running operations on the Graph. allow_soft_placement must be set to
+ # True to build towers on GPU, as some of the ops do not have GPU
+ # implementations.
+ sess = tf.Session(config=tf.ConfigProto(
+ allow_soft_placement=True,
+ log_device_placement=FLAGS.log_device_placement))
+ sess.run(init)
+ time_tensorflow_run(sess, [train_op, loss])
+
+
+def main(_):
+ run_benchmark()
+
+
+if __name__ == '__main__':
+ tf.app.run()
diff --git a/benchmark/tensorflow/rnn/run.sh b/benchmark/tensorflow/rnn/run.sh
new file mode 100755
index 0000000000000000000000000000000000000000..bb4c69cb95f965eff35f1c5a60376bf1e84f841b
--- /dev/null
+++ b/benchmark/tensorflow/rnn/run.sh
@@ -0,0 +1,29 @@
+set -e
+
+function test() {
+ lstm_num=$1
+ batch_size=$2
+ hid_size=$3
+ prefix=$4
+ python rnn.py --num_layers=${lstm_num} --batch_size=$batch_size \
+ --hidden_size=${hid_size} \
+ --forward_backward_only=1 \
+ > logs/1gpu-${lstm_num}lstm-batch${batch_size}-hid${hid_size}.log 2>&1
+}
+
+if [ ! -d "logs" ]; then
+ mkdir logs
+fi
+
+#--lstm_num--batch_size--hidden_size--#
+test 2 64 256
+test 2 64 512
+test 2 64 1280
+
+test 2 128 256
+test 2 128 512
+test 2 128 1280
+
+test 2 256 256
+test 2 256 512
+test 2 256 1280
diff --git a/benchmark/tensorflow/rnn/run_multi.sh b/benchmark/tensorflow/rnn/run_multi.sh
new file mode 100755
index 0000000000000000000000000000000000000000..f7f52e01e38d304bb3bf8185c53bd0da26014d3a
--- /dev/null
+++ b/benchmark/tensorflow/rnn/run_multi.sh
@@ -0,0 +1,28 @@
+set -e
+
+function test() {
+ num_gpu=$1
+ lstm_num=$2
+ hid_size=$3
+ batch_per_gpu=`expr ${batch_size} / ${num_gpu}`
+ batch_size=$4
+ python rnn_multi_gpu.py --num_layers=${lstm_num} --batch_size=$batch_per_gpu \
+ --num_gpus=${num_gpu} \
+ --hidden_size=${hid_size} \
+ --forward_backward_only=1 \
+ > logs/${num_gpu}gpu-${lstm_num}lstm-hid${hid_size}-batch${batch_size}.log 2>&1
+}
+
+if [ ! -d "logs" ]; then
+ mkdir logs
+fi
+
+#--num_gpus--lstm_num--hiddne_size--batch_size--#
+test 4 2 256 128
+test 4 2 256 256
+test 4 2 256 512
+
+test 4 2 512 128
+test 4 2 512 256
+test 4 2 512 512
+
diff --git a/cmake/version.cmake b/cmake/version.cmake
new file mode 100644
index 0000000000000000000000000000000000000000..a0518e07e88a1ff468c301523f888c7d95e15185
--- /dev/null
+++ b/cmake/version.cmake
@@ -0,0 +1,24 @@
+# Get the latest git tag.
+set(PADDLE_VERSION $ENV{PADDLE_VERSION})
+set(tmp_version "HEAD")
+while ("${PADDLE_VERSION}" STREQUAL "")
+ execute_process(
+ COMMAND ${GIT_EXECUTABLE} describe --tags --abbrev=0 ${tmp_version}
+ WORKING_DIRECTORY ${PROJ_ROOT}
+ OUTPUT_VARIABLE GIT_TAG_NAME
+ RESULT_VARIABLE GIT_RESULT
+ ERROR_QUIET OUTPUT_STRIP_TRAILING_WHITESPACE)
+ if (NOT ${GIT_RESULT})
+ # Check the tag is a correct version
+ if (${GIT_TAG_NAME} MATCHES "v[0-9]+\\.[0-9]+\\.[0-9]+(\\.(a|b|rc)\\.[0-9]+)?")
+ string(REPLACE "v" "" PADDLE_VERSION ${GIT_TAG_NAME})
+ else() # otherwise, get the previous git tag name.
+ set(tmp_version "${GIT_TAG_NAME}~1")
+ endif()
+ else()
+ set(PADDLE_VERSION "0.0.0")
+ message(WARNING "Cannot add paddle version from git tag")
+ endif()
+endwhile()
+
+message(STATUS "Paddle version is ${PADDLE_VERSION}")
diff --git a/demo/gan/.gitignore b/demo/gan/.gitignore
new file mode 100644
index 0000000000000000000000000000000000000000..93a6f5080a16a601cffb0bff51af9aef3ba3bae7
--- /dev/null
+++ b/demo/gan/.gitignore
@@ -0,0 +1,11 @@
+output/
+uniform_params/
+cifar_params/
+mnist_params/
+*.png
+.pydevproject
+.project
+*.log
+*.pyc
+data/mnist_data/
+data/cifar-10-batches-py/
diff --git a/demo/gan/README.md b/demo/gan/README.md
new file mode 100644
index 0000000000000000000000000000000000000000..fdc970a07b488c3a4146c9baa76a133a456fc9ab
--- /dev/null
+++ b/demo/gan/README.md
@@ -0,0 +1,13 @@
+# Generative Adversarial Networks (GAN)
+
+This demo implements GAN training described in the original GAN paper (https://arxiv.org/abs/1406.2661) and DCGAN (https://arxiv.org/abs/1511.06434).
+
+The general training procedures are implemented in gan_trainer.py. The neural network configurations are specified in gan_conf.py (for synthetic data) and gan_conf_image.py (for image data).
+
+In order to run the model, first download the corresponding data by running the shell script in ./data.
+Then you can run the command below. The flag -d specifies the training data (cifar, mnist or uniform) and flag --useGpu specifies whether to use gpu for training (0 is cpu, 1 is gpu).
+
+$python gan_trainer.py -d cifar --use_gpu 1
+
+The generated images will be stored in ./cifar_samples/
+The corresponding models will be stored in ./cifar_params/
\ No newline at end of file
diff --git a/demo/gan/data/download_cifar.sh b/demo/gan/data/download_cifar.sh
new file mode 100755
index 0000000000000000000000000000000000000000..ea3be594cd08f829e94f2c692a44947baa62b759
--- /dev/null
+++ b/demo/gan/data/download_cifar.sh
@@ -0,0 +1,18 @@
+# Copyright (c) 2016 Baidu, Inc. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+set -e
+wget https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz
+tar zxf cifar-10-python.tar.gz
+rm cifar-10-python.tar.gz
+
diff --git a/demo/gan/data/get_mnist_data.sh b/demo/gan/data/get_mnist_data.sh
new file mode 100644
index 0000000000000000000000000000000000000000..d21bf7067135f1f8be486ef0f13fc3ec94ffc4ed
--- /dev/null
+++ b/demo/gan/data/get_mnist_data.sh
@@ -0,0 +1,19 @@
+#!/usr/bin/env sh
+# This script downloads the mnist data and unzips it.
+set -e
+DIR="$( cd "$(dirname "$0")" ; pwd -P )"
+rm -rf "$DIR/mnist_data"
+mkdir "$DIR/mnist_data"
+cd "$DIR/mnist_data"
+
+echo "Downloading..."
+
+for fname in train-images-idx3-ubyte train-labels-idx1-ubyte t10k-images-idx3-ubyte t10k-labels-idx1-ubyte
+do
+ if [ ! -e $fname ]; then
+ wget --no-check-certificate http://yann.lecun.com/exdb/mnist/${fname}.gz
+ gunzip ${fname}.gz
+ fi
+done
+
+
diff --git a/demo/gan/gan_conf.py b/demo/gan/gan_conf.py
new file mode 100644
index 0000000000000000000000000000000000000000..05eee3a9b9ce455eb3a5d47d3165ee7f42f1002e
--- /dev/null
+++ b/demo/gan/gan_conf.py
@@ -0,0 +1,134 @@
+# Copyright (c) 2016 Baidu, Inc. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from paddle.trainer_config_helpers import *
+
+mode = get_config_arg("mode", str, "generator")
+assert mode in set(["generator",
+ "discriminator",
+ "generator_training",
+ "discriminator_training"])
+
+is_generator_training = mode == "generator_training"
+is_discriminator_training = mode == "discriminator_training"
+is_generator = mode == "generator"
+is_discriminator = mode == "discriminator"
+
+# The network structure below follows the ref https://arxiv.org/abs/1406.2661
+# Here we used two hidden layers and batch_norm
+
+print('mode=%s' % mode)
+# the dim of the noise (z) as the input of the generator network
+noise_dim = 10
+# the dim of the hidden layer
+hidden_dim = 10
+# the dim of the generated sample
+sample_dim = 2
+
+settings(
+ batch_size=128,
+ learning_rate=1e-4,
+ learning_method=AdamOptimizer(beta1=0.5)
+)
+
+def discriminator(sample):
+ """
+ discriminator ouputs the probablity of a sample is from generator
+ or real data.
+ The output has two dimenstional: dimension 0 is the probablity
+ of the sample is from generator and dimension 1 is the probabblity
+ of the sample is from real data.
+ """
+ param_attr = ParamAttr(is_static=is_generator_training)
+ bias_attr = ParamAttr(is_static=is_generator_training,
+ initial_mean=1.0,
+ initial_std=0)
+
+ hidden = fc_layer(input=sample, name="dis_hidden", size=hidden_dim,
+ bias_attr=bias_attr,
+ param_attr=param_attr,
+ act=ReluActivation())
+
+ hidden2 = fc_layer(input=hidden, name="dis_hidden2", size=hidden_dim,
+ bias_attr=bias_attr,
+ param_attr=param_attr,
+ act=LinearActivation())
+
+ hidden_bn = batch_norm_layer(hidden2,
+ act=ReluActivation(),
+ name="dis_hidden_bn",
+ bias_attr=bias_attr,
+ param_attr=ParamAttr(is_static=is_generator_training,
+ initial_mean=1.0,
+ initial_std=0.02),
+ use_global_stats=False)
+
+ return fc_layer(input=hidden_bn, name="dis_prob", size=2,
+ bias_attr=bias_attr,
+ param_attr=param_attr,
+ act=SoftmaxActivation())
+
+def generator(noise):
+ """
+ generator generates a sample given noise
+ """
+ param_attr = ParamAttr(is_static=is_discriminator_training)
+ bias_attr = ParamAttr(is_static=is_discriminator_training,
+ initial_mean=1.0,
+ initial_std=0)
+
+ hidden = fc_layer(input=noise,
+ name="gen_layer_hidden",
+ size=hidden_dim,
+ bias_attr=bias_attr,
+ param_attr=param_attr,
+ act=ReluActivation())
+
+ hidden2 = fc_layer(input=hidden, name="gen_hidden2", size=hidden_dim,
+ bias_attr=bias_attr,
+ param_attr=param_attr,
+ act=LinearActivation())
+
+ hidden_bn = batch_norm_layer(hidden2,
+ act=ReluActivation(),
+ name="gen_layer_hidden_bn",
+ bias_attr=bias_attr,
+ param_attr=ParamAttr(is_static=is_discriminator_training,
+ initial_mean=1.0,
+ initial_std=0.02),
+ use_global_stats=False)
+
+ return fc_layer(input=hidden_bn,
+ name="gen_layer1",
+ size=sample_dim,
+ bias_attr=bias_attr,
+ param_attr=param_attr,
+ act=LinearActivation())
+
+if is_generator_training:
+ noise = data_layer(name="noise", size=noise_dim)
+ sample = generator(noise)
+
+if is_discriminator_training:
+ sample = data_layer(name="sample", size=sample_dim)
+
+if is_generator_training or is_discriminator_training:
+ label = data_layer(name="label", size=1)
+ prob = discriminator(sample)
+ cost = cross_entropy(input=prob, label=label)
+ classification_error_evaluator(input=prob, label=label, name=mode+'_error')
+ outputs(cost)
+
+if is_generator:
+ noise = data_layer(name="noise", size=noise_dim)
+ outputs(generator(noise))
diff --git a/demo/gan/gan_conf_image.py b/demo/gan/gan_conf_image.py
new file mode 100644
index 0000000000000000000000000000000000000000..dc5910e9f02d7aac59207fdaa0222d01ac3bf609
--- /dev/null
+++ b/demo/gan/gan_conf_image.py
@@ -0,0 +1,264 @@
+# Copyright (c) 2016 Baidu, Inc. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+from paddle.trainer_config_helpers import *
+
+mode = get_config_arg("mode", str, "generator")
+dataSource = get_config_arg("data", str, "mnist")
+assert mode in set(["generator",
+ "discriminator",
+ "generator_training",
+ "discriminator_training"])
+
+is_generator_training = mode == "generator_training"
+is_discriminator_training = mode == "discriminator_training"
+is_generator = mode == "generator"
+is_discriminator = mode == "discriminator"
+
+# The network structure below follows the dcgan paper
+# (https://arxiv.org/abs/1511.06434)
+
+print('mode=%s' % mode)
+# the dim of the noise (z) as the input of the generator network
+noise_dim = 100
+# the number of filters in the layer in generator/discriminator that is
+# closet to the image
+gf_dim = 64
+df_dim = 64
+if dataSource == "mnist":
+ sample_dim = 28 # image dim
+ c_dim = 1 # image color
+else:
+ sample_dim = 32
+ c_dim = 3
+s2, s4 = int(sample_dim/2), int(sample_dim/4),
+s8, s16 = int(sample_dim/8), int(sample_dim/16)
+
+settings(
+ batch_size=128,
+ learning_rate=2e-4,
+ learning_method=AdamOptimizer(beta1=0.5)
+)
+
+def conv_bn(input, channels, imgSize, num_filters, output_x, stride, name,
+ param_attr, bias_attr, param_attr_bn, bn, trans=False,
+ act=ReluActivation()):
+
+ """
+ conv_bn is a utility function that constructs a convolution/deconv layer
+ with an optional batch_norm layer
+
+ :param bn: whether to use batch_norm_layer
+ :type bn: bool
+ :param trans: whether to use conv (False) or deconv (True)
+ :type trans: bool
+ """
+
+ # calculate the filter_size and padding size based on the given
+ # imgSize and ouput size
+ tmp = imgSize - (output_x - 1) * stride
+ if tmp <= 1 or tmp > 5:
+ raise ValueError("conv input-output dimension does not fit")
+ elif tmp <= 3:
+ filter_size = tmp + 2
+ padding = 1
+ else:
+ filter_size = tmp
+ padding = 0
+
+ print (imgSize, output_x, stride, filter_size, padding)
+
+ if trans:
+ nameApx = "_conv"
+ else:
+ nameApx = "_convt"
+
+ if bn:
+ conv = img_conv_layer(input, filter_size=filter_size,
+ num_filters=num_filters,
+ name=name + nameApx, num_channels=channels,
+ act=LinearActivation(), groups=1, stride=stride,
+ padding=padding, bias_attr=bias_attr,
+ param_attr=param_attr, shared_biases=True, layer_attr=None,
+ filter_size_y=None, stride_y=None, padding_y=None,
+ trans=trans)
+
+ conv_bn = batch_norm_layer(conv,
+ act=act,
+ name=name + nameApx + "_bn",
+ bias_attr=bias_attr,
+ param_attr=param_attr_bn,
+ use_global_stats=False)
+
+ return conv_bn
+ else:
+ conv = img_conv_layer(input, filter_size=filter_size,
+ num_filters=num_filters,
+ name=name + nameApx, num_channels=channels,
+ act=act, groups=1, stride=stride,
+ padding=padding, bias_attr=bias_attr,
+ param_attr=param_attr, shared_biases=True, layer_attr=None,
+ filter_size_y=None, stride_y=None, padding_y=None,
+ trans=trans)
+ return conv
+
+def generator(noise):
+ """
+ generator generates a sample given noise
+ """
+ param_attr = ParamAttr(is_static=is_discriminator_training,
+ initial_mean=0.0,
+ initial_std=0.02)
+ bias_attr = ParamAttr(is_static=is_discriminator_training,
+ initial_mean=0.0,
+ initial_std=0.0)
+
+ param_attr_bn=ParamAttr(is_static=is_discriminator_training,
+ initial_mean=1.0,
+ initial_std=0.02)
+
+ h1 = fc_layer(input=noise,
+ name="gen_layer_h1",
+ size=s8 * s8 * gf_dim * 4,
+ bias_attr=bias_attr,
+ param_attr=param_attr,
+ act=LinearActivation())
+
+ h1_bn = batch_norm_layer(h1,
+ act=ReluActivation(),
+ name="gen_layer_h1_bn",
+ bias_attr=bias_attr,
+ param_attr=param_attr_bn,
+ use_global_stats=False)
+
+ h2_bn = conv_bn(h1_bn,
+ channels=gf_dim*4,
+ output_x=s8,
+ num_filters=gf_dim*2,
+ imgSize=s4,
+ stride=2,
+ name="gen_layer_h2",
+ param_attr=param_attr,
+ bias_attr=bias_attr,
+ param_attr_bn=param_attr_bn,
+ bn=True,
+ trans=True)
+
+ h3_bn = conv_bn(h2_bn,
+ channels=gf_dim*2,
+ output_x=s4,
+ num_filters=gf_dim,
+ imgSize=s2,
+ stride=2,
+ name="gen_layer_h3",
+ param_attr=param_attr,
+ bias_attr=bias_attr,
+ param_attr_bn=param_attr_bn,
+ bn=True,
+ trans=True)
+
+
+ return conv_bn(h3_bn,
+ channels=gf_dim,
+ output_x=s2,
+ num_filters=c_dim,
+ imgSize=sample_dim,
+ stride=2,
+ name="gen_layer_h4",
+ param_attr=param_attr,
+ bias_attr=bias_attr,
+ param_attr_bn=param_attr_bn,
+ bn=False,
+ trans=True,
+ act=TanhActivation())
+
+
+def discriminator(sample):
+ """
+ discriminator ouputs the probablity of a sample is from generator
+ or real data.
+ The output has two dimenstional: dimension 0 is the probablity
+ of the sample is from generator and dimension 1 is the probabblity
+ of the sample is from real data.
+ """
+ param_attr = ParamAttr(is_static=is_generator_training,
+ initial_mean=0.0,
+ initial_std=0.02)
+ bias_attr = ParamAttr(is_static=is_generator_training,
+ initial_mean=0.0,
+ initial_std=0.0)
+
+ param_attr_bn=ParamAttr(is_static=is_generator_training,
+ initial_mean=1.0,
+ initial_std=0.02)
+
+ h0 = conv_bn(sample,
+ channels=c_dim,
+ imgSize=sample_dim,
+ num_filters=df_dim,
+ output_x=s2,
+ stride=2,
+ name="dis_h0",
+ param_attr=param_attr,
+ bias_attr=bias_attr,
+ param_attr_bn=param_attr_bn,
+ bn=False)
+
+ h1_bn = conv_bn(h0,
+ channels=df_dim,
+ imgSize=s2,
+ num_filters=df_dim*2,
+ output_x=s4,
+ stride=2,
+ name="dis_h1",
+ param_attr=param_attr,
+ bias_attr=bias_attr,
+ param_attr_bn=param_attr_bn,
+ bn=True)
+
+ h2_bn = conv_bn(h1_bn,
+ channels=df_dim*2,
+ imgSize=s4,
+ num_filters=df_dim*4,
+ output_x=s8,
+ stride=2,
+ name="dis_h2",
+ param_attr=param_attr,
+ bias_attr=bias_attr,
+ param_attr_bn=param_attr_bn,
+ bn=True)
+
+ return fc_layer(input=h2_bn, name="dis_prob", size=2,
+ bias_attr=bias_attr,
+ param_attr=param_attr,
+ act=SoftmaxActivation())
+
+
+
+if is_generator_training:
+ noise = data_layer(name="noise", size=noise_dim)
+ sample = generator(noise)
+
+if is_discriminator_training:
+ sample = data_layer(name="sample", size=sample_dim * sample_dim*c_dim)
+
+if is_generator_training or is_discriminator_training:
+ label = data_layer(name="label", size=1)
+ prob = discriminator(sample)
+ cost = cross_entropy(input=prob, label=label)
+ classification_error_evaluator(input=prob, label=label, name=mode+'_error')
+ outputs(cost)
+
+if is_generator:
+ noise = data_layer(name="noise", size=noise_dim)
+ outputs(generator(noise))
diff --git a/demo/gan/gan_trainer.py b/demo/gan/gan_trainer.py
new file mode 100644
index 0000000000000000000000000000000000000000..72699952b961cb5bf6ac14dd65eee1aeab5e2a7c
--- /dev/null
+++ b/demo/gan/gan_trainer.py
@@ -0,0 +1,329 @@
+# Copyright (c) 2016 Baidu, Inc. All Rights Reserved
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+import argparse
+import random
+import numpy
+import cPickle
+import sys,os
+from PIL import Image
+
+from paddle.trainer.config_parser import parse_config
+from paddle.trainer.config_parser import logger
+import py_paddle.swig_paddle as api
+import matplotlib.pyplot as plt
+
+def plot2DScatter(data, outputfile):
+ '''
+ Plot the data as a 2D scatter plot and save to outputfile
+ data needs to be two dimensinoal
+ '''
+ x = data[:, 0]
+ y = data[:, 1]
+ logger.info("The mean vector is %s" % numpy.mean(data, 0))
+ logger.info("The std vector is %s" % numpy.std(data, 0))
+
+ heatmap, xedges, yedges = numpy.histogram2d(x, y, bins=50)
+ extent = [xedges[0], xedges[-1], yedges[0], yedges[-1]]
+
+ plt.clf()
+ plt.scatter(x, y)
+ plt.savefig(outputfile, bbox_inches='tight')
+
+def CHECK_EQ(a, b):
+ assert a == b, "a=%s, b=%s" % (a, b)
+
+def copy_shared_parameters(src, dst):
+ '''
+ copy the parameters from src to dst
+ :param src: the source of the parameters
+ :type src: GradientMachine
+ :param dst: the destination of the parameters
+ :type dst: GradientMachine
+ '''
+ src_params = [src.getParameter(i)
+ for i in xrange(src.getParameterSize())]
+ src_params = dict([(p.getName(), p) for p in src_params])
+
+
+ for i in xrange(dst.getParameterSize()):
+ dst_param = dst.getParameter(i)
+ src_param = src_params.get(dst_param.getName(), None)
+ if src_param is None:
+ continue
+ src_value = src_param.getBuf(api.PARAMETER_VALUE)
+ dst_value = dst_param.getBuf(api.PARAMETER_VALUE)
+ CHECK_EQ(len(src_value), len(dst_value))
+ dst_value.copyFrom(src_value)
+ dst_param.setValueUpdated()
+
+def print_parameters(src):
+ src_params = [src.getParameter(i)
+ for i in xrange(src.getParameterSize())]
+
+ print "***************"
+ for p in src_params:
+ print "Name is %s" % p.getName()
+ print "value is %s \n" % p.getBuf(api.PARAMETER_VALUE).copyToNumpyArray()
+
+def load_mnist_data(imageFile):
+ f = open(imageFile, "rb")
+ f.read(16)
+
+ # Define number of samples for train/test
+ if "train" in imageFile:
+ n = 60000
+ else:
+ n = 10000
+
+ data = numpy.fromfile(f, 'ubyte', count=n*28*28).reshape((n, 28*28))
+ data = data / 255.0 * 2.0 - 1.0
+
+ f.close()
+ return data.astype('float32')
+
+def load_cifar_data(cifar_path):
+ batch_size = 10000
+ data = numpy.zeros((5*batch_size, 32*32*3), dtype = "float32")
+ for i in range(1, 6):
+ file = cifar_path + "/data_batch_" + str(i)
+ fo = open(file, 'rb')
+ dict = cPickle.load(fo)
+ fo.close()
+ data[(i - 1)*batch_size:(i*batch_size), :] = dict["data"]
+
+ data = data / 255.0 * 2.0 - 1.0
+ return data
+
+# synthesize 2-D uniform data
+def load_uniform_data():
+ data = numpy.random.rand(1000000, 2).astype('float32')
+ return data
+
+def merge(images, size):
+ if images.shape[1] == 28*28:
+ h, w, c = 28, 28, 1
+ else:
+ h, w, c = 32, 32, 3
+ img = numpy.zeros((h * size[0], w * size[1], c))
+ for idx in xrange(size[0] * size[1]):
+ i = idx % size[1]
+ j = idx // size[1]
+ img[j*h:j*h+h, i*w:i*w+w, :] = \
+ ((images[idx, :].reshape((h, w, c), order="F").transpose(1, 0, 2) + 1.0) / 2.0 * 255.0)
+ return img.astype('uint8')
+
+def save_images(images, path):
+ merged_img = merge(images, [8, 8])
+ if merged_img.shape[2] == 1:
+ im = Image.fromarray(numpy.squeeze(merged_img)).convert('RGB')
+ else:
+ im = Image.fromarray(merged_img, mode="RGB")
+ im.save(path)
+
+def get_real_samples(batch_size, data_np):
+ return data_np[numpy.random.choice(data_np.shape[0], batch_size,
+ replace=False),:]
+
+def get_noise(batch_size, noise_dim):
+ return numpy.random.normal(size=(batch_size, noise_dim)).astype('float32')
+
+def get_fake_samples(generator_machine, batch_size, noise):
+ gen_inputs = api.Arguments.createArguments(1)
+ gen_inputs.setSlotValue(0, api.Matrix.createDenseFromNumpy(noise))
+ gen_outputs = api.Arguments.createArguments(0)
+ generator_machine.forward(gen_inputs, gen_outputs, api.PASS_TEST)
+ fake_samples = gen_outputs.getSlotValue(0).copyToNumpyMat()
+ return fake_samples
+
+def get_training_loss(training_machine, inputs):
+ outputs = api.Arguments.createArguments(0)
+ training_machine.forward(inputs, outputs, api.PASS_TEST)
+ loss = outputs.getSlotValue(0).copyToNumpyMat()
+ return numpy.mean(loss)
+
+def prepare_discriminator_data_batch_pos(batch_size, data_np):
+ real_samples = get_real_samples(batch_size, data_np)
+ labels = numpy.ones(batch_size, dtype='int32')
+ inputs = api.Arguments.createArguments(2)
+ inputs.setSlotValue(0, api.Matrix.createDenseFromNumpy(real_samples))
+ inputs.setSlotIds(1, api.IVector.createVectorFromNumpy(labels))
+ return inputs
+
+def prepare_discriminator_data_batch_neg(generator_machine, batch_size, noise):
+ fake_samples = get_fake_samples(generator_machine, batch_size, noise)
+ labels = numpy.zeros(batch_size, dtype='int32')
+ inputs = api.Arguments.createArguments(2)
+ inputs.setSlotValue(0, api.Matrix.createDenseFromNumpy(fake_samples))
+ inputs.setSlotIds(1, api.IVector.createVectorFromNumpy(labels))
+ return inputs
+
+def prepare_generator_data_batch(batch_size, noise):
+ label = numpy.ones(batch_size, dtype='int32')
+ inputs = api.Arguments.createArguments(2)
+ inputs.setSlotValue(0, api.Matrix.createDenseFromNumpy(noise))
+ inputs.setSlotIds(1, api.IVector.createVectorFromNumpy(label))
+ return inputs
+
+
+def find(iterable, cond):
+ for item in iterable:
+ if cond(item):
+ return item
+ return None
+
+
+def get_layer_size(model_conf, layer_name):
+ layer_conf = find(model_conf.layers, lambda x: x.name == layer_name)
+ assert layer_conf is not None, "Cannot find '%s' layer" % layer_name
+ return layer_conf.size
+
+
+def main():
+ parser = argparse.ArgumentParser()
+ parser.add_argument("-d", "--data_source", help="mnist or cifar or uniform")
+ parser.add_argument("--use_gpu", default="1",
+ help="1 means use gpu for training")
+ parser.add_argument("--gpu_id", default="0",
+ help="the gpu_id parameter")
+ args = parser.parse_args()
+ data_source = args.data_source
+ use_gpu = args.use_gpu
+ assert data_source in ["mnist", "cifar", "uniform"]
+ assert use_gpu in ["0", "1"]
+
+ if not os.path.exists("./%s_samples/" % data_source):
+ os.makedirs("./%s_samples/" % data_source)
+
+ if not os.path.exists("./%s_params/" % data_source):
+ os.makedirs("./%s_params/" % data_source)
+
+ api.initPaddle('--use_gpu=' + use_gpu, '--dot_period=10', '--log_period=100',
+ '--gpu_id=' + args.gpu_id, '--save_dir=' + "./%s_params/" % data_source)
+
+ if data_source == "uniform":
+ conf = "gan_conf.py"
+ num_iter = 10000
+ else:
+ conf = "gan_conf_image.py"
+ num_iter = 1000
+
+ gen_conf = parse_config(conf, "mode=generator_training,data=" + data_source)
+ dis_conf = parse_config(conf, "mode=discriminator_training,data=" + data_source)
+ generator_conf = parse_config(conf, "mode=generator,data=" + data_source)
+ batch_size = dis_conf.opt_config.batch_size
+ noise_dim = get_layer_size(gen_conf.model_config, "noise")
+
+ if data_source == "mnist":
+ data_np = load_mnist_data("./data/mnist_data/train-images-idx3-ubyte")
+ elif data_source == "cifar":
+ data_np = load_cifar_data("./data/cifar-10-batches-py/")
+ else:
+ data_np = load_uniform_data()
+
+ # this creates a gradient machine for discriminator
+ dis_training_machine = api.GradientMachine.createFromConfigProto(
+ dis_conf.model_config)
+ # this create a gradient machine for generator
+ gen_training_machine = api.GradientMachine.createFromConfigProto(
+ gen_conf.model_config)
+
+ # generator_machine is used to generate data only, which is used for
+ # training discriminator
+ logger.info(str(generator_conf.model_config))
+ generator_machine = api.GradientMachine.createFromConfigProto(
+ generator_conf.model_config)
+
+ dis_trainer = api.Trainer.create(
+ dis_conf, dis_training_machine)
+
+ gen_trainer = api.Trainer.create(
+ gen_conf, gen_training_machine)
+
+ dis_trainer.startTrain()
+ gen_trainer.startTrain()
+
+ # Sync parameters between networks (GradientMachine) at the beginning
+ copy_shared_parameters(gen_training_machine, dis_training_machine)
+ copy_shared_parameters(gen_training_machine, generator_machine)
+
+ # constrain that either discriminator or generator can not be trained
+ # consecutively more than MAX_strike times
+ curr_train = "dis"
+ curr_strike = 0
+ MAX_strike = 5
+
+ for train_pass in xrange(100):
+ dis_trainer.startTrainPass()
+ gen_trainer.startTrainPass()
+ for i in xrange(num_iter):
+ # Do forward pass in discriminator to get the dis_loss
+ noise = get_noise(batch_size, noise_dim)
+ data_batch_dis_pos = prepare_discriminator_data_batch_pos(
+ batch_size, data_np)
+ dis_loss_pos = get_training_loss(dis_training_machine, data_batch_dis_pos)
+
+ data_batch_dis_neg = prepare_discriminator_data_batch_neg(
+ generator_machine, batch_size, noise)
+ dis_loss_neg = get_training_loss(dis_training_machine, data_batch_dis_neg)
+
+ dis_loss = (dis_loss_pos + dis_loss_neg) / 2.0
+
+ # Do forward pass in generator to get the gen_loss
+ data_batch_gen = prepare_generator_data_batch(
+ batch_size, noise)
+ gen_loss = get_training_loss(gen_training_machine, data_batch_gen)
+
+ if i % 100 == 0:
+ print "d_pos_loss is %s d_neg_loss is %s" % (dis_loss_pos, dis_loss_neg)
+ print "d_loss is %s g_loss is %s" % (dis_loss, gen_loss)
+
+ # Decide which network to train based on the training history
+ # And the relative size of the loss
+ if (not (curr_train == "dis" and curr_strike == MAX_strike)) and \
+ ((curr_train == "gen" and curr_strike == MAX_strike) or dis_loss > gen_loss):
+ if curr_train == "dis":
+ curr_strike += 1
+ else:
+ curr_train = "dis"
+ curr_strike = 1
+ dis_trainer.trainOneDataBatch(batch_size, data_batch_dis_neg)
+ dis_trainer.trainOneDataBatch(batch_size, data_batch_dis_pos)
+ copy_shared_parameters(dis_training_machine, gen_training_machine)
+
+ else:
+ if curr_train == "gen":
+ curr_strike += 1
+ else:
+ curr_train = "gen"
+ curr_strike = 1
+ gen_trainer.trainOneDataBatch(batch_size, data_batch_gen)
+ # TODO: add API for paddle to allow true parameter sharing between different GradientMachines
+ # so that we do not need to copy shared parameters.
+ copy_shared_parameters(gen_training_machine, dis_training_machine)
+ copy_shared_parameters(gen_training_machine, generator_machine)
+
+ dis_trainer.finishTrainPass()
+ gen_trainer.finishTrainPass()
+ # At the end of each pass, save the generated samples/images
+ fake_samples = get_fake_samples(generator_machine, batch_size, noise)
+ if data_source == "uniform":
+ plot2DScatter(fake_samples, "./%s_samples/train_pass%s.png" % (data_source, train_pass))
+ else:
+ save_images(fake_samples, "./%s_samples/train_pass%s.png" % (data_source, train_pass))
+ dis_trainer.finishTrain()
+ gen_trainer.finishTrain()
+
+if __name__ == '__main__':
+ main()
diff --git a/demo/image_classification/predict.sh b/demo/image_classification/predict.sh
old mode 100644
new mode 100755
diff --git a/demo/semantic_role_labeling/predict.sh b/demo/semantic_role_labeling/predict.sh
old mode 100644
new mode 100755
diff --git a/demo/semantic_role_labeling/test.sh b/demo/semantic_role_labeling/test.sh
old mode 100644
new mode 100755
diff --git a/demo/semantic_role_labeling/train.sh b/demo/semantic_role_labeling/train.sh
old mode 100644
new mode 100755
diff --git a/doc/CMakeLists.txt b/doc/CMakeLists.txt
index ef4e9d102d35fc95e96711175a57f7e181a946c6..efcf8b0ad3d6f2f831fe71f3c09163015cc1ac96 100644
--- a/doc/CMakeLists.txt
+++ b/doc/CMakeLists.txt
@@ -15,25 +15,11 @@ set(SPHINX_CACHE_DIR "${CMAKE_CURRENT_BINARY_DIR}/_doctrees")
# HTML output directory
set(SPHINX_HTML_DIR "${CMAKE_CURRENT_BINARY_DIR}/html")
-
-set(PADDLE_DOXYGEN_OUTPUT "${CMAKE_CURRENT_BINARY_DIR}/doxygen_xml")
-
configure_file(
"${CMAKE_CURRENT_SOURCE_DIR}/conf.py.in"
"${BINARY_BUILD_DIR}/conf.py"
@ONLY)
-configure_file(
- "${CMAKE_CURRENT_SOURCE_DIR}/Doxyfile.in"
- "${CMAKE_CURRENT_BINARY_DIR}/Doxyfile"
- @ONLY
- )
-
-add_custom_target(paddle_doxygen_docs ALL
- ${DOXYGEN_EXECUTABLE} ${CMAKE_CURRENT_BINARY_DIR}/Doxyfile
- WORKING_DIRECTORY ${CMAKE_CURRENT_BINARY_DIR}
-)
-
sphinx_add_target(paddle_docs
html
${BINARY_BUILD_DIR}
@@ -41,6 +27,5 @@ sphinx_add_target(paddle_docs
${CMAKE_CURRENT_SOURCE_DIR}
${SPHINX_HTML_DIR})
-add_dependencies(paddle_docs
- gen_proto_py
- paddle_doxygen_docs)
+add_dependencies(paddle_docs
+ gen_proto_py)
diff --git a/doc/Doxyfile.in b/doc/Doxyfile.in
deleted file mode 100644
index a1fc3801925dd340709ac77c9aa77c82051ee111..0000000000000000000000000000000000000000
--- a/doc/Doxyfile.in
+++ /dev/null
@@ -1,2384 +0,0 @@
-# Doxyfile 1.8.10
-
-# This file describes the settings to be used by the documentation system
-# doxygen (www.doxygen.org) for a project.
-#
-# All text after a double hash (##) is considered a comment and is placed in
-# front of the TAG it is preceding.
-#
-# All text after a single hash (#) is considered a comment and will be ignored.
-# The format is:
-# TAG = value [value, ...]
-# For lists, items can also be appended using:
-# TAG += value [value, ...]
-# Values that contain spaces should be placed between quotes (\" \").
-
-#---------------------------------------------------------------------------
-# Project related configuration options
-#---------------------------------------------------------------------------
-
-# This tag specifies the encoding used for all characters in the config file
-# that follow. The default is UTF-8 which is also the encoding used for all text
-# before the first occurrence of this tag. Doxygen uses libiconv (or the iconv
-# built into libc) for the transcoding. See http://www.gnu.org/software/libiconv
-# for the list of possible encodings.
-# The default value is: UTF-8.
-
-DOXYFILE_ENCODING = UTF-8
-
-# The PROJECT_NAME tag is a single word (or a sequence of words surrounded by
-# double-quotes, unless you are using Doxywizard) that should identify the
-# project for which the documentation is generated. This name is used in the
-# title of most generated pages and in a few other places.
-# The default value is: My Project.
-
-PROJECT_NAME = "paddle"
-
-# The PROJECT_NUMBER tag can be used to enter a project or revision number. This
-# could be handy for archiving the generated documentation or if some version
-# control system is used.
-
-PROJECT_NUMBER = 1.0.0
-
-# Using the PROJECT_BRIEF tag one can provide an optional one line description
-# for a project that appears at the top of each page and should give viewer a
-# quick idea about the purpose of the project. Keep the description short.
-
-PROJECT_BRIEF =
-
-# With the PROJECT_LOGO tag one can specify a logo or an icon that is included
-# in the documentation. The maximum height of the logo should not exceed 55
-# pixels and the maximum width should not exceed 200 pixels. Doxygen will copy
-# the logo to the output directory.
-
-PROJECT_LOGO =
-
-# The OUTPUT_DIRECTORY tag is used to specify the (relative or absolute) path
-# into which the generated documentation will be written. If a relative path is
-# entered, it will be relative to the location where doxygen was started. If
-# left blank the current directory will be used.
-
-OUTPUT_DIRECTORY = @PADDLE_DOXYGEN_OUTPUT@
-
-# If the CREATE_SUBDIRS tag is set to YES then doxygen will create 4096 sub-
-# directories (in 2 levels) under the output directory of each output format and
-# will distribute the generated files over these directories. Enabling this
-# option can be useful when feeding doxygen a huge amount of source files, where
-# putting all generated files in the same directory would otherwise causes
-# performance problems for the file system.
-# The default value is: NO.
-
-CREATE_SUBDIRS = NO
-
-# If the ALLOW_UNICODE_NAMES tag is set to YES, doxygen will allow non-ASCII
-# characters to appear in the names of generated files. If set to NO, non-ASCII
-# characters will be escaped, for example _xE3_x81_x84 will be used for Unicode
-# U+3044.
-# The default value is: NO.
-
-ALLOW_UNICODE_NAMES = NO
-
-# The OUTPUT_LANGUAGE tag is used to specify the language in which all
-# documentation generated by doxygen is written. Doxygen will use this
-# information to generate all constant output in the proper language.
-# Possible values are: Afrikaans, Arabic, Armenian, Brazilian, Catalan, Chinese,
-# Chinese-Traditional, Croatian, Czech, Danish, Dutch, English (United States),
-# Esperanto, Farsi (Persian), Finnish, French, German, Greek, Hungarian,
-# Indonesian, Italian, Japanese, Japanese-en (Japanese with English messages),
-# Korean, Korean-en (Korean with English messages), Latvian, Lithuanian,
-# Macedonian, Norwegian, Persian (Farsi), Polish, Portuguese, Romanian, Russian,
-# Serbian, Serbian-Cyrillic, Slovak, Slovene, Spanish, Swedish, Turkish,
-# Ukrainian and Vietnamese.
-# The default value is: English.
-
-OUTPUT_LANGUAGE = English
-
-# If the BRIEF_MEMBER_DESC tag is set to YES, doxygen will include brief member
-# descriptions after the members that are listed in the file and class
-# documentation (similar to Javadoc). Set to NO to disable this.
-# The default value is: YES.
-
-BRIEF_MEMBER_DESC = YES
-
-# If the REPEAT_BRIEF tag is set to YES, doxygen will prepend the brief
-# description of a member or function before the detailed description
-#
-# Note: If both HIDE_UNDOC_MEMBERS and BRIEF_MEMBER_DESC are set to NO, the
-# brief descriptions will be completely suppressed.
-# The default value is: YES.
-
-REPEAT_BRIEF = YES
-
-# This tag implements a quasi-intelligent brief description abbreviator that is
-# used to form the text in various listings. Each string in this list, if found
-# as the leading text of the brief description, will be stripped from the text
-# and the result, after processing the whole list, is used as the annotated
-# text. Otherwise, the brief description is used as-is. If left blank, the
-# following values are used ($name is automatically replaced with the name of
-# the entity):The $name class, The $name widget, The $name file, is, provides,
-# specifies, contains, represents, a, an and the.
-
-ABBREVIATE_BRIEF =
-
-# If the ALWAYS_DETAILED_SEC and REPEAT_BRIEF tags are both set to YES then
-# doxygen will generate a detailed section even if there is only a brief
-# description.
-# The default value is: NO.
-
-ALWAYS_DETAILED_SEC = NO
-
-# If the INLINE_INHERITED_MEMB tag is set to YES, doxygen will show all
-# inherited members of a class in the documentation of that class as if those
-# members were ordinary class members. Constructors, destructors and assignment
-# operators of the base classes will not be shown.
-# The default value is: NO.
-
-INLINE_INHERITED_MEMB = NO
-
-# If the FULL_PATH_NAMES tag is set to YES, doxygen will prepend the full path
-# before files name in the file list and in the header files. If set to NO the
-# shortest path that makes the file name unique will be used
-# The default value is: YES.
-
-FULL_PATH_NAMES = YES
-
-# The STRIP_FROM_PATH tag can be used to strip a user-defined part of the path.
-# Stripping is only done if one of the specified strings matches the left-hand
-# part of the path. The tag can be used to show relative paths in the file list.
-# If left blank the directory from which doxygen is run is used as the path to
-# strip.
-#
-# Note that you can specify absolute paths here, but also relative paths, which
-# will be relative from the directory where doxygen is started.
-# This tag requires that the tag FULL_PATH_NAMES is set to YES.
-
-STRIP_FROM_PATH =
-
-# The STRIP_FROM_INC_PATH tag can be used to strip a user-defined part of the
-# path mentioned in the documentation of a class, which tells the reader which
-# header file to include in order to use a class. If left blank only the name of
-# the header file containing the class definition is used. Otherwise one should
-# specify the list of include paths that are normally passed to the compiler
-# using the -I flag.
-
-STRIP_FROM_INC_PATH =
-
-# If the SHORT_NAMES tag is set to YES, doxygen will generate much shorter (but
-# less readable) file names. This can be useful is your file systems doesn't
-# support long names like on DOS, Mac, or CD-ROM.
-# The default value is: NO.
-
-SHORT_NAMES = NO
-
-# If the JAVADOC_AUTOBRIEF tag is set to YES then doxygen will interpret the
-# first line (until the first dot) of a Javadoc-style comment as the brief
-# description. If set to NO, the Javadoc-style will behave just like regular Qt-
-# style comments (thus requiring an explicit @brief command for a brief
-# description.)
-# The default value is: NO.
-
-JAVADOC_AUTOBRIEF = NO
-
-# If the QT_AUTOBRIEF tag is set to YES then doxygen will interpret the first
-# line (until the first dot) of a Qt-style comment as the brief description. If
-# set to NO, the Qt-style will behave just like regular Qt-style comments (thus
-# requiring an explicit \brief command for a brief description.)
-# The default value is: NO.
-
-QT_AUTOBRIEF = NO
-
-# The MULTILINE_CPP_IS_BRIEF tag can be set to YES to make doxygen treat a
-# multi-line C++ special comment block (i.e. a block of //! or /// comments) as
-# a brief description. This used to be the default behavior. The new default is
-# to treat a multi-line C++ comment block as a detailed description. Set this
-# tag to YES if you prefer the old behavior instead.
-#
-# Note that setting this tag to YES also means that rational rose comments are
-# not recognized any more.
-# The default value is: NO.
-
-MULTILINE_CPP_IS_BRIEF = NO
-
-# If the INHERIT_DOCS tag is set to YES then an undocumented member inherits the
-# documentation from any documented member that it re-implements.
-# The default value is: YES.
-
-INHERIT_DOCS = YES
-
-# If the SEPARATE_MEMBER_PAGES tag is set to YES then doxygen will produce a new
-# page for each member. If set to NO, the documentation of a member will be part
-# of the file/class/namespace that contains it.
-# The default value is: NO.
-
-SEPARATE_MEMBER_PAGES = NO
-
-# The TAB_SIZE tag can be used to set the number of spaces in a tab. Doxygen
-# uses this value to replace tabs by spaces in code fragments.
-# Minimum value: 1, maximum value: 16, default value: 4.
-
-TAB_SIZE = 2
-
-# This tag can be used to specify a number of aliases that act as commands in
-# the documentation. An alias has the form:
-# name=value
-# For example adding
-# "sideeffect=@par Side Effects:\n"
-# will allow you to put the command \sideeffect (or @sideeffect) in the
-# documentation, which will result in a user-defined paragraph with heading
-# "Side Effects:". You can put \n's in the value part of an alias to insert
-# newlines.
-
-ALIASES =
-
-# This tag can be used to specify a number of word-keyword mappings (TCL only).
-# A mapping has the form "name=value". For example adding "class=itcl::class"
-# will allow you to use the command class in the itcl::class meaning.
-
-TCL_SUBST =
-
-# Set the OPTIMIZE_OUTPUT_FOR_C tag to YES if your project consists of C sources
-# only. Doxygen will then generate output that is more tailored for C. For
-# instance, some of the names that are used will be different. The list of all
-# members will be omitted, etc.
-# The default value is: NO.
-
-OPTIMIZE_OUTPUT_FOR_C = NO
-
-# Set the OPTIMIZE_OUTPUT_JAVA tag to YES if your project consists of Java or
-# Python sources only. Doxygen will then generate output that is more tailored
-# for that language. For instance, namespaces will be presented as packages,
-# qualified scopes will look different, etc.
-# The default value is: NO.
-
-OPTIMIZE_OUTPUT_JAVA = NO
-
-# Set the OPTIMIZE_FOR_FORTRAN tag to YES if your project consists of Fortran
-# sources. Doxygen will then generate output that is tailored for Fortran.
-# The default value is: NO.
-
-OPTIMIZE_FOR_FORTRAN = NO
-
-# Set the OPTIMIZE_OUTPUT_VHDL tag to YES if your project consists of VHDL
-# sources. Doxygen will then generate output that is tailored for VHDL.
-# The default value is: NO.
-
-OPTIMIZE_OUTPUT_VHDL = NO
-
-# Doxygen selects the parser to use depending on the extension of the files it
-# parses. With this tag you can assign which parser to use for a given
-# extension. Doxygen has a built-in mapping, but you can override or extend it
-# using this tag. The format is ext=language, where ext is a file extension, and
-# language is one of the parsers supported by doxygen: IDL, Java, Javascript,
-# C#, C, C++, D, PHP, Objective-C, Python, Fortran (fixed format Fortran:
-# FortranFixed, free formatted Fortran: FortranFree, unknown formatted Fortran:
-# Fortran. In the later case the parser tries to guess whether the code is fixed
-# or free formatted code, this is the default for Fortran type files), VHDL. For
-# instance to make doxygen treat .inc files as Fortran files (default is PHP),
-# and .f files as C (default is Fortran), use: inc=Fortran f=C.
-#
-# Note: For files without extension you can use no_extension as a placeholder.
-#
-# Note that for custom extensions you also need to set FILE_PATTERNS otherwise
-# the files are not read by doxygen.
-
-EXTENSION_MAPPING =
-
-# If the MARKDOWN_SUPPORT tag is enabled then doxygen pre-processes all comments
-# according to the Markdown format, which allows for more readable
-# documentation. See http://daringfireball.net/projects/markdown/ for details.
-# The output of markdown processing is further processed by doxygen, so you can
-# mix doxygen, HTML, and XML commands with Markdown formatting. Disable only in
-# case of backward compatibilities issues.
-# The default value is: YES.
-
-MARKDOWN_SUPPORT = YES
-
-# When enabled doxygen tries to link words that correspond to documented
-# classes, or namespaces to their corresponding documentation. Such a link can
-# be prevented in individual cases by putting a % sign in front of the word or
-# globally by setting AUTOLINK_SUPPORT to NO.
-# The default value is: YES.
-
-AUTOLINK_SUPPORT = YES
-
-# If you use STL classes (i.e. std::string, std::vector, etc.) but do not want
-# to include (a tag file for) the STL sources as input, then you should set this
-# tag to YES in order to let doxygen match functions declarations and
-# definitions whose arguments contain STL classes (e.g. func(std::string);
-# versus func(std::string) {}). This also make the inheritance and collaboration
-# diagrams that involve STL classes more complete and accurate.
-# The default value is: NO.
-
-BUILTIN_STL_SUPPORT = YES
-
-# If you use Microsoft's C++/CLI language, you should set this option to YES to
-# enable parsing support.
-# The default value is: NO.
-
-CPP_CLI_SUPPORT = NO
-
-# Set the SIP_SUPPORT tag to YES if your project consists of sip (see:
-# http://www.riverbankcomputing.co.uk/software/sip/intro) sources only. Doxygen
-# will parse them like normal C++ but will assume all classes use public instead
-# of private inheritance when no explicit protection keyword is present.
-# The default value is: NO.
-
-SIP_SUPPORT = NO
-
-# For Microsoft's IDL there are propget and propput attributes to indicate
-# getter and setter methods for a property. Setting this option to YES will make
-# doxygen to replace the get and set methods by a property in the documentation.
-# This will only work if the methods are indeed getting or setting a simple
-# type. If this is not the case, or you want to show the methods anyway, you
-# should set this option to NO.
-# The default value is: YES.
-
-IDL_PROPERTY_SUPPORT = YES
-
-# If member grouping is used in the documentation and the DISTRIBUTE_GROUP_DOC
-# tag is set to YES then doxygen will reuse the documentation of the first
-# member in the group (if any) for the other members of the group. By default
-# all members of a group must be documented explicitly.
-# The default value is: NO.
-
-DISTRIBUTE_GROUP_DOC = NO
-
-# If one adds a struct or class to a group and this option is enabled, then also
-# any nested class or struct is added to the same group. By default this option
-# is disabled and one has to add nested compounds explicitly via \ingroup.
-# The default value is: NO.
-
-GROUP_NESTED_COMPOUNDS = NO
-
-# Set the SUBGROUPING tag to YES to allow class member groups of the same type
-# (for instance a group of public functions) to be put as a subgroup of that
-# type (e.g. under the Public Functions section). Set it to NO to prevent
-# subgrouping. Alternatively, this can be done per class using the
-# \nosubgrouping command.
-# The default value is: YES.
-
-SUBGROUPING = YES
-
-# When the INLINE_GROUPED_CLASSES tag is set to YES, classes, structs and unions
-# are shown inside the group in which they are included (e.g. using \ingroup)
-# instead of on a separate page (for HTML and Man pages) or section (for LaTeX
-# and RTF).
-#
-# Note that this feature does not work in combination with
-# SEPARATE_MEMBER_PAGES.
-# The default value is: NO.
-
-INLINE_GROUPED_CLASSES = NO
-
-# When the INLINE_SIMPLE_STRUCTS tag is set to YES, structs, classes, and unions
-# with only public data fields or simple typedef fields will be shown inline in
-# the documentation of the scope in which they are defined (i.e. file,
-# namespace, or group documentation), provided this scope is documented. If set
-# to NO, structs, classes, and unions are shown on a separate page (for HTML and
-# Man pages) or section (for LaTeX and RTF).
-# The default value is: NO.
-
-INLINE_SIMPLE_STRUCTS = NO
-
-# When TYPEDEF_HIDES_STRUCT tag is enabled, a typedef of a struct, union, or
-# enum is documented as struct, union, or enum with the name of the typedef. So
-# typedef struct TypeS {} TypeT, will appear in the documentation as a struct
-# with name TypeT. When disabled the typedef will appear as a member of a file,
-# namespace, or class. And the struct will be named TypeS. This can typically be
-# useful for C code in case the coding convention dictates that all compound
-# types are typedef'ed and only the typedef is referenced, never the tag name.
-# The default value is: NO.
-
-TYPEDEF_HIDES_STRUCT = NO
-
-# The size of the symbol lookup cache can be set using LOOKUP_CACHE_SIZE. This
-# cache is used to resolve symbols given their name and scope. Since this can be
-# an expensive process and often the same symbol appears multiple times in the
-# code, doxygen keeps a cache of pre-resolved symbols. If the cache is too small
-# doxygen will become slower. If the cache is too large, memory is wasted. The
-# cache size is given by this formula: 2^(16+LOOKUP_CACHE_SIZE). The valid range
-# is 0..9, the default is 0, corresponding to a cache size of 2^16=65536
-# symbols. At the end of a run doxygen will report the cache usage and suggest
-# the optimal cache size from a speed point of view.
-# Minimum value: 0, maximum value: 9, default value: 0.
-
-LOOKUP_CACHE_SIZE = 0
-
-#---------------------------------------------------------------------------
-# Build related configuration options
-#---------------------------------------------------------------------------
-
-# If the EXTRACT_ALL tag is set to YES, doxygen will assume all entities in
-# documentation are documented, even if no documentation was available. Private
-# class members and static file members will be hidden unless the
-# EXTRACT_PRIVATE respectively EXTRACT_STATIC tags are set to YES.
-# Note: This will also disable the warnings about undocumented members that are
-# normally produced when WARNINGS is set to YES.
-# The default value is: NO.
-
-EXTRACT_ALL = NO
-
-# If the EXTRACT_PRIVATE tag is set to YES, all private members of a class will
-# be included in the documentation.
-# The default value is: NO.
-
-EXTRACT_PRIVATE = NO
-
-# If the EXTRACT_PACKAGE tag is set to YES, all members with package or internal
-# scope will be included in the documentation.
-# The default value is: NO.
-
-EXTRACT_PACKAGE = NO
-
-# If the EXTRACT_STATIC tag is set to YES, all static members of a file will be
-# included in the documentation.
-# The default value is: NO.
-
-EXTRACT_STATIC = NO
-
-# If the EXTRACT_LOCAL_CLASSES tag is set to YES, classes (and structs) defined
-# locally in source files will be included in the documentation. If set to NO,
-# only classes defined in header files are included. Does not have any effect
-# for Java sources.
-# The default value is: YES.
-
-EXTRACT_LOCAL_CLASSES = YES
-
-# This flag is only useful for Objective-C code. If set to YES, local methods,
-# which are defined in the implementation section but not in the interface are
-# included in the documentation. If set to NO, only methods in the interface are
-# included.
-# The default value is: NO.
-
-EXTRACT_LOCAL_METHODS = NO
-
-# If this flag is set to YES, the members of anonymous namespaces will be
-# extracted and appear in the documentation as a namespace called
-# 'anonymous_namespace{file}', where file will be replaced with the base name of
-# the file that contains the anonymous namespace. By default anonymous namespace
-# are hidden.
-# The default value is: NO.
-
-EXTRACT_ANON_NSPACES = NO
-
-# If the HIDE_UNDOC_MEMBERS tag is set to YES, doxygen will hide all
-# undocumented members inside documented classes or files. If set to NO these
-# members will be included in the various overviews, but no documentation
-# section is generated. This option has no effect if EXTRACT_ALL is enabled.
-# The default value is: NO.
-
-HIDE_UNDOC_MEMBERS = NO
-
-# If the HIDE_UNDOC_CLASSES tag is set to YES, doxygen will hide all
-# undocumented classes that are normally visible in the class hierarchy. If set
-# to NO, these classes will be included in the various overviews. This option
-# has no effect if EXTRACT_ALL is enabled.
-# The default value is: NO.
-
-HIDE_UNDOC_CLASSES = NO
-
-# If the HIDE_FRIEND_COMPOUNDS tag is set to YES, doxygen will hide all friend
-# (class|struct|union) declarations. If set to NO, these declarations will be
-# included in the documentation.
-# The default value is: NO.
-
-HIDE_FRIEND_COMPOUNDS = NO
-
-# If the HIDE_IN_BODY_DOCS tag is set to YES, doxygen will hide any
-# documentation blocks found inside the body of a function. If set to NO, these
-# blocks will be appended to the function's detailed documentation block.
-# The default value is: NO.
-
-HIDE_IN_BODY_DOCS = NO
-
-# The INTERNAL_DOCS tag determines if documentation that is typed after a
-# \internal command is included. If the tag is set to NO then the documentation
-# will be excluded. Set it to YES to include the internal documentation.
-# The default value is: NO.
-
-INTERNAL_DOCS = NO
-
-# If the CASE_SENSE_NAMES tag is set to NO then doxygen will only generate file
-# names in lower-case letters. If set to YES, upper-case letters are also
-# allowed. This is useful if you have classes or files whose names only differ
-# in case and if your file system supports case sensitive file names. Windows
-# and Mac users are advised to set this option to NO.
-# The default value is: system dependent.
-
-CASE_SENSE_NAMES = YES
-
-# If the HIDE_SCOPE_NAMES tag is set to NO then doxygen will show members with
-# their full class and namespace scopes in the documentation. If set to YES, the
-# scope will be hidden.
-# The default value is: NO.
-
-HIDE_SCOPE_NAMES = NO
-
-# If the HIDE_COMPOUND_REFERENCE tag is set to NO (default) then doxygen will
-# append additional text to a page's title, such as Class Reference. If set to
-# YES the compound reference will be hidden.
-# The default value is: NO.
-
-HIDE_COMPOUND_REFERENCE= NO
-
-# If the SHOW_INCLUDE_FILES tag is set to YES then doxygen will put a list of
-# the files that are included by a file in the documentation of that file.
-# The default value is: YES.
-
-SHOW_INCLUDE_FILES = NO
-
-# If the SHOW_GROUPED_MEMB_INC tag is set to YES then Doxygen will add for each
-# grouped member an include statement to the documentation, telling the reader
-# which file to include in order to use the member.
-# The default value is: NO.
-
-SHOW_GROUPED_MEMB_INC = NO
-
-# If the FORCE_LOCAL_INCLUDES tag is set to YES then doxygen will list include
-# files with double quotes in the documentation rather than with sharp brackets.
-# The default value is: NO.
-
-FORCE_LOCAL_INCLUDES = NO
-
-# If the INLINE_INFO tag is set to YES then a tag [inline] is inserted in the
-# documentation for inline members.
-# The default value is: YES.
-
-INLINE_INFO = YES
-
-# If the SORT_MEMBER_DOCS tag is set to YES then doxygen will sort the
-# (detailed) documentation of file and class members alphabetically by member
-# name. If set to NO, the members will appear in declaration order.
-# The default value is: YES.
-
-SORT_MEMBER_DOCS = YES
-
-# If the SORT_BRIEF_DOCS tag is set to YES then doxygen will sort the brief
-# descriptions of file, namespace and class members alphabetically by member
-# name. If set to NO, the members will appear in declaration order. Note that
-# this will also influence the order of the classes in the class list.
-# The default value is: NO.
-
-SORT_BRIEF_DOCS = NO
-
-# If the SORT_MEMBERS_CTORS_1ST tag is set to YES then doxygen will sort the
-# (brief and detailed) documentation of class members so that constructors and
-# destructors are listed first. If set to NO the constructors will appear in the
-# respective orders defined by SORT_BRIEF_DOCS and SORT_MEMBER_DOCS.
-# Note: If SORT_BRIEF_DOCS is set to NO this option is ignored for sorting brief
-# member documentation.
-# Note: If SORT_MEMBER_DOCS is set to NO this option is ignored for sorting
-# detailed member documentation.
-# The default value is: NO.
-
-SORT_MEMBERS_CTORS_1ST = NO
-
-# If the SORT_GROUP_NAMES tag is set to YES then doxygen will sort the hierarchy
-# of group names into alphabetical order. If set to NO the group names will
-# appear in their defined order.
-# The default value is: NO.
-
-SORT_GROUP_NAMES = NO
-
-# If the SORT_BY_SCOPE_NAME tag is set to YES, the class list will be sorted by
-# fully-qualified names, including namespaces. If set to NO, the class list will
-# be sorted only by class name, not including the namespace part.
-# Note: This option is not very useful if HIDE_SCOPE_NAMES is set to YES.
-# Note: This option applies only to the class list, not to the alphabetical
-# list.
-# The default value is: NO.
-
-SORT_BY_SCOPE_NAME = NO
-
-# If the STRICT_PROTO_MATCHING option is enabled and doxygen fails to do proper
-# type resolution of all parameters of a function it will reject a match between
-# the prototype and the implementation of a member function even if there is
-# only one candidate or it is obvious which candidate to choose by doing a
-# simple string match. By disabling STRICT_PROTO_MATCHING doxygen will still
-# accept a match between prototype and implementation in such cases.
-# The default value is: NO.
-
-STRICT_PROTO_MATCHING = NO
-
-# The GENERATE_TODOLIST tag can be used to enable (YES) or disable (NO) the todo
-# list. This list is created by putting \todo commands in the documentation.
-# The default value is: YES.
-
-GENERATE_TODOLIST = YES
-
-# The GENERATE_TESTLIST tag can be used to enable (YES) or disable (NO) the test
-# list. This list is created by putting \test commands in the documentation.
-# The default value is: YES.
-
-GENERATE_TESTLIST = YES
-
-# The GENERATE_BUGLIST tag can be used to enable (YES) or disable (NO) the bug
-# list. This list is created by putting \bug commands in the documentation.
-# The default value is: YES.
-
-GENERATE_BUGLIST = YES
-
-# The GENERATE_DEPRECATEDLIST tag can be used to enable (YES) or disable (NO)
-# the deprecated list. This list is created by putting \deprecated commands in
-# the documentation.
-# The default value is: YES.
-
-GENERATE_DEPRECATEDLIST= YES
-
-# The ENABLED_SECTIONS tag can be used to enable conditional documentation
-# sections, marked by \if