From bd64719a2f012af82dcac731179a998764d432b9 Mon Sep 17 00:00:00 2001
From: typhoonzero <typhoonzero1986@gmail.com>
Date: Mon, 29 Jan 2018 20:42:29 +0800
Subject: [PATCH] update for today

---
 benchmark/cluster/vgg16/README.md          | 44 +++++++++++++++-------
 benchmark/cluster/vgg16/fluid_trainer.yaml |  2 +-
 benchmark/cluster/vgg16/v2_trainer.yaml    |  2 +-
 benchmark/cluster/vgg16/vgg16_fluid.py     | 39 ++++++++++---------
 benchmark/cluster/vgg16/vgg16_v2.py        |  1 +
 5 files changed, 54 insertions(+), 34 deletions(-)

diff --git a/benchmark/cluster/vgg16/README.md b/benchmark/cluster/vgg16/README.md
index 18128e52761..c1e85a2c407 100644
--- a/benchmark/cluster/vgg16/README.md
+++ b/benchmark/cluster/vgg16/README.md
@@ -2,41 +2,57 @@
 
 ## Test Result
 
-### Single node single thread
+### Hardware Infomation
+
+- CPU: Intel(R) Xeon(R) CPU E5-2620 v4 @ 2.10GHz
+- cpu MHz		: 2101.000
+- cache size	: 20480 KB
+
+### Single Node Single Thread
+
+- PServer Count: 10
+- Trainer Count: 20
+- Metrics: samples / sec
 
 | Batch Size | 32 | 64 | 128 | 256 |
 | -- | -- | -- | -- | -- |
-| PaddlePaddle Fluid | - | - | 16.74 | - |
-| PaddlePaddle v2 | - | - | 17.60 | - |
+| PaddlePaddle Fluid | 15.44 | 16.32 | 16.74 | 16.79 |
+| PaddlePaddle v2 | 15.97 | 17.04 | 17.60 | 17.83 |
 | TensorFlow | - | - | - | - |
 
 ### different batch size
 
 - PServer Count: 10
 - Trainer Count: 20
+- Per trainer CPU Core: 1
 - Metrics: samples / sec
 
 | Batch Size | 32 | 64 | 128 | 256 |
 | -- | -- | -- | -- | -- |
-| PaddlePaddle Fluid | - | 247.40 | - | - |
-| PaddlePaddle v2 | - | - | 256.14 | - |
+| PaddlePaddle Fluid | 190.20 | 222.15 | 247.40 | 258.18 |
+| PaddlePaddle v2 | 170.96 | 233.71 | 256.14 | 329.23 |
 | TensorFlow | - | - | - | - |
 
-### different pserver number
 
-- Trainer Count: 100
-- Batch Size: 64
-- Metrics: mini-batch / sec
+### Accelerate rate
 
-| PServer Count | 10 | 20 | 40 | 60 |
+- Pserver Count: 20
+- Batch Size: 128
+- Metrics: samples / sec
+
+| Trainer Counter | 20 | 40 | 80 | 100 |
 | -- | -- | -- | -- | -- |
-| PaddlePaddle Fluid | - | - | - | - |
-| PaddlePaddle v2 | - | - | - | - |
+| PaddlePaddle Fluid | 291.06 | 518.80 | 836.26 | 1019.29 |
+| PaddlePaddle v2 | 356.28 | - | - | 1041.99 |
 | TensorFlow | - | - | - | - |
 
-### Accelerate rate
+### different pserver number
 
-| Trainer Counter | 20 | 40 | 80 | 100 |
+- Trainer Count: 100
+- Batch Size: 128
+- Metrics: mini-batch / sec
+
+| PServer Count | 10 | 20 | 40 | 60 |
 | -- | -- | -- | -- | -- |
 | PaddlePaddle Fluid | - | - | - | - |
 | PaddlePaddle v2 | - | - | - | - |
diff --git a/benchmark/cluster/vgg16/fluid_trainer.yaml b/benchmark/cluster/vgg16/fluid_trainer.yaml
index 0a0ed25ebe4..2f6a87ab02a 100644
--- a/benchmark/cluster/vgg16/fluid_trainer.yaml
+++ b/benchmark/cluster/vgg16/fluid_trainer.yaml
@@ -30,7 +30,7 @@ spec:
         - name: TOPOLOGY
           value: ""
         - name: ENTRY
-          value: "MKL_NUM_THREADS=1 python /workspace/vgg16_fluid.py --local 0 --batch_size 128"
+          value: "MKL_NUM_THREADS=1 python /workspace/vgg16_fluid.py --local 0 --batch_size 256"
         - name: TRAINER_PACKAGE
           value: "/workspace"
         - name: PADDLE_INIT_PORT
diff --git a/benchmark/cluster/vgg16/v2_trainer.yaml b/benchmark/cluster/vgg16/v2_trainer.yaml
index 9d52e231f0e..997bbc81c99 100644
--- a/benchmark/cluster/vgg16/v2_trainer.yaml
+++ b/benchmark/cluster/vgg16/v2_trainer.yaml
@@ -22,7 +22,7 @@ spec:
         - name: PADDLE_JOB_NAME
           value: vgg16v2job
         - name: BATCH_SIZE
-          value: "128"
+          value: "256"
         - name: TRAINERS
           value: "20"
         - name: PSERVERS
diff --git a/benchmark/cluster/vgg16/vgg16_fluid.py b/benchmark/cluster/vgg16/vgg16_fluid.py
index 88d6d79cc06..51a01af6722 100644
--- a/benchmark/cluster/vgg16/vgg16_fluid.py
+++ b/benchmark/cluster/vgg16/vgg16_fluid.py
@@ -20,6 +20,7 @@ import numpy as np
 import paddle.v2 as paddle
 import paddle.v2.fluid as fluid
 import paddle.v2.fluid.core as core
+import paddle.v2.fluid.profiler as profiler
 import argparse
 import functools
 import os
@@ -160,24 +161,25 @@ def main():
             start_time = time.time()
             num_samples = 0
             accuracy.reset(exe)
-            for batch_id, data in enumerate(train_reader()):
-                ts = time.time()
-                img_data = np.array(
-                    map(lambda x: x[0].reshape(data_shape), data)).astype(
-                        "float32")
-                y_data = np.array(map(lambda x: x[1], data)).astype("int64")
-                y_data = y_data.reshape([-1, 1])
-
-                loss, acc = exe.run(trainer_prog,
-                                    feed={"pixel": img_data,
-                                          "label": y_data},
-                                    fetch_list=[avg_cost] + accuracy.metrics)
-                iters += 1
-                num_samples += len(data)
-                print(
-                    "Pass = %d, Iters = %d, Loss = %f, Accuracy = %f, spent %f"
-                    % (pass_id, iters, loss, acc, time.time() - ts)
-                )  # The accuracy is the accumulation of batches, but not the current batch.
+            with profiler.profiler("CPU", 'total') as prof:
+                for batch_id, data in enumerate(train_reader()):
+                    ts = time.time()
+                    img_data = np.array(
+                        map(lambda x: x[0].reshape(data_shape), data)).astype(
+                            "float32")
+                    y_data = np.array(map(lambda x: x[1], data)).astype("int64")
+                    y_data = y_data.reshape([-1, 1])
+
+                    loss, acc = exe.run(trainer_prog,
+                                        feed={"pixel": img_data,
+                                            "label": y_data},
+                                        fetch_list=[avg_cost] + accuracy.metrics)
+                    iters += 1
+                    num_samples += len(data)
+                    print(
+                        "Pass = %d, Iters = %d, Loss = %f, Accuracy = %f, spent %f"
+                        % (pass_id, iters, loss, acc, time.time() - ts)
+                    )  # The accuracy is the accumulation of batches, but not the current batch.
 
             pass_elapsed = time.time() - start_time
             pass_train_acc = accuracy.eval(exe)
@@ -211,6 +213,7 @@ def main():
         pserver_endpoints = ",".join(eplist)
         print("pserver endpoints: ", pserver_endpoints)
         trainers = int(os.getenv("TRAINERS"))  # total trainer count
+        print("trainers total: ", trainers)
         current_endpoint = os.getenv(
             "POD_IP") + ":6174"  # current pserver endpoint
         training_role = os.getenv(
diff --git a/benchmark/cluster/vgg16/vgg16_v2.py b/benchmark/cluster/vgg16/vgg16_v2.py
index 284dbec48dc..81ddeb03323 100644
--- a/benchmark/cluster/vgg16/vgg16_v2.py
+++ b/benchmark/cluster/vgg16/vgg16_v2.py
@@ -26,6 +26,7 @@ if BATCH_SIZE:
     BATCH_SIZE = int(BATCH_SIZE)
 else:
     BATCH_SIZE = 128
+print "batch_size", BATCH_SIZE
 NODE_COUNT = int(os.getenv("TRAINERS"))
 ts = 0
 
-- 
GitLab