diff --git a/benchmark/cluster/README.md b/benchmark/cluster/README.md index c2be886b34a5a7449c2afb987596e1a1b1b9791d..b619613ea7a5b6e940ec735314e8e47338b2c600 100644 --- a/benchmark/cluster/README.md +++ b/benchmark/cluster/README.md @@ -44,14 +44,14 @@ ### Measure the Performance for Different PServer Count -- Trainer Count: 60 -- Batch Size: 128 +- Trainer Count: 100 +- Batch Size: 64 - Metrics: mini-batch / sec -| PServer Count | 3 | 6 | 10 | 20 | +| PServer Count | 10 | 20 | 40 | 60 | | -- | -- | -- | -- | -- | -| PaddlePaddle Fluid | 589.1 | 592.6 | 656.4 | 655.8 | -| PaddlePaddle v2 | 412.2 | 368.4 | 346.8 | 283.2 | +| PaddlePaddle Fluid | - | - | - | - | +| PaddlePaddle v2 | - | - | - | - | | TensorFlow | - | - | - | - | ### Measure Parallel Efficiency By Increasing Trainer Count diff --git a/benchmark/cluster/vgg16/v2_pserver.yaml b/benchmark/cluster/vgg16/v2_pserver.yaml index 857e2ff4557d4a638bcdeb586d9fc8978d1183df..935cf0be3cdb0a2fd83e90c03c2240619c7c9b97 100644 --- a/benchmark/cluster/vgg16/v2_pserver.yaml +++ b/benchmark/cluster/vgg16/v2_pserver.yaml @@ -23,7 +23,7 @@ spec: - name: PADDLE_JOB_NAME value: vgg16v2job - name: TRAINERS - value: "60" + value: "20" - name: PSERVERS value: "10" - name: TOPOLOGY diff --git a/benchmark/cluster/vgg16/v2_trainer.yaml b/benchmark/cluster/vgg16/v2_trainer.yaml index be0f741b349c47e8f1978c0bc1d220284c96a53f..5189009f3e1c5dafe2900b7dce4553a39e951d60 100644 --- a/benchmark/cluster/vgg16/v2_trainer.yaml +++ b/benchmark/cluster/vgg16/v2_trainer.yaml @@ -3,8 +3,8 @@ kind: Job metadata: name: vgg16v2job-trainer spec: - parallelism: 60 - completions: 60 + parallelism: 20 + completions: 20 template: metadata: labels: @@ -24,7 +24,7 @@ spec: - name: BATCH_SIZE value: "256" - name: TRAINERS - value: "60" + value: "20" - name: PSERVERS value: "10" - name: TOPOLOGY