diff --git a/benchmark/.gitignore b/benchmark/.gitignore index 7b66e8a5b5020fd847982db401665d24ba3a069c..fb4114356d4f37efc8ad672316fd4f99443d9fcd 100644 --- a/benchmark/.gitignore +++ b/benchmark/.gitignore @@ -7,3 +7,6 @@ paddle/rnn/imdb.pkl caffe/image/logs tensorflow/image/logs tensorflow/rnn/logs +fluid/models/*.pyc +fluid/logs +fluid/nohup.out diff --git a/benchmark/fluid/fluid_benchmark.py b/benchmark/fluid/fluid_benchmark.py index c1d458970a58bfac2a3369e8964eb100568b28f2..9d33a841cddb8d8b8e14c00ae7e9d600d5d2eb46 100644 --- a/benchmark/fluid/fluid_benchmark.py +++ b/benchmark/fluid/fluid_benchmark.py @@ -40,10 +40,7 @@ def parse_args(): parser.add_argument( '--batch_size', type=int, default=32, help='The minibatch size.') parser.add_argument( - '--learning_rate', - type=float, - default=0.001, - help='The minibatch size.') + '--learning_rate', type=float, default=0.001, help='The learning rate.') # TODO(wuyi): add "--use_fake_data" option back. parser.add_argument( '--skip_batch_num', @@ -231,10 +228,7 @@ def train(avg_loss, infer_prog, optimizer, train_reader, test_reader, batch_acc, train_losses.append(loss) print("Pass: %d, Iter: %d, Loss: %f\n" % (pass_id, iters, np.mean(train_losses))) - train_elapsed = time.time() - start_time - examples_per_sec = num_samples / train_elapsed - print('\nTotal examples: %d, total time: %.5f, %.5f examples/sec\n' % - (num_samples, train_elapsed, examples_per_sec)) + print_train_time(start_time, time.time(), num_samples) print("Pass: %d, Loss: %f" % (pass_id, np.mean(train_losses))) # evaluation if not args.no_test and batch_acc != None: @@ -315,10 +309,7 @@ def train_parallel(avg_loss, infer_prog, optimizer, train_reader, test_reader, if batch_id % 1 == 0: print("Pass %d, batch %d, loss %s" % (pass_id, batch_id, np.array(loss))) - train_elapsed = time.time() - start_time - examples_per_sec = num_samples / train_elapsed - print('\nTotal examples: %d, total time: %.5f, %.5f examples/sed\n' % - (num_samples, train_elapsed, examples_per_sec)) + print_train_time(start_time, time.time(), num_samples) if not args.no_test and batch_acc != None: test_acc = test(startup_exe, infer_prog, test_reader, feeder, batch_acc) @@ -329,12 +320,19 @@ def train_parallel(avg_loss, infer_prog, optimizer, train_reader, test_reader, def print_arguments(args): vars(args)['use_nvprof'] = (vars(args)['use_nvprof'] and vars(args)['device'] == 'GPU') - print('----------- resnet Configuration Arguments -----------') + print('----------- Configuration Arguments -----------') for arg, value in sorted(vars(args).iteritems()): print('%s: %s' % (arg, value)) print('------------------------------------------------') +def print_train_time(start_time, end_time, num_samples): + train_elapsed = end_time - start_time + examples_per_sec = num_samples / train_elapsed + print('\nTotal examples: %d, total time: %.5f, %.5f examples/sed\n' % + (num_samples, train_elapsed, examples_per_sec)) + + def main(): args = parse_args() print_arguments(args) @@ -342,7 +340,7 @@ def main(): # the unique trainer id, starting from 0, needed by trainer # only nccl_id_var, num_trainers, trainer_id = ( - None, 1, int(os.getenv("PADDLE_TRAINER_ID", "-1"))) + None, 1, int(os.getenv("PADDLE_TRAINER_ID", "0"))) if args.use_cprof: pr = cProfile.Profile() diff --git a/benchmark/fluid/run.sh b/benchmark/fluid/run.sh index afaab5f4de43fa7e94feeed4a1de991351c04b76..5d9b2db87135e53470b106dcd11a6bcfdc5dbda9 100644 --- a/benchmark/fluid/run.sh +++ b/benchmark/fluid/run.sh @@ -2,6 +2,7 @@ # This script benchmarking the PaddlePaddle Fluid on # single thread single GPU. +mkdir -p logs #export FLAGS_fraction_of_gpu_memory_to_use=0.0 export CUDNN_PATH=/paddle/cudnn_v5 @@ -35,6 +36,7 @@ nohup stdbuf -oL nvidia-smi \ --format=csv \ --filename=mem.log \ -l 1 & + # mnist # mnist gpu mnist 128 FLAGS_benchmark=true stdbuf -oL python fluid_benchmark.py \ @@ -43,7 +45,7 @@ FLAGS_benchmark=true stdbuf -oL python fluid_benchmark.py \ --batch_size=128 \ --skip_batch_num=5 \ --iterations=500 \ - 2>&1 | tee -a mnist_gpu_128.log + 2>&1 | tee -a logs/mnist_gpu_128.log # vgg16 # gpu cifar10 128 @@ -53,7 +55,7 @@ FLAGS_benchmark=true stdbuf -oL python fluid_benchmark.py \ --batch_size=128 \ --skip_batch_num=5 \ --iterations=30 \ - 2>&1 | tee -a vgg16_gpu_128.log + 2>&1 | tee -a logs/vgg16_gpu_128.log # flowers gpu 128 FLAGS_benchmark=true stdbuf -oL python fluid_benchmark.py \ @@ -63,28 +65,28 @@ FLAGS_benchmark=true stdbuf -oL python fluid_benchmark.py \ --data_set=flowers \ --skip_batch_num=5 \ --iterations=30 \ - 2>&1 | tee -a vgg16_gpu_flowers_32.log + 2>&1 | tee -a logs/vgg16_gpu_flowers_32.log # resnet50 # resnet50 gpu cifar10 128 FLAGS_benchmark=true stdbuf -oL python fluid_benchmark.py \ - --model=resnet50 \ + --model=resnet \ --device=GPU \ --batch_size=128 \ --data_set=cifar10 \ --skip_batch_num=5 \ --iterations=30 \ - 2>&1 | tee -a resnet50_gpu_128.log + 2>&1 | tee -a logs/resnet50_gpu_128.log # resnet50 gpu flowers 64 FLAGS_benchmark=true stdbuf -oL python fluid_benchmark.py \ - --model=resnet50 \ + --model=resnet \ --device=GPU \ --batch_size=64 \ --data_set=flowers \ --skip_batch_num=5 \ --iterations=30 \ - 2>&1 | tee -a resnet50_gpu_flowers_64.log + 2>&1 | tee -a logs/resnet50_gpu_flowers_64.log # lstm # lstm gpu imdb 32 # tensorflow only support batch=32 @@ -94,7 +96,7 @@ FLAGS_benchmark=true stdbuf -oL python fluid_benchmark.py \ --batch_size=32 \ --skip_batch_num=5 \ --iterations=30 \ - 2>&1 | tee -a lstm_gpu_32.log + 2>&1 | tee -a logs/lstm_gpu_32.log # seq2seq # seq2seq gpu wmb 128 @@ -104,4 +106,4 @@ FLAGS_benchmark=true stdbuf -oL python fluid_benchmark.py \ --batch_size=128 \ --skip_batch_num=5 \ --iterations=30 \ - 2>&1 | tee -a lstm_gpu_128.log + 2>&1 | tee -a logs/lstm_gpu_128.log